libMems/FileSML.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: FileSML.h,v 1.11 2004/03/01 02:40:08 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * This file is licensed under the GPL.
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifndef _FileSML_h_
00010 #define _FileSML_h_
00011 
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015 
00016 #pragma warning(push)
00017 #pragma warning(disable : 4996)
00018 #pragma warning(pop)
00019 
00020 #include "libGenome/gnSequence.h"
00021 #include "libMems/SortedMerList.h"
00022 #include <boost/iostreams/device/mapped_file.hpp>
00023 #include <fstream>
00024 #include <vector>
00025 #include <string>
00026 
00027 namespace mems {
00028 
00029 //sequence database size will be
00030 //base_count / 4 + base_count * 12 bytes
00031 
00032 #define DEFAULT_MEMORY_MINIMUM 20971520  //~20 Megabytes
00033 
00034 class FileSML : public SortedMerList
00035 {
00036 public:
00037         FileSML() : SortedMerList() {
00038 //              file_mutex = new wxMutex();
00039         };
00040         FileSML& operator=(const FileSML& sa);
00041         virtual FileSML* Clone() const = 0;
00042         
00043         virtual void Clear();
00044         
00051         virtual void LoadFile(const std::string& fname);
00062         virtual void BigCreate(const genome::gnSequence& seq, const uint32 split_levels, const uint32 mersize = DNA_MER_SIZE);
00063         virtual void Create(const genome::gnSequence& seq, const uint64 seed );
00064         virtual boolean Read(std::vector<bmer>& readVector, gnSeqI size, gnSeqI offset = 0);
00065         virtual void Merge(SortedMerList& sa, SortedMerList& sa2);
00066 
00067         virtual bmer operator[]( gnSeqI index );
00068 
00069         virtual gnSeqI UniqueMerCount();
00070         virtual void SetDescription(const std::string& d);
00071         virtual void SetID(const sarID_t d);
00072         
00073         virtual uint32 FormatVersion();
00074         static uint64 MemoryMinimum();
00075         virtual void RadixSort(std::vector<bmer>& s_array);
00076 
00077         void dmCreate(const genome::gnSequence& seq, const uint64 seed);
00078         static void registerTempPath( const std::string& tmp_path );
00079 
00080         static const char* getTempPath( int pathI );
00081 
00082         static int getTempPathCount();
00083         
00084         const std::vector< int64 >& getUsedCoordinates() const { return seq_coords; };
00085 
00086 protected:
00091         virtual void OpenForWriting( boolean truncate = false );
00097         virtual boolean WriteHeader();
00104         virtual uint64 GetNeededMemory(gnSeqI len) = 0;
00105 
00106         std::string filename;
00107         std::fstream sarfile;
00108         uint64 sarray_start_offset;
00109 
00110         boost::iostreams::mapped_file_source sardata;
00111         smlSeqI_t* base(){ return (smlSeqI_t*)(sardata.data()+sarray_start_offset); }
00112         
00113         static char** tmp_paths;        
00114         std::vector< int64 > seq_coords;        
00115 };
00116 
00117 // versions 2 and 5 were previous
00118 // jump to 100 to avoid confusion with DNAFileSML
00119 inline
00120 uint32 FileSML::FormatVersion(){
00121         static uint32 f_version = 100;
00122         return f_version;
00123 }
00124 
00125 inline
00126 uint64 FileSML::MemoryMinimum(){
00127         static uint32 m_minimum = DEFAULT_MEMORY_MINIMUM;
00128         return m_minimum;
00129 }
00130 
00131 void maskNNNNN( const genome::gnSequence& in_seq, genome::gnSequence& out_seq, std::vector< int64 >& seq_coords, int mask_n_length );
00132 
00133 }
00134 
00135 #endif   //_FileSML_h_

Generated on Fri Mar 14 06:01:02 2008 for libMems by doxygen 1.3.6