00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef _FileSML_h_
00010 #define _FileSML_h_
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #pragma warning(push)
00017 #pragma warning(disable : 4996)
00018 #pragma warning(pop)
00019
00020 #include "libGenome/gnSequence.h"
00021 #include "libMems/SortedMerList.h"
00022 #include <boost/iostreams/device/mapped_file.hpp>
00023 #include <fstream>
00024 #include <vector>
00025 #include <string>
00026
00027 namespace mems {
00028
00029
00030
00031
00032 #define DEFAULT_MEMORY_MINIMUM 20971520 //~20 Megabytes
00033
00034 class FileSML : public SortedMerList
00035 {
00036 public:
00037 FileSML() : SortedMerList() {
00038
00039 };
00040 FileSML& operator=(const FileSML& sa);
00041 virtual FileSML* Clone() const = 0;
00042
00043 virtual void Clear();
00044
00051 virtual void LoadFile(const std::string& fname);
00062 virtual void BigCreate(const genome::gnSequence& seq, const uint32 split_levels, const uint32 mersize = DNA_MER_SIZE);
00063 virtual void Create(const genome::gnSequence& seq, const uint64 seed );
00064 virtual boolean Read(std::vector<bmer>& readVector, gnSeqI size, gnSeqI offset = 0);
00065 virtual void Merge(SortedMerList& sa, SortedMerList& sa2);
00066
00067 virtual bmer operator[]( gnSeqI index );
00068
00069 virtual gnSeqI UniqueMerCount();
00070 virtual void SetDescription(const std::string& d);
00071 virtual void SetID(const sarID_t d);
00072
00073 virtual uint32 FormatVersion();
00074 static uint64 MemoryMinimum();
00075 virtual void RadixSort(std::vector<bmer>& s_array);
00076
00077 void dmCreate(const genome::gnSequence& seq, const uint64 seed);
00078 static void registerTempPath( const std::string& tmp_path );
00079
00080 static const char* getTempPath( int pathI );
00081
00082 static int getTempPathCount();
00083
00084 const std::vector< int64 >& getUsedCoordinates() const { return seq_coords; };
00085
00086 protected:
00091 virtual void OpenForWriting( boolean truncate = false );
00097 virtual boolean WriteHeader();
00104 virtual uint64 GetNeededMemory(gnSeqI len) = 0;
00105
00106 std::string filename;
00107 std::fstream sarfile;
00108 uint64 sarray_start_offset;
00109
00110 boost::iostreams::mapped_file_source sardata;
00111 smlSeqI_t* base(){ return (smlSeqI_t*)(sardata.data()+sarray_start_offset); }
00112
00113 static char** tmp_paths;
00114 std::vector< int64 > seq_coords;
00115 };
00116
00117
00118
00119 inline
00120 uint32 FileSML::FormatVersion(){
00121 static uint32 f_version = 100;
00122 return f_version;
00123 }
00124
00125 inline
00126 uint64 FileSML::MemoryMinimum(){
00127 static uint32 m_minimum = DEFAULT_MEMORY_MINIMUM;
00128 return m_minimum;
00129 }
00130
00131 void maskNNNNN( const genome::gnSequence& in_seq, genome::gnSequence& out_seq, std::vector< int64 >& seq_coords, int mask_n_length );
00132
00133 }
00134
00135 #endif //_FileSML_h_