00001 /******************************************************************************* 00002 * $Id: MemHash.h,v 1.23 2004/03/01 02:40:08 darling Exp $ 00003 * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file. 00004 * This file is licensed under the GPL. 00005 * Please see the file called COPYING for licensing details. 00006 * ************** 00007 ******************************************************************************/ 00008 00009 #ifndef _MemHash_h_ 00010 #define _MemHash_h_ 00011 00012 #ifdef HAVE_CONFIG_H 00013 #include "config.h" 00014 #endif 00015 00016 #include <set> 00017 #include <map> 00018 #include <iostream> 00019 00020 #include "libMems/MatchFinder.h" 00021 #include "libMems/Match.h" 00022 #include "libGenome/gnException.h" 00023 #include "libMems/MatchList.h" 00024 #include "libMems/MatchHashEntry.h" 00025 #include "libMems/SlotAllocator.h" 00026 #include "boost/pool/object_pool.hpp" 00027 00028 namespace mems { 00029 00030 static const uint32 DEFAULT_MEM_TABLE_SIZE = 40000; 00031 static const uint32 DEFAULT_REPEAT_TOLERANCE = 0; 00032 static const uint32 DEFAULT_ENUMERATION_TOLERANCE = 1; 00033 00038 class MemHash : public MatchFinder{ 00039 00040 00041 00042 public: 00043 MemHash(); 00044 ~MemHash(); 00045 MemHash(const MemHash& mh); 00046 MemHash& operator=( const MemHash& mh ); 00047 virtual MemHash* Clone() const; 00048 virtual void Clear(); 00049 virtual void ClearSequences(); 00050 00055 virtual void FindMatches( MatchList& match_list ); 00056 virtual void FindMatchesFromPosition( MatchList& match_list, const std::vector<gnSeqI>& start_points ); 00057 00061 virtual boolean CreateMatches(); 00062 00067 virtual uint32 TableSize() const {return table_size;}; 00072 virtual void SetTableSize(uint32 new_table_size); 00076 virtual MatchList GetMatchList() const; 00081 //virtual void GetMatchList( std::vector<Match*>& mem_list ) const; 00082 00087 template< class MatchListType > 00088 void GetMatchList( MatchListType& mem_list ) const; 00089 00094 virtual uint32 MemCount(){return m_mem_count;} 00099 virtual uint32 MemCollisionCount(){return m_collision_count;} 00100 virtual void MemTableCount(std::vector<uint32>& table_count){table_count = mem_table_count;} 00105 virtual void PrintDistribution(std::ostream& os) const; 00106 00111 virtual void LoadFile(std::istream& mem_file); 00115 virtual void WriteFile(std::ostream& mem_file) const; 00116 00125 virtual void SetRepeatTolerance(uint32 repeat_tolerance){m_repeat_tolerance = repeat_tolerance;} 00130 virtual uint32 GetRepeatTolerance() const{return m_repeat_tolerance;} 00139 virtual void SetEnumerationTolerance(uint32 enumeration_tolerance){m_enumeration_tolerance = enumeration_tolerance;} 00144 virtual uint32 GetEnumerationTolerance() const{return m_enumeration_tolerance;} 00145 00149 void SetMatchLog( std::ostream* match_log ){ this->match_log = match_log; } 00150 00151 00152 00153 //end void GetMatchList( std::vector<MatchListType*>& mem_list ); 00154 00155 protected: 00156 virtual boolean EnumerateMatches( IdmerList& match_list ); 00157 virtual boolean HashMatch(IdmerList& match_list); 00158 virtual void SetDirection(MatchHashEntry& mhe); 00159 virtual MatchHashEntry* AddHashEntry(MatchHashEntry& mhe); 00160 virtual uint32 quadratic_li(uint32 listI){return (listI*(listI+1))/2;} 00161 00162 uint32 table_size; 00163 std::vector< std::vector<MatchHashEntry*> > mem_table; 00164 uint32 m_repeat_tolerance; 00165 uint32 m_enumeration_tolerance; 00166 uint64 m_mem_count; 00167 uint64 m_collision_count; 00168 std::vector<uint32> mem_table_count; 00169 00170 std::ostream* match_log; 00171 SlotAllocator<MatchHashEntry>& allocator; 00172 std::vector<MatchHashEntry*> allocated; // used to track what needs to get explicitly destroyed later... 00173 // boost::object_pool<MatchHashEntry> allocator; 00174 MheCompare mhecomp; 00175 }; 00176 00177 00182 template< class MatchListType > 00183 void MemHash::GetMatchList( MatchListType& mem_list ) const { 00184 00185 mem_list.clear(); 00186 typedef typename MatchListType::value_type MatchType; 00187 00188 //Boost to the rescue! use remove_pointer to get at MatchListType's type 00189 typedef typename boost::remove_pointer<MatchType>::type SinPtrMatchType; 00190 SinPtrMatchType mm; 00191 00192 for(uint32 i=0; i < table_size; ++i) 00193 { 00194 std::vector<MatchHashEntry*>::const_iterator iter = mem_table[i].begin(); 00195 for(; iter != mem_table[i].end(); iter++ ) 00196 { 00197 MatchType m = mm.Copy(); 00198 *m = **iter; 00199 mem_list.push_back( m ); 00200 } 00201 } 00202 00203 } 00204 00205 00206 } 00207 00208 #endif //_MemHash_h_
1.3.6