libMems/Aligner.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: Aligner.h,v 1.23 2004/04/19 23:10:13 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * This file is licensed under the GPL.
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifndef _Aligner_h_
00010 #define _Aligner_h_
00011 
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015 
00016 #include "libGenome/gnSequence.h"
00017 #include "libMems/DNAMemorySML.h"
00018 #include "libMems/GappedAligner.h"
00019 #include "libMems/MatchList.h"
00020 #include "libMems/Interval.h"
00021 #include "libMems/IntervalList.h"
00022 #include "libMems/MemHash.h"
00023 #include "libMems/MaskedMemHash.h"
00024 #include <map>
00025 #include "libMems/NumericMatrix.h"
00026 #include "libMems/GreedyBreakpointElimination.h"
00027 #include <list>
00028 #include "libMems/LCB.h"
00029 #include "libMUSCLE/threadstorage.h"
00030 
00031 namespace mems {
00032 
00037 class LabeledMem{
00038 public:
00039         Match* mem;
00040         uint label;
00041 };
00042 
00047 class LabeledMemComparator {
00048 public:
00049         LabeledMemComparator( uint seq ){
00050                 m_seq = seq;
00051         }
00052         LabeledMemComparator( LabeledMemComparator& lmc ){
00053                 m_seq = lmc.m_seq;
00054         }
00055         boolean operator()(const LabeledMem& a, const LabeledMem& b) const{
00056                 
00057                 int64 a_start = a.mem->Start( m_seq ), b_start = b.mem->Start( m_seq );
00058                 if( a_start == NO_MATCH || b_start == NO_MATCH ){
00059                         if( b_start != NO_MATCH )
00060                                 return true;
00061                         return false;
00062                 }
00063                 if(a_start < 0)
00064                         a_start = -a_start;
00065 //                      a_start = -a_start + a.mem->Length();
00066                 if(b_start < 0)
00067                         b_start = -b_start;
00068 //                      b_start = -b_start + b.mem->Length();
00069                 int64 diff = a_start - b_start;
00070                 return diff < 0;
00071         }
00072         ~LabeledMemComparator() {
00073         }
00074 protected:
00075         uint m_seq;
00076 private:
00077         LabeledMemComparator();
00078 };
00079 
00084 class PlacementMatch{
00085 public:
00086         Match* mem;
00087         std::list< LabeledMem >::iterator iter;
00088 };
00089 
00094 class PlacementMatchComparator {
00095 public:
00096         PlacementMatchComparator( uint seq ){
00097                 m_seq = seq;
00098         }
00099         PlacementMatchComparator( PlacementMatchComparator& lmc ){
00100                 m_seq = lmc.m_seq;
00101         }
00102         boolean operator()(const PlacementMatch& a, const PlacementMatch& b) const{
00103                 
00104                 int64 a_start = a.mem->Start( m_seq ), b_start = b.mem->Start( m_seq );
00105                 if( a_start == NO_MATCH || b_start == NO_MATCH ){
00106                         if( b_start != NO_MATCH )
00107                                 return true;
00108                         return false;
00109                 }
00110                 if(a_start < 0)
00111                         a_start = -a_start;
00112 //                      a_start = -a_start + a.mem->Length();
00113                 if(b_start < 0)
00114                         b_start = -b_start;
00115 //                      b_start = -b_start + b.mem->Length();
00116 
00117                 int64 diff = a_start - b_start;
00118                 return diff < 0;
00119         }
00120         ~PlacementMatchComparator() {
00121         }
00122 protected:
00123         uint m_seq;
00124 private:
00125         PlacementMatchComparator();
00126 };
00127 
00128 
00130 typedef std::pair< mems::Match*, mems::Match* > search_cache_t;
00131 
00132 
00142 class Aligner {
00143 public:
00148         Aligner( uint seq_count );
00149         Aligner( const Aligner& al );
00150         Aligner& operator=( const Aligner& al );
00151 
00172         void align( MatchList& mlist, IntervalList& interval_list, double LCB_minimum_density, double LCB_minimum_range, boolean recursive, boolean extend_lcbs, boolean gapped_alignment, std::string tree_filename = "" );
00173         
00174         void Recursion( MatchList& r_list, Match* r_begin, Match* r_end, boolean nway_only = false );
00175         void GetBestLCB( MatchList& r_list, MatchList& best_lcb );
00176         void DoSomethingCool( MatchList& mlist, Interval& iv );
00177         
00184         void SetMinRecursionGapLength( gnSeqI min_r_gap );
00185 
00186         void SetMaxExtensionIterations( uint ext_iters ){ this->max_extension_iters = ext_iters; }
00187 
00188         void SearchWithinLCB( MatchList& mlist, std::vector< search_cache_t >& new_cache, bool leftmost = false, bool rightmost = false );
00189         void RecursiveAnchorSearch( MatchList& mlist, gnSeqI minimum_weight, std::vector< MatchList >& LCB_list, boolean entire_genome, std::ostream* status_out = NULL );
00190 
00191         void AlignLCB( MatchList& mlist, Interval& iv );
00192         void SetGappedAligner( GappedAligner& gal );
00194         void SetMaxGappedAlignmentLength( gnSeqI len );
00195 
00197         void SetPermutationOutput( std::string& permutation_filename, int64 permutation_weight );
00198         void WritePermutation( std::vector< LCB >& adjacencies, std::string out_filename );
00199 
00200         void SetRecursive( bool value ){ this->recursive = value; }
00201 protected:
00202         TLS<MemHash> gap_mh;                    
00203         MaskedMemHash nway_mh;  
00204         uint32 seq_count;               
00205         boolean debug;                  
00207         double LCB_minimum_density;
00208         double LCB_minimum_range;
00209 
00210         uint max_extension_iters;       
00212         int64 cur_min_coverage; 
00214         gnSeqI min_recursive_gap_length;        
00216         void consistencyCheck( uint lcb_count, std::vector< LCB >& adjacencies, std::vector< MatchList >& lcb_list, std::vector< int64 >& weights );
00217         
00218         boolean recursive;              
00219         boolean extend_lcbs;    
00220         boolean gapped_alignment;       
00221         boolean currently_recursing;    
00222         boolean collinear_genomes;      
00224         GappedAligner* gal;
00225 
00226         std::string permutation_filename;
00227         int64 permutation_weight;
00228 
00229         std::vector< search_cache_t > search_cache; 
00230 };
00231 
00235 CREATE_EXCEPTION( AlignerError );
00236 
00237 void transposeMatches( MatchList& mlist, uint seqI, const std::vector< int64 >& seq_regions );
00238 
00243 void EliminateOverlaps( MatchList& ml );
00244 
00254 void AaronsLCB( MatchList& mlist, std::set<uint>& breakpoints );
00255 
00256 
00257 void ComputeLCBs( MatchList& meml, std::set<uint>& breakpoints, std::vector<MatchList>& lcb_list, std::vector<int64>& weights );
00258 void computeLCBAdjacencies_v2( std::vector<MatchList>& lcb_list, std::vector< int64 >& weights, std::vector< LCB >& adjacencies );
00259 void computeLCBAdjacencies_v2( IntervalList& iv_list, std::vector< int64 >& weights, std::vector< LCB >& adjacencies );
00260 void scanLeft( int& left_recurseI, std::vector< LCB >& adjacencies, int min_weight, int seqI );
00261 void scanRight( int& right_recurseI, std::vector< LCB >& adjacencies, int min_weight, int seqI );
00262 void GetLCBCoverage( MatchList& lcb, uint64& coverage );
00263 
00264 int64 greedyBreakpointElimination( gnSeqI minimum_weight, std::vector< LCB >& adjacencies, std::vector< int64 >& weights, std::ostream* status_out = NULL );
00265 void filterMatches( std::vector< LCB >& adjacencies, std::vector< MatchList >& lcb_list, std::vector< int64 >& weights );
00266 
00267 void CreateGapSearchList( std::vector< LCB >& adjacencies, const std::vector< genome::gnSequence* >& seq_table, std::vector< std::vector< int64 > >& iv_regions, boolean entire_genome );
00268 void SearchLCBGaps( MatchList& new_matches, const std::vector< std::vector< int64 > >& iv_regions, MaskedMemHash& nway_mh );
00269 
00270 static const uint MIN_ANCHOR_LENGTH = 9;
00271 
00272 
00274 class SearchCacheComparator
00275 {
00276 public:
00277         SearchCacheComparator() : msc(0){};
00278         bool operator()( const search_cache_t& a, const search_cache_t& b ) const
00279         {
00280                 bool lt = true;
00281                 if( a.first == NULL )
00282                 {
00283                         if( b.first == NULL )
00284                                 lt = false;
00285                 }else if( b.first == NULL )
00286                 {
00287                         lt = false;
00288                 }else if( !msc( a.first, b.first ) )
00289                         lt = false;
00290                 else if( a.second == NULL )
00291                 {
00292                         if( b.second == NULL )
00293                                 lt = false;
00294                 }else if( b.second == NULL )
00295                 {
00296                         lt = false;
00297                 }else if( !msc( a.second, b.second ) )
00298                         lt = false;
00299 
00300                 return lt;              
00301         }
00302 protected:
00303         mems::MatchStartComparator<mems::Match> msc;
00304 };
00305 
00306 static SearchCacheComparator cache_comparator;
00307 
00308 
00309 }
00310 
00311 #endif // _Aligner_h_

Generated on Fri Mar 14 06:01:02 2008 for libMems by doxygen 1.3.6