libMems/ProgressiveAligner.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: ProgressiveAligner.h,v 1.23 2004/04/19 23:10:13 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * This file is licensed under the GPL.
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifndef _ProgressiveAligner_h_
00010 #define _ProgressiveAligner_h_
00011 
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015 
00016 #include "libMems/SuperInterval.h"
00017 #include "libMems/Aligner.h"
00018 #include "libMems/PhyloTree.h"
00019 #include "libMems/GreedyBreakpointElimination.h"
00020 #include "libMems/CompactGappedAlignment.h"
00021 #include "libMems/Islands.h"
00022 #include <boost/type_traits/remove_pointer.hpp>
00023 #include <boost/multi_array.hpp>
00024 #include "libMems/SeedOccurrenceList.h"
00025 #include "libMems/SubstitutionMatrix.h"
00026 #include "libMems/MatchProjectionAdapter.h"
00027 
00028 namespace mems
00029 {
00030 
00032 extern bool debug_aligner;
00033 
00034 
00036 class AlignmentTreeNode : public TreeNode
00037 {
00038 public:
00039         AlignmentTreeNode() : TreeNode(), refined(false) {};
00040         std::vector< SuperInterval > ordering;  
00041         std::vector< boolean > parents_aligned;         
00042         std::vector< boolean > children_aligned;        
00043         genome::gnSequence* sequence;   
00044         bool refined;   
00045 };
00046 
00047 
00048 double getDefaultBreakpointPenalty( std::vector< genome::gnSequence* >& sequences );
00049 
00053 class ProgressiveAligner : public mems::Aligner
00054 {
00055 public:
00060         ProgressiveAligner( uint seq_count );
00061         ProgressiveAligner( const ProgressiveAligner& al );
00062         ProgressiveAligner& operator=( const ProgressiveAligner& al );
00063         ~ProgressiveAligner();
00064 
00066         void setBreakpointPenalty( double bp_penalty ){ breakpoint_penalty = bp_penalty; }
00068         void setMinimumBreakpointPenalty( double min_bp_penalty ){ min_breakpoint_penalty = min_bp_penalty; }
00070         void setCollinear( boolean collinear ){ this->collinear_genomes = collinear; }
00072         void setPairwiseMatches( mems::MatchList& pair_ml );
00074         void setInputGuideTreeFileName( std::string& fname ){ this->input_guide_tree_fname = fname; }
00076         void setOutputGuideTreeFileName( std::string& fname ){ this->output_guide_tree_fname = fname; }
00078         void SetMaxGappedAlignmentLength( size_t len );
00081         void SetUseCacheDb( bool cbd ){ this->using_cache_db = cbd; }
00082 
00084         void setRefinement( bool refine ){ this->refine = refine; }
00086         void setGappedAlignment( bool do_gapped_alignment ){ this->gapped_alignment = do_gapped_alignment; }
00087 
00088         void setPairwiseScoringScheme( const mems::PairwiseScoringScheme& pss ){ this->subst_scoring = pss; }
00089 
00090         enum LcbScoringScheme
00091         {
00092                 AncestralScoring,
00093                 AncestralSumOfPairsScoring,
00094                 ExtantSumOfPairsScoring
00095         };
00096 
00098         void setLcbScoringScheme( LcbScoringScheme scheme ){ scoring_scheme = scheme; }
00099         LcbScoringScheme getLcbScoringScheme(void){ return scoring_scheme; }
00100 
00101         void setUseSeedFamilies( bool use_seed_families ){ this->use_seed_families = use_seed_families; }
00102         bool getUseSeedFamilies(void){ return this->use_seed_families; }
00103 
00104         void setUseLcbWeightScaling( bool use_weight_scaling ){ this->use_weight_scaling = use_weight_scaling; }
00105         bool getUseLcbWeightScaling(void){ return this->use_weight_scaling; }
00106 
00107         void setBreakpointDistanceScale( double bp_dist_scale ){ this->bp_dist_scale = bp_dist_scale; }
00108         double getBreakpointDistanceScale(void){ return this->bp_dist_scale; }
00109 
00110         void setConservationDistanceScale( double conservation_dist_scale ){ this->conservation_dist_scale = conservation_dist_scale; }
00111         double getConservationDistanceScale(void){ return this->conservation_dist_scale; }
00112 
00113         void setBpDistEstimateMinScore( double min_score ){ this->bp_dist_estimate_score = min_score; }
00114         double getBpDistEstimateMinScore(void){ return this->bp_dist_estimate_score; }
00115 
00117         void getAlignedChildren( node_id_t node, std::vector< node_id_t >& descendants );
00118 
00120         void createAncestralOrdering( std::vector< mems::Interval* >& interval_list, std::vector< SuperInterval >& ancestral_sequence );
00121 
00123         void alignProfileToProfile( node_id_t node1, node_id_t node2, node_id_t ancestor );
00124 
00126         void alignNodes( node_id_t node1, node_id_t node2, node_id_t ancestor );
00127 
00128 
00130         void align( std::vector< genome::gnSequence* >& seq_table, mems::IntervalList& interval_list );
00131 
00132         void getPath( node_id_t first_n, node_id_t last_n, std::vector< node_id_t >& path );
00133         template<class MatchType>
00134         void propagateDescendantBreakpoints( node_id_t node1, uint seqI, std::vector< MatchType* >& iv_list );
00135         void linkSuperIntervals( node_id_t node1, uint seqI, node_id_t ancestor );
00136         void recursiveApplyAncestralBreakpoints( node_id_t ancestor );
00137         void extractAlignment( node_id_t ancestor, size_t super_iv, mems::GappedAlignment& gal );
00138         void extractAlignment( node_id_t ancestor, size_t super_iv, mems::CompactGappedAlignment<>& cga );
00139 
00140         void getPairwiseMatches( const std::vector< node_id_t >& node1_seqs, const std::vector< node_id_t >& node2_seqs, Matrix<mems::MatchList>& pairwise_matches );
00141         void getAncestralMatches( const std::vector< node_id_t > node1_seqs, const std::vector< node_id_t > node2_seqs, node_id_t node1, node_id_t node2, node_id_t ancestor, std::vector< mems::AbstractMatch* >& ancestral_matches );
00142         void getRepresentativeAncestralMatches( const std::vector< node_id_t > node1_seqs, const std::vector< node_id_t > node2_seqs, node_id_t node1, node_id_t node2, node_id_t ancestor, std::vector< mems::AbstractMatch* >& ancestral_matches );
00143         
00144         // functions for recursive anchor search
00145         
00146         template<class GappedAlignmentType>
00147         void recurseOnPairs( const std::vector<node_id_t>& node1_seqs, 
00148                 const std::vector<node_id_t>& node2_seqs, const GappedAlignmentType& iv, 
00149                 Matrix<mems::MatchList>& matches, Matrix< std::vector< mems::search_cache_t > >& search_cache_db, 
00150                 Matrix< std::vector< mems::search_cache_t > >& new_cache_db,
00151                 boost::multi_array< std::vector< std::vector< int64 > >, 2 >& iv_regions);
00152         void pairwiseAnchorSearch( mems::MatchList& r_list, mems::Match* r_begin, mems::Match* r_end, const mems::AbstractMatch* iv, uint oseqI, uint oseqJ );
00153 
00154         void translateGappedCoordinates( std::vector<mems::AbstractMatch*>& ml, uint seqI, node_id_t extant, node_id_t ancestor );
00155 
00156         void doGappedAlignment( node_id_t ancestor, bool profile_aln );
00157         void refineAlignment( mems::GappedAlignment& gal, node_id_t ancestor, bool profile_aln, AlnProgressTracker& apt );
00158         void FixLeftEnds( node_id_t ancestor );
00159         void ConstructSuperIntervalFromMSA( node_id_t ancestor, size_t ans_siv, mems::GappedAlignment& gal );
00160 
00161         // determines LCBs among each pair of genomes using a somewhat stringent homology 
00162         // criteria.  fills the distance matrix with the number of breakpoints between each pair
00163         void CreatePairwiseBPDistance( boost::multi_array<double, 2>& bp_distmat );
00164 
00165         void constructLcbTrackingMatches( node_id_t ancestral_node, std::vector< mems::AbstractMatch* >& ancestral_matches, std::vector< mems::LcbTrackingMatch< mems::AbstractMatch* > >& tracking_matches );
00166 
00167         void pairwiseScoreTrackingMatches( 
00168                                                 std::vector< mems::TrackingMatch >& tracking_matches, 
00169                                                 std::vector<node_id_t>& node1_descendants, 
00170                                                 std::vector<node_id_t>& node2_descendants,
00171                                                 boost::multi_array< double, 3 >& tm_score_array
00172                                                 );
00173 
00174         void computeAvgAncestralMatchScores( 
00175                                                 std::vector< TrackingMatch >& tracking_matches, 
00176                                                 std::vector<node_id_t>& node1_descendants,
00177                                                 std::vector<node_id_t>& node2_descendants,
00178                                                 boost::multi_array< double, 3 >& tm_score_array
00179                                                 );
00180 
00181         void computeInternalNodeDistances( 
00182                                                 boost::multi_array<double, 2>& bp_dist_mat, 
00183                                                 boost::multi_array<double, 2>& cons_dist_mat, 
00184                                                 std::vector<node_id_t>& node1_descendants,
00185                                                 std::vector<node_id_t>& node2_descendants);
00186 
00187         bool validateSuperIntervals(node_id_t node1, node_id_t node2, node_id_t ancestor);
00188         bool validatePairwiseIntervals(node_id_t node1, node_id_t node2, std::vector<mems::Interval*>& pair_iv);
00189 
00190 
00191         void alignPP(mems::IntervalList& prof1, mems::IntervalList& prof2, mems::IntervalList& interval_list );
00192 
00193 protected:
00194         void getAlignment( mems::IntervalList& interval_list );
00195 
00196         mems::MatchList original_ml;    
00197         PhyloTree< AlignmentTreeNode > alignment_tree;
00198         std::vector< uint > node_sequence_map;
00199         double breakpoint_penalty;
00200         double min_breakpoint_penalty;
00201         std::string input_guide_tree_fname;
00202         std::string output_guide_tree_fname;
00203         boolean debug;
00204         boolean refine;
00205         bool using_cache_db;
00206 
00207         std::vector< SeedOccurrenceList > sol_list;
00208         boost::multi_array<double, 2> bp_distance;      
00209         boost::multi_array<double, 2> conservation_distance;    
00211         LcbScoringScheme scoring_scheme;
00212         bool use_weight_scaling;
00213         bool use_seed_families;
00214 
00215         double bp_dist_scale;
00216         double conservation_dist_scale;
00217 
00218         double bp_dist_estimate_score;  
00220         size_t max_gapped_alignment_length;
00221 
00222         mems::PairwiseScoringScheme subst_scoring;
00223 };
00224 
00225 extern bool debug_aligner;
00226 
00233 void chooseNextAlignmentPair( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t& node1, node_id_t& node2, node_id_t& ancestor );
00234 
00235 void markAligned( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t subject_node, node_id_t neighbor );
00236 
00237 node_id_t createAlignmentTreeRoot( PhyloTree< AlignmentTreeNode >& alignment_tree, node_id_t node1, node_id_t node2 );
00238 
00239 // homogenizes an alignment tree and ordering to prepare for alignment
00240 void prepareAlignmentTree( PhyloTree< AlignmentTreeNode >& alignment_tree );
00241 
00242 inline
00243 ProgressiveAligner::~ProgressiveAligner()
00244 {
00245         for( size_t mI = 0; mI < original_ml.size(); mI++ )
00246                 original_ml[mI]->Free();
00247 }
00248 
00249 template<class T>
00250 class AbsolutComparator
00251 {
00252 public:
00253         boolean operator()(const T& a, const T& b) const
00254         {
00255                 return (genome::absolut(a) < genome::absolut(b));
00256         }
00257 };
00258 
00259 
00260 
00261 template <class MatchVector>
00262 void processNewMatch( uint seqI, MatchVector& new_matches, typename MatchVector::value_type& new_match )
00263 {
00264         new_match->SetStart( seqI, 0 );
00265         if( new_match->Multiplicity() > 1 && new_match->Length(seqI) > 0 )
00266                 new_matches.push_back( new_match );
00267         else
00268         {
00269                 new_match->Free();
00270                 new_match = NULL;
00271         }
00272 }
00273 inline
00274 bool checkConsistent(const AbstractMatch* a, const AbstractMatch* b)
00275 {
00276         bool consistent_overlap = true;
00277         int64 o = (std::numeric_limits<int64>::max)();
00278         int64 inter = 0;
00279         uint seq_count = a->SeqCount();
00280         for( size_t seqI = 0; seqI < seq_count; seqI++ )
00281         {
00282                 if(b->LeftEnd(seqI) == 0 || a->LeftEnd(seqI) == 0)
00283                         continue;
00284                 inter++;
00285                 if(o == (std::numeric_limits<int64>::max)())
00286                         o = b->Start(seqI) - a->Start(seqI);
00287                 if(o != b->Start(seqI) - a->Start(seqI))
00288                         consistent_overlap = false;
00289         }
00290         consistent_overlap = consistent_overlap && inter > 1;
00291         return consistent_overlap;
00292 }
00293 
00301 template <class MatchVector>
00302 void EliminateOverlaps_v2( MatchVector& ml, const std::vector< uint >& seq_ids, bool eliminate_both = false ){
00303         if( ml.size() < 2 )
00304                 return;
00305         uint seq_count = ml[0]->SeqCount();
00306         for( uint sidI = 0; sidI < seq_ids.size(); sidI++ ){
00307                 uint seqI = seq_ids[ sidI ];
00308                 mems::SingleStartComparator<mems::AbstractMatch> msc( seqI );
00309                 std::sort( ml.begin(), ml.end(), msc );
00310                 int64 matchI = 0;
00311                 int64 nextI = 0;
00312                 int64 deleted_count = 0;
00313                 MatchVector new_matches;
00314 
00315                 // scan forward to first defined match
00316                 for(; matchI != ml.size(); matchI++ )
00317                         if( ml[ matchI ]->Start( seqI ) != mems::NO_MATCH )
00318                                 break;
00319 
00320                 for(; matchI < ml.size(); matchI++ ){
00321                         if( ml[ matchI ] == NULL )
00322                                 continue;
00323                         
00324                         for( nextI = matchI + 1; nextI < ml.size(); nextI++ ){
00325                                 if( ml[ nextI ] == NULL )
00326                                         continue;
00327 
00328                                 boolean deleted_matchI = false;
00329                                 // check for overlaps
00330                                 int64 startI = ml[ matchI ]->Start( seqI );
00331                                 int64 lenI = ml[ matchI ]->Length( seqI );
00332                                 int64 startJ = ml[ nextI ]->Start( seqI );
00333                                 int64 diff =  genome::absolut( startJ ) - genome::absolut( startI ) - lenI;
00334 
00335                                 if( diff >= 0 )
00336                                         break;  // there are no more overlaps
00337 
00338                                 diff = -diff;
00339                                 typename MatchVector::value_type new_match;
00340                                 bool mem_iter_smaller = ( ml[ nextI ]->Multiplicity() > ml[ matchI ]->Multiplicity() ) ||
00341                                         ( ml[ nextI ]->Multiplicity() == ml[ matchI ]->Multiplicity() && ml[ nextI ]->Length(seqI) > ml[ matchI ]->Length(seqI) );
00342 
00343                                 bool consistent_overlap = checkConsistent( ml[ matchI ], ml[ nextI ] );
00344 
00345                                 // delete bases from the smaller match
00346                                 if( (!consistent_overlap && eliminate_both) || mem_iter_smaller )
00347                                 {
00348                                         // mem_iter is smaller
00349                                         new_match = ml[matchI]->Copy();
00350                                         // erase base pairs from new_match
00351                                         if( diff >= lenI ){
00352 //                                                      cerr << "Deleting " << **mem_iter << " at the hands of\n" << **next_iter << endl;
00353                                                 ml[ matchI ]->Free();
00354                                                 ml[ matchI ] = NULL;
00355                                                 matchI--;
00356                                                 deleted_matchI = true;
00357                                                 deleted_count++;
00358                                         }else{
00359                                                 ml[ matchI ]->CropRight( diff, seqI );
00360                                                 new_match->CropLeft( new_match->Length(seqI) - diff, seqI );
00361                                         }
00362                                         processNewMatch( seqI, new_matches, new_match );
00363                                 }
00364                                 if( (!consistent_overlap && eliminate_both) || !mem_iter_smaller )
00365                                 {
00366                                         // match_iter is smaller
00367                                         new_match = ml[nextI]->Copy();
00368                                         // erase base pairs from new_match
00369                                         if( diff >= ml[ nextI ]->Length(seqI) ){
00370 //                                                      cerr << "Deleting " << **next_iter << " at the hands of\n" << **mem_iter << endl;
00371                                                 ml[ nextI ]->Free();
00372                                                 ml[ nextI ] = NULL;
00373                                                 deleted_count++;
00374                                         }else{
00375                                                 ml[ nextI ]->CropLeft( diff, seqI );
00376                                                 new_match->CropRight( new_match->Length(seqI) - diff, seqI );
00377                                         }
00378                                         processNewMatch( seqI, new_matches, new_match );
00379                                 }
00380                                 if( deleted_matchI )
00381                                         break;
00382                         }
00383                 }
00384 
00385                 if( deleted_count > 0 ){
00386                         size_t cur = 0;
00387                         for( size_t mI = 0; mI < ml.size(); ++mI )
00388                                 if( ml[mI] != NULL )
00389                                         ml[cur++] = ml[mI];
00390                         ml.erase( ml.begin() + cur, ml.end() );
00391                 }
00392                 ml.insert( ml.end(), new_matches.begin(), new_matches.end() );
00393                 new_matches.clear();
00394         }
00395 }
00396 
00397 template <class MatchVector>
00398 void EliminateOverlaps_v2( MatchVector& ml, bool eliminate_both = false )
00399 {
00400         if( ml.size() < 2 )
00401                 return; // can't eliminate overlaps between fewer than 2 matches
00402         uint seq_count = ml[0]->SeqCount();
00403         std::vector< uint > seq_ids( seq_count );
00404         for( uint i = 0; i < seq_count; ++i )
00405                 seq_ids[i] = i;
00406         EliminateOverlaps_v2( ml, seq_ids, eliminate_both );
00407 };
00408 
00409 template< class MatchVector >
00410 uint64 SimpleGetLCBCoverage( MatchVector& lcb ){
00411         typename MatchVector::iterator match_iter = lcb.begin();
00412         uint64 coverage = 0;
00413         bool debug = true;
00414         for( ; match_iter != lcb.end(); ++match_iter ){
00415                 double maxlen = 0;
00416                 double minlen = 0;
00417                 for( uint seqI = 0; seqI < (*match_iter)->SeqCount(); seqI++ )
00418                 {
00419                         if( (*match_iter)->LeftEnd(seqI) != mems::NO_MATCH )
00420                         {
00421                                 maxlen += (double)(*match_iter)->Length(seqI);
00422                                 if( (*match_iter)->Length(seqI) > minlen )
00423                                         minlen = (double)(*match_iter)->Length(seqI);
00424                         }
00425                 }
00426                 double score = exp( ((*match_iter)->AlignmentLength() - minlen) / (maxlen - minlen) );
00427                 score *= maxlen;
00428                 coverage += (uint64)score;
00429         }
00430         return coverage;
00431 }
00432 
00433 template< class MatchVectorType >
00434 void addUnalignedIntervals_v2( MatchVectorType& iv_list, std::set< uint > seq_set, std::vector<gnSeqI> seq_lengths )
00435 {
00436         std::vector< mems::LCB > adjacencies;
00437         uint lcbI;
00438         uint seqI;
00439         uint seq_count = seq_lengths.size();
00440 
00441 
00442         if( seq_set.size() == 0 )
00443         {
00444                 // if an empty seq set was passed then assume all seqs
00445                 // should be processed
00446                 for( seqI = 0; seqI < seq_count; seqI++ )
00447                         seq_set.insert( seqI );
00448         }
00449         std::vector< std::vector< typename MatchVectorType::value_type > > ymmv;
00450         for( size_t ivI = 0; ivI < iv_list.size(); ++ivI )
00451                 ymmv.push_back( std::vector< typename MatchVectorType::value_type >( 1, iv_list[ivI] ) );
00452 
00453         std::vector< double > scores( iv_list.size(), 0 );
00454         computeLCBAdjacencies_v3( ymmv, scores, adjacencies );
00455 
00456         std::vector< int > rightmost;
00457         for( seqI = 0; seqI < seq_count; seqI++ ){
00458                 rightmost.push_back( -1 );
00459         }
00460 
00461         for( lcbI = 0; lcbI <= adjacencies.size(); lcbI++ ){
00462                 std::set< uint >::iterator seq_set_iterator = seq_set.begin();
00463                 for( ; seq_set_iterator != seq_set.end(); seq_set_iterator++ ){
00464                         seqI = *seq_set_iterator;
00465                         // scan left
00466                         int leftI;
00467                         if( lcbI < adjacencies.size() ){
00468 // left is always to the left!!
00469                                 leftI = adjacencies[ lcbI ].left_adjacency[ seqI ];
00470                         }else
00471                                 leftI = rightmost[ seqI ];
00472 
00473                         int rightI = lcbI < adjacencies.size() ? lcbI : -1;
00474 // right is always to the right!!
00475                         if( lcbI < adjacencies.size() )
00476                                 if( adjacencies[ lcbI ].right_adjacency[ seqI ] == -1 )
00477                                         rightmost[ seqI ] = lcbI;
00478                         
00479                         int64 left_start, right_start;
00480                         mems::getGapBounds( seq_lengths, adjacencies, seqI, leftI, rightI, left_start, right_start );
00481                         int64 gap_len =  genome::absolut( right_start ) - genome::absolut( left_start );
00482                         if( gap_len > 0 ){
00483                                 mems::Match mm( seq_count );
00484                                 mems::Match* m = mm.Copy();
00485                                 for( uint seqJ = 0; seqJ < seq_count; seqJ++ ){
00486                                         m->SetStart( seqJ, 0 );
00487                                 }
00488                                 m->SetStart( seqI, left_start );
00489                                 m->SetLength( gap_len );
00490                                 mems::Interval iv;
00491                                 std::vector< mems::AbstractMatch* > tmpvec(1, m);
00492                                 iv.SetMatches( tmpvec );
00493                                 iv_list.push_back( iv.Copy() );
00494                         }
00495                 }
00496         }
00497 }
00498 
00499 inline
00500 void projectIntervalList( mems::IntervalList& iv_list, std::vector< uint >& projection, std::vector< std::vector< mems::MatchProjectionAdapter* > >& LCB_list, std::vector< mems::LCB >& projected_adjs )
00501 {
00502         std::vector< size_t > proj(projection.size());
00503         for( size_t i = 0; i < projection.size(); ++i )
00504                 proj[i] = projection[i];
00505         std::vector< mems::MatchProjectionAdapter* > mpa_list;
00506         // construct pairwise Interval projections
00507         for( size_t corI = 0; corI < iv_list.size(); corI++ )
00508         {
00509                 size_t projI = 0;
00510                 for( ; projI < projection.size(); ++projI )
00511                         if( iv_list[corI].LeftEnd(projection[projI]) == mems::NO_MATCH )
00512                                 break;
00513                 if( projI != projection.size() )
00514                         continue;
00515                 mems::MatchProjectionAdapter mpa_tmp( &iv_list[corI], proj );
00516                 mpa_list.push_back( mpa_tmp.Copy() );
00517                 if( mpa_list.back()->Orientation(0) == mems::AbstractMatch::reverse )
00518                         mpa_list.back()->Invert();
00519         }
00520         std::vector< gnSeqI > breakpoints;
00521         IdentifyBreakpoints( mpa_list, breakpoints );
00522         ComputeLCBs_v2( mpa_list, breakpoints, LCB_list );
00523         std::vector< double > lcb_scores( LCB_list.size(), 0 );
00524         computeLCBAdjacencies_v3( LCB_list, lcb_scores, projected_adjs );
00525 }
00526 
00527 
00528 template< class MatchType = mems::AbstractMatch >
00529 class GenericMatchSeqManipulator
00530 {
00531 public:
00532         GenericMatchSeqManipulator( uint seq ) : m_seq(seq) {}
00533         gnSeqI LeftEnd(MatchType*& m) const{ return m->LeftEnd(m_seq); }
00534         gnSeqI Length(MatchType*& m) const{ return m->Length(m_seq); }
00535         void CropLeft(MatchType*& m, gnSeqI amount ) const{ m->CropLeft(amount, m_seq); }
00536         void CropRight(MatchType*& m, gnSeqI amount ) const{ m->CropRight(amount, m_seq); }
00537         template< typename ContainerType >
00538         void AddCopy(ContainerType& c, MatchType*& m) const{ c.push_back( m->Copy() ); }
00539 private:
00540         uint m_seq;
00541 };
00542 
00543 typedef GenericMatchSeqManipulator<> AbstractMatchSeqManipulator;
00544 
00545 class SuperIntervalManipulator
00546 {
00547 public:
00548         gnSeqI LeftEnd(const SuperInterval& siv) const{ return siv.LeftEnd(); }
00549         gnSeqI Length(const SuperInterval& siv) const{ return siv.Length(); }
00550         void CropLeft( SuperInterval& siv, gnSeqI amount ) const{ siv.CropLeft( amount );}
00551         void CropRight( SuperInterval& siv, gnSeqI amount ) const{ siv.CropRight( amount );}
00552         template< typename ContainerType >
00553         void AddCopy(ContainerType& c, const SuperInterval& siv) const{ c.push_back( siv ); }
00554 };
00555 
00556 
00557 // iv_list is a container class that contains pointers to intervals or 
00558 // matches of some sort
00559 // precondition: both bp_list and intervals *must* be sorted
00560 template< class T, class Maniplator >
00561 void applyBreakpoints( std::vector< gnSeqI >& bp_list, std::vector<T>& iv_list, Maniplator& manip )
00562 {
00563 
00564         size_t iv_count = iv_list.size();
00565         size_t bpI = 0;
00566         size_t ivI = 0;
00567         while( ivI < iv_count && bpI < bp_list.size() )
00568         {
00569                 if( manip.LeftEnd(iv_list[ivI]) == NO_MATCH )
00570                 {
00571                         ++ivI;
00572                         continue;       // undefined in seqI, so no breakpoint here
00573                 }
00574                 //  -(ivI)----
00575                 //  -------|--
00576                 if( manip.LeftEnd(iv_list[ivI]) + manip.Length(iv_list[ivI]) <= bp_list[bpI] )
00577                 {
00578                         ++ivI;
00579                         continue;
00580                 }
00581                 //  -----(ivI)-
00582                 //  --|--------
00583                 if( bp_list[bpI] <= manip.LeftEnd(iv_list[ivI]) )
00584                 {
00585                         ++bpI;
00586                         continue;
00587                 }
00588 
00589                 // if split_at isn't 0 then we need to split cur_iv
00590                 // put the left side in the new list and crop cur_iv
00591                 gnSeqI crop_amt = bp_list[bpI] - manip.LeftEnd(iv_list[ivI]);
00592                 manip.AddCopy( iv_list, iv_list[ivI] );
00593                 T& left_iv = iv_list.back();
00594 
00595                 manip.CropLeft( iv_list[ivI], crop_amt );
00596                 manip.CropRight( left_iv, manip.Length(left_iv)-crop_amt );
00597                 // restore ordering
00598                 size_t nextI = ivI + 1;
00599                 while( nextI < iv_count && manip.LeftEnd( iv_list[nextI-1] ) > manip.LeftEnd( iv_list[nextI] ) )
00600                 {
00601                         std::swap( iv_list[nextI-1], iv_list[nextI] );
00602                         nextI++;
00603                 }
00604 
00605 // assume that crop works correctly and that it's okay to pass matches with NO_MATCH            
00606 
00607                 if( manip.Length( iv_list[ivI] ) == 0 )
00608                 {
00609                         std::cerr << "Big fat generic zero 1\n";
00610                         genome::breakHere();
00611                 }
00612                 if( manip.Length( left_iv ) == 0 )
00613                 {
00614                         std::cerr << "Big fat generic zero 2\n";
00615                         genome::breakHere();
00616                 }
00617                 if( manip.LeftEnd( iv_list[ivI] ) == 0 )
00618                 {
00619                         std::cerr << "uh oh\n";
00620                         genome::breakHere();
00621                 }
00622                 if( manip.LeftEnd( left_iv ) == 0 )
00623                 {
00624                         std::cerr << "uh oh 2\n";
00625                         genome::breakHere();
00626                 }
00627 
00628         }
00629 }
00630 
00631 
00632 }
00633 
00634 //namespace std {
00635 //      void swap( PhyloTree<mems::AlignmentTreeNode>& a, PhyloTree<mems::AlignmentTreeNode>& b);
00636 //}
00637 
00638 #endif // _ProgressiveAligner_h_

Generated on Fri Mar 14 06:01:04 2008 for libMems by doxygen 1.3.6