00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef __Interval_h__
00010 #define __Interval_h__
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #include "libGenome/gnClone.h"
00017 #include "libGenome/gnDebug.h"
00018 #include "libMems/SparseAbstractMatch.h"
00019 #include "libMems/gnAlignedSequences.h"
00020 #include "libMems/AbstractGappedAlignment.h"
00021 #include "libMems/Match.h"
00022 #include "libMems/GappedAlignment.h"
00023 #include <iostream>
00024 #include <vector>
00025 #include "libMems/twister.h"
00026
00027
00028
00029 namespace mems {
00030
00031
00032 template< typename ListType, typename RanIt, typename Ty >
00033 void insert( ListType& the_list, std::reverse_iterator<RanIt>& riter, Ty& val )
00034 {
00035 the_list.insert( riter.base(), val );
00036 ++riter;
00037 }
00038 template< typename ListType, typename Ty >
00039 void insert( ListType& the_list, const typename ListType::iterator& iter, Ty& val )
00040 {
00041 the_list.insert( iter, val );
00042 }
00043
00044
00045 template< class GappedBaseImpl = AbstractGappedAlignment< SparseAbstractMatch<> > >
00046 class GenericInterval : public GappedBaseImpl
00047 {
00048 public:
00049 GenericInterval(){};
00050
00051
00052
00054 template<typename BidIt>
00055 GenericInterval( BidIt it_begin, const BidIt& it_end ) : GappedBaseImpl( (*it_begin)->SeqCount(), 0 )
00056 {
00057 std::vector<gnSeqI> pos((*it_begin)->SeqCount(), NO_MATCH);
00058 for( ; it_begin != it_end; ++it_begin )
00059 this->matches.push_back( (*it_begin)->Copy() );
00060 CalculateOffset();
00061 addUnalignedRegions();
00062 CalculateAlignmentLength();
00063 ValidateMatches();
00064 }
00065
00066 GenericInterval( const GenericInterval& iv );
00067 ~GenericInterval();
00068 GenericInterval& operator=( const GenericInterval& iv );
00069
00070 GenericInterval* Clone() const;
00071 GenericInterval* Copy() const;
00072 virtual void Free();
00073
00075 template< class MatchVector >
00076 void SetMatches( MatchVector& matches )
00077 {
00078
00079 Match m( matches[0]->SeqCount() );
00080 std::vector<AbstractMatch*> tmp(1, &m);
00081 *this = GenericInterval( tmp.begin(), tmp.end() );
00082
00083
00084 for( std::size_t mI = 0; mI < this->matches.size(); mI++ )
00085 this->matches[mI]->Free();
00086
00087
00088 this->matches.resize(matches.size());
00089 std::copy(matches.begin(), matches.end(), this->matches.begin());
00090
00091 CalculateOffset();
00092 addUnalignedRegions();
00093 CalculateAlignmentLength();
00094 ValidateMatches();
00095
00096
00097 matches.clear();
00098 }
00099
00101 template< class MatchVector >
00102 void SetMatchesTemp( MatchVector& matches )
00103 {
00104
00105
00106 for( std::size_t mI = 0; mI < this->matches.size(); mI++ )
00107 this->matches[mI]->Free();
00108
00109
00110 this->matches.resize(matches.size());
00111 std::copy(matches.begin(), matches.end(), this->matches.begin());
00112
00113 CalculateOffset();
00114 addUnalignedRegions();
00115 CalculateAlignmentLength();
00116 ValidateMatches();
00117
00118
00119 matches.clear();
00120 }
00124 template<typename BaseImpl> friend std::ostream& operator<<(std::ostream& os, const GenericInterval<BaseImpl>& iv);
00125
00129 template<typename BaseImpl> friend std::istream& operator>>(std::istream& is, const GenericInterval<BaseImpl>& iv);
00130
00131
00132 void Invert();
00133 void CropStart(gnSeqI crop_amount);
00134 void CropEnd(gnSeqI crop_amount);
00135 void MoveStart(int64 move_amount);
00136 void MoveEnd(int64 move_amount);
00137
00138 virtual void CalculateOffset();
00139
00140 void add( AbstractMatch* am ){ matches.push_back( am->Copy() ); }
00141
00146 virtual void GetAlignedSequences( gnAlignedSequences& gnas, const std::vector< genome::gnSequence* >& seq_table ) const;
00147
00148 void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
00149
00150 void CropLeft( gnSeqI amount, uint seqI );
00151 void CropRight( gnSeqI amount, uint seqI );
00152
00153 void SetAlignment( const std::vector< std::string >& seq_align );
00154
00155
00156 const std::vector<AbstractMatch*>& GetMatches() const{ return matches; }
00157 void StealMatches( std::vector<AbstractMatch*>& matches );
00158
00160 void Marble( gnSeqI size );
00161
00162 void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
00163
00164 bool IsGap( uint seq, gnSeqI col ) const;
00165
00167 void ValidateMatches() const;
00168
00169 void swap( GenericInterval& other ){ swap(&other); }
00170
00171 protected:
00172
00173 void swap( GenericInterval* other ){
00174 std::swap( matches, other->matches );
00175 GappedBaseImpl::swap( other );
00176 }
00177 std::vector< AbstractMatch* > matches;
00178 private:
00179 void addUnalignedRegions();
00180 void FindMatchPos( uint seqI, gnSeqI pos, size_t& matchI, gnSeqI& match_pos );
00181 void GetColumnAndMatch( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column, size_t& matchI, gnSeqI& match_col ) const;
00182 void CalculateAlignmentLength();
00183 };
00184
00185 typedef GenericInterval<> Interval;
00186
00187
00188 template<class GappedBaseImpl>
00189 GenericInterval<GappedBaseImpl>* GenericInterval<GappedBaseImpl>::Copy() const
00190 {
00191 return m_allocateAndCopy( *this );
00192 }
00193 template<class GappedBaseImpl>
00194 void GenericInterval<GappedBaseImpl>::Free()
00195 {
00196 m_free(this);
00197 }
00198
00199 template<class GappedBaseImpl>
00200 GenericInterval<GappedBaseImpl>::~GenericInterval()
00201 {
00202 for( std::size_t mI = 0; mI < matches.size(); mI++ )
00203 matches[mI]->Free();
00204 }
00205
00206 template<class GappedBaseImpl>
00207 void GenericInterval<GappedBaseImpl>::StealMatches( std::vector<AbstractMatch*>& matches ){
00208 matches = this->matches;
00209 this->matches.clear();
00210 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00211 {
00212 this->SetLeftEnd( seqI, NO_MATCH );
00213 this->SetLength( 0, seqI );
00214 }
00215 this->SetAlignmentLength(0);
00216 }
00217
00218 template<class GappedBaseImpl>
00219 GenericInterval<GappedBaseImpl>::GenericInterval( const GenericInterval<GappedBaseImpl>& iv )
00220 {
00221 *this = iv;
00222 }
00223
00224 template<class GappedBaseImpl>
00225 GenericInterval<GappedBaseImpl>& GenericInterval<GappedBaseImpl>::operator=( const GenericInterval& iv )
00226 {
00227 GappedBaseImpl::operator=( iv );
00228 for( std::size_t mI = 0; mI < matches.size(); mI++ )
00229 matches[mI]->Free();
00230 matches.clear();
00231 for( std::size_t mI = 0; mI < iv.matches.size(); mI++ )
00232 matches.push_back( iv.matches[mI]->Copy() );
00233 return *this;
00234 }
00235
00236 template<class GappedBaseImpl>
00237 GenericInterval<GappedBaseImpl>* GenericInterval<GappedBaseImpl>::Clone() const
00238 {
00239 return new GenericInterval( *this );
00240 }
00241
00242
00243 static bool debug_interval = false;
00244
00245 template<class GappedBaseImpl>
00246 void GenericInterval<GappedBaseImpl>::ValidateMatches() const
00247 {
00248 if( !debug_interval )
00249 return;
00250 if( matches.size() == 0 )
00251 {
00252
00253
00254 return;
00255 }
00256 for( uint seqI = 0; seqI < matches[0]->SeqCount(); ++seqI )
00257 {
00258 gnSeqI prev_rend = this->LeftEnd(seqI);
00259 if( this->Orientation(seqI) == AbstractMatch::forward )
00260 {
00261 for( size_t mI = 0; mI < matches.size(); ++mI )
00262 {
00263 if( matches[mI]->LeftEnd(seqI) != NO_MATCH )
00264 {
00265 if( prev_rend != matches[mI]->LeftEnd(seqI) )
00266 {
00267 std::cerr << "iv broken\n";
00268 std::cerr << "seqI: " << seqI << "\t prev_rend: " << prev_rend << std::endl;
00269 std::cerr << "mI: " << mI << "\tlend: " << matches[mI]->LeftEnd(seqI) << std::endl;
00270 genome::breakHere();
00271 }
00272 prev_rend = matches[mI]->RightEnd(seqI) + 1;
00273 }
00274 }
00275 }else if( this->Orientation(seqI) == AbstractMatch::reverse )
00276 {
00277 for( size_t mI = matches.size(); mI > 0; mI-- )
00278 {
00279 if( matches[mI-1]->LeftEnd(seqI) != NO_MATCH )
00280 {
00281 if( prev_rend != matches[mI-1]->LeftEnd(seqI) )
00282 {
00283 std::cerr << "iv broken 2\n";
00284 genome::breakHere();
00285 }
00286 prev_rend = matches[mI-1]->RightEnd(seqI) + 1;
00287 }
00288 }
00289 }
00290
00291 if( this->Orientation(seqI) != AbstractMatch::undefined && this->Length(seqI) == 0 )
00292 {
00293 genome::breakHere();
00294 std::cerr << "ERROR: confused interval\n";
00295 }
00296 }
00297 }
00298
00299 template<class GappedBaseImpl>
00300 void GenericInterval<GappedBaseImpl>::GetColumnAndMatch( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column, size_t& matchI, gnSeqI& match_col ) const
00301 {
00302
00303 gnSeqI col_pos = 0;
00304 size_t mI = 0;
00305 pos.clear();
00306 for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
00307 {
00308 if( this->LeftEnd(seqI) == NO_MATCH )
00309 pos.push_back(NO_MATCH);
00310 else if( this->Orientation(seqI) == AbstractMatch::forward )
00311 pos.push_back(this->LeftEnd(seqI));
00312 else
00313 pos.push_back(this->RightEnd(seqI)+1);
00314 }
00315
00316 column = std::vector<bool>(this->SeqCount(), false);
00317
00318 for( ; mI < matches.size(); ++mI )
00319 {
00320 uint seqI = 0;
00321
00322 gnSeqI diff = matches[mI]->AlignmentLength();
00323 diff = col_pos + diff <= col ? diff : col - col_pos;
00324
00325 for( seqI = 0; seqI < this->SeqCount(); ++seqI )
00326 if( this->Orientation(seqI) == AbstractMatch::forward )
00327 pos[seqI] += diff;
00328 else if( this->Orientation(seqI) == AbstractMatch::reverse )
00329 pos[seqI] -= diff;
00330
00331 col_pos += diff;
00332
00333 if( col_pos >= col && diff < matches[mI]->AlignmentLength() )
00334 {
00335 std::vector<gnSeqI> m_pos;
00336 matches[mI]->GetColumn( diff, m_pos, column );
00337 for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
00338 if( m_pos[seqI] != NO_MATCH )
00339 pos[seqI] = m_pos[seqI];
00340 matchI = mI;
00341 match_col = diff;
00342 break;
00343 }
00344 }
00345 }
00346
00347 template<typename ListType, typename Iter>
00348 void AddGapMatches( ListType& the_list, const Iter& first, const Iter& last,
00349 uint seqI, int64 left_end, int64 right_end,
00350 AbstractMatch::orientation seq_orient, uint seq_count )
00351 {
00352 Iter iter = first;
00353 int64 pos = left_end-1;
00354 for( ; iter != last; ++iter )
00355 {
00356 if( (*iter)->LeftEnd(seqI) != NO_MATCH )
00357 {
00358 gnSeqI len = (*iter)->LeftEnd(seqI)-pos-1;
00359 if( len > 4000000000u )
00360 {
00361 std::cerr << "triplebogus interval data\n";
00362 std::cerr << "(*iter)->LeftEnd(" << seqI << "): " << (*iter)->LeftEnd(seqI) << std::endl;
00363 std::cerr << "pos: " << pos << std::endl;
00364 genome::breakHere();
00365 }
00366
00367 if( len > 0 )
00368 {
00369 Match tmp(seq_count);
00370 Match* new_m = tmp.Copy();
00371 new_m->SetLeftEnd(seqI, pos + 1);
00372 new_m->SetOrientation(seqI, seq_orient);
00373 new_m->SetLength(len);
00374 pos = (*iter)->RightEnd(seqI);
00375 insert(the_list, iter, new_m);
00376 }else
00377 pos = (*iter)->RightEnd(seqI);
00378 }
00379 }
00380
00381 if( right_end != pos )
00382 {
00383 Match tmp(seq_count);
00384 Match* new_m = tmp.Copy();
00385 new_m->SetLeftEnd(seqI, pos+1);
00386 new_m->SetLength(right_end-pos-1);
00387 insert(the_list, iter, new_m);
00388 }
00389 }
00390
00391
00392 template<class GappedBaseImpl>
00393 void GenericInterval<GappedBaseImpl>::Marble( gnSeqI size )
00394 {
00395 if( this->SeqCount() > 2 )
00396 throw "I can't handle that many at once\n";
00397 if( this->Multiplicity() < 2 )
00398 return;
00399
00400
00401 std::list<AbstractMatch*> mlist;
00402 mlist.insert( mlist.end(), matches.begin(), matches.end() );
00403 std::list<AbstractMatch*>::iterator m_iter = mlist.begin();
00404 for(; m_iter != mlist.end(); ++m_iter )
00405 {
00406 if( (*m_iter)->Multiplicity() != 1 || (*m_iter)->AlignmentLength() <= size )
00407 continue;
00408
00409 uint seqI = 0;
00410 for( ; seqI < (*m_iter)->SeqCount(); seqI++ )
00411 if( (*m_iter)->LeftEnd(seqI) != NO_MATCH )
00412 break;
00413 AbstractMatch* left_iv = (*m_iter)->Copy();
00414 left_iv->CropEnd( left_iv->AlignmentLength() - size );
00415 (*m_iter)->CropStart( size );
00416 m_iter = mlist.insert( m_iter, left_iv );
00417 }
00418 matches.clear();
00419 matches.insert( matches.end(), mlist.begin(), mlist.end() );
00420 this->ValidateMatches();
00421
00422
00423 std::vector< std::vector<AbstractMatch*>::iterator > seq_iter( this->SeqCount(), matches.begin() );
00424 std::vector< AbstractMatch* > interleaved(matches.size());
00425 std::vector<AbstractMatch*>::iterator anchor = matches.begin();
00426 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00427 {
00428 if( this->LeftEnd(seqI) == NO_MATCH )
00429 continue;
00430 for( ; seq_iter[seqI] != matches.end() && (*seq_iter[seqI])->LeftEnd(seqI) == NO_MATCH; ++seq_iter[seqI] );
00431 }
00432 for( ; anchor != matches.end() && (*anchor)->Multiplicity() < this->SeqCount(); ++anchor );
00433 size_t cur = 0;
00434 while(true)
00435 {
00436
00437 uint seqI = 0;
00438 do{
00439 for( seqI = 0; seqI < this->SeqCount(); seqI++ )
00440 {
00441 if( seq_iter[seqI] == anchor && anchor != matches.end() )
00442 {
00443 for( uint seqJ = 0; seqJ < this->SeqCount(); seqJ++ )
00444 {
00445
00446 while( seq_iter[seqJ] != anchor )
00447 {
00448 interleaved[cur++] = *(seq_iter[seqJ]);
00449 for( ++seq_iter[seqJ]; seq_iter[seqJ] != matches.end() && (*seq_iter[seqJ])->LeftEnd(seqJ) == NO_MATCH; ++seq_iter[seqJ] );
00450 }
00451
00452 for( ++seq_iter[seqJ]; seq_iter[seqJ] != matches.end() && (*seq_iter[seqJ])->LeftEnd(seqJ) == NO_MATCH; ++seq_iter[seqJ] );
00453 }
00454
00455 interleaved[cur++] = *anchor;
00456 for( ++anchor; anchor != matches.end() && (*anchor)->Multiplicity() < this->SeqCount(); ++anchor );
00457
00458 break;
00459 }
00460 }
00461 }while( seqI < this->SeqCount() );
00462
00463 size_t diff1 = anchor - seq_iter[0];
00464 size_t diff2 = anchor - seq_iter[1];
00465 if( diff1 == 0 && diff2 == 0 )
00466 break;
00467
00468
00469 double samp = RandTwisterDouble();
00470
00471 if( diff2 == 0 || (samp < .5 && diff1 > 0) )
00472 {
00473 interleaved[cur++] = *(seq_iter[0]);
00474 for( ++seq_iter[0]; seq_iter[0] != matches.end() && (*seq_iter[0])->LeftEnd(0) == NO_MATCH; ++seq_iter[0] );
00475 }else{
00476 interleaved[cur++] = *(seq_iter[1]);
00477 for( ++seq_iter[1]; seq_iter[1] != matches.end() && (*seq_iter[1])->LeftEnd(1) == NO_MATCH; ++seq_iter[1] );
00478 }
00479 }
00480 matches = interleaved;
00481 this->ValidateMatches();
00482 }
00483
00484
00485 template<class GappedBaseImpl>
00486 void GenericInterval<GappedBaseImpl>::CropStart(gnSeqI crop_amount)
00487 {
00488 if( crop_amount > this->AlignmentLength() )
00489 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00490 if( crop_amount == 0 )
00491 return;
00492
00493 std::vector<bool> col;
00494 std::vector<gnSeqI> pos;
00495 size_t matchI = 0;
00496 gnSeqI match_col;
00497 this->GetColumnAndMatch( crop_amount, pos, col, matchI, match_col );
00498
00499
00500 for( size_t mI = 0; mI < matchI; ++mI )
00501 matches[mI]->Free();
00502 matches.erase(matches.begin(), matches.begin()+matchI);
00503
00504
00505 matches[0]->CropStart(match_col);
00506
00507 this->CalculateOffset();
00508 this->ValidateMatches();
00509 }
00510
00511 template<class GappedBaseImpl>
00512 void GenericInterval<GappedBaseImpl>::CropEnd(gnSeqI crop_amount)
00513 {
00514 if( crop_amount > this->AlignmentLength() )
00515 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00516 if( crop_amount == 0 )
00517 return;
00518 std::vector<bool> col;
00519 std::vector<gnSeqI> pos;
00520 size_t matchI = 0;
00521 gnSeqI match_col;
00522 this->GetColumnAndMatch( this->AlignmentLength()-crop_amount, pos, col, matchI, match_col );
00523
00524
00525 size_t plusmatch = match_col == 0 ? 0 : 1;
00526 for( size_t mI = matchI+plusmatch; mI < matches.size(); ++mI )
00527 matches[mI]->Free();
00528 matches.erase(matches.begin()+matchI+plusmatch, matches.end());
00529
00530
00531 if( matches.size() > 0 && plusmatch == 1 )
00532 matches.back()->CropEnd(matches.back()->AlignmentLength() - match_col);
00533
00534 this->CalculateOffset();
00535 this->ValidateMatches();
00536 }
00537
00538 template<class GappedBaseImpl>
00539 void GenericInterval<GappedBaseImpl>::GetAlignment( std::vector< bitset_t >& align_matrix ) const
00540 {
00541 gnSeqI cur_col = 0;
00542 align_matrix = std::vector< bitset_t >( this->SeqCount(), bitset_t(this->AlignmentLength(),false) );
00543 for( uint matchI = 0; matchI < matches.size(); ++matchI ){
00544 std::vector< bitset_t > aln_mat;
00545 matches[matchI]->GetAlignment( aln_mat );
00546 for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
00547 {
00548 if( matches[matchI]->LeftEnd(seqI) == NO_MATCH || matches[matchI]->Length(seqI) == 0 )
00549 continue;
00550
00551 size_t ct = 0;
00552 gnSeqI len = matches[matchI]->Length(seqI);
00553 for( bitset_t::size_type pos = aln_mat[seqI].find_first(); ct < len; pos = aln_mat[seqI].find_next(pos) )
00554 {
00555 align_matrix[seqI].set( cur_col + pos );
00556 ct++;
00557 }
00558 }
00559 cur_col += matches[matchI]->AlignmentLength();
00560 }
00561 }
00562
00563 template<class GappedBaseImpl>
00564 void GenericInterval<GappedBaseImpl>::CropLeft( gnSeqI amount, uint seqI )
00565 {
00566 if( amount > this->Length(seqI) )
00567 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00568 if( this->LeftEnd(seqI) == NO_MATCH || amount == 0 )
00569 return;
00570
00571
00572 gnSeqI pre_len = this->Length(seqI);
00573 gnSeqI pre_lend = this->LeftEnd(seqI);
00574
00575 gnSeqI match_pos;
00576 size_t mI;
00577 this->FindMatchPos(seqI, amount, mI, match_pos);
00578 if( matches[mI]->Orientation(seqI) == this->Orientation(seqI) )
00579 matches[mI]->CropLeft(match_pos, seqI);
00580 else
00581 matches[mI]->CropRight(match_pos, seqI);
00582
00583 if( matches[mI]->Length(seqI) == 0 )
00584 std::cerr << "Big fat zero 1\n";
00585
00586
00587 if( this->Orientation(seqI) == AbstractMatch::forward )
00588 {
00589 for( size_t m = 0; m < mI; m++ )
00590 matches[m]->Free();
00591 matches.erase(matches.begin(), matches.begin()+mI);
00592 }else{
00593 for( size_t m = mI+1; m < matches.size(); m++ )
00594 matches[m]->Free();
00595 matches.erase(matches.begin()+mI+1, matches.end());
00596 }
00597
00598 this->CalculateOffset();
00599 this->ValidateMatches();
00600
00601 if( this->Length(seqI) != pre_len - amount )
00602 {
00603 std::cerr << "Error intercroplef\n";
00604 std::cerr << "pre len: " << pre_len << std::endl;
00605 std::cerr << "pre lend: " << pre_lend << std::endl;
00606 std::cerr << "amount: " << amount << std::endl;
00607 std::cerr << "LeftEnd(seqI) " << this->LeftEnd(seqI) << std::endl;
00608 std::cerr << "Length(seqI) " << this->Length(seqI) << std::endl;
00609 std::cerr << "AlignmentLength() " << this->AlignmentLength() << std::endl;
00610 genome::breakHere();
00611 }
00612 }
00613
00614 template<class GappedBaseImpl>
00615 void GenericInterval<GappedBaseImpl>::CropRight( gnSeqI amount, uint seqI )
00616 {
00617 if( amount > this->Length(seqI) )
00618 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00619
00620 if( this->LeftEnd(seqI) == NO_MATCH || amount == 0 )
00621 return;
00622
00623
00624 gnSeqI pre_len = this->Length(seqI);
00625 gnSeqI pre_lend = this->LeftEnd(seqI);
00626
00627 gnSeqI left_amount = this->Length(seqI) - amount;
00628 gnSeqI match_pos;
00629 size_t mI;
00630 this->FindMatchPos(seqI, left_amount, mI, match_pos);
00631 if( matches[mI]->Orientation(seqI) == this->Orientation(seqI) )
00632 matches[mI]->CropRight(matches[mI]->Length(seqI)-match_pos, seqI);
00633 else
00634 matches[mI]->CropLeft(matches[mI]->Length(seqI)-match_pos, seqI);
00635
00636 if( matches[mI]->Length(seqI) == 0 )
00637 mI += this->Orientation(seqI) == AbstractMatch::forward ? -1 : 1;
00638
00639
00640 if( this->Orientation(seqI) == AbstractMatch::forward )
00641 {
00642 for( size_t m = mI+1; m < matches.size(); m++ )
00643 matches[m]->Free();
00644 matches.erase(matches.begin()+(mI+1), matches.end());
00645 }else{
00646 for( size_t m = 0; m < mI; m++ )
00647 matches[m]->Free();
00648 matches.erase(matches.begin(), matches.begin()+mI);
00649 }
00650
00651 this->CalculateOffset();
00652 this->ValidateMatches();
00653
00654 if( this->Length(seqI) != pre_len - amount )
00655 {
00656 std::cerr << "Error intercropright\n";
00657 std::cerr << "pre len: " << pre_len << std::endl;
00658 std::cerr << "pre lend: " << pre_lend << std::endl;
00659 std::cerr << "amount: " << amount << std::endl;
00660 std::cerr << "LeftEnd(seqI) " << this->LeftEnd(seqI) << std::endl;
00661 std::cerr << "Length(seqI) " << this->Length(seqI) << std::endl;
00662 std::cerr << "AlignmentLength() " << this->AlignmentLength() << std::endl;
00663 genome::breakHere();
00664 }
00665 }
00666
00667 template<class GappedBaseImpl>
00668 void GenericInterval<GappedBaseImpl>::MoveStart(int64 move_amount)
00669 {
00670 GappedBaseImpl::MoveStart(move_amount);
00671 for( size_t mI = 0; mI < matches.size(); mI++ )
00672 matches[mI]->MoveStart(move_amount);
00673 }
00674
00675 template<class GappedBaseImpl>
00676 void GenericInterval<GappedBaseImpl>::MoveEnd(int64 move_amount)
00677 {
00678 GappedBaseImpl::MoveEnd(move_amount);
00679 for( size_t mI = 0; mI < matches.size(); mI++ )
00680 matches[mI]->MoveEnd(move_amount);
00681 }
00682
00683
00684 template< class MatchVector >
00685 void FindBoundaries( const MatchVector& matches, std::vector<gnSeqI>& left_ends, std::vector<gnSeqI>& lengths, std::vector<bool>& orientations )
00686 {
00687 uint seqI;
00688 boolean zero_exists = false;
00689 uint seq_count = matches.front()->SeqCount();
00690 left_ends = std::vector<gnSeqI>( seq_count, NO_MATCH );
00691 lengths = std::vector<gnSeqI>( seq_count, 0 );
00692 orientations = std::vector<bool>( seq_count, false );
00693
00694
00695 uint matchI = 0;
00696 for(; matchI != matches.size(); ++matchI )
00697 {
00698 zero_exists = false;
00699 for( seqI = 0; seqI < seq_count; ++seqI )
00700 {
00701 if( left_ends[seqI] == NO_MATCH && matches[matchI]->Orientation(seqI) == AbstractMatch::forward )
00702 {
00703 left_ends[seqI] = matches[ matchI ]->LeftEnd(seqI);
00704 orientations[seqI] = true;
00705 }
00706 else if( left_ends[seqI] == NO_MATCH )
00707 zero_exists = true;
00708 }
00709 if( !zero_exists )
00710 break;
00711 }
00712
00713
00714 for( matchI = matches.size(); matchI > 0; matchI-- )
00715 {
00716 zero_exists = false;
00717 for( seqI = 0; seqI < seq_count; ++seqI )
00718 {
00719 if( lengths[seqI] == 0 &&
00720 matches[ matchI - 1 ]->Orientation(seqI) == AbstractMatch::forward )
00721 {
00722 lengths[seqI] = matches[matchI - 1]->LeftEnd(seqI) + matches[matchI - 1]->Length(seqI) - left_ends[seqI];
00723 }
00724 if( left_ends[seqI] != NO_MATCH && lengths[seqI] == 0 )
00725 zero_exists = true;
00726 if( lengths[seqI] > 1000000000 )
00727 {
00728 std::cerr << "matches[matchI - 1]->Length(" << seqI << ") = " << matches[matchI - 1]->Length(seqI) << std::endl;
00729 std::cerr << "matches[matchI - 1]->LeftEnd(" << seqI << ") = " << matches[matchI - 1]->LeftEnd(seqI) << std::endl;
00730
00731 std::cerr << "oh skeethockey mushrooms\n";
00732 genome::breakHere();
00733 }
00734 }
00735 if( !zero_exists )
00736 break;
00737 }
00738
00739
00740 for( matchI = matches.size(); matchI > 0; matchI-- )
00741 {
00742 zero_exists = false;
00743 for( seqI = 0; seqI < seq_count; ++seqI )
00744 {
00745 if( left_ends[seqI] == NO_MATCH && matches[ matchI - 1 ]->Orientation(seqI) == AbstractMatch::reverse )
00746 left_ends[seqI] = matches[matchI - 1]->LeftEnd(seqI);
00747 if( left_ends[seqI] == NO_MATCH )
00748 zero_exists = true;
00749 }
00750 if( !zero_exists )
00751 break;
00752 }
00753
00754
00755 for( matchI = 0; matchI != matches.size(); ++matchI )
00756 {
00757 zero_exists = false;
00758 for( seqI = 0; seqI < seq_count; ++seqI )
00759 {
00760 if( lengths[seqI] == 0 &&
00761 matches[matchI]->Orientation(seqI) == AbstractMatch::reverse )
00762 {
00763 lengths[seqI] = matches[matchI]->Length(seqI)+(matches[matchI]->LeftEnd(seqI) - left_ends[seqI]);
00764 }
00765 if( lengths[seqI] == 0 )
00766 zero_exists = true;
00767 if( lengths[seqI] > 1000000000 )
00768 {
00769 std::cerr << "matches[" << matchI << "]->Length(" << seqI << ") = " << matches[matchI]->Length(seqI) << std::endl;
00770 std::cerr << "matches[" << matchI << "]->LeftEnd(" << seqI << ") = " << matches[matchI]->LeftEnd(seqI) << std::endl;
00771
00772 std::cerr << "oh skeethockey mushrooms too\n";
00773 genome::breakHere();
00774 }
00775 }
00776 if( !zero_exists )
00777 break;
00778 }
00779 }
00780
00781
00782 template<class GappedBaseImpl>
00783 void GenericInterval<GappedBaseImpl>::addUnalignedRegions()
00784 {
00785 std::list<AbstractMatch*> new_matches(matches.begin(), matches.end());
00786
00787 for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
00788 {
00789 if( this->LeftEnd(seqI) == NO_MATCH )
00790 continue;
00791 if(this->Orientation(seqI) == AbstractMatch::forward)
00792 AddGapMatches( new_matches, new_matches.begin(), new_matches.end(), seqI, this->LeftEnd(seqI), this->RightEnd(seqI), this->Orientation(seqI), this->SeqCount() );
00793 else
00794 AddGapMatches( new_matches, new_matches.rbegin(), new_matches.rend(), seqI, this->LeftEnd(seqI), this->RightEnd(seqI), this->Orientation(seqI), this->SeqCount() );
00795 }
00796 matches.clear();
00797 matches.insert(matches.end(), new_matches.begin(), new_matches.end());
00798 }
00799
00800 template<class GappedBaseImpl>
00801 void GenericInterval<GappedBaseImpl>::Invert(){
00802 GappedBaseImpl::Invert();
00803 for( uint matchI = 0; matchI < matches.size(); ++matchI )
00804 matches[ matchI ]->Invert();
00805
00806 std::reverse( matches.begin(), matches.end() );
00807 }
00808
00809 template<class GappedBaseImpl>
00810 void GenericInterval<GappedBaseImpl>::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
00811 {
00812 size_t matchI;
00813 gnSeqI match_col;
00814 this->GetColumnAndMatch( col, pos, column, matchI, match_col );
00815 }
00816
00817 template<class GappedBaseImpl>
00818 void GenericInterval<GappedBaseImpl>::FindMatchPos( uint seqI, gnSeqI pos, size_t& matchI, gnSeqI& match_pos )
00819 {
00820 match_pos = pos;
00821 int diff_amt = 0;
00822 int incr = 1;
00823 matchI = 0;
00824 size_t end_mI = matches.size();
00825 if( this->Orientation(seqI) == AbstractMatch::reverse )
00826 {
00827 diff_amt = -1;
00828 incr = -1;
00829 matchI = matches.size();
00830 end_mI = 0;
00831 }
00832
00833 for( ; matchI != end_mI; matchI+=incr )
00834 {
00835 if( matches[matchI+diff_amt]->LeftEnd(seqI) == NO_MATCH )
00836 continue;
00837 if( matches[matchI+diff_amt]->Length(seqI) <= match_pos )
00838 match_pos -= matches[matchI+diff_amt]->Length(seqI);
00839 else
00840 break;
00841 }
00842
00843 if( this->Orientation(seqI) == AbstractMatch::reverse )
00844 matchI--;
00845 }
00846
00847 template<class GappedBaseImpl>
00848 void GenericInterval<GappedBaseImpl>::CalculateOffset(){
00849 std::vector<gnSeqI> left_end( this->SeqCount(), NO_MATCH );
00850 std::vector<gnSeqI> length( this->SeqCount(), 0 );
00851 std::vector<bool> orientation;
00852 if( this->matches.size() > 0 )
00853 FindBoundaries( this->matches, left_end, length, orientation );
00854 for( uint seqI = 0; seqI < this->SeqCount(); seqI++ )
00855 {
00856 if( left_end[seqI] != 0 )
00857 {
00858 this->SetLeftEnd(seqI, left_end[seqI]);
00859 this->SetLength(length[seqI], seqI);
00860 if( orientation[seqI] )
00861 this->SetOrientation(seqI, AbstractMatch::forward);
00862 else
00863 this->SetOrientation(seqI, AbstractMatch::reverse);
00864 }else if( this->LeftEnd(seqI) != NO_MATCH )
00865 {
00866 this->SetLength(0, seqI);
00867 this->SetLeftEnd(seqI, NO_MATCH);
00868 }
00869
00870 }
00871
00872 this->CalculateAlignmentLength();
00873 }
00874
00875 template<class GappedBaseImpl>
00876 void GenericInterval<GappedBaseImpl>::SetAlignment( const std::vector< std::string >& seq_align )
00877 {
00878 GappedAlignment* ga = new GappedAlignment(seq_align.size(), seq_align[0].size());
00879 matches.clear();
00880 matches.push_back(ga);
00881 ga->SetAlignment(seq_align);
00882 for( uint seqI = 0; seqI < this->SeqCount(); ++seqI )
00883 {
00884 ga->SetStart(seqI, this->Start(seqI));
00885 ga->SetLength(this->Length(seqI), seqI);
00886 }
00887 }
00888
00889
00893 template<class GappedBaseImpl>
00894 std::ostream& operator<<(std::ostream& os, const GenericInterval<GappedBaseImpl>& cr){
00895 try{
00896 for( uint matchI = 0; matchI < cr.matches.size(); ++matchI ){
00897 const AbstractMatch* m = cr.matches[ matchI ];
00898 const GappedAlignment* clust = dynamic_cast< const GappedAlignment* >( m );
00899 if( clust != NULL )
00900 os << *clust;
00901 const Match* match = dynamic_cast< const Match* >( m );
00902 if( match != NULL )
00903 os << *match;
00904 os << std::endl;
00905 }
00906 }catch(...){
00907 std::cerr << "Exceptional handler\n";
00908 }
00909 return os;
00910 }
00911
00912 template<class GappedBaseImpl>
00913 void GenericInterval<GappedBaseImpl>::CalculateAlignmentLength()
00914 {
00915 gnSeqI aln_len = 0;
00916
00917 for( size_t mI = 0; mI < matches.size(); ++mI )
00918 aln_len += matches[mI]->AlignmentLength();
00919 this->SetAlignmentLength(aln_len);
00920 }
00921
00922 template<class GappedBaseImpl>
00923 void GenericInterval<GappedBaseImpl>::GetAlignedSequences( gnAlignedSequences& gnas, const std::vector< genome::gnSequence* >& seq_table ) const
00924 {
00925 gnas.names.clear();
00926 for( uint seqI = 0; seqI < seq_table.size(); ++seqI ){
00927 std::string name;
00928 if( seq_table[ seqI ]->contigListSize() > 0 )
00929 name = seq_table[ seqI ]->contigName( 0 );
00930 gnas.names.push_back( name );
00931 gnas.positions.push_back(this->Start(seqI));
00932 }
00933 mems::GetAlignment( *this, seq_table, gnas.sequences );
00934 }
00935
00936 template<class GappedBaseImpl>
00937 bool GenericInterval<GappedBaseImpl>::IsGap( uint seq, gnSeqI col ) const
00938 {
00939 std::vector<gnSeqI> pos;
00940 std::vector<bool> column;
00941 GetColumn(col, pos, column);
00942 return column[seq];
00943 }
00944
00945 }
00946
00947 namespace std {
00948 template<> inline
00949 void swap( mems::Interval& a, mems::Interval& b )
00950 {
00951 a.swap(b);
00952 }
00953 }
00954
00955 #endif // __Interval_h__