00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifdef HAVE_CONFIG_H
00010 #include "config.h"
00011 #endif
00012
00013 #include "libMems/ProgressiveAligner.h"
00014 #include "libMems/Backbone.h"
00015 #include "libMems/Islands.h"
00016 #include "libMems/CompactGappedAlignment.h"
00017
00018 #include <boost/property_map.hpp>
00019 #include <boost/graph/graph_traits.hpp>
00020 #include <boost/graph/adjacency_list.hpp>
00021 #include <boost/graph/topological_sort.hpp>
00022 #include <boost/graph/johnson_all_pairs_shortest.hpp>
00023 #include <boost/graph/depth_first_search.hpp>
00024 #include <boost/graph/undirected_dfs.hpp>
00025
00026 using namespace std;
00027 using namespace genome;
00028 namespace mems {
00029
00030
00031 template< typename MatchVector >
00032 void getBpList( MatchVector& mvect, uint seq, vector< gnSeqI >& bp_list )
00033 {
00034 bp_list.clear();
00035 for( size_t ivI = 0; ivI < mvect.size(); ivI++ )
00036 {
00037 if( mvect[ivI]->LeftEnd(seq) == NO_MATCH )
00038 continue;
00039 bp_list.push_back( mvect[ivI]->LeftEnd(seq) );
00040 bp_list.push_back( mvect[ivI]->RightEnd(seq)+1 );
00041 }
00042 std::sort( bp_list.begin(), bp_list.end() );
00043 }
00044
00045 template< typename MatchVector >
00046 void createMap( const MatchVector& mv_from, const MatchVector& mv_to, vector< size_t >& map )
00047 {
00048 typedef typename MatchVector::value_type MatchPtr;
00049 vector< pair< MatchPtr, size_t > > m1(mv_from.size());
00050 vector< pair< MatchPtr, size_t > > m2(mv_to.size());
00051 for( size_t i = 0; i < mv_from.size(); ++i )
00052 m1[i] = make_pair( mv_from[i], i );
00053 for( size_t i = 0; i < mv_to.size(); ++i )
00054 m2[i] = make_pair( mv_to[i], i );
00055 std::sort( m1.begin(), m1.end() );
00056 std::sort( m2.begin(), m2.end() );
00057 map.resize( m1.size() );
00058 for( size_t i = 0; i < m1.size(); ++i )
00059 map[m1[i].second] = m2[i].second;
00060 }
00061
00062 typedef pair< size_t, Interval* > iv_tracker_t;
00063 class IvTrackerComp
00064 {
00065 public:
00066 IvTrackerComp( uint seq ) : ssc( seq ) {}
00067 bool operator()( const iv_tracker_t& a, const iv_tracker_t& b )
00068 {
00069 return ssc(a.second, b.second);
00070 }
00071 private:
00072 SingleStartComparator<Interval> ssc;
00073 };
00074
00075 const int LEFT_NEIGHBOR = -1;
00076 const int RIGHT_NEIGHBOR = 1;
00077 typedef vector< size_t > neighbor_t;
00078
00079 neighbor_t& getNeighbor( pair< neighbor_t, neighbor_t >& entry, int direction )
00080 {
00081 if( direction == RIGHT_NEIGHBOR )
00082 return entry.first;
00083 else
00084 return entry.second;
00085 }
00086
00087
00088 void collapseCollinear( IntervalList& iv_list )
00089 {
00090 if( iv_list.size() == 0 )
00091 return;
00092 const size_t seq_count = iv_list.seq_table.size();
00093 std::vector< Interval* > iv_ptrs(iv_list.size());
00094 size_t lilI = 0;
00095 for( size_t i = 0; i < iv_list.size(); ++i )
00096 {
00097
00098 if( iv_list[i].Multiplicity() < 2 )
00099 continue;
00100 iv_ptrs[lilI++] = &iv_list[i];
00101 }
00102 iv_ptrs.resize(lilI);
00103 const size_t NEIGHBOR_UNKNOWN = (std::numeric_limits<size_t>::max)();
00104 neighbor_t lefties_tmp( seq_count, NEIGHBOR_UNKNOWN );
00105 pair< neighbor_t, neighbor_t > neighbor_pair( lefties_tmp, lefties_tmp );
00106 vector< pair< neighbor_t, neighbor_t > > neighbor_list( iv_ptrs.size(), neighbor_pair );
00107 vector< iv_tracker_t > iv_tracker( iv_ptrs.size() );
00108 for( size_t i = 0; i < iv_ptrs.size(); ++i )
00109 {
00110 iv_tracker[i] = make_pair( i, iv_ptrs[i] );
00111 }
00112 for( size_t seqI = 0; seqI < seq_count; ++seqI )
00113 {
00114 IvTrackerComp ivc( seqI );
00115 sort( iv_tracker.begin(), iv_tracker.end(), ivc );
00116 size_t prev_i = NEIGHBOR_UNKNOWN;
00117 size_t cur_i = NEIGHBOR_UNKNOWN;
00118 for( size_t i = 0; i < iv_tracker.size(); ++i )
00119 {
00120 if( iv_tracker[i].second->LeftEnd(seqI) == NO_MATCH )
00121 continue;
00122 if( cur_i != NEIGHBOR_UNKNOWN )
00123 {
00124 neighbor_list[cur_i].first[seqI] = prev_i;
00125 neighbor_list[cur_i].second[seqI] = iv_tracker[i].first;
00126 }
00127 prev_i = cur_i;
00128 cur_i = iv_tracker[i].first;
00129 }
00130
00131 if( cur_i != NEIGHBOR_UNKNOWN )
00132 {
00133 neighbor_list[cur_i].first[seqI] = prev_i;
00134 neighbor_list[cur_i].second[seqI] = NEIGHBOR_UNKNOWN;
00135 }
00136 }
00137
00138
00139 for( int d = -1; d < 2; d+= 2 )
00140 {
00141 size_t unknown_count = 0;
00142 for( size_t nI = 0; nI < neighbor_list.size(); ++nI )
00143 {
00144 size_t nayb = NEIGHBOR_UNKNOWN;
00145 size_t seqI = 0;
00146 bool parity = false;
00147 size_t ct = 0;
00148 for( ; seqI < seq_count; ++seqI )
00149 {
00150 if( iv_ptrs[nI]->Orientation(seqI) == AbstractMatch::undefined )
00151 continue;
00152 int orient = iv_ptrs[nI]->Orientation(seqI) == AbstractMatch::forward ? 1 : -1;
00153
00154 if( nayb == NEIGHBOR_UNKNOWN )
00155 {
00156 nayb = getNeighbor( neighbor_list[nI], d * orient * -1 )[seqI];
00157 if( nayb != NEIGHBOR_UNKNOWN )
00158 parity = iv_ptrs[nI]->Orientation(seqI) == iv_ptrs[nayb]->Orientation(seqI);
00159 }
00160 else if( nayb != getNeighbor( neighbor_list[nI], d * orient * -1 )[seqI] )
00161 break;
00162 else if( parity != (iv_ptrs[nI]->Orientation(seqI) == iv_ptrs[nayb]->Orientation(seqI)) )
00163 break;
00164 if( nayb != NEIGHBOR_UNKNOWN )
00165 ct++;
00166 }
00167 if( seqI < seq_count || ct < iv_ptrs[nI]->Multiplicity() )
00168 continue;
00169 if( nayb == NEIGHBOR_UNKNOWN )
00170 continue;
00171
00172
00173 uint fs = iv_ptrs[nI]->FirstStart();
00174 gnSeqI nI_lend_fs = iv_ptrs[nI]->LeftEnd(fs);
00175 gnSeqI nayb_lend_fs = iv_ptrs[nayb]->LeftEnd(fs);
00176 AbstractMatch::orientation o = iv_ptrs[nI]->Orientation(fs);
00177 vector< AbstractMatch* > nI_matches;
00178 iv_ptrs[nI]->StealMatches( nI_matches );
00179 vector< AbstractMatch* > nayb_matches;
00180 iv_ptrs[nayb]->StealMatches( nayb_matches );
00181 if( !parity )
00182 {
00183 std::reverse( nI_matches.begin(), nI_matches.end() );
00184 for( size_t i = 0; i < nI_matches.size(); ++i )
00185 nI_matches[i]->Invert();
00186 o = o == AbstractMatch::forward ? AbstractMatch::reverse : AbstractMatch::forward;
00187 }
00188 if( (o == AbstractMatch::forward && nI_lend_fs > nayb_lend_fs) ||
00189 (o == AbstractMatch::reverse && nI_lend_fs < nayb_lend_fs))
00190 nayb_matches.insert( nayb_matches.end(), nI_matches.begin(), nI_matches.end() );
00191 else
00192 nayb_matches.insert( nayb_matches.begin(), nI_matches.begin(), nI_matches.end() );
00193
00194 iv_ptrs[nayb]->SetMatches( nayb_matches );
00195
00196
00197 seqI = 0;
00198 for( ; seqI < seq_count; ++seqI )
00199 {
00200 if( getNeighbor( neighbor_list[nI], -1 )[seqI] == NEIGHBOR_UNKNOWN &&
00201 getNeighbor( neighbor_list[nI], 1 )[seqI] == NEIGHBOR_UNKNOWN )
00202 continue;
00203 int orient = iv_ptrs[nayb]->Orientation(seqI) == AbstractMatch::forward ? 1 : -1;
00204 size_t other_nayb = getNeighbor( neighbor_list[nI], d * orient * (parity ? 1 : -1) )[seqI];
00205 if( other_nayb != NEIGHBOR_UNKNOWN )
00206 {
00207 if( getNeighbor( neighbor_list[other_nayb], 1 )[seqI] == nI )
00208 getNeighbor( neighbor_list[other_nayb], 1 )[seqI] = nayb;
00209 else if( getNeighbor( neighbor_list[other_nayb], -1 )[seqI] == nI )
00210 getNeighbor( neighbor_list[other_nayb], -1 )[seqI] = nayb;
00211 else
00212 {
00213 cerr << "serious programmer error\n";
00214 genome::breakHere();
00215 }
00216 }
00217 if( getNeighbor( neighbor_list[nayb], 1 )[seqI] == nI )
00218 getNeighbor( neighbor_list[nayb], 1 )[seqI] = other_nayb;
00219 else if( getNeighbor( neighbor_list[nayb], -1 )[seqI] == nI )
00220 getNeighbor( neighbor_list[nayb], -1 )[seqI] = other_nayb;
00221 else
00222 {
00223 cerr << "inexcusable programmer error\n";
00224 genome::breakHere();
00225 }
00226 neighbor_list[nI].first[seqI] = NEIGHBOR_UNKNOWN;
00227 neighbor_list[nI].second[seqI] = NEIGHBOR_UNKNOWN;
00228 }
00229 }
00230 }
00231
00232 IntervalList new_list;
00233 new_list.seq_filename = iv_list.seq_filename;
00234 new_list.seq_table = iv_list.seq_table;
00235 new_list.resize( iv_ptrs.size() );
00236 size_t newI = 0;
00237 for( size_t ivI = 0; ivI < iv_ptrs.size(); ++ivI )
00238 {
00239 vector< AbstractMatch* > matches;
00240 iv_ptrs[ivI]->StealMatches( matches );
00241 if( matches.size() > 0 )
00242 new_list[newI++].SetMatches( matches );
00243 }
00244 new_list.resize(newI);
00245 swap( iv_list, new_list );
00246 addUnalignedRegions(iv_list);
00247 }
00248
00249
00250 void checkForAllGapColumns( IntervalList& iv_list )
00251 {
00252
00253 for( size_t ivI = 0; ivI < iv_list.size(); ivI++ )
00254 {
00255 vector< string > aln;
00256 mems::GetAlignment( iv_list[ivI], iv_list.seq_table, aln );
00257 for( size_t colI = 0; colI < aln[0].size(); ++colI )
00258 {
00259 size_t rowI = 0;
00260 for( ; rowI < aln.size(); ++rowI )
00261 if( aln[rowI][colI] != '-' )
00262 break;
00263 if( rowI == aln.size() )
00264 {
00265 cerr << "ERROR! IV " << ivI << " COLUMN " << colI << " IS ALL GAPS!\n";
00266 }
00267 }
00268 }
00269 }
00270
00271
00272
00273 typedef boost::multi_array< vector< pair< size_t, size_t > >, 3 > pairwise_genome_hss_t;
00274
00275 void translateToPairwiseGenomeHSS( const hss_array_t& hss_array, pairwise_genome_hss_t& hss_cols )
00276 {
00277 uint seq_count = hss_array.shape()[0];
00278 uint iv_count = hss_array.shape()[2];
00279 hss_cols.resize( boost::extents[seq_count][seq_count][iv_count] );
00280
00281
00282 for( size_t seqI = 0; seqI < seq_count; ++seqI )
00283 {
00284 for( size_t seqJ = seqI+1; seqJ < seq_count; ++seqJ )
00285 {
00286 for( size_t ivI = 0; ivI < iv_count; ++ivI )
00287 {
00288 const hss_list_t& cur_list = hss_array[seqI][seqJ][ivI];
00289 hss_cols[seqI][seqJ][ivI].resize( cur_list.size() );
00290 for( size_t hssI = 0; hssI < cur_list.size(); hssI++ )
00291 {
00292 hss_cols[seqI][seqJ][ivI][hssI].first = cur_list[hssI].left_col;
00293 hss_cols[seqI][seqJ][ivI][hssI].second = cur_list[hssI].right_col;
00294 }
00295 }
00296 }
00297 }
00298 }
00299
00300
00301 double computeGC( std::vector< gnSequence* >& seq_table )
00302 {
00303 const uint8* tab = SortedMerList::BasicDNATable();
00304 size_t counts[4];
00305 for( int i = 0; i < 4; i++ )
00306 counts[i] = 0;
00307 for( size_t seqI = 0; seqI < seq_table.size(); seqI++ )
00308 {
00309 std::string seq;
00310 seq_table[seqI]->ToString( seq );
00311 for( size_t cI = 0; cI < seq.size(); cI++ )
00312 counts[ tab[ seq[cI] ] ]++;
00313 }
00314 return double(counts[1]+counts[2]) / double(counts[1]+counts[2] + counts[0]+counts[3]);
00315 }
00316
00317 void makeAllPairwiseGenomeHSS( IntervalList& iv_list, vector< CompactGappedAlignment<>* >& iv_ptrs, vector< CompactGappedAlignment<>* >& iv_orig_ptrs, pairwise_genome_hss_t& hss_cols, const Params& hmm_params )
00318 {
00319 uint seq_count = iv_list.seq_table.size();
00320
00321 for( size_t seqI = 0; seqI < seq_count; ++seqI )
00322 {
00323 for( size_t seqJ = seqI+1; seqJ < seq_count; ++seqJ )
00324 {
00325 vector< uint > projection;
00326 projection.push_back( seqI );
00327 projection.push_back( seqJ );
00328 vector< vector< MatchProjectionAdapter* > > LCB_list;
00329 vector< LCB > projected_adjs;
00330 projectIntervalList( iv_list, projection, LCB_list, projected_adjs );
00331
00332 IntervalList pair_ivs;
00333 pair_ivs.seq_table.push_back( iv_list.seq_table[seqI] );
00334 pair_ivs.seq_table.push_back( iv_list.seq_table[seqJ] );
00335 pair_ivs.resize( LCB_list.size() );
00336 for( size_t lcbI = 0; lcbI < LCB_list.size(); ++lcbI )
00337 pair_ivs[lcbI].SetMatches( LCB_list[lcbI] );
00338 LCB_list.clear();
00339
00340 vector< CompactGappedAlignment<>* > pair_cgas( pair_ivs.size() );
00341 for( size_t lcbI = 0; lcbI < pair_ivs.size(); ++lcbI )
00342 {
00343 CompactGappedAlignment<> tmp_cga;
00344 pair_cgas[lcbI] = tmp_cga.Copy();
00345 new (pair_cgas[lcbI])CompactGappedAlignment<>( pair_ivs[lcbI] );
00346 }
00347
00348 vector< CompactGappedAlignment<>* > hss_list;
00349
00350 hss_array_t hss_array;
00351 findHssHomologyHMM( pair_cgas, pair_ivs.seq_table, hss_array, hmm_params, true, true );
00352 HssArrayToCga(pair_cgas, pair_ivs.seq_table, hss_array, hss_list);
00353
00354 for( size_t cgaI = 0; cgaI < pair_cgas.size(); ++cgaI )
00355 pair_cgas[cgaI]->Free();
00356 pair_cgas.clear();
00357
00358
00359 vector< gnSeqI > bp_list;
00360 getBpList( iv_ptrs, seqI, bp_list );
00361 GenericMatchSeqManipulator< CompactGappedAlignment<> > gmsm(0);
00362 SingleStartComparator< CompactGappedAlignment<> > ssc(0);
00363 std::sort(hss_list.begin(), hss_list.end(), ssc );
00364 applyBreakpoints( bp_list, hss_list, gmsm );
00365
00366 getBpList( iv_ptrs, seqJ, bp_list );
00367 GenericMatchSeqManipulator< CompactGappedAlignment<> > gmsm1(1);
00368 SingleStartComparator< CompactGappedAlignment<> > ssc1(1);
00369 std::sort(hss_list.begin(), hss_list.end(), ssc1 );
00370 applyBreakpoints( bp_list, hss_list, gmsm1 );
00371
00372
00373 std::sort(hss_list.begin(), hss_list.end(), ssc );
00374
00375 SingleStartComparator< CompactGappedAlignment<> > ivcomp(seqI);
00376 std::sort( iv_ptrs.begin(), iv_ptrs.end(), ivcomp );
00377 vector< size_t > iv_map;
00378 createMap( iv_ptrs, iv_orig_ptrs, iv_map );
00379 size_t ivI = 0;
00380 while( ivI < iv_ptrs.size() && iv_ptrs[ivI]->LeftEnd(0) == NO_MATCH )
00381 ++ivI;
00382 for( size_t hssI = 0; hssI < hss_list.size(); ++hssI )
00383 {
00384 if( hss_list[hssI]->LeftEnd(0) == NO_MATCH || hss_list[hssI]->Length(0) == 0 )
00385 continue;
00386 if( ivI == iv_ptrs.size() )
00387 {
00388 cerr << "huh?\n";
00389 cerr << hss_list[hssI]->LeftEnd(0) << endl;
00390 cerr << hss_list[hssI]->RightEnd(0) << endl;
00391 cerr << iv_ptrs.back()->LeftEnd(seqI) << endl;
00392 cerr << iv_ptrs.back()->RightEnd(seqI) << endl;
00393 }
00394 while( ivI < iv_ptrs.size() &&
00395 (iv_ptrs[ivI]->LeftEnd(seqI) == NO_MATCH ||
00396 hss_list[hssI]->LeftEnd(0) > iv_ptrs[ivI]->RightEnd(seqI) ) )
00397 ++ivI;
00398 if( ivI == iv_ptrs.size() )
00399 {
00400 cerr << "hssI fit!!\n";
00401 genome::breakHere();
00402 }
00403
00404 if( iv_ptrs[ivI]->LeftEnd(seqJ) == NO_MATCH ||
00405 iv_ptrs[ivI]->RightEnd(seqJ) < hss_list[hssI]->LeftEnd(1) ||
00406 hss_list[hssI]->RightEnd(1) < iv_ptrs[ivI]->LeftEnd(seqJ) )
00407 continue;
00408
00409 if( hss_list[hssI]->RightEnd(0) < iv_ptrs[ivI]->LeftEnd(seqI) )
00410 {
00411 cerr << "huh 2?\n";
00412 cerr << hss_list[hssI]->LeftEnd(0) << endl;
00413 cerr << hss_list[hssI]->RightEnd(0) << endl;
00414 cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
00415 cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
00416 hssI++;
00417 continue;
00418 }
00419
00420 vector< pair< size_t, size_t > >& cur_hss_cols = hss_cols[seqI][seqJ][iv_map[ivI]];
00421
00422 gnSeqI left_col = iv_ptrs[ivI]->SeqPosToColumn( seqI, hss_list[hssI]->LeftEnd(0) );
00423 gnSeqI right_col = iv_ptrs[ivI]->SeqPosToColumn( seqI, hss_list[hssI]->RightEnd(0) );
00424 if(left_col > right_col && iv_ptrs[ivI]->Orientation(seqI) == AbstractMatch::reverse )
00425 {
00426 swap(left_col, right_col);
00427 }
00428 else if(left_col > right_col)
00429 {
00430 cerr << "bad cols\n";
00431 cerr << hss_list[hssI]->LeftEnd(0) << endl;
00432 cerr << hss_list[hssI]->RightEnd(0) << endl;
00433 cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
00434 cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
00435 genome::breakHere();
00436 }
00437
00438 if( left_col > 2000000000 || right_col > 2000000000 )
00439 {
00440 cerr << "huh 2?\n";
00441 cerr << hss_list[hssI]->LeftEnd(0) << endl;
00442 cerr << hss_list[hssI]->RightEnd(0) << endl;
00443 cerr << iv_ptrs[ivI]->LeftEnd(seqI) << endl;
00444 cerr << iv_ptrs[ivI]->RightEnd(seqI) << endl;
00445 genome::breakHere();
00446 }
00447 cur_hss_cols.push_back( make_pair( left_col, right_col ) );
00448 }
00449 for( size_t hssI = 0; hssI < hss_list.size(); ++hssI )
00450 hss_list[hssI]->Free();
00451 }
00452 }
00453 }
00454
00455 void mergePairwiseHomologyPredictions( vector< CompactGappedAlignment<>* >& iv_orig_ptrs, pairwise_genome_hss_t& hss_cols, vector< vector< ULA* > >& ula_list )
00456 {
00457 uint seq_count = hss_cols.shape()[0];
00458 uint iv_count = hss_cols.shape()[2];
00459
00460
00461
00462
00463
00464
00465
00466 ula_list.resize( iv_count );
00467 for( size_t ivI = 0; ivI < iv_count; ++ivI )
00468 {
00469 vector< ULA* >& iv_ulas = ula_list[ivI];
00470 for( size_t seqI = 0; seqI < seq_count; ++seqI )
00471 {
00472 for( size_t seqJ = seqI+1; seqJ < seq_count; ++seqJ )
00473 {
00474 vector< pair< size_t, size_t > >& cur_hss_cols = hss_cols[seqI][seqJ][ivI];
00475 vector< ULA* > cur_ulas( cur_hss_cols.size() );
00476 ULA tmp_ula(seq_count);
00477 for( size_t hssI = 0; hssI < cur_hss_cols.size(); ++hssI )
00478 {
00479 cur_ulas[hssI] = tmp_ula.Copy();
00480 cur_ulas[hssI]->SetStart(seqI, cur_hss_cols[hssI].first+1);
00481 cur_ulas[hssI]->SetStart(seqJ, cur_hss_cols[hssI].first+1);
00482 cur_ulas[hssI]->SetLength( cur_hss_cols[hssI].second - cur_hss_cols[hssI].first + 1 );
00483 }
00484
00485 vector< gnSeqI > iv_bp_list;
00486 vector< gnSeqI > cur_bp_list;
00487 SingleStartComparator<ULA> ulacompI(seqI);
00488 std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompI );
00489 std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
00490 getBpList( iv_ulas, seqI, iv_bp_list );
00491 getBpList( cur_ulas, seqI, cur_bp_list );
00492 GenericMatchSeqManipulator< ULA > gmsm(seqI);
00493 applyBreakpoints( iv_bp_list, cur_ulas, gmsm );
00494 applyBreakpoints( cur_bp_list, iv_ulas, gmsm );
00495
00496 SingleStartComparator<ULA> ulacompJ(seqJ);
00497 std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompJ );
00498 std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompJ );
00499 getBpList( iv_ulas, seqJ, iv_bp_list );
00500 getBpList( cur_ulas, seqJ, cur_bp_list );
00501 GenericMatchSeqManipulator< ULA > gmsmJ(seqJ);
00502 applyBreakpoints( iv_bp_list, cur_ulas, gmsmJ );
00503 applyBreakpoints( cur_bp_list, iv_ulas, gmsmJ );
00504
00505
00506 std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompI );
00507 std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
00508 getBpList( iv_ulas, seqI, iv_bp_list );
00509 getBpList( cur_ulas, seqI, cur_bp_list );
00510 applyBreakpoints( iv_bp_list, cur_ulas, gmsm );
00511 applyBreakpoints( cur_bp_list, iv_ulas, gmsm );
00512
00513 std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompI );
00514 std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
00515
00516
00517 size_t iv_ulas_size = iv_ulas.size();
00518 size_t ivuI = 0;
00519 size_t curuI = 0;
00520 vector< ULA* > added_to( cur_ulas.size(), NULL );
00521 vector< ULA* > to_delete;
00522 while( ivuI < iv_ulas_size && curuI < cur_ulas.size() )
00523 {
00524 if( iv_ulas[ivuI]->LeftEnd(seqI) == cur_ulas[curuI]->LeftEnd(seqI) )
00525 {
00526 if( added_to[curuI] == iv_ulas[ivuI] )
00527 {
00528
00529 }else if( added_to[curuI] == NULL )
00530 {
00531 iv_ulas[ivuI]->SetLeftEnd(seqJ, cur_ulas[curuI]->LeftEnd(seqJ));
00532 added_to[curuI] = iv_ulas[ivuI];
00533 }else{
00534 ULA* merge = added_to[curuI];
00535 for( size_t seqK = 0; seqK < seq_count; ++seqK )
00536 {
00537 if( merge->Start(seqK) == NO_MATCH )
00538 continue;
00539 iv_ulas[ivuI]->SetStart( seqK, merge->Start(seqK) );
00540 }
00541 to_delete.push_back( merge );
00542 }
00543 ivuI++;
00544 }else if( iv_ulas[ivuI]->LeftEnd(seqI) < cur_ulas[curuI]->LeftEnd(seqI) )
00545 {
00546 ivuI++;
00547 }else
00548 curuI++;
00549 }
00550
00551
00552 std::sort( to_delete.begin(), to_delete.end() );
00553 vector< ULA* >::iterator last = std::unique( to_delete.begin(), to_delete.end() );
00554 to_delete.erase( last, to_delete.end() );
00555 vector< ULA* > new_iv_ulas( iv_ulas.size() - to_delete.size() );
00556 std::sort( iv_ulas.begin(), iv_ulas.end() );
00557 std::set_difference( iv_ulas.begin(), iv_ulas.end(), to_delete.begin(), to_delete.end(), new_iv_ulas.begin() );
00558 swap( iv_ulas, new_iv_ulas );
00559 for( size_t delI = 0; delI < to_delete.size(); ++delI )
00560 to_delete[delI]->Free();
00561
00562 vector< ULA* > orig_ula_order = cur_ulas;
00563
00564 std::sort( iv_ulas.begin(), iv_ulas.end(), ulacompJ );
00565 std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompJ );
00566
00567 vector< size_t > added_map;
00568 createMap( cur_ulas, orig_ula_order, added_map );
00569
00570 ivuI = 0;
00571 curuI = 0;
00572 to_delete.clear();
00573 while( ivuI < iv_ulas_size && curuI < cur_ulas.size() )
00574 {
00575 if( iv_ulas[ivuI]->LeftEnd(seqJ) == cur_ulas[curuI]->LeftEnd(seqJ) )
00576 {
00577 if( added_to[added_map[curuI]] == iv_ulas[ivuI] )
00578 {
00579
00580 }else if( added_to[added_map[curuI]] == NULL )
00581 {
00582 iv_ulas[ivuI]->SetLeftEnd(seqI, cur_ulas[curuI]->LeftEnd(seqI));
00583 added_to[added_map[curuI]] = iv_ulas[ivuI];
00584 }else{
00585 ULA* merge = added_to[added_map[curuI]];
00586 for( size_t seqK = 0; seqK < seq_count; ++seqK )
00587 {
00588 if( merge->Start(seqK) == NO_MATCH )
00589 continue;
00590 iv_ulas[ivuI]->SetStart( seqK, merge->Start(seqK) );
00591 }
00592 to_delete.push_back( merge );
00593 }
00594 ivuI++;
00595 }else if( iv_ulas[ivuI]->LeftEnd(seqJ) < cur_ulas[curuI]->LeftEnd(seqJ) )
00596 {
00597 ivuI++;
00598 }else
00599 {
00600 curuI++;
00601 }
00602 }
00603
00604
00605
00606 std::sort( cur_ulas.begin(), cur_ulas.end(), ulacompI );
00607 for( curuI = 0; curuI < cur_ulas.size(); ++curuI )
00608 {
00609 if( added_to[curuI] == NULL )
00610 iv_ulas.push_back( cur_ulas[curuI] );
00611 else
00612 cur_ulas[curuI]->Free();
00613 }
00614
00615 std::sort( to_delete.begin(), to_delete.end() );
00616 last = std::unique( to_delete.begin(), to_delete.end() );
00617 to_delete.erase( last, to_delete.end() );
00618 new_iv_ulas = vector< ULA* >( iv_ulas.size() - to_delete.size() );
00619 std::sort( iv_ulas.begin(), iv_ulas.end() );
00620 std::set_difference( iv_ulas.begin(), iv_ulas.end(), to_delete.begin(), to_delete.end(), new_iv_ulas.begin() );
00621 swap( iv_ulas, new_iv_ulas );
00622 for( size_t delI = 0; delI < to_delete.size(); ++delI )
00623 to_delete[delI]->Free();
00624 }
00625 }
00626 }
00627
00628
00629 for( size_t ivI = 0; ivI < ula_list.size(); ++ivI )
00630 {
00631 for( size_t mI = 0; mI < ula_list[ivI].size(); ++mI )
00632 {
00633 size_t seqI = ula_list[ivI][mI]->FirstStart();
00634 std::vector<gnSeqI> l_pos;
00635 std::vector<bool> l_column;
00636 std::vector<gnSeqI> r_pos;
00637 std::vector<bool> r_column;
00638 gnSeqI left_col = ula_list[ivI][mI]->LeftEnd(seqI)-1;
00639 gnSeqI right_col = ula_list[ivI][mI]->RightEnd(seqI)-1;
00640 iv_orig_ptrs[ivI]->GetColumn(left_col, l_pos, l_column);
00641 iv_orig_ptrs[ivI]->GetColumn(right_col, r_pos, r_column);
00642 for( ; seqI < ula_list[ivI][mI]->SeqCount(); ++seqI )
00643 {
00644 if( ula_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
00645 continue;
00646 if( l_pos[seqI] == r_pos[seqI] && !l_column[seqI] && !r_column[seqI] )
00647 ula_list[ivI][mI]->SetStart(seqI, NO_MATCH);
00648 }
00649 if( ula_list[ivI][mI]->Multiplicity() < 2 )
00650 {
00651 ula_list[ivI][mI]->Free();
00652 ula_list[ivI][mI] = NULL;
00653 }
00654 }
00655
00656 std::vector< ULA* >::iterator last = std::remove( ula_list[ivI].begin(), ula_list[ivI].end(), (ULA*)NULL );
00657 ula_list[ivI].erase( last, ula_list[ivI].end() );
00658 }
00659 }
00660
00661 void unalignIslands( IntervalList& iv_list, vector< CompactGappedAlignment<>* >& iv_orig_ptrs, vector< vector< ULA* > >& ula_list )
00662 {
00663 uint seq_count = iv_list.seq_table.size();
00664
00665 for( size_t ivI = 0; ivI < ula_list.size(); ++ivI )
00666 {
00667 vector< AbstractMatch* > new_matches(ula_list[ivI].size());
00668 for( size_t mI = 0; mI < ula_list[ivI].size(); ++mI )
00669 {
00670 size_t seqI = ula_list[ivI][mI]->FirstStart();
00671 gnSeqI left_col = ula_list[ivI][mI]->LeftEnd(seqI)-1;
00672 CompactGappedAlignment<> tmp_cga;
00673 CompactGappedAlignment<>* new_cga = tmp_cga.Copy();
00674 iv_orig_ptrs[ivI]->copyRange(*new_cga, left_col, ula_list[ivI][mI]->Length(seqI));
00675 for( seqI = 0; seqI < ula_list[ivI][mI]->SeqCount(); ++seqI )
00676 {
00677 if( ula_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
00678 new_cga->SetLeftEnd(seqI, NO_MATCH);
00679 }
00680 new_cga->CondenseGapColumns();
00681 new_matches[mI] = new_cga;
00682 }
00683 if( new_matches.size() > 0 )
00684 {
00685
00686 vector< vector< AbstractMatch* > > disjoint_subsets;
00687 {
00688
00689
00690 vector< uint > seq_map( seq_count );
00691 for( size_t sI = 0; sI < seq_map.size(); ++sI )
00692 seq_map[sI] = sI;
00693 for( size_t mI = 0; mI < new_matches.size(); ++mI )
00694 {
00695 uint sI = new_matches[mI]->FirstStart();
00696 uint map_to = seq_map[sI];
00697 while( map_to != seq_map[map_to] )
00698 map_to = seq_map[map_to];
00699 seq_map[sI] = map_to;
00700 for( ++sI; sI < seq_count; ++sI )
00701 {
00702 if( new_matches[mI]->LeftEnd(sI) == NO_MATCH )
00703 continue;
00704 uint map_from = seq_map[sI];
00705 while( map_from != seq_map[map_from] )
00706 map_from = seq_map[map_from];
00707 seq_map[map_from] = map_to;
00708 }
00709 }
00710 vector< vector< AbstractMatch* > > mapped_lists( seq_count );
00711 for( size_t mI = 0; mI < new_matches.size(); ++mI )
00712 {
00713 uint sI = new_matches[mI]->FirstStart();
00714 uint map_to = seq_map[sI];
00715 while( map_to != seq_map[map_to] )
00716 map_to = seq_map[map_to];
00717 mapped_lists[map_to].push_back( new_matches[mI] );
00718 }
00719 for( uint sI = 0; sI < seq_count; ++sI )
00720 {
00721 if( mapped_lists[sI].size() > 0 )
00722 disjoint_subsets.push_back( mapped_lists[sI] );
00723 }
00724 }
00725
00726 for( size_t dI = 0; dI < disjoint_subsets.size(); ++dI )
00727 {
00728 vector< AbstractMatch* >& cur_d_matches = disjoint_subsets[dI];
00729 vector< AbstractMatch* > orig_order = cur_d_matches;
00730
00731 vector< size_t > id_map;
00732 typedef boost::adjacency_list< boost::vecS, boost::vecS, boost::directedS, boost::property<boost::vertex_color_t, boost::default_color_type> > Graph;
00733 typedef boost::graph_traits<Graph>::vertex_descriptor Vertex;
00734 typedef std::pair< int, int > Pair;
00735 vector< Pair > edges;
00736 for( size_t seqI = 0; seqI < seq_count; ++seqI )
00737 {
00738 SingleStartComparator<AbstractMatch> ssc(seqI);
00739 std::sort( cur_d_matches.begin(), cur_d_matches.end(), ssc );
00740 createMap( cur_d_matches, orig_order, id_map );
00741 int prev = -1;
00742 int first = -1;
00743 bool reverse = false;
00744 for( int mI = 0; mI < cur_d_matches.size(); ++mI )
00745 {
00746 if( cur_d_matches[mI]->LeftEnd(seqI) == NO_MATCH )
00747 continue;
00748 if( prev != -1 )
00749 {
00750 Pair edge( id_map[prev], id_map[mI] );
00751 if( reverse )
00752 swap( edge.first, edge.second );
00753 edges.push_back(edge);
00754 }else
00755 {
00756 reverse = cur_d_matches[mI]->Start(seqI) < 0;
00757 first = mI;
00758 }
00759 prev = mI;
00760 }
00761 if( prev != -1 && !reverse )
00762 edges.push_back( Pair( id_map[prev], cur_d_matches.size() ) );
00763 else if( prev != -1 && reverse )
00764 edges.push_back( Pair( id_map[first], cur_d_matches.size() ) );
00765 }
00766 std::sort( edges.begin(), edges.end() );
00767 vector< Pair >::iterator ee_iter = std::unique( edges.begin(), edges.end() );
00768 edges.erase( ee_iter, edges.end() );
00769 Pair* edge_array = new Pair[edges.size()];
00770 for( size_t eI = 0; eI < edges.size(); ++eI )
00771 edge_array[eI] = edges[eI];
00772 typedef boost::graph_traits<Graph>::vertices_size_type v_size_t;
00773 Graph G(edge_array, edge_array + edges.size(), v_size_t(edges.size()));
00774 typedef std::vector< Vertex > container;
00775 container c;
00776 topological_sort(G, std::back_inserter(c));
00777 cur_d_matches.clear();
00778 for ( container::reverse_iterator ii=c.rbegin(); ii!=c.rend(); ++ii)
00779 {
00780 if( *ii < orig_order.size() )
00781 cur_d_matches.push_back( orig_order[ *ii ] );
00782 }
00783 if( dI == 0 )
00784 iv_list[ivI].SetMatches(cur_d_matches);
00785 else
00786 {
00787 Interval new_iv( cur_d_matches.begin(), cur_d_matches.end() );
00788 iv_list.push_back(new_iv);
00789 }
00790 delete[] edge_array;
00791 }
00792 }
00793 else
00794 {
00795 iv_orig_ptrs[ivI]->Free();
00796 iv_orig_ptrs[ivI] = NULL;
00797 }
00798 }
00799
00800
00801
00802 size_t givI = 0;
00803 for( size_t iI = 0; iI < iv_orig_ptrs.size(); ++iI )
00804 {
00805 if( iv_orig_ptrs[iI] != NULL )
00806 {
00807 swap( iv_list[givI], iv_list[iI] );
00808 iv_orig_ptrs[iI]->Free();
00809 iv_orig_ptrs[iI] = NULL;
00810 givI++;
00811 }
00812 }
00813
00814 for( size_t iI = iv_orig_ptrs.size(); iI < iv_list.size(); ++iI )
00815 swap( iv_list[givI++], iv_list[iI] );
00816 iv_list.erase( iv_list.begin()+givI, iv_list.end() );
00817
00818
00819 collapseCollinear( iv_list );
00820 }
00821
00822 void createBackboneList( const IntervalList& iv_list, backbone_list_t& ula_list )
00823 {
00824 ula_list.resize( iv_list.size() );
00825 for( size_t ivI = 0; ivI < iv_list.size(); ++ivI )
00826 {
00827 if( iv_list[ivI].Multiplicity() < 2 )
00828 continue;
00829 const vector< AbstractMatch* >& matches = iv_list[ivI].GetMatches();
00830 int64 right_col = 0;
00831 int64 left_col = 0;
00832 for( size_t mI = 0; mI < matches.size(); ++mI )
00833 {
00834 left_col = right_col;
00835 right_col += matches[mI]->AlignmentLength();
00836 if( matches[mI]->Multiplicity() < 2 )
00837 continue;
00838 ULA tmp_ula(matches[mI]->SeqCount());
00839 ULA* mula = tmp_ula.Copy();
00840 for( size_t seqI = 0; seqI < matches[mI]->SeqCount(); ++seqI )
00841 if( matches[mI]->LeftEnd(seqI) != NO_MATCH )
00842 mula->SetLeftEnd( seqI, left_col+1 );
00843 mula->SetLength( right_col - left_col );
00844 ula_list[ivI].push_back(mula);
00845 }
00846
00847 for( size_t ulaI = 1; ulaI < ula_list[ivI].size(); ulaI++ )
00848 {
00849 size_t seqI = 0;
00850 for( ; seqI < ula_list[ivI][ulaI]->SeqCount(); ++seqI )
00851 {
00852 int64 s1 = ula_list[ivI][ulaI-1]->Start(seqI);
00853 int64 s2 = ula_list[ivI][ulaI]->Start(seqI);
00854 if( s1 == mems::NO_MATCH && s2 == mems::NO_MATCH )
00855 continue;
00856 if( s1 == mems::NO_MATCH && s2 != mems::NO_MATCH )
00857 break;
00858 if( s1 != mems::NO_MATCH && s2 == mems::NO_MATCH )
00859 break;
00860 int64 r1 = ula_list[ivI][ulaI-1]->RightEnd(seqI);
00861 if( r1 + 1 != s2 )
00862 break;
00863 }
00864 if( seqI == ula_list[ivI][ulaI]->SeqCount() )
00865 {
00866
00867 ula_list[ivI][ulaI]->ExtendStart( ula_list[ivI][ulaI-1]->Length() );
00868 ula_list[ivI][ulaI-1]->SetLength(0);
00869 }
00870 }
00871
00872 vector< ULA* > condensed_list;
00873 for( size_t ulaI = 0; ulaI < ula_list[ivI].size(); ulaI++ )
00874 {
00875 if( ula_list[ivI][ulaI]->Length() > 0 )
00876 condensed_list.push_back(ula_list[ivI][ulaI]);
00877 else
00878 ula_list[ivI][ulaI]->Free();
00879 }
00880 swap( ula_list[ivI], condensed_list );
00881 }
00882 }
00883
00884 void detectAndApplyBackbone( AbstractMatch* m, vector< gnSequence* >& seq_table, CompactGappedAlignment<>*& result, backbone_list_t& bb_list, const Params& hmm_params, boolean left_homologous, boolean right_homologous )
00885 {
00886 vector< AbstractMatch* > mlist( 1, m );
00887 uint seq_count = seq_table.size();
00888
00889
00890 pairwise_genome_hss_t hss_cols(boost::extents[seq_count][seq_count][1]);
00891
00892
00893 vector< CompactGappedAlignment<>* > iv_ptrs(1);
00894 CompactGappedAlignment<> tmp_cga;
00895 iv_ptrs[0] = tmp_cga.Copy();
00896 new (iv_ptrs[0])CompactGappedAlignment<>( *m );
00897
00898 vector< CompactGappedAlignment<>* > iv_orig_ptrs(iv_ptrs);
00899 hss_array_t island_array, hss_array;
00900
00901 findHssHomologyHMM( mlist, seq_table, island_array, hmm_params, left_homologous, right_homologous );
00902 translateToPairwiseGenomeHSS( island_array, hss_cols );
00903
00904
00905
00906 backbone_list_t ula_list;
00907 mergePairwiseHomologyPredictions( iv_orig_ptrs, hss_cols, ula_list );
00908
00909
00910 IntervalList iv_list;
00911 iv_list.seq_table = seq_table;
00912 iv_list.resize(1);
00913 vector<AbstractMatch*> asdf(1, iv_orig_ptrs.front()->Copy() );
00914 iv_list[0].SetMatches( asdf );
00915
00916 unalignIslands( iv_list, iv_orig_ptrs, ula_list );
00917
00918
00919 for( size_t ulaI = 0; ulaI < ula_list.size(); ++ulaI )
00920 for( size_t i = 0; i < ula_list[ulaI].size(); ++i )
00921 ula_list[ulaI][i]->Free();
00922 ula_list.clear();
00923
00924
00925 createBackboneList( iv_list, ula_list );
00926
00927 iv_orig_ptrs.clear();
00928
00929 bb_list.clear();
00930 bb_list = ula_list;
00931
00932
00933
00934 result = tmp_cga.Copy();
00935 if( iv_list.size() > 0 )
00936 new (result)CompactGappedAlignment<>( iv_list[0] );
00937 }
00938
00939
00940 void detectAndApplyBackbone( IntervalList& iv_list, backbone_list_t& bb_list, const Params& hmm_params )
00941 {
00942
00943 collapseCollinear( iv_list );
00944
00945 uint seq_count = iv_list.seq_table.size();
00946 boost::multi_array< vector< CompactGappedAlignment<>* >, 2> island_array;
00947
00948
00949 pairwise_genome_hss_t hss_cols(boost::extents[seq_count][seq_count][iv_list.size()]);
00950
00951
00952 vector< CompactGappedAlignment<>* > iv_ptrs(iv_list.size());
00953 for( size_t i = 0; i < iv_list.size(); ++i )
00954 {
00955 CompactGappedAlignment<> tmp_cga;
00956 iv_ptrs[i] = tmp_cga.Copy();
00957 new (iv_ptrs[i])CompactGappedAlignment<>( iv_list[i] );
00958 }
00959 vector< CompactGappedAlignment<>* > iv_orig_ptrs(iv_ptrs);
00960
00961 makeAllPairwiseGenomeHSS( iv_list, iv_ptrs, iv_orig_ptrs, hss_cols, hmm_params );
00962
00963 backbone_list_t ula_list;
00964
00965
00966 mergePairwiseHomologyPredictions( iv_orig_ptrs, hss_cols, ula_list );
00967
00968
00969 unalignIslands( iv_list, iv_orig_ptrs, ula_list );
00970
00971
00972 addUnalignedRegions( iv_list );
00973
00974
00975 for( size_t ulaI = 0; ulaI < ula_list.size(); ++ulaI )
00976 for( size_t i = 0; i < ula_list[ulaI].size(); ++i )
00977 ula_list[ulaI][i]->Free();
00978 ula_list.clear();
00979
00980
00981 createBackboneList( iv_list, ula_list );
00982
00983 iv_orig_ptrs.clear();
00984
00985 bb_list.clear();
00986 bb_list = ula_list;
00987
00988 checkForAllGapColumns( iv_list );
00989 }
00990
00991 void writeBackboneColumns( ostream& bb_out, backbone_list_t& bb_list )
00992 {
00993
00994
00995
00996 for( size_t ivI = 0; ivI < bb_list.size(); ++ivI )
00997 {
00998 for( size_t mI = 0; mI < bb_list[ivI].size(); ++mI )
00999 {
01000 size_t seqI = bb_list[ivI][mI]->FirstStart();
01001 bb_out << ivI << '\t' << bb_list[ivI][mI]->LeftEnd(seqI) << '\t' << bb_list[ivI][mI]->Length();
01002 for( ; seqI < bb_list[ivI][mI]->SeqCount(); ++seqI )
01003 {
01004 if( bb_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
01005 continue;
01006 bb_out << '\t' << seqI;
01007 }
01008 bb_out << endl;
01009 }
01010 }
01011 }
01012
01013 void writeBackboneSeqCoordinates( backbone_list_t& bb_list, IntervalList& iv_list, ostream& bb_out )
01014 {
01015 if( bb_list.size() == 0 )
01016 return;
01017
01018 uint seq_count = 0;
01019 for( size_t bbI = 0; bbI < bb_list.size(); ++bbI )
01020 if( bb_list[bbI].size() > 0 )
01021 {
01022 seq_count = bb_list[bbI].front()->SeqCount();
01023 break;
01024 }
01025
01026
01027
01028 for( size_t seqI = 0; seqI < seq_count; ++seqI )
01029 {
01030 if( seqI > 0 )
01031 bb_out << '\t';
01032 bb_out << "seq_" << seqI << "_leftend\t";
01033 bb_out << "seq_" << seqI << "_rightend";
01034 }
01035 bb_out << endl;
01036 for( size_t ivI = 0; ivI < bb_list.size(); ++ivI )
01037 {
01038
01039
01040
01041 CompactGappedAlignment<> iv_cga( iv_list[ivI] );
01042 for( size_t mI = 0; mI < bb_list[ivI].size(); ++mI )
01043 {
01044 uint fs = bb_list[ivI][mI]->FirstStart();
01045
01046 vector< gnSeqI > left_pos;
01047 vector< bool > left_cols;
01048 iv_cga.GetColumn( bb_list[ivI][mI]->LeftEnd(fs)-1, left_pos, left_cols );
01049 vector< gnSeqI > right_pos;
01050 vector< bool > right_cols;
01051 iv_cga.GetColumn( bb_list[ivI][mI]->RightEnd(fs)-1, right_pos, right_cols );
01052 for( size_t seqI = 0; seqI < bb_list[ivI][mI]->SeqCount(); ++seqI )
01053 {
01054 if( seqI > 0 )
01055 bb_out << '\t';
01056 if( bb_list[ivI][mI]->LeftEnd(seqI) == NO_MATCH )
01057 {
01058 bb_out << "0\t0";
01059 continue;
01060 }else{
01061 int64 leftI = left_pos[seqI];
01062 int64 rightI = right_pos[seqI];
01063 if( iv_cga.Orientation(seqI) == AbstractMatch::forward && leftI != 0 && !left_cols[seqI] )
01064 leftI++;
01065 if( iv_cga.Orientation(seqI) == AbstractMatch::reverse && rightI != 0 && !right_cols[seqI] )
01066 rightI++;
01067 if( iv_cga.Orientation(seqI) == AbstractMatch::reverse )
01068 {
01069 swap( leftI, rightI );
01070 }
01071 if( rightI + 1 == leftI )
01072 {
01073 bb_out << "0\t0";
01074 continue;
01075 }
01076 if( leftI > rightI )
01077 {
01078 cerr << "oh crahpey!\n";
01079 cerr << "leftI: " << leftI << endl;
01080 cerr << "rightI: " << rightI << endl;
01081 cerr << "seqI: " << seqI << endl;
01082 cerr << "ivI: " << ivI << endl;
01083 }
01084 if( leftI == 0 )
01085 leftI = iv_cga.LeftEnd(seqI);
01086 if( rightI == iv_cga.RightEnd(seqI)+1 )
01087 rightI--;
01088 if( iv_cga.Orientation(seqI) == AbstractMatch::reverse )
01089 {
01090 leftI *= -1;
01091 rightI *= -1;
01092 }
01093 bb_out << leftI << '\t' << rightI;
01094 }
01095 }
01096 bb_out << endl;
01097 }
01098 }
01099 }
01100
01101
01102 }
01103