libMems/Backbone.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: Backbone.h,v 1.7 2004/03/01 02:40:08 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * This file is licensed under the GPL.
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifndef __Backbone_h__
00010 #define __Backbone_h__
00011 
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015 
00016 #include "libGenome/gnSequence.h"
00017 #include "libMems/SubstitutionMatrix.h"
00018 #include "libMems/IntervalList.h"
00019 #include "libMems/NumericMatrix.h"
00020 #include "libMems/MatchList.h"
00021 #include "libMems/GappedAlignment.h"
00022 #include "libMems/CompactGappedAlignment.h"
00023 #include "libMems/Aligner.h"
00024 #include "libMems/Islands.h"
00025 #include <boost/multi_array.hpp>
00026 
00027 #include <sstream>
00028 #include <vector>
00029 
00030 namespace mems {
00031 
00032 // this is a 99.9% score threshold derived from the EVD of
00033 // simulations of homolgous sequence diverged to .75 substitutions per site and .05 indels per site
00034 const mems::score_t DEFAULT_ISLAND_SCORE_THRESHOLD = 2727;
00035 
00036 typedef mems::UngappedLocalAlignment< mems::HybridAbstractMatch<> > ULA;
00037 typedef std::vector< std::vector< ULA* > > backbone_list_t;
00038 
00040 double computeGC( std::vector< genome::gnSequence* >& seq_table );
00041 
00045 void collapseCollinear( IntervalList& iv_list );
00046 
00050 void checkForAllGapColumns( IntervalList& iv_list );
00051 
00065 void detectAndApplyBackbone( AbstractMatch* m, std::vector< genome::gnSequence* >& seq_table, CompactGappedAlignment<>*& result, backbone_list_t& bb_list, const Params& hmm_params, boolean left_homologous = false, boolean right_homologous = false );
00066 // void detectAndApplyBackbone( AbstractMatch* m, std::vector< genome::gnSequence* >& seq_table, CompactGappedAlignment<>*& result, backbone_list_t& bb_list, const PairwiseScoringScheme& subst_scoring, score_t score_threshold = DEFAULT_ISLAND_SCORE_THRESHOLD, const float pGoHomo = 0.004, const float pGoUnrelated = 0.004, std::vector<double>* pEmitHomo = NULL, std::vector<double>* pEmitUnrelated = NULL, boolean left_homologous = false, boolean right_homologous = false );
00067 
00072 void detectAndApplyBackbone( IntervalList& iv_list, backbone_list_t& bb_list, const Params& hmm_params );
00073 
00077 void writeBackboneColumns( std::ostream& bb_out, backbone_list_t& bb_list );
00078 
00082 void writeBackboneSeqCoordinates( backbone_list_t& bb_list, IntervalList& iv_list, std::ostream& bb_out );
00083 
00084 
00085 
00086 typedef std::vector< std::pair< int64, int64 > > bb_seqentry_t;
00087 typedef struct bb_entry_s
00088 {
00089         bb_seqentry_t bb_seq;
00090         ULA bb_cols;
00091         size_t iv;
00092 } bb_entry_t;
00093 
00094 inline
00095 void printBbSeq( std::ostream& os, const bb_seqentry_t& bbseq )
00096 {
00097         for( size_t i = 0; i < bbseq.size(); ++i )
00098         {
00099                 if( i > 0 )
00100                         os << '\t';
00101                 os << "(" << bbseq[i].first << ", " << bbseq[i].second << ")";
00102         }
00103 }
00104 
00105 inline
00106 void readBackboneSeqFile( std::istream& bbseq_input, std::vector< bb_seqentry_t >& backbone )
00107 {
00108         std::string cur_line;
00109         std::getline( bbseq_input, cur_line );  // read off the header line
00110         while( std::getline( bbseq_input, cur_line ) )
00111         {
00112                 bb_seqentry_t bb;
00113                 std::stringstream line_str( cur_line );
00114                 int64 lpos = 0;
00115                 while( line_str >> lpos )
00116                 {
00117                         int64 rpos = 0;
00118                         line_str >> rpos;
00119                         bb.push_back( std::make_pair( lpos, rpos ) );
00120                 }
00121                 backbone.push_back(bb);
00122         }
00123 }
00124 
00125 inline
00126 void readBackboneColsFile( std::istream& bbcol_input, std::vector< std::pair< size_t, ULA > >& bb_list )
00127 {
00128         std::string cur_line;
00129         while( std::getline( bbcol_input, cur_line ) )
00130         {
00131                 ULA tmp_ula;
00132                 size_t ivI;
00133                 std::stringstream ss( cur_line );
00134                 ss >> ivI;
00135                 size_t left_col;
00136                 size_t len;
00137                 ss >> left_col;
00138                 ss >> len;
00139                 gnSeqI bbseq;
00140                 while( ss >> bbseq )
00141                 {
00142                         tmp_ula.SetStart( bbseq, left_col );
00143                 }
00144                 tmp_ula.SetLength( len );
00145                 bb_list.push_back( std::make_pair( ivI, tmp_ula ) );
00146         }
00147 }
00148 
00149 
00150 }
00151 
00152 #endif  // __Backbone_h__
00153 

Generated on Fri Mar 14 06:01:02 2008 for libMems by doxygen 1.3.6