00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef __GappedAlignment_h__
00010 #define __GappedAlignment_h__
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #include "libGenome/gnFilter.h"
00017 #include "libGenome/gnSequence.h"
00018 #include "libMems/SparseAbstractMatch.h"
00019 #include "libMems/AbstractGappedAlignment.h"
00020 #include "libMems/Memory.h"
00021 #include <iostream>
00022
00023 namespace mems {
00024
00025 class GappedAlignment : public AbstractGappedAlignment< SparseAbstractMatch<> >
00026 {
00027 public:
00028 GappedAlignment();
00029 GappedAlignment( uint seq_count, gnSeqI align_length );
00030
00031 GappedAlignment* Clone() const { return new GappedAlignment( *this ); }
00032 GappedAlignment* Copy() const;
00033 virtual void Free();
00034
00035 void SetAlignment( const std::vector< std::string >& seq_align );
00036
00040 friend std::ostream& operator<<(std::ostream& os, const GappedAlignment& ga);
00041
00045 friend std::istream& operator>>(std::istream& is, GappedAlignment& ga);
00046
00047
00048 virtual void Invert();
00049 virtual void CropStart(gnSeqI crop_amount);
00050 virtual void CropEnd(gnSeqI crop_amount);
00051
00052 virtual void CropLeft(gnSeqI crop_amount, uint seqI);
00053 virtual void CropRight(gnSeqI crop_amount, uint seqI);
00054
00055 void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
00056
00057 friend const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table );
00058
00059 void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
00060
00065 virtual AbstractMatch* Split( gnSeqI before_column );
00066
00067 virtual bool IsGap( uint seq, gnSeqI col ) const;
00068
00069 void swap( GappedAlignment& other ){ swap(&other); }
00070
00071 protected:
00072
00073 void swap( GappedAlignment* other ){
00074 std::swap( align_matrix, other->align_matrix );
00075 AbstractGappedAlignment< SparseAbstractMatch<> >::swap( other );
00076 }
00077
00078 std::vector< std::string > align_matrix;
00079
00080 void CropStartCoords(gnSeqI crop_amount);
00081 void CropEndCoords(gnSeqI crop_amount);
00082 };
00083
00084
00085 inline
00086 GappedAlignment* GappedAlignment::Copy() const
00087 {
00088 return m_allocateAndCopy( *this );
00089 }
00090 inline
00091 void GappedAlignment::Free()
00092 {
00093 m_free(this);
00094 }
00095
00096 inline
00097 void GappedAlignment::Invert(){
00098 const genome::gnFilter* rc_filter = genome::gnFilter::DNAComplementFilter();
00099 for(uint startI = 0; startI < SeqCount(); startI++)
00100 rc_filter->ReverseFilter( align_matrix[ startI ] );
00101 AbstractGappedAlignment< SparseAbstractMatch<> >::Invert();
00102 }
00103
00104 inline
00105 void GappedAlignment::CropStartCoords(gnSeqI crop_amount){
00106 if( crop_amount > AlignmentLength() )
00107 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00108 for( uint i=0; i < SeqCount(); i++ ){
00109 gnSeqI char_count = 0;
00110 for( gnSeqI cropI = 0; cropI < crop_amount; cropI++ )
00111 if( align_matrix[i][cropI] != '-' )
00112 char_count++;
00113 if( Start(i) > 0 )
00114 SetStart(i, Start(i) + char_count);
00115 SetLength(Length(i)-char_count, i);
00116 if( Length(i) == 0 )
00117 SetLeftEnd(i, NO_MATCH);
00118 }
00119 SetAlignmentLength( AlignmentLength() - crop_amount );
00120 }
00121
00122 inline
00123 void GappedAlignment::CropStart(gnSeqI crop_amount){
00124 CropStartCoords(crop_amount);
00125 for( uint i=0; i < SeqCount(); i++ )
00126 align_matrix[ i ] = align_matrix[ i ].substr( crop_amount );
00127
00128 }
00129
00130 inline
00131 void GappedAlignment::CropEndCoords(gnSeqI crop_amount){
00132 if( crop_amount > AlignmentLength() )
00133 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00134 SetAlignmentLength( AlignmentLength() - crop_amount );
00135
00136 for( uint i=0; i < SeqCount(); i++ ){
00137 gnSeqI char_count = 0;
00138 for( gnSeqI cropI = align_matrix[i].length() - crop_amount; cropI < align_matrix[i].length(); cropI++ )
00139 if( align_matrix[i][cropI] != '-' )
00140 char_count++;
00141 if( Start(i) < 0 )
00142 SetStart(i, Start(i)-char_count);
00143 SetLength(Length(i)-char_count, i);
00144 if( Length(i) == 0 )
00145 SetLeftEnd(i, NO_MATCH);
00146 }
00147 }
00148
00149 inline
00150 void GappedAlignment::CropEnd(gnSeqI crop_amount){
00151 CropEndCoords(crop_amount);
00152
00153
00154
00155
00156
00157
00158 std::vector< std::string > new_matrix(SeqCount());
00159 for( uint i=0; i < SeqCount(); i++ )
00160 new_matrix[ i ] = align_matrix[ i ].substr( 0, AlignmentLength() );
00161 std::swap( new_matrix, align_matrix );
00162 }
00163
00164 inline
00165 void GappedAlignment::CropLeft(gnSeqI crop_amount, uint seqI)
00166 {
00167
00168 size_t left_col = 0;
00169 if( Orientation(seqI) == AbstractMatch::forward )
00170 {
00171 for( ; crop_amount > 0 && left_col < align_matrix[seqI].size(); ++left_col )
00172 if( align_matrix[seqI][left_col] != '-' )
00173 --crop_amount;
00174
00175 CropStart(left_col);
00176 }else{
00177 left_col = align_matrix[seqI].size();
00178 for( ; crop_amount > 0 && left_col > 0; --left_col )
00179 if( align_matrix[seqI][left_col-1] != '-' )
00180 --crop_amount;
00181 CropEnd(AlignmentLength()-left_col);
00182 }
00183 }
00184
00185 inline
00186 void GappedAlignment::CropRight(gnSeqI crop_amount, uint seqI)
00187 {
00188
00189 Invert();
00190 CropLeft(crop_amount, seqI);
00191 Invert();
00192 }
00193
00194 inline
00195 void GappedAlignment::GetAlignment( std::vector< bitset_t >& align_matrix ) const
00196 {
00197 align_matrix = std::vector< bitset_t >( this->align_matrix.size(), bitset_t(this->AlignmentLength(), false) );
00198 for( size_t seqI = 0; seqI < this->align_matrix.size(); seqI++ )
00199 {
00200 if( LeftEnd(seqI) == NO_MATCH )
00201 continue;
00202 for( std::string::size_type charI = 0; charI < this->align_matrix[seqI].size(); charI++ )
00203 if( this->align_matrix[seqI][charI] != '-' )
00204 align_matrix[seqI].set(charI);
00205 }
00206 }
00207
00208 inline
00209 AbstractMatch* GappedAlignment::Split( gnSeqI before_column )
00210 {
00211 GappedAlignment ga_tmp(SeqCount(), AlignmentLength());
00212 GappedAlignment* ga = ga_tmp.Copy();
00213
00214 for( size_t seqI = 0; seqI < SeqCount(); seqI++ )
00215 {
00216 ga->SetStart( seqI, Start(seqI) );
00217 ga->SetLength( Length(seqI), seqI );
00218 }
00219 std::swap(ga->align_matrix, align_matrix);
00220 ga->CropStartCoords(before_column);
00221 std::swap(ga->align_matrix, align_matrix);
00222
00223 ga->align_matrix.resize(SeqCount());
00224 for( size_t seqI = 0; seqI < SeqCount(); seqI++ )
00225 ga->align_matrix[seqI] = align_matrix[seqI].substr( before_column );
00226 ga->SetAlignmentLength( AlignmentLength()-before_column );
00227 CropEnd(AlignmentLength()-before_column);
00228
00229 return ga;
00230 }
00231
00232 const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table );
00233 inline
00234 const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table )
00235 {
00236 return ga.align_matrix;
00237 }
00238
00239 inline
00240 void GappedAlignment::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
00241 {
00242 pos = std::vector<gnSeqI>(SeqCount(), NO_MATCH);
00243 column = std::vector<bool>(SeqCount(), false);
00244 for( uint seqI = 0; seqI < SeqCount(); seqI++ )
00245 {
00246 if( align_matrix[seqI][col] != '-' )
00247 column[seqI] = true;
00248
00249 gnSeqI count = 0;
00250 for( size_t colI = 0; colI <= col; colI++ )
00251 if( align_matrix[seqI][colI] != '-' )
00252 count++;
00253
00254 if( count > 0 )
00255 {
00256 if( Orientation(seqI) == forward )
00257 pos[seqI] = LeftEnd(seqI) + count - 1;
00258 else if( Orientation(seqI) == reverse )
00259 pos[seqI] = RightEnd(seqI) - count + 1;
00260 }
00261 }
00262 }
00263
00264 inline
00265 bool GappedAlignment::IsGap( uint seq, gnSeqI col ) const
00266 {
00267 return align_matrix[seq][col] == '-';
00268 }
00269
00270 }
00271
00272
00273 namespace std {
00274 template<> inline
00275 void swap( mems::GappedAlignment& a, mems::GappedAlignment& b )
00276 {
00277 a.swap(b);
00278 }
00279 }
00280
00281
00282 #endif // __GappedAlignment_h__
00283