libMems/GappedAlignment.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: GappedAlignment.h,v 1.12 2004/04/19 23:10:50 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * This file is licensed under the GPL.
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifndef __GappedAlignment_h__
00010 #define __GappedAlignment_h__
00011 
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015 
00016 #include "libGenome/gnFilter.h"
00017 #include "libGenome/gnSequence.h"
00018 #include "libMems/SparseAbstractMatch.h"
00019 #include "libMems/AbstractGappedAlignment.h"
00020 #include "libMems/Memory.h"
00021 #include <iostream>
00022 
00023 namespace mems {
00024 
00025 class GappedAlignment : public AbstractGappedAlignment< SparseAbstractMatch<> >
00026 {
00027 public:
00028         GappedAlignment();
00029         GappedAlignment( uint seq_count, gnSeqI align_length );
00030         
00031         GappedAlignment* Clone() const { return new GappedAlignment( *this ); }
00032         GappedAlignment* Copy() const;
00033         virtual void Free();
00034         
00035         void SetAlignment( const std::vector< std::string >& seq_align );
00036 
00040         friend std::ostream& operator<<(std::ostream& os, const GappedAlignment& ga); //write to source.
00041 
00045         friend std::istream& operator>>(std::istream& is, GappedAlignment& ga); //read from source
00046 
00047         // Inherited methods from AbstractMatch:
00048         virtual void Invert();
00049         virtual void CropStart(gnSeqI crop_amount);
00050         virtual void CropEnd(gnSeqI crop_amount);
00051 
00052         virtual void CropLeft(gnSeqI crop_amount, uint seqI);
00053         virtual void CropRight(gnSeqI crop_amount, uint seqI);
00054 
00055         void GetAlignment( std::vector< bitset_t >& align_matrix ) const;
00056 
00057         friend const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table );
00058 
00059         void GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const;
00060 
00065         virtual AbstractMatch* Split( gnSeqI before_column );
00066 
00067         virtual bool IsGap( uint seq, gnSeqI col ) const;
00068 
00069         void swap( GappedAlignment& other ){ swap(&other); }
00070 
00071 protected:
00072         // for use by derived classes in order to swap contents
00073         void swap( GappedAlignment* other ){
00074                 std::swap( align_matrix, other->align_matrix );
00075                 AbstractGappedAlignment< SparseAbstractMatch<> >::swap( other );
00076         }
00077 
00078         std::vector< std::string > align_matrix;
00079 
00080         void CropStartCoords(gnSeqI crop_amount);
00081         void CropEndCoords(gnSeqI crop_amount);
00082 };
00083 
00084 
00085 inline
00086 GappedAlignment* GappedAlignment::Copy() const
00087 {
00088         return m_allocateAndCopy( *this );
00089 }
00090 inline
00091 void GappedAlignment::Free()
00092 {
00093         m_free(this);
00094 }
00095 
00096 inline
00097 void GappedAlignment::Invert(){
00098         const genome::gnFilter* rc_filter = genome::gnFilter::DNAComplementFilter();
00099         for(uint startI = 0; startI < SeqCount(); startI++)
00100                 rc_filter->ReverseFilter( align_matrix[ startI ] );
00101         AbstractGappedAlignment< SparseAbstractMatch<> >::Invert();
00102 }
00103 
00104 inline
00105 void GappedAlignment::CropStartCoords(gnSeqI crop_amount){
00106         if( crop_amount > AlignmentLength() )
00107                 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00108         for( uint i=0; i < SeqCount(); i++ ){
00109                 gnSeqI char_count = 0;
00110                 for( gnSeqI cropI = 0; cropI < crop_amount; cropI++ )
00111                         if( align_matrix[i][cropI] != '-' )
00112                                 char_count++;
00113                 if( Start(i) > 0 )
00114                         SetStart(i, Start(i) + char_count);
00115                 SetLength(Length(i)-char_count, i);
00116                 if( Length(i) == 0 )
00117                         SetLeftEnd(i, NO_MATCH);
00118         }
00119         SetAlignmentLength( AlignmentLength() - crop_amount );
00120 }
00121 
00122 inline
00123 void GappedAlignment::CropStart(gnSeqI crop_amount){
00124         CropStartCoords(crop_amount);
00125         for( uint i=0; i < SeqCount(); i++ )
00126                 align_matrix[ i ] = align_matrix[ i ].substr( crop_amount );
00127 
00128 }
00129 
00130 inline
00131 void GappedAlignment::CropEndCoords(gnSeqI crop_amount){
00132         if( crop_amount > AlignmentLength() )
00133                 Throw_gnEx( genome::SeqIndexOutOfBounds() );
00134         SetAlignmentLength( AlignmentLength() - crop_amount );
00135 
00136         for( uint i=0; i < SeqCount(); i++ ){
00137                 gnSeqI char_count = 0;
00138                 for( gnSeqI cropI = align_matrix[i].length() - crop_amount; cropI < align_matrix[i].length(); cropI++ )
00139                         if( align_matrix[i][cropI] != '-' )
00140                                 char_count++;
00141                 if( Start(i) < 0 )
00142                         SetStart(i, Start(i)-char_count);
00143                 SetLength(Length(i)-char_count, i);
00144                 if( Length(i) == 0 )
00145                         SetLeftEnd(i, NO_MATCH);
00146         }
00147 }
00148 
00149 inline
00150 void GappedAlignment::CropEnd(gnSeqI crop_amount){
00151         CropEndCoords(crop_amount);
00152         // this code doesn't free up memory in Windows release builds
00153 //      for( uint i=0; i < SeqCount(); i++ )
00154 //      {
00155 //              align_matrix[ i ].resize( AlignmentLength() );
00156 //              align_matrix[ i ].reserve( AlignmentLength() );
00157 //      }
00158         std::vector< std::string > new_matrix(SeqCount());
00159         for( uint i=0; i < SeqCount(); i++ )
00160                 new_matrix[ i ] = align_matrix[ i ].substr( 0, AlignmentLength() );
00161         std::swap( new_matrix, align_matrix );
00162 }
00163 
00164 inline
00165 void GappedAlignment::CropLeft(gnSeqI crop_amount, uint seqI)
00166 {
00167         // count "crop_amount" characters into seqI and crop there
00168         size_t left_col = 0;
00169         if( Orientation(seqI) == AbstractMatch::forward )
00170         {
00171                 for( ; crop_amount > 0 && left_col < align_matrix[seqI].size(); ++left_col )
00172                         if( align_matrix[seqI][left_col] != '-' )
00173                                 --crop_amount;
00174 
00175                 CropStart(left_col);
00176         }else{
00177                 left_col = align_matrix[seqI].size();
00178                 for( ; crop_amount > 0 && left_col > 0; --left_col )
00179                         if( align_matrix[seqI][left_col-1] != '-' )
00180                                 --crop_amount;
00181                 CropEnd(AlignmentLength()-left_col);
00182         }
00183 }
00184 
00185 inline
00186 void GappedAlignment::CropRight(gnSeqI crop_amount, uint seqI)
00187 {
00188         // TODO: remove the dependency on Invert() since it will be slow
00189         Invert();
00190         CropLeft(crop_amount, seqI);
00191         Invert();
00192 }
00193 
00194 inline
00195 void GappedAlignment::GetAlignment( std::vector< bitset_t >& align_matrix ) const
00196 {
00197         align_matrix = std::vector< bitset_t >( this->align_matrix.size(), bitset_t(this->AlignmentLength(), false) );
00198         for( size_t seqI = 0; seqI < this->align_matrix.size(); seqI++ )
00199         {
00200                 if( LeftEnd(seqI) == NO_MATCH )
00201                         continue;
00202                 for( std::string::size_type charI = 0; charI < this->align_matrix[seqI].size(); charI++ )
00203                         if( this->align_matrix[seqI][charI] != '-' )
00204                                 align_matrix[seqI].set(charI);
00205         }
00206 }
00207 
00208 inline
00209 AbstractMatch* GappedAlignment::Split( gnSeqI before_column )
00210 {
00211         GappedAlignment ga_tmp(SeqCount(), AlignmentLength());
00212         GappedAlignment* ga = ga_tmp.Copy();
00213 
00214         for( size_t seqI = 0; seqI < SeqCount(); seqI++ )
00215         {
00216                 ga->SetStart( seqI, Start(seqI) );
00217                 ga->SetLength( Length(seqI), seqI );
00218         }
00219         std::swap(ga->align_matrix, align_matrix);
00220         ga->CropStartCoords(before_column);
00221         std::swap(ga->align_matrix, align_matrix);
00222 
00223         ga->align_matrix.resize(SeqCount());
00224         for( size_t seqI = 0; seqI < SeqCount(); seqI++ )
00225                 ga->align_matrix[seqI] = align_matrix[seqI].substr( before_column );
00226         ga->SetAlignmentLength( AlignmentLength()-before_column );
00227         CropEnd(AlignmentLength()-before_column);
00228 
00229         return ga;
00230 }
00231 
00232 const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table );
00233 inline
00234 const std::vector<std::string>& GetAlignment( const GappedAlignment& ga, const std::vector< genome::gnSequence* >& seq_table )
00235 {
00236         return ga.align_matrix;
00237 }
00238 
00239 inline
00240 void GappedAlignment::GetColumn( gnSeqI col, std::vector<gnSeqI>& pos, std::vector<bool>& column ) const
00241 {
00242         pos = std::vector<gnSeqI>(SeqCount(), NO_MATCH);
00243         column = std::vector<bool>(SeqCount(), false);
00244         for( uint seqI = 0; seqI < SeqCount(); seqI++ )
00245         {
00246                 if( align_matrix[seqI][col] != '-' )
00247                         column[seqI] = true;
00248 
00249                 gnSeqI count = 0;
00250                 for( size_t colI = 0; colI <= col; colI++ )
00251                         if( align_matrix[seqI][colI] != '-' )
00252                                 count++;
00253 
00254                 if( count > 0 )
00255                 {
00256                         if( Orientation(seqI) == forward )
00257                                 pos[seqI] = LeftEnd(seqI) + count - 1;
00258                         else if( Orientation(seqI) == reverse )
00259                                 pos[seqI] = RightEnd(seqI) - count + 1;
00260                 }
00261         }
00262 }
00263 
00264 inline
00265 bool GappedAlignment::IsGap( uint seq, gnSeqI col ) const
00266 {
00267         return align_matrix[seq][col] == '-';
00268 }
00269 
00270 }
00271 
00272 
00273 namespace std {
00274 template<> inline
00275 void swap( mems::GappedAlignment& a, mems::GappedAlignment& b )
00276 {
00277         a.swap(b);
00278 }
00279 }
00280 
00281 
00282 #endif // __GappedAlignment_h__
00283 

Generated on Fri Mar 14 06:01:02 2008 for libMems by doxygen 1.3.6