00001
00002
00003
00004
00005
00006
00007
00008
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00025 #ifndef __gnAlignedSequences_h__
00026 #define __gnAlignedSequences_h__
00027
00028 #ifdef HAVE_CONFIG_H
00029 #include "config.h"
00030 #endif
00031
00032 #include "libGenome/gnSequence.h"
00033 #include "libGenome/gnFilter.h"
00034 #include <list>
00035 #include <fstream>
00036 #include <vector>
00037
00038 namespace mems {
00039
00040
00041 const int MEGA_ALIGN_COLUMNS = 60;
00042
00048 class gnAlignedSequences
00049 {
00050 public:
00054 gnAlignedSequences();
00058 gnAlignedSequences(const gnAlignedSequences &toCopy);
00062 ~gnAlignedSequences();
00063
00064
00068 static const std::vector< std::string >& getSupportedFormats();
00069
00073 static boolean isSupportedFormat( const std::string& format_name );
00074
00079 void output( const std::string& format_name, std::ostream& os ) const;
00080
00081
00086 void constructFromClustalW(std::string alignedFileName);
00091 void constructFromPhylip(std::string alignedFileName);
00096 void constructFromMSF(std::string alignedFileName);
00101 void constructFromNexus(std::string alignedFileName);
00106 void constructFromMega(std::string alignedFileName);
00107
00113 void constructFromRelaxedNexus( std::istream& align_stream );
00114
00119 void assignFileName(std::string name);
00120
00121
00127 bool outputPhylip(std::ostream& os) const;
00133 bool outputClustalW(std::ostream& os) const;
00139 bool outputMSF(std::ostream& os) const;
00145 bool outputNexus(std::ostream& os) const;
00151 bool outputMega(std::ostream& os) const;
00158 bool outputCodon(std::ostream& os) const;
00165 bool outputWithConsensus(std::ostream& os);
00166
00167
00176 gnAlignedSequences getAlignedSegment(unsigned start, unsigned stop);
00187 gnAlignedSequences getCodons(int readingFrame, int startCodon, int codonMultiple);
00188
00193 std::string getAlignedSequenceFileName();
00198 gnSeqI alignedSeqsSize() const;
00199
00205 bool removeAlignedSeq(std::string seqName);
00211 bool removeAlignedSeq(unsigned index);
00212
00217 void concatenateAlignedSequences(gnAlignedSequences toConcat);
00218
00224 void extractVariableSites(gnAlignedSequences &variableSites, bool countGapsAsMismatches);
00225
00232 bool collapseIdenticalSequences();
00239 std::vector <char> operator[]( const int offset );
00240
00246 void addSequence(std::string& seqToAdd, std::string& seqName);
00252 void addSequence(genome::gnSequence& seqToAdd, std::string& seqName);
00253
00254 std::list <std::pair <std::string*, std::string*> > alignedSequences;
00255 std::vector< std::string > sequences;
00256 std::vector< std::string > names;
00257 std::vector< int64 > positions;
00258 void seq( uint seqI );
00259
00260 private:
00261
00266 bool readRelaxedNexusAlignment( std::istream& align_stream );
00271 bool readClustalWAlignment();
00276 bool readPhylipAlignment();
00281 bool readMSFAlignment();
00286 bool readNexusAlignment();
00291 bool readMegaAlignment();
00292
00298 bool constructClustalWAlignedSequenceList(std::ifstream& alignmentFile);
00304 bool constructPhylipAlignedSequenceList(std::ifstream& alignmentFile);
00310 bool constructMSFAlignedSequenceList(std::ifstream& alignmentFile);
00316 bool constructNexusAlignedSequenceList(std::ifstream& alignmentFile);
00322 bool constructMegaAlignedSequenceList(std::ifstream& alignmentFile);
00323
00330 bool sequenceNameInList(std::string sequenceName, std::list <std::pair <std::string*, std::string*> >::iterator &sequenceItr);
00331
00337 int sequenceNameInList( std::string& sequenceName );
00338
00343 bool buildConsensus();
00344
00352 void addSequence(genome::gnSequence seqToAdd, std::string seqName, int consensusStart, std::string originalConsensus);
00353
00360 void addAllSegments(gnAlignedSequences &alignment, unsigned start, unsigned stop);
00368 void addAllSegmentsReplaceGaps(gnAlignedSequences &alignment, unsigned start, unsigned stop);
00374 void removeAllSegments(unsigned start, unsigned stop);
00375
00381 int determineBaseIndex(char base);
00382
00388 bool coordinates(std::string line);
00389
00390 std::string alignedSequenceFileName;
00391
00392 std::string consensus;
00393 std::vector <int> indexPositions;
00394 };
00395
00396
00397
00398 inline
00399 gnAlignedSequences::~gnAlignedSequences() {}
00400
00401 inline
00402 void gnAlignedSequences::assignFileName(std::string name) {alignedSequenceFileName=name;}
00403
00404 inline
00405 std::string gnAlignedSequences::getAlignedSequenceFileName() {return alignedSequenceFileName;}
00406
00407 }
00408
00409 #endif // __gnAlignedSequences_h__