libMems/gnAlignedSequences.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: gnAlignedSequences.h,v 1.5 2004/02/27 23:08:55 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * This file is licensed under the GPL.
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00010 // File:            gnAlignedSequences.h
00011 // Purpose:         Aligned Sequences class
00012 // Discription:     Provides an alignment interface for any number of alignable
00013 //                                      sequences (the data of each of which is contained in a 
00014 //                  genome::gnSequence object).
00015 //                  Currently only compatible with ClustalW alignment files.
00016 // Revisions:       
00017 // Version:         A
00018 // Created:         August 3, 2000, 11:55am
00019 // Author:          Brian Gettler
00020 // Last Edited:     May 3, 2001, 4:25pm
00021 // Modified by:     
00022 // Copyright:       (c)
00023 // Licences:         
00025 #ifndef __gnAlignedSequences_h__
00026 #define __gnAlignedSequences_h__
00027 
00028 #ifdef HAVE_CONFIG_H
00029 #include "config.h"
00030 #endif
00031 
00032 #include "libGenome/gnSequence.h"
00033 #include "libGenome/gnFilter.h"
00034 #include <list>
00035 #include <fstream>
00036 #include <vector>
00037 
00038 namespace mems {
00039 
00040 // the number of characters in each row of an alignment file
00041 const int MEGA_ALIGN_COLUMNS = 60;
00042 
00048 class gnAlignedSequences// : blClone
00049 {
00050 public:
00054         gnAlignedSequences();
00058         gnAlignedSequences(const gnAlignedSequences &toCopy);
00062         ~gnAlignedSequences();
00063 
00064 
00068         static const std::vector< std::string >& getSupportedFormats();
00069 
00073         static boolean isSupportedFormat( const std::string& format_name );
00074 
00079         void output( const std::string& format_name, std::ostream& os ) const;
00080 
00081 // sequence alignment loading
00086         void constructFromClustalW(std::string alignedFileName);
00091         void constructFromPhylip(std::string alignedFileName);
00096         void constructFromMSF(std::string alignedFileName);
00101         void constructFromNexus(std::string alignedFileName);
00106         void constructFromMega(std::string alignedFileName);
00107         
00113         void constructFromRelaxedNexus( std::istream& align_stream );
00114 
00119         void assignFileName(std::string name);
00120 
00121 // output
00127         bool outputPhylip(std::ostream& os) const;
00133         bool outputClustalW(std::ostream& os) const;
00139         bool outputMSF(std::ostream& os) const;
00145         bool outputNexus(std::ostream& os) const;
00151         bool outputMega(std::ostream& os) const;
00158         bool outputCodon(std::ostream& os) const;
00165         bool outputWithConsensus(std::ostream& os);
00166 
00167 // alignment manipulators that create new gnAlignedSequences
00176         gnAlignedSequences getAlignedSegment(unsigned start, unsigned stop);
00187         gnAlignedSequences getCodons(int readingFrame, int startCodon, int codonMultiple);
00188         
00193         std::string getAlignedSequenceFileName();
00198         gnSeqI alignedSeqsSize() const;
00199 
00205         bool removeAlignedSeq(std::string seqName);
00211         bool removeAlignedSeq(unsigned index);
00212 
00217         void concatenateAlignedSequences(gnAlignedSequences toConcat);
00218         
00224         void extractVariableSites(gnAlignedSequences &variableSites, bool countGapsAsMismatches);
00225 
00232         bool collapseIdenticalSequences();
00239         std::vector <char> operator[]( const int offset ); //const;
00240         
00246         void addSequence(std::string& seqToAdd, std::string& seqName);
00252         void addSequence(genome::gnSequence& seqToAdd, std::string& seqName);
00253 
00254         std::list <std::pair <std::string*, std::string*> > alignedSequences;
00255         std::vector< std::string > sequences;
00256         std::vector< std::string > names;
00257         std::vector< int64 > positions;         
00258         void seq( uint seqI );
00259 
00260 private:
00261 
00266         bool readRelaxedNexusAlignment( std::istream& align_stream );
00271         bool readClustalWAlignment();
00276         bool readPhylipAlignment();
00281         bool readMSFAlignment();
00286         bool readNexusAlignment();
00291         bool readMegaAlignment();
00292 
00298         bool constructClustalWAlignedSequenceList(std::ifstream& alignmentFile);
00304         bool constructPhylipAlignedSequenceList(std::ifstream& alignmentFile);
00310         bool constructMSFAlignedSequenceList(std::ifstream& alignmentFile);
00316         bool constructNexusAlignedSequenceList(std::ifstream& alignmentFile);
00322         bool constructMegaAlignedSequenceList(std::ifstream& alignmentFile);
00323 
00330         bool sequenceNameInList(std::string sequenceName, std::list <std::pair <std::string*, std::string*> >::iterator &sequenceItr);
00331 
00337         int sequenceNameInList( std::string& sequenceName );
00338 
00343         bool buildConsensus();
00344 
00352         void addSequence(genome::gnSequence seqToAdd, std::string seqName, int consensusStart, std::string originalConsensus);
00353 
00360         void addAllSegments(gnAlignedSequences &alignment, unsigned start, unsigned stop);
00368         void addAllSegmentsReplaceGaps(gnAlignedSequences &alignment, unsigned start, unsigned stop);
00374         void removeAllSegments(unsigned start, unsigned stop);
00375         
00381         int determineBaseIndex(char base);
00382         
00388         bool coordinates(std::string line);
00389 
00390         std::string alignedSequenceFileName;
00391 //      list <pair <string*, genome::gnSequence*> > alignedSequences;
00392         std::string consensus;
00393         std::vector <int> indexPositions; // 1->n if a standard alignment, variable for varible sites
00394 }; // gnAlignedSequences
00395 
00396 
00397 // destructor
00398 inline
00399 gnAlignedSequences::~gnAlignedSequences() {}
00400 
00401 inline
00402 void gnAlignedSequences::assignFileName(std::string name) {alignedSequenceFileName=name;}
00403 
00404 inline
00405 std::string gnAlignedSequences::getAlignedSequenceFileName() {return alignedSequenceFileName;}
00406 
00407 }
00408 
00409 #endif  // __gnAlignedSequences_h__

Generated on Fri Mar 14 06:01:03 2008 for libMems by doxygen 1.3.6