libMems/PhyloTree.h

Go to the documentation of this file.
00001 #ifdef HAVE_CONFIG_H
00002 #include "config.h"
00003 #endif
00004 
00005 #ifndef __PhyloTree_h__
00006 #define __PhyloTree_h__
00007 
00008 #include <vector>
00009 #include <string>
00010 #include <iostream>
00011 #include <sstream>
00012 #include <stack>
00013 
00014 //typedef unsigned int node_id_t;
00015 typedef size_t node_id_t;
00016 class TreeNode 
00017 {
00018 public:
00019         TreeNode() : distance(0) {};
00020         std::string name;       
00021         double distance;        
00022         std::vector< node_id_t > parents;       
00023         std::vector< node_id_t > children;      
00024 };
00025 
00026 template< class T >
00027 class PhyloTree 
00028 {
00029 public:
00030         PhyloTree();
00031         PhyloTree( const PhyloTree<T>& pt );
00032         PhyloTree<T>& operator=( const PhyloTree<T>& pt );
00033         ~PhyloTree();
00034         double weight;  
00035         node_id_t root; 
00036         std::vector< T > nodes; 
00037         void clear();
00041         void readTree( std::istream& tree_file );
00045         void writeTree( std::ostream& os ) const;
00049         double getHeight() const;
00053         double getHeight( node_id_t nodeI ) const;
00054 
00055         T& operator[]( const unsigned i ){ return nodes[i]; }
00056         const T& operator[]( const unsigned i ) const{ return nodes[i]; }
00057         size_t size() const{ return nodes.size(); }
00058         void push_back( T& t ){ nodes.push_back(t); }
00059         T& back() { return nodes.back(); }
00060         const T& back() const{ return nodes.back(); }
00061         void resize( const unsigned s ){ nodes.resize(s); }
00062 
00063 
00064         void swap( PhyloTree<T>& other )
00065         {
00066                 std::swap( weight, other.weight );
00067                 std::swap( root, other.root );
00068                 nodes.swap( other.nodes );
00069         }
00070 protected:
00071 };
00072 
00073 
00074 template< class T >
00075 PhyloTree<T>::PhyloTree()
00076 {
00077         weight = 0;
00078         root = 0;
00079 }
00080 
00081 template< class T >
00082 PhyloTree<T>::PhyloTree( const PhyloTree<T>& pt ) :
00083 nodes( pt.nodes ),
00084 weight( pt.weight ),
00085 root( pt.root )
00086 {}
00087 
00088 template< class T >
00089 PhyloTree<T>& PhyloTree<T>::operator=( const PhyloTree<T>& pt )
00090 {
00091         nodes = pt.nodes;
00092         weight = pt.weight;
00093         root = pt.root;
00094         return *this;
00095 }
00096 
00097 template< class T >
00098 PhyloTree<T>::~PhyloTree()
00099 {}
00100 
00101 template< class T >
00102 void PhyloTree<T>::clear()
00103 {
00104         nodes.clear();
00105         weight = 0;
00106         root = 0;
00107 }
00108 
00109 
00114 template< class T >
00115 void PhyloTree<T>::readTree( std::istream& tree_file )
00116 {
00117         std::string line;
00118         clear();
00119         if( !std::getline( tree_file, line ) )
00120                 return;
00121         // look for either a ; or a matched number of parenthesis, if
00122         // not found then read another line
00123         while(true){
00124                 int paren_count = 0;
00125                 for( size_t charI = 0; charI < line.size(); charI++ )
00126                 {
00127                         if( line[charI] == '(' )
00128                                 paren_count++;
00129                         if( line[charI] == ')' )
00130                                 paren_count--;
00131                 }
00132                 if( paren_count == 0 )
00133                         break;
00134                 if( paren_count != 0 ){
00135                         std::string another_line;
00136                         if( !std::getline( tree_file, another_line ) )
00137                                 return;
00138                         line += another_line;
00139                 }
00140         }
00141 
00142         std::stringstream line_str( line );
00143 
00144         // look for a weight
00145         std::string::size_type open_bracket_pos = line.find( "[" );
00146         std::string::size_type bracket_pos = line.find( "]" );
00147         if( open_bracket_pos != std::string::npos && bracket_pos != std::string::npos && 
00148                 open_bracket_pos < bracket_pos && bracket_pos < line.find( "(" ) ){
00149                 // read in a weight
00150                 getline( line_str, line, '[' );
00151                 getline( line_str, line, ']' );
00152                 std::stringstream weight_str( line );
00153                 weight_str >> weight;
00154         }
00155         
00156         // ready to begin parsing the tree data.
00157         std::string tree_line;
00158         std::getline( line_str, tree_line, ';' );
00159         uint read_state = 0;    
00160         uint section_start = 0;
00161         std::stack< node_id_t > node_stack;
00162         std::stringstream blen_str;
00163         T new_node;
00164         new_node.distance = 0;  // default the distance to 0
00165         bool already_read_name = false;
00166         bool blen_found = false;
00167         for( uint charI = 0; charI < tree_line.size(); charI++ ){
00168                 switch( tree_line[ charI ] ){
00169                         // if this is an open parens then simply create a new
00170                         // parent node and push it on the parent stack
00171                         case '(':
00172                                 if( node_stack.size() > 0 ){
00173                                         new_node.parents.clear();
00174                                         new_node.parents.push_back( node_stack.top() );
00175                                         (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00176                                 }
00177                                 node_stack.push( (node_id_t)(*this).size() );
00178                                 nodes.push_back( new_node );
00179                                 read_state = 1;
00180                                 section_start = charI + 1;
00181                                 break;
00182                         case ')':
00183                                 if( blen_found )
00184                                 {
00185                                         // read off a branch length
00186                                         blen_str.clear();
00187                                         blen_str.str( tree_line.substr( section_start, charI - section_start ) );
00188                                         blen_str >> (*this)[ node_stack.top() ].distance;
00189                                 }else{
00190                                         // read off a name, if possible
00191                                         if( read_state == 1 ){
00192                                                 new_node.parents.clear();
00193                                                 new_node.parents.push_back( node_stack.top() );
00194                                                 (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00195                                                 node_stack.push( (node_id_t)(*this).size() );
00196                                                 nodes.push_back( new_node );
00197                                                 read_state = 2; // pop this node after reading its branch length
00198                                         }
00199                                         (*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
00200                                 }
00201                                 if( read_state == 2 )
00202                                         node_stack.pop();
00203                                 section_start = charI + 1;
00204                                 blen_found = false;
00205 
00206                                 // pop off the top of the node stack
00207                                 read_state = 2;
00208                                 break;
00209                         case ',':
00210                                 if( blen_found ){
00211                                         // read off a branch length
00212                                         blen_str.clear();
00213                                         blen_str.str( tree_line.substr( section_start, charI - section_start ) );
00214                                         blen_str >> (*this)[ node_stack.top() ].distance;
00215                                 }else{
00216                                         // read off a name, if possible
00217                                         if( read_state == 1 ){
00218                                                 new_node.parents.clear();
00219                                                 new_node.parents.push_back( node_stack.top() );
00220                                                 (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00221                                                 node_stack.push( (node_id_t)(*this).size() );
00222                                                 nodes.push_back( new_node );
00223                                                 read_state = 2; // pop this node after reading its name
00224                                         }
00225                                         (*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
00226                                 }
00227                                 if( read_state == 2 )
00228                                         node_stack.pop();
00229                                 section_start = charI + 1;
00230                                 read_state = 1; // indicates that we'll be creating a new node when we hit :
00231                                 blen_found = false;
00232                                 break;
00233                         case ':':
00234                                 // read off a name, if possible
00235                                 if( read_state == 1 ){
00236                                         new_node.parents.clear();
00237                                         new_node.parents.push_back( node_stack.top() );
00238                                         (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00239                                         node_stack.push( (node_id_t)(*this).size() );
00240                                         nodes.push_back( new_node );
00241                                         read_state = 2; // pop this node after reading its branch length
00242                                 }
00243                                 (*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
00244                                 section_start = charI + 1;
00245                                 blen_found = true;
00246                                 break;
00247                         default:
00248                                 break;
00249                 }
00250         }
00251 
00252 }
00253 
00254 
00255 template< class T >
00256 void PhyloTree<T>::writeTree( std::ostream& os ) const{
00257         std::stack< node_id_t > node_stack;
00258         std::stack< uint > child_stack;
00259         node_stack.push( root );
00260         child_stack.push( 0 );
00261         bool write_branch_lengths = false;
00262         for( size_t nodeI = 0; nodeI < this->size(); nodeI++ )
00263         {
00264                 if( (*this)[nodeI].distance != 0 )
00265                 {
00266                         write_branch_lengths = true;
00267                         break;
00268                 }
00269         }
00270 
00271         if( (*this).weight != 0 )
00272                 os << "[" << weight << "]";
00273         os << "(";
00274 
00275         while( node_stack.size() > 0 ) {
00276                 if( (*this)[ node_stack.top() ].children.size() != 0 ){
00277                         // this is a parent node
00278                         // if we have scanned all its children then pop it
00279                         if( child_stack.top() == (*this)[ node_stack.top() ].children.size() ){
00280                                 os << ")";
00281                                 if( node_stack.size() > 1 && write_branch_lengths )
00282                                         os << ":" << (*this)[ node_stack.top() ].distance;
00283                                 node_stack.pop();
00284                                 child_stack.pop();
00285                                 continue;
00286                         }
00287                         // try to recurse to its children
00288                         // if the child is a parent as well spit out a paren
00289                         node_id_t child = (*this)[ node_stack.top() ].children[ child_stack.top() ];
00290                         node_stack.push( child );
00291                         child_stack.top()++;
00292                         // print a comma to separate multiple children
00293                         if( child_stack.top() > 1 )
00294                                 os << ",";
00295                         if( (*this)[ child ].children.size() > 0 ){
00296                                 child_stack.push( 0 );
00297                                 os << "(";
00298                         }
00299                         continue;
00300                 }
00301                 
00302                 // this is a leaf node
00303                 os << (*this)[ node_stack.top() ].name;
00304                 if( write_branch_lengths )
00305                         os << ":" << (*this)[ node_stack.top() ].distance;
00306                 
00307                 // pop the child
00308                 node_stack.pop();
00309         }
00310         os << ";" << std::endl;
00311 }
00312 
00313 
00314 template< class T >
00315 double PhyloTree<T>::getHeight() const
00316 {
00317         return getHeight( root );
00318 }
00319 
00320 template< class T >
00321 double PhyloTree<T>::getHeight( node_id_t nodeI ) const
00322 {
00323         if( (*this)[ nodeI ].children.size() == 0 )
00324                 return (*this)[ nodeI ].distance;
00325         return (*this)[ nodeI ].distance + getHeight( (*this)[ nodeI ].children[ 0 ] );
00326 }
00327 
00328 
00330 template< class TreeType >
00331 void getDescendants( TreeType& alignment_tree, node_id_t node, std::vector< node_id_t >& descendants )
00332 {
00333         // do a depth first search
00334         std::stack< node_id_t > node_stack;
00335         node_stack.push( node );
00336         descendants.clear();
00337         while( node_stack.size() > 0 )
00338         {
00339                 node_id_t cur_node = node_stack.top();
00340                 node_stack.pop();
00341                 if( alignment_tree[cur_node].children.size() > 0 )
00342                 {
00343                         node_stack.push(alignment_tree[cur_node].children[0]);
00344                         node_stack.push(alignment_tree[cur_node].children[1]);
00345                 }
00346                 descendants.push_back(cur_node);
00347         }
00348 }
00349 
00350 
00352 template< class TreeType >
00353 void getLeaves( TreeType& tree, node_id_t node, std::vector< node_id_t >& leaves )
00354 {
00355         // do a depth first search
00356         std::stack< node_id_t > node_stack;
00357         node_stack.push( node );
00358         leaves.clear();
00359         while( node_stack.size() > 0 )
00360         {
00361                 node_id_t cur_node = node_stack.top();
00362                 node_stack.pop();
00363                 if( tree[cur_node].children.size() > 0 )
00364                 {
00365                         node_stack.push(tree[cur_node].children[0]);
00366                         node_stack.push(tree[cur_node].children[1]);
00367                 }else
00368                         leaves.push_back(cur_node);
00369         }
00370 }
00371 
00372 namespace std {
00373 
00374 template< class T > inline
00375 void swap( PhyloTree<T>& a, PhyloTree<T>& b )
00376 {
00377         a.swap(b);
00378 }
00379 
00380 template<> inline void swap( PhyloTree<TreeNode>& a, PhyloTree<TreeNode>& b){ a.swap(b); }
00381 }
00382 
00383 #endif // __PhyloTree_h__

Generated on Fri Mar 14 06:01:03 2008 for libMems by doxygen 1.3.6