00001 #ifdef HAVE_CONFIG_H
00002 #include "config.h"
00003 #endif
00004
00005 #ifndef __PhyloTree_h__
00006 #define __PhyloTree_h__
00007
00008 #include <vector>
00009 #include <string>
00010 #include <iostream>
00011 #include <sstream>
00012 #include <stack>
00013
00014
00015 typedef size_t node_id_t;
00016 class TreeNode
00017 {
00018 public:
00019 TreeNode() : distance(0) {};
00020 std::string name;
00021 double distance;
00022 std::vector< node_id_t > parents;
00023 std::vector< node_id_t > children;
00024 };
00025
00026 template< class T >
00027 class PhyloTree
00028 {
00029 public:
00030 PhyloTree();
00031 PhyloTree( const PhyloTree<T>& pt );
00032 PhyloTree<T>& operator=( const PhyloTree<T>& pt );
00033 ~PhyloTree();
00034 double weight;
00035 node_id_t root;
00036 std::vector< T > nodes;
00037 void clear();
00041 void readTree( std::istream& tree_file );
00045 void writeTree( std::ostream& os ) const;
00049 double getHeight() const;
00053 double getHeight( node_id_t nodeI ) const;
00054
00055 T& operator[]( const unsigned i ){ return nodes[i]; }
00056 const T& operator[]( const unsigned i ) const{ return nodes[i]; }
00057 size_t size() const{ return nodes.size(); }
00058 void push_back( T& t ){ nodes.push_back(t); }
00059 T& back() { return nodes.back(); }
00060 const T& back() const{ return nodes.back(); }
00061 void resize( const unsigned s ){ nodes.resize(s); }
00062
00063
00064 void swap( PhyloTree<T>& other )
00065 {
00066 std::swap( weight, other.weight );
00067 std::swap( root, other.root );
00068 nodes.swap( other.nodes );
00069 }
00070 protected:
00071 };
00072
00073
00074 template< class T >
00075 PhyloTree<T>::PhyloTree()
00076 {
00077 weight = 0;
00078 root = 0;
00079 }
00080
00081 template< class T >
00082 PhyloTree<T>::PhyloTree( const PhyloTree<T>& pt ) :
00083 nodes( pt.nodes ),
00084 weight( pt.weight ),
00085 root( pt.root )
00086 {}
00087
00088 template< class T >
00089 PhyloTree<T>& PhyloTree<T>::operator=( const PhyloTree<T>& pt )
00090 {
00091 nodes = pt.nodes;
00092 weight = pt.weight;
00093 root = pt.root;
00094 return *this;
00095 }
00096
00097 template< class T >
00098 PhyloTree<T>::~PhyloTree()
00099 {}
00100
00101 template< class T >
00102 void PhyloTree<T>::clear()
00103 {
00104 nodes.clear();
00105 weight = 0;
00106 root = 0;
00107 }
00108
00109
00114 template< class T >
00115 void PhyloTree<T>::readTree( std::istream& tree_file )
00116 {
00117 std::string line;
00118 clear();
00119 if( !std::getline( tree_file, line ) )
00120 return;
00121
00122
00123 while(true){
00124 int paren_count = 0;
00125 for( size_t charI = 0; charI < line.size(); charI++ )
00126 {
00127 if( line[charI] == '(' )
00128 paren_count++;
00129 if( line[charI] == ')' )
00130 paren_count--;
00131 }
00132 if( paren_count == 0 )
00133 break;
00134 if( paren_count != 0 ){
00135 std::string another_line;
00136 if( !std::getline( tree_file, another_line ) )
00137 return;
00138 line += another_line;
00139 }
00140 }
00141
00142 std::stringstream line_str( line );
00143
00144
00145 std::string::size_type open_bracket_pos = line.find( "[" );
00146 std::string::size_type bracket_pos = line.find( "]" );
00147 if( open_bracket_pos != std::string::npos && bracket_pos != std::string::npos &&
00148 open_bracket_pos < bracket_pos && bracket_pos < line.find( "(" ) ){
00149
00150 getline( line_str, line, '[' );
00151 getline( line_str, line, ']' );
00152 std::stringstream weight_str( line );
00153 weight_str >> weight;
00154 }
00155
00156
00157 std::string tree_line;
00158 std::getline( line_str, tree_line, ';' );
00159 uint read_state = 0;
00160 uint section_start = 0;
00161 std::stack< node_id_t > node_stack;
00162 std::stringstream blen_str;
00163 T new_node;
00164 new_node.distance = 0;
00165 bool already_read_name = false;
00166 bool blen_found = false;
00167 for( uint charI = 0; charI < tree_line.size(); charI++ ){
00168 switch( tree_line[ charI ] ){
00169
00170
00171 case '(':
00172 if( node_stack.size() > 0 ){
00173 new_node.parents.clear();
00174 new_node.parents.push_back( node_stack.top() );
00175 (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00176 }
00177 node_stack.push( (node_id_t)(*this).size() );
00178 nodes.push_back( new_node );
00179 read_state = 1;
00180 section_start = charI + 1;
00181 break;
00182 case ')':
00183 if( blen_found )
00184 {
00185
00186 blen_str.clear();
00187 blen_str.str( tree_line.substr( section_start, charI - section_start ) );
00188 blen_str >> (*this)[ node_stack.top() ].distance;
00189 }else{
00190
00191 if( read_state == 1 ){
00192 new_node.parents.clear();
00193 new_node.parents.push_back( node_stack.top() );
00194 (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00195 node_stack.push( (node_id_t)(*this).size() );
00196 nodes.push_back( new_node );
00197 read_state = 2;
00198 }
00199 (*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
00200 }
00201 if( read_state == 2 )
00202 node_stack.pop();
00203 section_start = charI + 1;
00204 blen_found = false;
00205
00206
00207 read_state = 2;
00208 break;
00209 case ',':
00210 if( blen_found ){
00211
00212 blen_str.clear();
00213 blen_str.str( tree_line.substr( section_start, charI - section_start ) );
00214 blen_str >> (*this)[ node_stack.top() ].distance;
00215 }else{
00216
00217 if( read_state == 1 ){
00218 new_node.parents.clear();
00219 new_node.parents.push_back( node_stack.top() );
00220 (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00221 node_stack.push( (node_id_t)(*this).size() );
00222 nodes.push_back( new_node );
00223 read_state = 2;
00224 }
00225 (*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
00226 }
00227 if( read_state == 2 )
00228 node_stack.pop();
00229 section_start = charI + 1;
00230 read_state = 1;
00231 blen_found = false;
00232 break;
00233 case ':':
00234
00235 if( read_state == 1 ){
00236 new_node.parents.clear();
00237 new_node.parents.push_back( node_stack.top() );
00238 (*this)[ node_stack.top() ].children.push_back( (node_id_t)(*this).size() );
00239 node_stack.push( (node_id_t)(*this).size() );
00240 nodes.push_back( new_node );
00241 read_state = 2;
00242 }
00243 (*this)[ node_stack.top() ].name = tree_line.substr( section_start, charI - section_start );
00244 section_start = charI + 1;
00245 blen_found = true;
00246 break;
00247 default:
00248 break;
00249 }
00250 }
00251
00252 }
00253
00254
00255 template< class T >
00256 void PhyloTree<T>::writeTree( std::ostream& os ) const{
00257 std::stack< node_id_t > node_stack;
00258 std::stack< uint > child_stack;
00259 node_stack.push( root );
00260 child_stack.push( 0 );
00261 bool write_branch_lengths = false;
00262 for( size_t nodeI = 0; nodeI < this->size(); nodeI++ )
00263 {
00264 if( (*this)[nodeI].distance != 0 )
00265 {
00266 write_branch_lengths = true;
00267 break;
00268 }
00269 }
00270
00271 if( (*this).weight != 0 )
00272 os << "[" << weight << "]";
00273 os << "(";
00274
00275 while( node_stack.size() > 0 ) {
00276 if( (*this)[ node_stack.top() ].children.size() != 0 ){
00277
00278
00279 if( child_stack.top() == (*this)[ node_stack.top() ].children.size() ){
00280 os << ")";
00281 if( node_stack.size() > 1 && write_branch_lengths )
00282 os << ":" << (*this)[ node_stack.top() ].distance;
00283 node_stack.pop();
00284 child_stack.pop();
00285 continue;
00286 }
00287
00288
00289 node_id_t child = (*this)[ node_stack.top() ].children[ child_stack.top() ];
00290 node_stack.push( child );
00291 child_stack.top()++;
00292
00293 if( child_stack.top() > 1 )
00294 os << ",";
00295 if( (*this)[ child ].children.size() > 0 ){
00296 child_stack.push( 0 );
00297 os << "(";
00298 }
00299 continue;
00300 }
00301
00302
00303 os << (*this)[ node_stack.top() ].name;
00304 if( write_branch_lengths )
00305 os << ":" << (*this)[ node_stack.top() ].distance;
00306
00307
00308 node_stack.pop();
00309 }
00310 os << ";" << std::endl;
00311 }
00312
00313
00314 template< class T >
00315 double PhyloTree<T>::getHeight() const
00316 {
00317 return getHeight( root );
00318 }
00319
00320 template< class T >
00321 double PhyloTree<T>::getHeight( node_id_t nodeI ) const
00322 {
00323 if( (*this)[ nodeI ].children.size() == 0 )
00324 return (*this)[ nodeI ].distance;
00325 return (*this)[ nodeI ].distance + getHeight( (*this)[ nodeI ].children[ 0 ] );
00326 }
00327
00328
00330 template< class TreeType >
00331 void getDescendants( TreeType& alignment_tree, node_id_t node, std::vector< node_id_t >& descendants )
00332 {
00333
00334 std::stack< node_id_t > node_stack;
00335 node_stack.push( node );
00336 descendants.clear();
00337 while( node_stack.size() > 0 )
00338 {
00339 node_id_t cur_node = node_stack.top();
00340 node_stack.pop();
00341 if( alignment_tree[cur_node].children.size() > 0 )
00342 {
00343 node_stack.push(alignment_tree[cur_node].children[0]);
00344 node_stack.push(alignment_tree[cur_node].children[1]);
00345 }
00346 descendants.push_back(cur_node);
00347 }
00348 }
00349
00350
00352 template< class TreeType >
00353 void getLeaves( TreeType& tree, node_id_t node, std::vector< node_id_t >& leaves )
00354 {
00355
00356 std::stack< node_id_t > node_stack;
00357 node_stack.push( node );
00358 leaves.clear();
00359 while( node_stack.size() > 0 )
00360 {
00361 node_id_t cur_node = node_stack.top();
00362 node_stack.pop();
00363 if( tree[cur_node].children.size() > 0 )
00364 {
00365 node_stack.push(tree[cur_node].children[0]);
00366 node_stack.push(tree[cur_node].children[1]);
00367 }else
00368 leaves.push_back(cur_node);
00369 }
00370 }
00371
00372 namespace std {
00373
00374 template< class T > inline
00375 void swap( PhyloTree<T>& a, PhyloTree<T>& b )
00376 {
00377 a.swap(b);
00378 }
00379
00380 template<> inline void swap( PhyloTree<TreeNode>& a, PhyloTree<TreeNode>& b){ a.swap(b); }
00381 }
00382
00383 #endif // __PhyloTree_h__