libMems/MuscleInterface.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: MuscleInterface.cpp,v 1.27 2004/03/01 02:40:08 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * Please see the file called COPYING for licensing, copying, and modification
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifdef HAVE_CONFIG_H
00010 #include "config.h"
00011 #endif
00012 
00013 #include "libMems/MuscleInterface.h"
00014 
00015 #include "libGenome/gnFilter.h"
00016 #include "libGenome/gnFASSource.h"
00017 #include "libGenome/gnStringTools.h"
00018 #include "libMUSCLE/muscle.h"
00019 #include "libMUSCLE/params.h"
00020 #include "libMUSCLE/msa.h"
00021 #include "libMUSCLE/seq.h"
00022 #include "libMUSCLE/seqvect.h"
00023 #include "libMUSCLE/tree.h"
00024 #include "libMUSCLE/clust.h"
00025 #include "libMUSCLE/profile.h"
00026 #include "libMUSCLE/distfunc.h"
00027 #include "libMUSCLE/clustsetdf.h"
00028 #include "libMUSCLE/textfile.h"
00029 #include "libMUSCLE/types.h"
00030 #include "boost/algorithm/string/erase.hpp"
00031 #include "boost/algorithm/string/case_conv.hpp"
00032 
00033 #include <sstream>
00034 #include <fstream>
00035 
00036 // this gets defined in muscle.cpp, but not declared in any headers
00037 extern void MUSCLE(SeqVect &v, MSA &msaOut);
00038 extern void RefineW(const MSA &msaIn, MSA &msaOut);
00039 
00040 using namespace std;
00041 using namespace genome;
00042 namespace mems {
00043 
00044 bool debug_muscle = false;
00045 
00046 bool pipeExec( char** cmd_argv, const string& command, const string& input, string& output, string& error );
00047 
00048 char** parseCommand( const string& cmd );
00049 char** parseCommand( const string& cmd ){
00050         // first count tokens
00051 
00052         // tokenize on "
00053         stringstream qs( cmd );
00054         string cur_str;
00055         boolean in_quote = true;
00056         int token_count = 0;
00057         vector< string > cmd_tokens;
00058         while( getline( qs, cur_str, '"' ) ){
00059                 // never start out in a quote
00060                 in_quote = !in_quote;
00061                 if( cur_str.length() == 0 )
00062                         continue;
00063                 if( in_quote ){
00064                         cmd_tokens.push_back( cur_str );
00065                 }else{
00066                         stringstream ss( cur_str );
00067                         string asdf;
00068                         while( ss >> asdf )
00069                                 cmd_tokens.push_back( asdf );
00070                 }
00071         }
00072         char ** cmd_array = new char*[ cmd_tokens.size() + 1 ];
00073         for( int tokI = 0; tokI < cmd_tokens.size(); tokI++ ){
00074                 cmd_array[ tokI ] = new char[ cmd_tokens[ tokI ].length() + 1 ];
00075                 strcpy( cmd_array[ tokI ], cmd_tokens[ tokI ].c_str() );
00076         }
00077         cmd_array[ cmd_tokens.size() ] = NULL;
00078         return cmd_array;
00079 }
00080 
00081 #if !defined(WIN32)
00082 #include <unistd.h>
00083 #include <fcntl.h>
00084 #include <errno.h>
00085 #include <sys/types.h>
00086 #include <sys/wait.h>
00087 
00088 // unix pipelined execution code
00089 bool pipeExec( char** cmd_argv, const string& command, const string& input, string& output, string& error ){
00090         int stdin_pipe[2], stdout_pipe[2], stderr_pipe[2];
00091         boolean success = false;
00092         pid_t sid;
00093         pid_t pid1;
00094         const char* fail;
00095         char buf[1024];
00096         ssize_t bread = 0;
00097         int rval = 0;
00098 
00099         if((sid = setsid()) < 0)sid = getpgrp();
00100         
00101         if((sid < 0 && (fail = "sid"))
00102          || (pipe(stdin_pipe) < 0 && (fail = "stdin"))
00103          || (pipe(stdout_pipe) < 0 && (fail = "stdout"))
00104 //       || (pipe(stderr_pipe) < 0 && (fail = "stderr"))
00105         )
00106     {
00107                 fprintf(stderr, "Ouch, the world just collapsed (%s).\n", fail);
00108                 perror("muscle:");
00109                 goto cleanup;
00110         }
00111         
00112         fcntl(stdin_pipe[0], F_SETFL, fcntl(stdin_pipe[0], F_GETFL) & ~O_NONBLOCK);
00113         fcntl(stdin_pipe[1], F_SETFL, fcntl(stdin_pipe[1], F_GETFL) & ~O_NONBLOCK);
00114         fcntl(stdout_pipe[0], F_SETFL, fcntl(stdout_pipe[0], F_GETFL) & ~O_NONBLOCK);
00115         fcntl(stdout_pipe[1], F_SETFL, fcntl(stdout_pipe[1], F_GETFL) & ~O_NONBLOCK);
00116 /*      fcntl(stderr_pipe[0], F_SETFL, fcntl(stderr_pipe[0], F_GETFL) & ~O_NONBLOCK);
00117         fcntl(stderr_pipe[1], F_SETFL, fcntl(stderr_pipe[1], F_GETFL) & ~O_NONBLOCK);
00118 */      
00119         if((pid1 = fork()) < 0)goto cleanup;    
00120         if(pid1)
00121                 setpgid(pid1, sid);
00122         else
00123         {
00124                 dup2(stdin_pipe[0], 0);
00125                 dup2(stdout_pipe[1], 1);
00126 //              dup2(stderr_pipe[1], 2);
00127                 close( stdin_pipe[0] );
00128                 close( stdin_pipe[1] );
00129                 close( stdout_pipe[0] );
00130                 close( stdout_pipe[1] );
00131 //              close( stderr_pipe[0] );
00132 //              close( stderr_pipe[1] );
00133                 execvp(cmd_argv[0], cmd_argv);
00134                 _exit(errno);
00135         }
00136         rval = write( stdin_pipe[1], input.c_str(), input.size() );
00137         if( rval == -1 )
00138                 perror( "write: " );
00139         if( close( stdin_pipe[1] ) )
00140                 perror( "close stdin_w: " );
00141         if( close( stdin_pipe[0] ) )
00142                 perror( "close stdin_r: " );
00143 
00144         close( stdout_pipe[1] );
00145         // read the alignment
00146         while(true){
00147                 bzero( buf, sizeof(buf) );
00148                 bread = read( stdout_pipe[0], buf, 1023 );
00149                 if( bread == 0 )
00150                         break;  // reached EOF
00151                 if( bread == -1 ){
00152                         perror("muscle read: " );
00153                 }
00154                 output += buf;
00155         }
00156         wait( NULL );
00157         success = true;
00158         
00159 cleanup:
00160         close(stdin_pipe[0]);
00161         close(stdin_pipe[1]);
00162         close(stdout_pipe[0]);
00163         close(stdout_pipe[1]);
00164 //      close(stderr_pipe[0]);
00165 //      close(stderr_pipe[1]);
00166         return success;
00167 };
00168 
00169 
00170 #else
00171 
00172 //windows piping code
00173 #include <windows.h>
00174 #define bzero(a) memset(a,0,sizeof(a)) //easier -- shortcut
00175 
00176 bool IsWinNT()  //check if we're running NT
00177 {
00178   OSVERSIONINFO osv;
00179   osv.dwOSVersionInfoSize = sizeof(osv);
00180   GetVersionEx(&osv);
00181   return (osv.dwPlatformId == VER_PLATFORM_WIN32_NT);
00182 }
00183 
00184 void ErrorMessage(char *str)  //display detailed error info
00185 {
00186   LPVOID msg;
00187   FormatMessage(
00188     FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM,
00189     NULL,
00190     GetLastError(),
00191     MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
00192     (LPTSTR) &msg,
00193     0,
00194     NULL
00195   );
00196   printf("%s: %s\n",str,msg);
00197   LocalFree(msg);
00198 }
00199 
00200 bool pipeExec( char** cmd_argv, const string& command, const string& input, string& output, string& error ){
00201 
00202         char buf[1024];           //i/o buffer
00203 
00204         STARTUPINFO si;
00205         SECURITY_ATTRIBUTES sa;
00206         SECURITY_DESCRIPTOR sd;               //security information for pipes
00207         PROCESS_INFORMATION pi;
00208         HANDLE newstdin_w,newstdout_w,newstderr_w,newstdin_r,newstdout_r,newstderr_r;
00209         HANDLE read_stdout,read_stderr,write_stdin;  //pipe handles
00210         boolean success = false;
00211 
00212         if (IsWinNT())        //initialize security descriptor (Windows NT)
00213         {
00214                 InitializeSecurityDescriptor(&sd,SECURITY_DESCRIPTOR_REVISION);
00215                 SetSecurityDescriptorDacl(&sd, true, NULL, false);
00216                 sa.lpSecurityDescriptor = &sd;
00217         }
00218         else sa.lpSecurityDescriptor = NULL;
00219 
00220         sa.nLength = sizeof(SECURITY_ATTRIBUTES);
00221         sa.bInheritHandle = true;         //allow inheritable handles
00222 
00223         if (!CreatePipe(&newstdin_r,&newstdin_w,&sa,0))   //create stdin pipe
00224         {
00225                 ErrorMessage("CreatePipe");
00226                 goto finito;
00227         }
00228         if (!CreatePipe(&newstdout_r,&newstdout_w,&sa,0))  //create stdout pipe
00229         {
00230                 ErrorMessage("CreatePipe");
00231                 goto finito;
00232         }
00233         if (!CreatePipe(&newstderr_r,&newstderr_w,&sa,0))  //create stdout pipe
00234         {
00235                 ErrorMessage("CreatePipe");
00236                 goto finito;
00237         }
00238         // Duplicate the write handle to the pipe so it is not inherited. 
00239         boolean fSuccess = DuplicateHandle(GetCurrentProcess(), newstdin_w, 
00240                 GetCurrentProcess(), &write_stdin, 0, 
00241                 FALSE,                  // not inherited 
00242                 DUPLICATE_SAME_ACCESS); 
00243         if (! fSuccess){
00244                 ErrorMessage("DuplicateHandle failed"); 
00245                 goto finito;
00246         }
00247         CloseHandle(newstdin_w); 
00248         newstdin_w = INVALID_HANDLE_VALUE;
00249 
00250         // Duplicate the read handle to the pipe so it is not inherited. 
00251         fSuccess = DuplicateHandle(GetCurrentProcess(), newstdout_r, 
00252                 GetCurrentProcess(), &read_stdout, 0, 
00253                 FALSE,                  // not inherited 
00254                 DUPLICATE_SAME_ACCESS); 
00255         if (! fSuccess){
00256                 ErrorMessage("DuplicateHandle failed"); 
00257                 goto finito;
00258         }
00259         CloseHandle(newstdout_r); 
00260         newstdout_r = INVALID_HANDLE_VALUE;
00261 
00262         // Duplicate the read handle to the pipe so it is not inherited. 
00263         fSuccess = DuplicateHandle(GetCurrentProcess(), newstderr_r, 
00264                 GetCurrentProcess(), &read_stderr, 0, 
00265                 FALSE,                  // not inherited 
00266                 DUPLICATE_SAME_ACCESS); 
00267         if (! fSuccess){
00268                 ErrorMessage("DuplicateHandle failed"); 
00269                 goto finito;
00270         }
00271         CloseHandle(newstderr_r); 
00272         newstderr_r = INVALID_HANDLE_VALUE;
00273 
00274         GetStartupInfo(&si);      //set startupinfo for the spawned process
00275         /*
00276                 The dwFlags member tells CreateProcess how to make the process.
00277                 STARTF_USESTDHANDLES validates the hStd* members. STARTF_USESHOWWINDOW
00278                 validates the wShowWindow member.
00279         */
00280         si.dwFlags = STARTF_USESTDHANDLES|STARTF_USESHOWWINDOW;
00281         si.wShowWindow = SW_HIDE;
00282         si.hStdOutput = newstdout_w;
00283         si.hStdError = newstderr_w;     //set the new handles for the child process
00284         si.hStdInput = newstdin_r;
00285 
00286         //spawn the child process
00287         char* cmd = new char[ command.length() + 1 ];
00288         strcpy( cmd, command.c_str() );
00289         if (!CreateProcess(NULL,cmd,NULL,NULL,TRUE,0,
00290                                                 NULL,NULL,&si,&pi))
00291         {
00292                 delete cmd;
00293                 ErrorMessage("CreateProcess");
00294                 goto finito;
00295         }
00296         delete cmd;
00297 
00298         unsigned long exit=0;  //process exit code
00299         unsigned long bread;   //bytes read
00300         unsigned long avail;   //bytes available
00301 
00302         WriteFile(write_stdin, input.c_str(), input.size(), &bread, NULL); //send data to stdin
00303         CloseHandle(write_stdin);
00304         write_stdin = INVALID_HANDLE_VALUE;
00305 
00306         // Wait until child process exits.
00307         while( true ){
00308                 GetExitCodeProcess( pi.hProcess, &exit );
00309                 if( exit != STILL_ACTIVE )
00310                         WaitForSingleObject( pi.hProcess, INFINITE );
00311                         
00312                 // read anything that came to stdout
00313                 PeekNamedPipe(read_stdout,buf,1023,&bread,&avail,NULL);
00314                 if( avail == 0 )
00315                         Sleep(5);       // didn't get anything, so take a break to avoid hogging the CPU...
00316                 while( avail > 0 ){
00317                         bzero(buf);
00318                         int read_size = 1023 < avail ? 1023 : avail;
00319                         ReadFile(read_stdout,buf,read_size,&bread,NULL);  //read the stdout pipe
00320                         avail -= bread;
00321                         output += buf;
00322                 }
00323 
00324                 // read anything that came to stderr
00325                 PeekNamedPipe(read_stderr,buf,1023,&bread,&avail,NULL);
00326                 while( avail > 0 ){
00327                         bzero(buf);
00328                         int read_size = 1023 < avail ? 1023 : avail;
00329                         ReadFile(read_stderr,buf,read_size,&bread,NULL);  //read the stdout pipe
00330                         avail -= bread;
00331                         error += buf;
00332                 }
00333 
00334                 if( exit != STILL_ACTIVE )
00335                         break;
00336         }
00337         // Wait until child process exits.
00338     WaitForSingleObject( pi.hProcess, INFINITE );
00339         success = true;
00340 
00341         //clean up and exit
00342 finito:
00343     if( pi.hThread != INVALID_HANDLE_VALUE )
00344                 CloseHandle(pi.hThread);
00345     if( pi.hProcess != INVALID_HANDLE_VALUE )
00346                 CloseHandle(pi.hProcess);
00347     if( newstdin_r != INVALID_HANDLE_VALUE )
00348                 CloseHandle(newstdin_r);
00349     if( newstdout_w != INVALID_HANDLE_VALUE )
00350                 CloseHandle(newstdout_w);
00351     if( newstderr_w != INVALID_HANDLE_VALUE )
00352                 CloseHandle(newstderr_w);
00353     if( read_stdout != INVALID_HANDLE_VALUE )
00354                 CloseHandle(read_stdout);
00355     if( read_stderr != INVALID_HANDLE_VALUE )
00356                 CloseHandle(read_stderr);
00357     if( write_stdin != INVALID_HANDLE_VALUE )
00358                 CloseHandle(write_stdin);
00359         return success;
00360 }
00361 
00362 #endif
00363 
00364 
00365 
00366 MuscleInterface& MuscleInterface::getMuscleInterface()
00367 {
00368         static MuscleInterface m_ci;
00369 
00370         return m_ci;
00371 }
00372 
00373 MuscleInterface::MuscleInterface() : GappedAligner() {
00374         muscle_path = "muscle_aed";
00375         muscle_arguments = "-stable -quiet -seqtype DNA";
00376         muscle_cmdline = parseCommand( muscle_path + " " + muscle_arguments );
00377         max_alignment_length = 12500;
00378 }
00379 
00380 void MuscleInterface::ParseMusclePath( const char* argv0 ){
00381         // get the execution path
00382         string path_str = argv0;
00383         // trim quotes
00384         if( path_str[0] == '"' )
00385                 path_str = path_str.substr( 1, path_str.size() - 2 );
00386         standardizePathString( path_str );
00387         string::size_type i = path_str.rfind('/');
00388         if( i != string::npos )
00389                 path_str.erase(i+1, path_str.length() - (i+1));
00390         else
00391                 path_str.clear();
00392         SetMusclePath( '"' + path_str + "muscle_aed\"");
00393 }
00394 
00395 void MuscleInterface::SetMusclePath( const string& path ){
00396         muscle_path = path;
00397         ClearCommandLine();
00398         muscle_cmdline = parseCommand( muscle_path + " " + muscle_arguments );
00399 }
00400 
00401 void MuscleInterface::SetExtraMuscleArguments( const string& args )
00402 {
00403         extra_muscle_arguments = args;
00404 }
00405 
00406 void MuscleInterface::SetMuscleArguments( const string& args )
00407 {
00408         ClearCommandLine();
00409         muscle_arguments = args + " " + extra_muscle_arguments;
00410         muscle_cmdline = parseCommand( muscle_path + " " + args + " " + extra_muscle_arguments );
00411 }
00412 
00413 MuscleInterface& MuscleInterface::operator=( const MuscleInterface& ci ){
00414         GappedAligner::operator =( ci );
00415         return *this;
00416 }
00417 
00418 //tjt: not the best way of doing this, should have just one Align function that takes an AbstractMatch*,
00419         //     not both Match* & AbstractMatch* in separate, nearly identical functions..
00420         //     Such a change would involve changes to GappedAligner, and would require some additional care taken
00421         //     with SeqCount & Multiplicity, as well as seq_table[ seqI ]->length()/seq_table[ 0 ]->length(i),
00422         //     for now, leave like this. hopefully sooner than later, make pretty!
00423 boolean MuscleInterface::Align( GappedAlignment& cr, Match* r_begin, Match* r_end, vector< gnSequence* >& seq_table ){
00424         gnSeqI gap_size = 0;
00425         boolean create_ok = true;
00426         uint seq_count = seq_table.size();
00427         //seq_count = r_begin->Multiplicity();
00428         uint seqI;
00429         uint align_seqs = 0;
00430         vector< string > tmp_mat = vector< string >( seq_count );
00431 try{
00432 
00433 // 
00434 //      Get the sequence in the intervening gaps between these two matches
00435 //
00436         vector< string > seq_data;
00437         vector< int64 > starts;
00438         vector< uint > seqs;
00439         const gnFilter* rc_filter = gnFilter::DNAComplementFilter();
00440         
00441         for( seqI = 0; seqI < seq_count; seqI++ ){
00442 
00443                 // skip this sequence if it's undefined
00444                 if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH ) ||
00445                         (r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
00446                         starts.push_back( NO_MATCH );
00447                         continue;
00448                 }
00449 
00450                 // determine the size of the gap
00451                 int64 gap_start = 0;
00452                 int64 gap_end = 0;
00453                 getInterveningCoordinates( seq_table, r_begin, r_end, seqI, gap_start, gap_end );
00454 
00455                 int64 diff = gap_end - gap_start;
00456                 if( diff <= 0 || diff > max_alignment_length ){
00457                         starts.push_back( NO_MATCH );
00458                         continue;       // skip this sequence if it's either too big or too small
00459                 }
00460                 seqs.push_back( seqI );
00461 // the gnSequence pointers are shared across threads and have a common ifstream
00462                 // extract sequence data
00463                 if( r_end == NULL || r_end->Start( seqI ) > 0 ){
00464                         starts.push_back( gap_start );
00465                         seq_data.push_back( seq_table[ seqI ]->ToString( diff , gap_start ) );
00466                 }else{
00467                         // reverse complement the sequence data.
00468                         starts.push_back( -gap_start );
00469                         string cur_seq_data = seq_table[ seqI ]->ToString( diff , gap_start );
00470                         rc_filter->ReverseFilter( cur_seq_data );
00471                         seq_data.push_back( cur_seq_data );
00472                 }
00473         }
00474 
00475         if( seqs.size() <= 1 )
00476                 create_ok = false;
00477 
00478         if( create_ok ){
00479 //              SetMuscleArguments( " -quiet -stable -seqtype DNA " );
00480                 vector< string > aln_matrix;
00481                 if( !CallMuscleFast( aln_matrix, seq_data ) ){
00482                         cout << "Muscle was unable to align:\n";
00483                         if( r_begin )
00484                                 cout << "Left match: " << *r_begin << endl;
00485                         if( r_end )
00486                                 cout << "Right match: " << *r_end << endl;
00487                         return false;
00488                 }
00489 
00490                 gnSeqI aln_length = aln_matrix.size() == 0 ? 0 : aln_matrix[0].length();
00491                 cr = GappedAlignment( seq_count, aln_length );
00492                 vector< string > aln_mat = vector< string >( seq_count );
00493 
00494                 // set sequence starts
00495                 for( uint seqI = 0; seqI < seqs.size(); seqI++ ){
00496                         cr.SetLength( seq_data[ seqI ].size(), seqs[ seqI ] );
00497                         aln_mat[ seqs[ seqI ] ] = aln_matrix[ seqI ];
00498                 }
00499                 for( uint seqI = 0; seqI < seq_count; seqI++ ){
00500                         cr.SetStart( seqI, starts[ seqI ] );
00501                         if( aln_mat[ seqI ].length() != aln_length )
00502                                 aln_mat[ seqI ] = string( aln_length, '-' );
00503                 }
00504 
00505                 cr.SetAlignment( aln_mat );
00506 
00507                 return true;
00508         }
00509 }catch(exception& e){
00510         cerr << "At: " << __FILE__ << ":" << __LINE__ << endl;
00511         cerr << e.what();
00512 }
00513         return false;
00514 }
00515 
00516 static int failure_count = 0;
00517 
00518 boolean MuscleInterface::Align( GappedAlignment& cr, AbstractMatch* r_begin, AbstractMatch* r_end, vector< gnSequence* >& seq_table ){
00519         gnSeqI gap_size = 0;
00520         boolean create_ok = true;
00521         //tjt: set the seq_count to a match m's multiplicity
00522         //     even though all components n of match m could be 
00523         //     less than the k sequences
00524         //     if n == k, then perhaps there is 1 match component per sequence
00525         //     if k = 1, n == repeat match multiplicity, where n >= 2
00526         //     
00527         uint seq_count = r_begin->Multiplicity();
00528         uint seqI;
00529         uint align_seqs = 0;
00530         vector< string > tmp_mat = vector< string >( seq_count );
00531 try{
00532 
00533 // 
00534 //      Get the sequence in the intervening gaps between these two matches
00535 //
00536         vector< string > seq_data;
00537         vector< int64 > starts;
00538         vector< uint > seqs;
00539         const gnFilter* rc_filter = gnFilter::DNAComplementFilter();
00540         
00541         //std::cout << "getting regions between match components to align" << std::endl;
00542         for( seqI = 0; seqI < seq_count; seqI++ ){
00543 
00544                 // skip this sequence if it's undefined
00545                 if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH ) ||
00546                         (r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
00547                         starts.push_back( NO_MATCH );
00548                         continue;
00549                 }
00550 
00551                 // determine the size of the gap
00552                 int64 gap_start = 0;
00553                 int64 gap_end = 0;
00554                 // skip this sequence if it's undefined
00555                 if( (r_end != NULL && r_end->Start( seqI ) == NO_MATCH) ||
00556                         (r_begin != NULL && r_begin->Start( seqI ) == NO_MATCH) ){
00557                         gap_start = 0;
00558                         gap_end = 0;
00559                         return true;
00560                 }
00561                                 
00562                 // determine the size of the gap
00563                 gap_end = r_end != NULL ? r_end->Start( seqI ) : seq_table[ seqI ]->length() + 1;
00564                 gap_start = r_begin != NULL ? r_begin->End( seqI ) + 1 : 1;
00565                 if( gap_end < 0 || gap_start < 0 ){
00566                         gap_end   = r_begin != NULL ? -r_begin->Start( seqI ) : seq_table[ 0 ]->length() + 1;
00567                         gap_start = r_end != NULL ? -r_end->Start( seqI ) + r_end->Length( seqI ) : 1;
00568                 }
00569                 if( gap_end <= 0 || gap_start <= 0 ){
00570                         // if either is still < 0 then there's a problem...
00571                         genome::ErrorMsg( "Error constructing intervening coordinates" );
00572                 }
00573 
00574                 int64 diff = gap_end - gap_start;
00575                 
00576                 //diff <= 0 ||
00577                 if( diff <= 0 || diff > max_alignment_length ){
00578                         starts.push_back( NO_MATCH );
00579                         continue;       // skip this sequence if it's either too big or too small
00580                 }
00581                 seqs.push_back( seqI );
00582 
00583                 // extract sequence data
00584                 if (0 )
00585                 {
00586                         starts.push_back( gap_start );
00587                         seq_data.push_back( "A" );
00588                         std::cout << "A" << std::endl;
00589                         diff = 1;
00590                 }
00591 // the gnSequence pointers are shared across threads and have a common ifstream
00592                 if( r_end == NULL || r_end->Start( seqI ) > 0 ){
00593                         starts.push_back( gap_start );
00594                         //std::cout << seq_table[ 0 ]->ToString( diff , gap_start ) << std::endl;
00595                         //tjt: all sequences are concatenated together into 1 seq_table entry
00596                         //
00597                         seq_data.push_back( seq_table[ 0 ]->ToString( diff , gap_start ) );
00598                 }else{
00599                         // reverse complement the sequence data.
00600                         starts.push_back( -gap_start );
00601                         //tjt: all sequences are concatenated together into 1 seq_table entry
00602                         //     
00603                         string cur_seq_data = seq_table[ 0 ]->ToString( diff , gap_start );
00604                         rc_filter->ReverseFilter( cur_seq_data );
00605                         seq_data.push_back( cur_seq_data );
00606                         //std::cout << cur_seq_data << std::endl;
00607                 }
00608         }
00609 
00610         if( seqs.size() <= 1 )
00611                 create_ok = false;
00612 
00613         if( create_ok ){
00614 //              SetMuscleArguments( " -quiet -stable -seqtype DNA " );
00615                 vector< string > aln_matrix;
00616                 if( !CallMuscleFast( aln_matrix, seq_data ) ){
00617                         cout << "Muscle was unable to align:\n";
00618                         //if( r_begin )
00619                         //      cout << "Left match: " << *r_begin << endl;
00620                         //if( r_end )
00621                         //      cout << "Right match: " << *r_end << endl;
00622                         return false;
00623                 }
00624 
00625                 gnSeqI aln_length = aln_matrix.size() == 0 ? 0 : aln_matrix[0].length();
00626                 cr = GappedAlignment( seq_count, aln_length );
00627                 vector< string > aln_mat = vector< string >( seq_count );
00628 
00629                 // set sequence starts
00630                 for( uint seqI = 0; seqI < seqs.size(); seqI++ ){
00631                         cr.SetLength( seq_data[ seqI ].size(), seqs[ seqI ] );
00632                         aln_mat[ seqs[ seqI ] ] = aln_matrix[ seqI ];
00633                 }
00634                 for( uint seqI = 0; seqI < seq_count; seqI++ ){
00635                         cr.SetStart( seqI, starts[ seqI ] );
00636                         if( aln_mat[ seqI ].length() != aln_length )
00637                                 aln_mat[ seqI ] = string( aln_length, '-' );
00638                 }
00639 
00640                 cr.SetAlignment( aln_mat );
00641 
00642                 return true;
00643         }
00644 }catch(exception& e){
00645         cerr << "At: " << __FILE__ << ":" << __LINE__ << endl;
00646         cerr << e.what();
00647 }
00648         return false;
00649 }
00650 
00651 boolean MuscleInterface::CallMuscle( vector< string >& aln_matrix, const vector< string >& seq_table )
00652 {
00653         gnSequence seq;
00654 
00655         try{
00656                 ostringstream input_seq_stream;
00657                 //istringstream muscle_input_seq_stream;
00658                 for( uint seqI = 0; seqI < seq_table.size(); seqI++ ){
00659                         seq += seq_table[ seqI ];
00660                         seq.setContigName( seqI, "seq" );
00661                 }
00662                 gnFASSource::Write( seq, input_seq_stream, false, true );               
00663                 // now open a pipe to Muscle
00664                 string muscle_cmd = muscle_path + " " + muscle_arguments;
00665                 string output;
00666                 string error;
00667                 boolean success = pipeExec( muscle_cmdline, muscle_cmd, input_seq_stream.str(), output, error );
00668                 if( !success || output.size() == 0 )
00669                 {
00670                         throw "b0rk3d";
00671                 }
00672 
00673                 istringstream output_aln_stream( output );
00674                 string cur_line;
00675 
00676                 // parse the fasta output
00677                 while( getline( output_aln_stream, cur_line ) ){
00678                         if( cur_line[0] == '>' ){
00679                                 aln_matrix.push_back( "" );
00680                                 continue;
00681                         }
00682                         gnSeqI len = cur_line.size();
00683                         len = cur_line[ len - 1 ] == '\r' ? len - 1 : len;
00684                         uint seqI = aln_matrix.size() - 1;
00685                         aln_matrix[ seqI ] += cur_line.substr( 0, len );
00686                 }
00687 
00688                 return true;
00689         }catch( gnException& gne ){
00690         }catch( exception& e ){
00691         }catch(...){
00692         }
00693         cerr << "muscle failed!  saving failed input data to muscle_failure_" << failure_count << ".txt\n";
00694         cerr << "Please contact the Mauve developers about this problem\n";
00695         stringstream debug_fname;
00696         debug_fname << "muscle_failure_" << failure_count++ << ".txt";
00697         ofstream debug_file( debug_fname.str().c_str() );
00698         gnFASSource::Write(seq, debug_file, false);
00699         debug_file.close();
00700         return false;
00701 }
00702 
00703 // version 2 of this code: attempt to call muscle without performing costly disk I/O!!
00704 boolean MuscleInterface::CallMuscleFast( vector< string >& aln_matrix, const vector< string >& seq_table )
00705 {
00706         g_SeqType.get() = SEQTYPE_DNA;  // we're operating on DNA
00707         g_uMaxIters.get() = 1;                  // and we don't want to refine the alignment...yet
00708         g_bStable.get() = true;                 // we want output seqs in the same order as input
00709         g_bQuiet.get() = true;                  // and don't print anything to the console
00710         g_SeqWeight1.get() = SEQWEIGHT_ClustalW;        // not sure what weighting scheme works best for DNA
00711 
00712         SetMaxIters(g_uMaxIters.get());
00713         SetSeqWeightMethod(g_SeqWeight1.get());
00714 
00715         // now construct a SeqVect containing input sequences
00716         SeqVect sv;
00717         const char* seqname = "seq00000";
00718         for( size_t seqI = 0; seqI < seq_table.size(); seqI++ )
00719         {
00720                 Seq curseq;
00721                 curseq.SetId(seqI);
00722                 curseq.SetName(seqname);
00723                 curseq.resize(seq_table[seqI].size());
00724                 std::copy(seq_table[seqI].begin(), seq_table[seqI].end(), curseq.begin());
00725                 sv.AppendSeq(curseq);
00726         }
00727 
00728         MSA msaTmp;
00729         MUSCLE(sv,msaTmp);
00730 
00731         // now extract the alignment
00732         aln_matrix.clear();
00733         aln_matrix.resize(msaTmp.GetSeqCount());
00734         for( size_t seqI = 0; seqI < msaTmp.GetSeqCount(); seqI++ )
00735         {
00736                 unsigned indie = msaTmp.GetSeqIndex(seqI);
00737                 const char* buf = msaTmp.GetSeqBuffer(indie);
00738                 string curseq(buf, msaTmp.GetColCount());
00739                 swap(aln_matrix[seqI],curseq);
00740         }
00741         return true;    // how can it possibly fail? :)
00742 }
00743 
00744 bool MuscleInterface::Refine( GappedAlignment& ga, size_t windowsize )
00745 {
00746         const vector< string >& seq_table = GetAlignment( ga, vector< gnSequence* >() );
00747         vector< string > aln_table;
00748         for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ )
00749         {
00750                 if( ga.LeftEnd(seqI) != NO_MATCH )
00751                 {
00752                         aln_table.push_back( seq_table[seqI] );
00753                 }
00754         }
00755         vector< string > aln_matrix;
00756         if( windowsize == 0 )
00757                 SetMuscleArguments( " -quiet -refine -seqtype DNA " );
00758         else
00759         {
00760                 stringstream sstr;
00761                 sstr << " -quiet -seqtype DNA -refinew -refinewindow " << windowsize << " ";
00762                 SetMuscleArguments( sstr.str() );
00763         }
00764         bool success = CallMuscle( aln_matrix, aln_table );
00765         if( success )
00766         {
00767                 aln_table.clear();
00768                 uint alnI = 0;
00769                 for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ )
00770                 {
00771                         if( ga.LeftEnd(seqI) != NO_MATCH )
00772                                 aln_table.push_back( aln_matrix[alnI++] );
00773                         else
00774                                 aln_table.push_back( string( aln_matrix[0].size(), '-' ) );
00775                 }
00776                 ga.SetAlignment( aln_table );
00777         }
00778         return success;
00779 }
00780 
00781 void msaFromSeqTable(MSA& msa, const vector< string >& seq_table, unsigned id_base = 0)
00782 {
00783         msa.SetSize(seq_table.size(), seq_table[0].size());
00784         for( uint seqI = 0; seqI < seq_table.size(); seqI++ )
00785         {
00786                 stringstream ss;
00787                 ss << "seq" << seqI;
00788                 msa.SetSeqName(seqI, ss.str().c_str());
00789                 msa.SetSeqId(seqI,seqI+id_base);
00790                 for(size_t i = 0; i < seq_table[seqI].size(); i++)
00791                         msa.SetChar(seqI, i, seq_table[seqI][i]);
00792         }
00793 }
00794 
00795 
00796 bool MuscleInterface::RefineFast( GappedAlignment& ga, size_t windowsize )
00797 {
00798         const vector< string >& seq_table = GetAlignment( ga, vector< gnSequence* >() );
00799         vector< string > aln_table;
00800         for( uint seqI = 0; seqI < ga.SeqCount(); seqI++ )
00801         {
00802                 if( ga.LeftEnd(seqI) != NO_MATCH )
00803                 {
00804                         aln_table.push_back( seq_table[seqI] );
00805                 }
00806         }
00807 
00808         g_SeqType.get() = SEQTYPE_DNA;  // we're operating on DNA
00809         g_uMaxIters.get() = 1;                  // and we don't want to refine the alignment...yet
00810         g_bStable.get() = true;                 // we want output seqs in the same order as input
00811         g_bQuiet.get() = true;                  // and don't print anything to the console
00812         g_SeqWeight1.get() = SEQWEIGHT_ClustalW;        // not sure what weighting scheme works best for DNA
00813 
00814         g_uRefineWindow.get() = windowsize;
00815         g_uWindowTo.get() = 0;
00816 
00817         SetMaxIters(g_uMaxIters.get());
00818         SetSeqWeightMethod(g_SeqWeight1.get());
00819 
00820         MSA::SetIdCount(seq_table.size());
00821 
00822         // create an MSA
00823         MSA msa;
00824         msaFromSeqTable(msa, seq_table);
00825 
00826         SetAlpha(ALPHA_DNA);
00827         msa.FixAlpha();
00828         SetPPScore(PPSCORE_SPN);
00829         SetMuscleInputMSA(msa);
00830 
00831         Tree GuideTree;
00832         TreeFromMSA(msa, GuideTree, g_Cluster2.get(), g_Distance2.get(), g_Root2.get());
00833         SetMuscleTree(GuideTree);
00834 
00835         MSA msaOut;
00836         MSA* finalMsa;
00837 
00838         if(windowsize == 0)
00839         {
00840                 if (g_bAnchors.get())
00841                         RefineVert(msa, GuideTree, g_uMaxIters.get());
00842                 else
00843                         RefineHoriz(msa, GuideTree, g_uMaxIters.get(), false, false);
00844                 finalMsa = &msa;
00845         }else{
00846                 RefineW(msa, msaOut);
00847                 finalMsa = &msaOut;
00848         }
00849 
00850 
00851         ValidateMuscleIds(*finalMsa);
00852         ValidateMuscleIds(GuideTree);
00853 
00854         // now extract the alignment
00855         vector< string > aln_matrix;
00856         aln_matrix.resize(finalMsa->GetSeqCount());
00857         for( size_t seqI = 0; seqI < finalMsa->GetSeqCount(); seqI++ )
00858         {
00859                 unsigned indie = finalMsa->GetSeqIndex(seqI);
00860                 const char* buf = finalMsa->GetSeqBuffer(indie);
00861                 string curseq(buf, finalMsa->GetColCount());
00862                 swap(aln_matrix[seqI],curseq);
00863         }
00864 
00865         ga.SetAlignment( aln_matrix );
00866         return true;
00867 }
00868 
00869 
00870 void stripGapColumns( std::vector< std::string >& aln )
00871 {
00872         size_t cur_col = 0;
00873         size_t gap_seq = 0;
00874         for( size_t colI = 0; colI < aln[0].size(); colI++ )
00875         {
00876                 gap_seq = 0;
00877                 for( ; gap_seq < aln.size(); gap_seq++ )
00878                         if( aln[gap_seq][colI] != '-' )
00879                                 break;
00880                 if( gap_seq != aln.size() )
00881                 {
00882                         for( gap_seq = 0; gap_seq < aln.size(); gap_seq++ )
00883                                 aln[gap_seq][cur_col] = aln[gap_seq][colI];
00884                         cur_col++;
00885                 }
00886         }
00887         for( gap_seq = 0; gap_seq < aln.size(); gap_seq++ )
00888                 aln[gap_seq].resize(cur_col);
00889 }
00890 
00891 void stripGaps( std::string& str )
00892 {
00893         std::string::iterator striter = std::remove(str.begin(), str.end(), '-');
00894         str.resize(striter - str.begin());
00895 }
00896 
00897 bool MuscleInterface::ProfileAlign( const GappedAlignment& ga1, const GappedAlignment& ga2, GappedAlignment& aln, bool anchored )
00898 {
00899         try{
00900                 const vector< string >& aln1 = GetAlignment( ga1, vector< gnSequence* >() );
00901                 const vector< string >& aln2 = GetAlignment( ga2, vector< gnSequence* >() );
00902                 vector< uint > order;
00903                 ostringstream input_seq_stream;
00904                 gnSequence seq;
00905                 vector< string > aln11( ga1.Multiplicity() );
00906                 vector< string > aln22( ga2.Multiplicity() );
00907                 size_t curI = 0;
00908                 for( uint seqI = 0; seqI < aln1.size(); seqI++ )
00909                 {
00910                         if( ga1.LeftEnd(seqI) != NO_MATCH )
00911                         {
00912                                 aln11[curI++] = aln1[seqI];
00913                                 order.push_back(seqI);
00914                         }
00915                 }
00916                 curI = 0;
00917                 for( uint seqI = 0; seqI < aln2.size(); seqI++ )
00918                 {
00919                         if( ga2.LeftEnd(seqI) != NO_MATCH )
00920                         {
00921                                 aln22[curI++] = aln2[seqI];
00922                                 order.push_back(seqI);
00923                         }
00924                 }
00925 // strip the gap columns only if we're doing unanchored PP alignment
00926                 if( !anchored )
00927                 {
00928                         stripGapColumns(aln11);
00929                         stripGapColumns(aln22);
00930                 }
00931                 for( uint seqI = 0; seqI < aln11.size(); seqI++ )
00932                 {
00933                         seq += aln11[ seqI ];
00934                         seq.setContigName( seq.contigListLength()-1, "seq" );
00935                 }
00936 
00937                 gnFASSource::Write( seq, input_seq_stream, false, true );
00938                 input_seq_stream << "=\n";
00939 
00940                 gnSequence seq2;
00941                 for( uint seqI = 0; seqI < aln22.size(); seqI++ )
00942                 {
00943                         seq2 += aln22[ seqI ];
00944                         seq2.setContigName( seq2.contigListLength()-1, "seq" );
00945                 }
00946 
00947                 gnFASSource::Write( seq2, input_seq_stream, false, true );
00948                 input_seq_stream << "=\n";
00949 
00950                 if( debug_muscle )
00951                 {
00952                         // for debugging: write the anchored profiles to a file
00953                         stringstream debug_fname;
00954                         debug_fname << "muscle_debug_" << failure_count++ << ".txt";
00955                         ofstream debug_file( debug_fname.str().c_str() );
00956                         debug_file << input_seq_stream.str();
00957                         debug_file.close();
00958                 }
00959 
00960                 // now open a pipe to Muscle
00961                 string musc_args = "-quiet -seqtype DNA -profile -ProfileOnStdIn ";
00962                 if( anchored )
00963                         musc_args += "-AnchoredPP ";
00964                 SetMuscleArguments( musc_args );
00965                 string output;
00966                 string error;
00967                 string muscle_cmd = muscle_path + " " + muscle_arguments;
00968                 if( debug_muscle )
00969                 {
00970                         cerr << "Running " << muscle_cmd << endl;
00971                 }
00972                 boolean success = pipeExec( muscle_cmdline, muscle_cmd, input_seq_stream.str(), output, error );
00973                 if( !success || output.size() == 0 )
00974                 {
00975                         if( output.size() == 0 )
00976                                 cerr << "\nmuscle nothing\n";
00977                         else
00978                                 cerr << "\nunsuccessful muscle\n";
00979                         return false;
00980                 }
00981 
00982                 istringstream output_aln_stream( output );
00983                 string cur_line;
00984 
00985                 // parse the fasta output
00986                 vector< string > aln_matrix( ga1.SeqCount() );
00987                 int ordI = -1;
00988                 while( getline( output_aln_stream, cur_line ) ){
00989                         if( cur_line[0] == '>' ){
00990                                 ordI++;
00991                                 continue;
00992                         }
00993                         gnSeqI len = cur_line.size();
00994                         len = cur_line[ len - 1 ] == '\r' ? len - 1 : len;
00995                         uint seqI = aln_matrix.size() - 1;
00996                         aln_matrix[ order[ordI] ] += cur_line.substr( 0, len );
00997                 }
00998                 for( size_t i = 0; i < aln_matrix.size(); i++ )
00999                 {
01000                         if( aln_matrix[i].size() == 0 )
01001                                 aln_matrix[i].resize( aln_matrix[order[0]].size(), '-' );
01002                 }
01003 
01004                 aln.SetAlignment( aln_matrix );
01005                 for( uint seqI = 0; seqI < ga1.SeqCount(); seqI++ )
01006                         if( ga1.LeftEnd(seqI) != NO_MATCH )
01007                         {
01008                                 aln.SetLeftEnd(seqI, ga1.LeftEnd(seqI));
01009                                 aln.SetLength(ga1.Length(seqI), seqI);
01010                         }
01011                 for( uint seqI = 0; seqI < ga2.SeqCount(); seqI++ )
01012                         if( ga2.LeftEnd(seqI) != NO_MATCH )
01013                         {
01014                                 aln.SetLeftEnd(seqI, ga2.LeftEnd(seqI));
01015                                 aln.SetLength(ga2.Length(seqI), seqI);
01016                         }
01017                 return true;
01018         }catch( gnException& gne ){
01019         }catch( exception& e ){
01020         }catch(...){
01021         }
01022         return false;
01023 }
01024 
01025 
01026 bool MuscleInterface::ProfileAlignFast( const GappedAlignment& ga1, const GappedAlignment& ga2, GappedAlignment& aln, bool anchored )
01027 {
01028         try{
01029                 const vector< string >& aln1 = GetAlignment( ga1, vector< gnSequence* >() );
01030                 const vector< string >& aln2 = GetAlignment( ga2, vector< gnSequence* >() );
01031                 vector< uint > order;
01032                 vector< string > aln11( ga1.Multiplicity() );
01033                 vector< string > aln22( ga2.Multiplicity() );
01034                 size_t curI = 0;
01035                 for( uint seqI = 0; seqI < aln1.size(); seqI++ )
01036                 {
01037                         if( ga1.LeftEnd(seqI) != NO_MATCH )
01038                         {
01039                                 aln11[curI++] = aln1[seqI];
01040                                 order.push_back(seqI);
01041                         }
01042                 }
01043                 curI = 0;
01044                 for( uint seqI = 0; seqI < aln2.size(); seqI++ )
01045                 {
01046                         if( ga2.LeftEnd(seqI) != NO_MATCH )
01047                         {
01048                                 aln22[curI++] = aln2[seqI];
01049                                 order.push_back(seqI);
01050                         }
01051                 }
01052 // strip the gap columns only if we're doing unanchored PP alignment
01053                 if( !anchored )
01054                 {
01055                         stripGapColumns(aln11);
01056                         stripGapColumns(aln22);
01057                 }
01058 
01059                 g_SeqType.get() = SEQTYPE_DNA;  // we're operating on DNA
01060                 g_uMaxIters.get() = 1;                  // and we don't want to refine the alignment...yet
01061                 g_bStable.get() = true;                 // we want output seqs in the same order as input
01062                 g_bQuiet.get() = true;                  // and don't print anything to the console
01063                 g_SeqWeight1.get() = SEQWEIGHT_ClustalW;        // not sure what weighting scheme works best for DNA
01064 
01065                 SetMaxIters(g_uMaxIters.get());
01066                 SetSeqWeightMethod(g_SeqWeight1.get());
01067 
01068                 MSA::SetIdCount(order.size());
01069 
01070                 MSA msa1;
01071                 MSA msa2;
01072                 MSA msaOut;
01073                 msaFromSeqTable(msa1, aln11);
01074                 msaFromSeqTable(msa2, aln22, msa1.GetSeqCount());
01075 
01076                 SetAlpha(ALPHA_DNA);
01077                 msa1.FixAlpha();
01078                 msa2.FixAlpha();
01079                 SetPPScore(PPSCORE_SPN);
01080 
01081                 if(anchored)
01082                 {
01083                         AnchoredProfileProfile(msa1, msa2, msaOut);
01084                 }else{
01085                         ProfileProfile(msa1, msa2, msaOut);
01086                 }
01087 
01088                 // get the output
01089                 vector< string > aln_matrix( aln1.size() );
01090                 for( size_t seqI = 0; seqI < msaOut.GetSeqCount(); seqI++ )
01091                 {
01092                         unsigned indie = msaOut.GetSeqIndex(seqI);
01093                         const char* buf = msaOut.GetSeqBuffer(indie);
01094                         string curseq(buf, msaOut.GetColCount());
01095                         swap(aln_matrix[order[indie]],curseq);
01096 
01097                         // debugging, check that sequences came out in the same order they went in!
01098 /*                      string inseq = aln1[order[indie]];
01099                         string outseq = aln_matrix[order[indie]];
01100                         stripGaps(inseq);
01101                         stripGaps(outseq);
01102                         if(inseq != outseq)
01103                         {
01104                                 unsigned indie = msaOut.GetSeqIndex(seqI);
01105                                 cerr << "bad indie " << indie << endl;
01106                                 genome::breakHere();
01107                         }
01108 */
01109                 }
01110                 // fill empty seqs with gaps
01111                 for( size_t seqI = 0; seqI < aln_matrix.size(); seqI++ )
01112                         if(aln_matrix[seqI].size() == 0)
01113                                 aln_matrix[seqI].resize(msaOut.GetColCount(), '-');
01114 
01115                 aln.SetAlignment( aln_matrix );
01116                 for( uint seqI = 0; seqI < ga1.SeqCount(); seqI++ )
01117                         if( ga1.LeftEnd(seqI) != NO_MATCH )
01118                         {
01119                                 aln.SetLeftEnd(seqI, ga1.LeftEnd(seqI));
01120                                 aln.SetLength(ga1.Length(seqI), seqI);
01121                         }
01122                 for( uint seqI = 0; seqI < ga2.SeqCount(); seqI++ )
01123                         if( ga2.LeftEnd(seqI) != NO_MATCH )
01124                         {
01125                                 aln.SetLeftEnd(seqI, ga2.LeftEnd(seqI));
01126                                 aln.SetLength(ga2.Length(seqI), seqI);
01127                         }
01128                 return true;
01129 
01130         }catch( gnException& gne ){
01131         }catch( exception& e ){
01132         }catch(...){
01133         }
01134         return false;
01135 }
01136 
01137 
01138 void MuscleInterface::CreateTree( const NumericMatrix<double>& distances, const std::string& tree_filename  )
01139 {
01140         DistFunc df;
01141         df.SetCount( distances.rows() );
01142         for( size_t i = 0; i < distances.rows(); i++ )
01143                 for( size_t j = 0; j < distances.rows(); j++ )
01144                         df.SetDist( i, j, distances(i,j) );
01145 
01146         for( size_t i = 0; i < distances.rows(); i++ )
01147         {
01148                 stringstream ss;
01149                 ss << "seq";
01150                 ss << i + 1;
01151                 df.SetName( i, ss.str().c_str() );
01152                 df.SetId( i, i );
01153         }
01154         ClustSetDF csdf( df );
01155         Clust crusty;
01156         crusty.Create( csdf, CLUSTER_NeighborJoining );
01157         Tree tt;
01158         tt.FromClust( crusty );
01159         TextFile tf( tree_filename.c_str(), true );
01160         tt.ToFile( tf );
01161 }
01162 
01163 
01164 }

Generated on Fri Mar 14 06:01:03 2008 for libMems by doxygen 1.3.6