libMems/MatchHashEntry.cpp

Go to the documentation of this file.
00001 /*******************************************************************************
00002  * $Id: MatchHashEntry.cpp,v 1.9 2004/03/01 02:40:08 darling Exp $
00003  * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
00004  * Please see the file called COPYING for licensing, copying, and modification
00005  * Please see the file called COPYING for licensing details.
00006  * **************
00007  ******************************************************************************/
00008 
00009 #ifdef HAVE_CONFIG_H
00010 #include "config.h"
00011 #endif
00012 
00013 #include "libMems/MatchHashEntry.h"
00014 #include "libGenome/gnException.h"
00015 #include "libGenome/gnDebug.h"
00016 
00017 using namespace std;
00018 using namespace genome;
00019 namespace mems {
00020 
00021 boolean MatchHashEntry::offset_lessthan(const MatchHashEntry& a, const MatchHashEntry& b){
00022         return a.m_offset < b.m_offset;
00023 }
00024 
00025 boolean MatchHashEntry::start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b){
00026         int32 start_diff = a->FirstStart() - b->FirstStart();
00027         if(start_diff == 0){
00028                 uint32 m_count = a->SeqCount();
00029                 m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
00030                 for(uint32 seqI = seq_compare_start; seqI < m_count; seqI++){
00031                         int64 a_start = a->Start(seqI), b_start = b->Start(seqI);
00032                         if(a_start < 0)
00033                                 a_start = -a_start + a->Length() - a->m_mersize;
00034                         if(b_start < 0)
00035                                 b_start = -b_start + b->Length() - b->m_mersize;
00036                         int64 diff = a_start - b_start;
00037                         if(a_start == NO_MATCH || b_start == NO_MATCH)
00038                                 continue;
00039                         else if(diff == 0)
00040                                 continue;
00041                         else
00042                                 return diff < 0;
00043                 }
00044         }
00045         return start_diff < 0;
00046 }
00047 
00048 boolean MatchHashEntry::strict_start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b){
00049         int start_diff = a->FirstStart() - b->FirstStart();
00050         if(start_diff == 0){
00051                 uint m_count = a->SeqCount();
00052                 m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
00053                 for(uint seqI = 0; seqI < m_count; seqI++){
00054                         int64 a_start = a->Start(seqI), b_start = b->Start(seqI);
00055                         if(a_start < 0)
00056                                 a_start = -a_start + a->Length() - a->m_mersize;
00057                         if(b_start < 0)
00058                                 b_start = -b_start + b->Length() - b->m_mersize;
00059                         int64 diff = a_start - b_start;
00060                         if(diff == 0)
00061                                 continue;
00062                         else
00063                                 return diff < 0;
00064                 }
00065         }
00066         return start_diff < 0;
00067 }
00068 
00069 
00070 //ignores mem_no_matches
00071 int64 MatchHashEntry::start_compare(const MatchHashEntry& a, const MatchHashEntry& b){
00072         uint m_count = a.SeqCount();
00073         m_count = m_count <= b.SeqCount() ? m_count : b.SeqCount();
00074         for(uint seqI = 0; seqI < m_count; seqI++){
00075                 int64 a_start = a.Start(seqI), b_start = b.Start(seqI);
00076                 if(a_start < 0)
00077                         a_start = -a_start + a.Length() - a.m_mersize;
00078                 if(b_start < 0)
00079                         b_start = -b_start + b.Length() - b.m_mersize;
00080                 int64 diff = a_start - b_start;
00081                 if(a_start == NO_MATCH || b_start == NO_MATCH)
00082                         continue;
00083                 else if(diff == 0)
00084                         continue;
00085                 else
00086                         return diff;
00087         }
00088         return 0;
00089 }
00090 
00091 int64 MatchHashEntry::end_to_start_compare(const MatchHashEntry& a, const MatchHashEntry& b){
00092         MatchHashEntry tmp_a = a;
00093         tmp_a.CropStart(tmp_a.Length()-1);
00094         return MatchHashEntry::start_compare(tmp_a, b);
00095 }
00096 
00097 
00098 MatchHashEntry::MatchHashEntry() : 
00099 Match(),
00100 m_extended( false ),
00101 m_mersize( 0 )
00102 {
00103 }
00104 
00105 
00106 MatchHashEntry::MatchHashEntry(uint32 seq_count, const gnSeqI mersize, MemType m_type) : 
00107  Match( seq_count ),
00108  m_mersize( mersize )
00109 {
00110         m_extended = m_type == extended;
00111 }
00112 
00113 
00114 MatchHashEntry* MatchHashEntry::Clone() const{
00115         return new MatchHashEntry(*this);
00116 }
00117 
00118 MatchHashEntry& MatchHashEntry::operator=(const MatchHashEntry& mhe)
00119 {
00120         Match::operator=( mhe );
00121         m_extended = mhe.m_extended;
00122         m_mersize = 0;
00123         m_offset = mhe.m_offset;
00124 
00125         return *this;
00126 }
00127 
00128 boolean MatchHashEntry::operator==(const MatchHashEntry& mhe) const
00129 {
00130         if(m_seq_count != mhe.m_seq_count)
00131                 return false;
00132         if(m_mersize != mhe.m_mersize)
00133                 return false;
00134         if(m_extended != mhe.m_extended)
00135                 return false;
00136         if( m_offset != mhe.m_offset )
00137                 return false;
00138         return Match::operator ==(mhe);
00139 }
00140 
00141 void MatchHashEntry::CalculateOffset()
00142 {
00143         if( SeqCount() == 0 )
00144                 return;
00145 
00146         int64 tmp_off = 0;
00147         m_offset = 0;
00148 
00149         uint seqI = FirstStart();
00150         int64 ref_start = Start(seqI);
00151 
00152         for(seqI++; seqI < SeqCount(); seqI++){
00153                 if(Start(seqI) != NO_MATCH){
00154                         tmp_off = Start(seqI) - ref_start;
00155                         if( Start(seqI) < 0 )
00156                                 tmp_off -= (int64)Length( seqI );
00157                         m_offset += tmp_off;
00158                 }
00159         }
00160 }
00161 
00162 // checks if mhe is _perfectly_ contained in this match.
00163 // all offsets in all sequences must be aligned to each other
00164 boolean MatchHashEntry::Contains(const MatchHashEntry& mhe) const{
00165         uint i;
00166         int64 diff_i;
00167         int64 diff;
00168         uint seq_count = mhe.SeqCount();
00169         //check for a consistent number of genomes and
00170         //identical generalized offsets
00171         if(SeqCount() != seq_count || m_offset != mhe.m_offset)
00172                 return false;
00173 
00174         i = mhe.FirstStart();
00175         diff = mhe.Start(i) - Start(i);
00176         if(Start(i) == NO_MATCH)
00177                 return false;
00178 
00179         //check for containment properties
00180         if(diff < 0 || Length() < mhe.Length() + diff)
00181                 return false;
00182 
00183         //everything is ok so far, check for alignment
00184         int64 diff_rc = (int64)mhe.Length() - (int64)Length() + diff;
00185         for(i++; i < seq_count; i++){
00186                 //check for consistent alignment between all genomes
00187                 //in the case of revcomp, diff_i must equal diff_rc
00188                 diff_i = mhe.Start(i) - Start(i);
00189 
00190                 //it's ok if neither matches in a sequence
00191                 if(mhe.Start(i) == NO_MATCH && Start(i) == NO_MATCH)
00192                         continue;
00193                 else if(mhe.Start(i) < 0 && diff_rc == diff_i)
00194                         continue;
00195                 else if(diff != diff_i )
00196                         return false;
00197         }
00198         //it was contained.
00199         return true;
00200 }
00201 
00202 
00203 } // namespace mems

Generated on Fri Mar 14 06:01:03 2008 for libMems by doxygen 1.3.6