00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifdef HAVE_CONFIG_H
00010 #include "config.h"
00011 #endif
00012
00013 #include "libMems/MatchHashEntry.h"
00014 #include "libGenome/gnException.h"
00015 #include "libGenome/gnDebug.h"
00016
00017 using namespace std;
00018 using namespace genome;
00019 namespace mems {
00020
00021 boolean MatchHashEntry::offset_lessthan(const MatchHashEntry& a, const MatchHashEntry& b){
00022 return a.m_offset < b.m_offset;
00023 }
00024
00025 boolean MatchHashEntry::start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b){
00026 int32 start_diff = a->FirstStart() - b->FirstStart();
00027 if(start_diff == 0){
00028 uint32 m_count = a->SeqCount();
00029 m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
00030 for(uint32 seqI = seq_compare_start; seqI < m_count; seqI++){
00031 int64 a_start = a->Start(seqI), b_start = b->Start(seqI);
00032 if(a_start < 0)
00033 a_start = -a_start + a->Length() - a->m_mersize;
00034 if(b_start < 0)
00035 b_start = -b_start + b->Length() - b->m_mersize;
00036 int64 diff = a_start - b_start;
00037 if(a_start == NO_MATCH || b_start == NO_MATCH)
00038 continue;
00039 else if(diff == 0)
00040 continue;
00041 else
00042 return diff < 0;
00043 }
00044 }
00045 return start_diff < 0;
00046 }
00047
00048 boolean MatchHashEntry::strict_start_lessthan_ptr(const MatchHashEntry* a, const MatchHashEntry* b){
00049 int start_diff = a->FirstStart() - b->FirstStart();
00050 if(start_diff == 0){
00051 uint m_count = a->SeqCount();
00052 m_count = m_count <= b->SeqCount() ? m_count : b->SeqCount();
00053 for(uint seqI = 0; seqI < m_count; seqI++){
00054 int64 a_start = a->Start(seqI), b_start = b->Start(seqI);
00055 if(a_start < 0)
00056 a_start = -a_start + a->Length() - a->m_mersize;
00057 if(b_start < 0)
00058 b_start = -b_start + b->Length() - b->m_mersize;
00059 int64 diff = a_start - b_start;
00060 if(diff == 0)
00061 continue;
00062 else
00063 return diff < 0;
00064 }
00065 }
00066 return start_diff < 0;
00067 }
00068
00069
00070
00071 int64 MatchHashEntry::start_compare(const MatchHashEntry& a, const MatchHashEntry& b){
00072 uint m_count = a.SeqCount();
00073 m_count = m_count <= b.SeqCount() ? m_count : b.SeqCount();
00074 for(uint seqI = 0; seqI < m_count; seqI++){
00075 int64 a_start = a.Start(seqI), b_start = b.Start(seqI);
00076 if(a_start < 0)
00077 a_start = -a_start + a.Length() - a.m_mersize;
00078 if(b_start < 0)
00079 b_start = -b_start + b.Length() - b.m_mersize;
00080 int64 diff = a_start - b_start;
00081 if(a_start == NO_MATCH || b_start == NO_MATCH)
00082 continue;
00083 else if(diff == 0)
00084 continue;
00085 else
00086 return diff;
00087 }
00088 return 0;
00089 }
00090
00091 int64 MatchHashEntry::end_to_start_compare(const MatchHashEntry& a, const MatchHashEntry& b){
00092 MatchHashEntry tmp_a = a;
00093 tmp_a.CropStart(tmp_a.Length()-1);
00094 return MatchHashEntry::start_compare(tmp_a, b);
00095 }
00096
00097
00098 MatchHashEntry::MatchHashEntry() :
00099 Match(),
00100 m_extended( false ),
00101 m_mersize( 0 )
00102 {
00103 }
00104
00105
00106 MatchHashEntry::MatchHashEntry(uint32 seq_count, const gnSeqI mersize, MemType m_type) :
00107 Match( seq_count ),
00108 m_mersize( mersize )
00109 {
00110 m_extended = m_type == extended;
00111 }
00112
00113
00114 MatchHashEntry* MatchHashEntry::Clone() const{
00115 return new MatchHashEntry(*this);
00116 }
00117
00118 MatchHashEntry& MatchHashEntry::operator=(const MatchHashEntry& mhe)
00119 {
00120 Match::operator=( mhe );
00121 m_extended = mhe.m_extended;
00122 m_mersize = 0;
00123 m_offset = mhe.m_offset;
00124
00125 return *this;
00126 }
00127
00128 boolean MatchHashEntry::operator==(const MatchHashEntry& mhe) const
00129 {
00130 if(m_seq_count != mhe.m_seq_count)
00131 return false;
00132 if(m_mersize != mhe.m_mersize)
00133 return false;
00134 if(m_extended != mhe.m_extended)
00135 return false;
00136 if( m_offset != mhe.m_offset )
00137 return false;
00138 return Match::operator ==(mhe);
00139 }
00140
00141 void MatchHashEntry::CalculateOffset()
00142 {
00143 if( SeqCount() == 0 )
00144 return;
00145
00146 int64 tmp_off = 0;
00147 m_offset = 0;
00148
00149 uint seqI = FirstStart();
00150 int64 ref_start = Start(seqI);
00151
00152 for(seqI++; seqI < SeqCount(); seqI++){
00153 if(Start(seqI) != NO_MATCH){
00154 tmp_off = Start(seqI) - ref_start;
00155 if( Start(seqI) < 0 )
00156 tmp_off -= (int64)Length( seqI );
00157 m_offset += tmp_off;
00158 }
00159 }
00160 }
00161
00162
00163
00164 boolean MatchHashEntry::Contains(const MatchHashEntry& mhe) const{
00165 uint i;
00166 int64 diff_i;
00167 int64 diff;
00168 uint seq_count = mhe.SeqCount();
00169
00170
00171 if(SeqCount() != seq_count || m_offset != mhe.m_offset)
00172 return false;
00173
00174 i = mhe.FirstStart();
00175 diff = mhe.Start(i) - Start(i);
00176 if(Start(i) == NO_MATCH)
00177 return false;
00178
00179
00180 if(diff < 0 || Length() < mhe.Length() + diff)
00181 return false;
00182
00183
00184 int64 diff_rc = (int64)mhe.Length() - (int64)Length() + diff;
00185 for(i++; i < seq_count; i++){
00186
00187
00188 diff_i = mhe.Start(i) - Start(i);
00189
00190
00191 if(mhe.Start(i) == NO_MATCH && Start(i) == NO_MATCH)
00192 continue;
00193 else if(mhe.Start(i) < 0 && diff_rc == diff_i)
00194 continue;
00195 else if(diff != diff_i )
00196 return false;
00197 }
00198
00199 return true;
00200 }
00201
00202
00203 }