00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef __SparseAbstractMatch_h__
00010 #define __SparseAbstractMatch_h__
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #include "libGenome/gnClone.h"
00017 #include "libGenome/gnDefs.h"
00018 #include "libMems/AbstractMatch.h"
00019 #include <vector>
00020 #include <limits>
00021
00022 namespace mems {
00023
00024
00029 template< class gnSeqIAlloc=std::allocator<gnSeqI>, class uintAlloc=std::allocator<uint> >
00030 class SparseAbstractMatch : public AbstractMatch {
00031 public:
00032 SparseAbstractMatch() : m_seq_count(0) {}
00037 SparseAbstractMatch(const uint seq_count );
00038
00039
00040
00041
00042
00043 int64 Start(uint seqI) const;
00044 void SetStart(uint seqI, int64 startI);
00045 uint Multiplicity() const{return (uint)seq_ids.size();}
00046 uint SeqCount() const{return m_seq_count;}
00047 uint FirstStart() const;
00048 virtual void Invert();
00049
00050 gnSeqI LeftEnd(uint seqI) const;
00051 orientation Orientation(uint seqI) const;
00052 void SetLeftEnd(uint seqI, gnSeqI position);
00053 void SetOrientation(uint seqI, orientation o);
00054
00055
00056 virtual void MoveStart(int64 move_amount);
00057 virtual void MoveEnd(int64 move_amount);
00058
00059 virtual boolean operator==( const SparseAbstractMatch& sam ) const;
00060
00061 virtual uint UsedSeq( uint seqI ) const;
00062 protected:
00063
00064 std::vector<uint, uintAlloc > seq_ids;
00065 uint m_seq_count;
00066 std::vector<gnSeqI, gnSeqIAlloc > leftend;
00067 bitset_t orient;
00068 uint SeqToIndex( uint seqI ) const;
00069
00070
00071 void swap( SparseAbstractMatch* other );
00072 };
00073
00074
00075 template< class gnSeqIAlloc, class uintAlloc >
00076 SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SparseAbstractMatch(const uint seq_count ) :
00077 m_seq_count(seq_count)
00078 {}
00079
00080 template< class gnSeqIAlloc, class uintAlloc >
00081 void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::swap( SparseAbstractMatch* other )
00082 {
00083 std::swap(seq_ids, other->seq_ids);
00084 std::swap(m_seq_count, other->m_seq_count);
00085 std::swap(leftend, other->leftend);
00086 std::swap(orient, other->orient);
00087 }
00088
00089 template< class gnSeqIAlloc, class uintAlloc >
00090 uint SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::FirstStart() const
00091 {
00092 uint minI = (std::numeric_limits<uint>::max)();
00093 for( std::size_t i = 0; i < seq_ids.size(); ++i )
00094 minI = seq_ids[i] < minI ? seq_ids[i] : minI;
00095 return minI;
00096 }
00097
00098 template< class gnSeqIAlloc, class uintAlloc >
00099 uint SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SeqToIndex( uint seqI ) const
00100 {
00101 uint posI = 0;
00102 for( ; posI < seq_ids.size(); ++posI )
00103 if( seq_ids[posI] == seqI )
00104 break;
00105 return posI;
00106 }
00107
00108
00109 template< class gnSeqIAlloc, class uintAlloc >
00110 int64 SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::Start(uint seqI) const
00111 {
00112 uint posI = SeqToIndex( seqI );
00113 if( posI >= seq_ids.size() )
00114 return NO_MATCH;
00115 int64 s = leftend[posI];
00116 return orient.test(posI)? -s : s;
00117 }
00118
00119
00120 template< class gnSeqIAlloc, class uintAlloc >
00121 void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SetStart(uint seqI, int64 startI)
00122 {
00123 uint posI = SeqToIndex( seqI );
00124 if( startI == NO_MATCH && posI >= seq_ids.size() )
00125 return;
00126 if( startI == NO_MATCH )
00127 {
00128 seq_ids.erase( seq_ids.begin() + posI );
00129 leftend.erase( leftend.begin() + posI );
00130 for( size_t i = posI; i + 1 < orient.size(); ++i )
00131 orient.set( i, orient.test( i + 1 ) );
00132 orient.resize( orient.size()-1 );
00133 return;
00134 }
00135 if( posI >= seq_ids.size() )
00136 {
00137 seq_ids.push_back(seqI);
00138 leftend.push_back(genome::absolut(startI));
00139 orient.resize( orient.size() + 1, (startI < 0) );
00140 }else{
00141 leftend[posI] = genome::absolut(startI);
00142 orient.set(posI, startI < 0);
00143 }
00144 }
00145
00146
00147 template< class gnSeqIAlloc, class uintAlloc >
00148 void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::Invert()
00149 {
00150 orient.flip();
00151 }
00152
00153
00154
00155 template< class gnSeqIAlloc, class uintAlloc >
00156 gnSeqI SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::LeftEnd(uint seqI) const
00157 {
00158 uint posI = SeqToIndex( seqI );
00159 return posI < leftend.size() ? leftend[posI] : 0;
00160 }
00161
00162
00163 template< class gnSeqIAlloc, class uintAlloc >
00164 AbstractMatch::orientation SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::Orientation(uint seqI) const
00165 {
00166 uint posI = SeqToIndex( seqI );
00167 if( posI < leftend.size() && leftend[posI] != NO_MATCH )
00168 return orient.test(posI) ? reverse : forward;
00169 return undefined;
00170 }
00171
00172
00173 template< class gnSeqIAlloc, class uintAlloc >
00174 void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SetLeftEnd(uint seqI, gnSeqI position)
00175 {
00176 uint posI = SeqToIndex( seqI );
00177 if( position == NO_MATCH && posI >= seq_ids.size() )
00178 return;
00179 if( posI >= leftend.size() )
00180 {
00181 seq_ids.push_back(seqI);
00182 leftend.push_back(position);
00183 orient.resize( orient.size() + 1 );
00184 }else if( position == NO_MATCH )
00185 {
00186 seq_ids.erase( seq_ids.begin() + posI );
00187 leftend.erase( leftend.begin() + posI );
00188 for( size_t i = posI; i + 1 < orient.size(); ++i )
00189 orient.set( i, orient.test( i + 1 ) );
00190 orient.resize( orient.size()-1 );
00191 return;
00192 }
00193
00194 leftend[posI]=position;
00195 }
00196
00197 template< class gnSeqIAlloc, class uintAlloc >
00198 void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::SetOrientation(uint seqI, orientation o)
00199 {
00200 uint posI = SeqToIndex( seqI );
00201
00202 if( posI >= orient.size() )
00203 throw "ArrayIndexOutOfBounds!\n";
00204 orient.set(posI, o == reverse);
00205 }
00206
00207 template< class gnSeqIAlloc, class uintAlloc >
00208 void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::MoveStart(int64 move_amount)
00209 {
00210 for( uint i=0; i < leftend.size(); ++i )
00211 if( orient.test(i) == false && leftend[i] != NO_MATCH )
00212 leftend[i] += move_amount;
00213 }
00214
00215 template< class gnSeqIAlloc, class uintAlloc >
00216 void SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::MoveEnd(int64 move_amount)
00217 {
00218 for( uint i=0; i < leftend.size(); ++i )
00219 if( orient.test(i) && leftend[i] != NO_MATCH )
00220 leftend[i] += move_amount;
00221 }
00222
00223 template< class gnSeqIAlloc, class uintAlloc >
00224 boolean SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::operator==( const SparseAbstractMatch< gnSeqIAlloc, uintAlloc >& sam ) const
00225 {
00226 for( uint i=0; i < leftend.size(); ++i ){
00227 if( leftend[i] != sam.leftend[i] ||
00228 (leftend[i] != 0 && orient.test(i) != sam.orient.test(i)))
00229 return false;
00230 }
00231 return true;
00232 }
00233
00234 template< class gnSeqIAlloc, class uintAlloc >
00235 uint SparseAbstractMatch< gnSeqIAlloc, uintAlloc >::UsedSeq( uint seqI ) const
00236 {
00237 uint count = 0;
00238 for( uint i = 0; i < leftend.size(); i++ )
00239 {
00240 if(leftend[i] != 0)
00241 count++;
00242 if( count > seqI )
00243 return i;
00244 }
00245 return (std::numeric_limits<uint>::max)();
00246 }
00247
00248 }
00249
00250 #endif // __SparseAbstractMatch_h__