00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef _SeedMasks_h_
00010 #define _SeedMasks_h_
00011
00012 #ifdef HAVE_CONFIG_H
00013 #include "config.h"
00014 #endif
00015
00016 #ifdef __cplusplus
00017 #include <cmath>
00018 #else
00019 #include <math.h>
00020 #endif
00021 #include "libGenome/gnDefs.h"
00022
00023
00024
00025
00026
00027
00032 #ifdef __cplusplus
00033 static
00034 #endif
00035 uint32** seedMasks();
00036
00041 #ifdef __cplusplus
00042 inline static
00043 #endif
00044 uint32** seedMasks(){
00045 static uint32 seed_masks_3[] =
00046 {
00047 0,0xb,
00048 0,0, 0,0, 0,0, 0,0, 0,0, };
00049 static uint32 seed_masks_4[] =
00050 {
00051 0,0x3b,
00052 0,0, 0,0, 0,0, 0,0, 0,0, };
00053 static uint32 seed_masks_5[] =
00054 {
00055 0,0x6b,
00056 0,0x139,
00057 0,0x193,
00058 0,0x6b,
00059 0,0, 0,0, };
00060 static uint32 seed_masks_6[] =
00061 {
00062 0,0x58D,
00063 0,0x653,
00064 0,0x1AB,
00065 0,0xdb,
00066 0,0, 0,0, };
00067 static uint32 seed_masks_7[] =
00068 {
00069 0,0x1953,
00070 0,0x588d,
00071 0,0x688b,
00072 0,0x17d,
00073 0,0x164d,
00074 0,0, 0,0, };
00075 static uint32 seed_masks_8[] =
00076 {
00077 0,0x3927,
00078 0,0x1CA7,
00079 0,0x6553,
00080 0,0xb6d,
00081 0,0, 0,0, };
00082 static uint32 seed_masks_9[] =
00083 {
00084 0,0x7497,
00085 0,0x1c927,
00086 0,0x72a7,
00087 0,0x6fb,
00088 0,0x16ed,
00089 0,0, };
00090 static uint32 seed_masks_10[] =
00091 {
00092 0,0x1d297,
00093 0,0x3A497,
00094 0,0xE997,
00095 0,0x6D5B,
00096 0,0, 0,0, };
00097 static uint32 seed_masks_11[] =
00098 {
00099 0,0x7954f,
00100 0,0x75257,
00101 0,0x1c9527,
00102 0,0x5bed,
00103 0,0x5b26d,
00104 0,0, };
00105 static uint32 seed_masks_12[] =
00106 {
00107 0,0x7954f,
00108 0,0x3D32F,
00109 0,0x768B7,
00110 0,0x5B56D,
00111 0,0, 0,0, };
00112 static uint32 seed_masks_13[] =
00113 {
00114 0,0x792a4f,
00115 0,0x1d64d7,
00116 0,0x1d3597,
00117 0,0x1b7db,
00118 0,0x75ad7,
00119 0,0, };
00120 static uint32 seed_masks_14[] =
00121 {
00122 0,0x1e6acf,
00123 0,0xF59AF,
00124 0,0x3D4CAF,
00125 0,0x35AD6B,
00126 0,0, 0,0, };
00127 static uint32 seed_masks_15[] =
00128 {
00129 0,0x7ac9af,
00130 0,0x7b2a6f,
00131 0,0x79aacf,
00132 0,0x16df6d,
00133 0,0x6b5d6b,
00134 0,0, };
00135 static uint32 seed_masks_16[] =
00136 {
00137 0,0xf599af,
00138 0,0xEE5A77,
00139 0,0x7CD59F,
00140 0,0xEB5AD7,
00141 0,0, 0,0, };
00142 static uint32 seed_masks_17[] =
00143 {
00144 0,0x6dbedb,
00145 0,0, 0,0, 0,0, 0,0, 0,0, };
00146 static uint32 seed_masks_18[] =
00147 {
00148 0,0x3E6B59F,
00149 0,0x3EB335F,
00150 0,0x7B3566F,
00151 0,0, 0,0, 0,0, };
00152
00153 static uint32 seed_masks_19[] =
00154 {
00155 0,0x7b974ef,
00156 0,0x7d6735f,
00157 0,0x1edd74f,
00158 0,0, 0,0, 0,0, };
00159 static uint32 seed_masks_20[] =
00160 {
00161 0,0x1F59B35F,
00162 0,0x3EDCEDF,
00163 0,0xFAE675F,
00164 0,0, 0,0, 0,0, };
00165 static uint32 seed_masks_21[] =
00166 {
00167 0,0x7ddaddf,
00168 0,0xaeb3f,
00169 0,0x7eb76bf,
00170 0,0, 0,0, 0,0, };
00171
00172 static uint32 seed_masks_22[] =
00173 {
00174 0,0x003fffff,
00175 0,0, 0,0, 0,0, 0,0, 0,0, };
00176 static uint32 seed_masks_23[] =
00177 {
00178 0,0x007fffff,
00179 0,0, 0,0, 0,0, 0,0, 0,0, };
00180 static uint32 seed_masks_24[] =
00181 {
00182 0,0x00ffffff,
00183 0,0, 0,0, 0,0, 0,0, 0,0, };
00184 static uint32 seed_masks_25[] =
00185 {
00186 0,0x01ffffff,
00187 0,0, 0,0, 0,0, 0,0, 0,0, };
00188 static uint32 seed_masks_26[] =
00189 {
00190 0,0x03ffffff,
00191 0,0, 0,0, 0,0, 0,0, 0,0, };
00192 static uint32 seed_masks_27[] =
00193 {
00194 0,0x07ffffff,
00195 0,0, 0,0, 0,0, 0,0, 0,0, };
00196 static uint32 seed_masks_28[] =
00197 {
00198 0,0x0fffffff,
00199 0,0, 0,0, 0,0, 0,0, 0,0, };
00200 static uint32 seed_masks_29[] =
00201 {
00202 0,0x1fffffff,
00203 0,0, 0,0, 0,0, 0,0, 0,0, };
00204 static uint32 seed_masks_30[] =
00205 {
00206 0,0x3fffffff,
00207 0,0, 0,0, 0,0, 0,0, 0,0, };
00208 static uint32 seed_masks_31[] =
00209 {
00210 0,0x7fffffff,
00211 0,0, 0,0, 0,0, 0,0, 0,0, };
00212
00213 static uint32 no_seeds[] =
00214 {
00215 0,0,
00216 0,0,
00217 0,0,
00218 0,0,
00219 0,0,
00220 0,0,
00221 };
00222
00223 static uint32* seed_masks[] =
00224 {
00225 no_seeds,
00226 no_seeds,
00227 no_seeds,
00228 seed_masks_3,
00229 seed_masks_4,
00230 seed_masks_5,
00231 seed_masks_6,
00232 seed_masks_7,
00233 seed_masks_8,
00234 seed_masks_9,
00235 seed_masks_10,
00236 seed_masks_11,
00237 seed_masks_12,
00238 seed_masks_13,
00239 seed_masks_14,
00240 seed_masks_15,
00241 seed_masks_16,
00242 seed_masks_17,
00243 seed_masks_18,
00244 seed_masks_19,
00245 seed_masks_20,
00246 seed_masks_21,
00247 seed_masks_22,
00248 seed_masks_23,
00249 seed_masks_24,
00250 seed_masks_25,
00251 seed_masks_26,
00252 seed_masks_27,
00253 seed_masks_28,
00254 seed_masks_29,
00255 seed_masks_30,
00256 seed_masks_31,
00257 };
00258
00259 return seed_masks;
00260 }
00261
00262 static const int CODING_SEED = 3;
00263 static const int SOLID_SEED = INT_MAX;
00264
00268 #ifdef __cplusplus
00269 static
00270 #endif
00271 int64 getSolidSeed( int weight );
00272
00273 #ifdef __cplusplus
00274 inline static
00275 #endif
00276 int64 getSolidSeed( int weight ){
00277 int64 seed = 1;
00278 seed <<= weight;
00279 seed--;
00280 return seed;
00281 };
00282
00283
00284
00289 #ifdef __cplusplus
00290 static int64 getSeed( int weight, int seed_rank = 0 );
00291 #else
00292 int64 getSeed( int weight, int seed_rank );
00293 #endif
00294
00295 #ifdef __cplusplus
00296 inline static
00297 #endif
00298 int64 getSeed( int weight, int seed_rank ){
00299 uint32** masks;
00300 int high;
00301 int low;
00302 int i = 1;
00303 int64 seed = 0;
00304 if( seed_rank == SOLID_SEED )
00305 return getSolidSeed( weight );
00306
00307 masks = seedMasks();
00308 if(weight > 31)
00309 return getSolidSeed(32);
00310 if( seed_rank > 5 )
00311 return getSolidSeed(weight);
00312 if( masks[weight][seed_rank*2+1] == 0 )
00313 return getSolidSeed(weight);
00314 high = masks[ weight ][ seed_rank*2 ];
00315 low = masks[ weight ][ seed_rank*2 + 1 ];
00316
00317 seed |= high;
00318 seed <<= 32;
00319 seed |= low;
00320 return seed;
00321 };
00322
00323
00327 #ifdef __cplusplus
00328 static
00329 #endif
00330 int getSeedLength( int64 seed );
00331
00332 #ifdef __cplusplus
00333 inline static
00334 #endif
00335 int getSeedLength( int64 seed ){
00336 int right_bit = -1;
00337 int left_bit = -1;
00338 uint bitI = 0;
00339 for( ; bitI < 64; ++bitI ){
00340 if( (seed & 1) == 1 ){
00341 left_bit = bitI;
00342 if( right_bit == -1 )
00343 right_bit = bitI;
00344 }
00345 seed >>= 1;
00346 }
00347 if( left_bit != -1 )
00348 return left_bit - right_bit + 1;
00349 return 0;
00350 }
00351
00355 #ifdef __cplusplus
00356 static
00357 #endif
00358 int getSeedWeight( int64 seed );
00359
00360 #ifdef __cplusplus
00361 inline static
00362 #endif
00363 int getSeedWeight( int64 seed ){
00364 int weight = 0;
00365 uint bitI = 0;
00366 for( ; bitI < 64; ++bitI ){
00367 if( (seed & 1) == 1 ){
00368 ++weight;
00369 }
00370 seed >>= 1;
00371 }
00372 return weight;
00373 }
00374
00375 const uint MIN_DNA_SEED_WEIGHT = 5;
00376 const uint MAX_DNA_SEED_WEIGHT = 31;
00377
00381 #ifdef __cplusplus
00382 static
00383 #endif
00384 uint getDefaultSeedWeight( gnSeqI avg_sequence_length );
00385
00386 #ifdef __cplusplus
00387 inline static
00388 #endif
00389 uint getDefaultSeedWeight( gnSeqI avg_sequence_length ){
00390 uint mer_size = (uint)((log( (double)avg_sequence_length ) / log( 2.0 ))/1.5);
00391
00392 if( !(mer_size & 0x1 ) )
00393 ++mer_size;
00394 mer_size = mer_size < MIN_DNA_SEED_WEIGHT ? 0 : mer_size;
00395 if( avg_sequence_length == 0 )
00396 mer_size = 0;
00397
00398
00399 mer_size = mer_size > MAX_DNA_SEED_WEIGHT ? MAX_DNA_SEED_WEIGHT : mer_size;
00400 return mer_size;
00401 }
00402
00403
00404 #endif // _SeedMasks_h_