Commit 0444c09f authored by Carsten Kemena's avatar Carsten Kemena

small middle

parent 5e2544b7
......@@ -87,7 +87,7 @@ RadiantDB::getDomID(const PrefixType &prefix, const CodedSuffix &suffix, bool &p
}
return 0;
}
/*
void
RadiantDB::write_(const fs::path &path, const std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned short > > &database) const
{
......@@ -111,4 +111,4 @@ RadiantDB::write_(const fs::path &path, const std::unordered_map<PrefixType, std
}
fout.close();
fout_index.close();
}
}*/
......@@ -24,10 +24,9 @@ class RadiantDB
{
private:
fs::path prefix_;
std::unordered_map<PrefixType, std::streampos> index_;
mutable std::unordered_map<PrefixType, std::vector<SuffixAcc> > database_;
std::unordered_map<PrefixType, std::vector<SuffixAcc> > database_;
SuffixAcc noSuffix_;
mutable std::ifstream dbFile_;
std::ifstream dbFile_;
void
write_(const fs::path &path, const std::unordered_map<PrefixType, std::map<CodedSuffix, unsigned short > > &database) const;
......
......@@ -21,8 +21,15 @@
namespace BSDL = BioSeqDataLib;
typedef uint64_t SuffixType;
typedef uint32_t PrefixType;
typedef uint16_t PrefixType;
typedef uint32_t SuffixType;
const short ALPHABET_BIT_NUM = 3;
const short PREFIX_SIZE = 5;
const short SUFFIX_SIZE = 10;
constexpr short WORD_SIZE = PREFIX_SIZE + SUFFIX_SIZE;
constexpr PrefixType PREFIX_SHIFT = PREFIX_SIZE * ALPHABET_BIT_NUM;
constexpr SuffixType SUFFIX_SHIFT = SUFFIX_SIZE * ALPHABET_BIT_NUM;
/**
* Convertion of an amino acid into a binary representation. Binary representation has been chosen
......@@ -68,14 +75,7 @@ static const std::unordered_map<char, PrefixType> alphabet2bit8 =
{'V', 7}, {'C', 7}
};
static const std::unordered_map<char, PrefixType> ALPHABET_2_BIT = alphabet2bit20;
const short ALPHABET_BIT_NUM = 5;
const short PREFIX_SIZE = 6;
const short SUFFIX_SIZE = 12;
constexpr short WORD_SIZE = PREFIX_SIZE + SUFFIX_SIZE;
constexpr PrefixType PREFIX_SHIFT = PREFIX_SIZE * ALPHABET_BIT_NUM;
constexpr SuffixType SUFFIX_SHIFT = SUFFIX_SIZE * ALPHABET_BIT_NUM;
static const std::unordered_map<char, PrefixType> ALPHABET_2_BIT = alphabet2bit8;
struct CodedSuffix {
SuffixType suffix;
......@@ -87,14 +87,6 @@ struct CodedSuffix {
CodedSuffix (SuffixType s, bool p): suffix(s), position(p)
{}
template<class Archive>
void serialize(Archive &ar, __attribute__ ((unused)) const unsigned int version)
{
ar & suffix;
ar & position;
}
} __attribute__((packed));
......@@ -109,13 +101,6 @@ struct SuffixAcc
SuffixAcc() : suffix(0, 0), accession(0)
{}
template<class Archive>
void serialize(Archive &ar, __attribute__ ((unused)) const unsigned int version)
{
ar & suffix;
ar & accession;
}
} __attribute__((packed));
inline bool operator<(const CodedSuffix &l, const CodedSuffix &r)
......
......@@ -155,8 +155,8 @@ void splitSequence(S &seq, D &db, unsigned int windowSize, bool reverse)
bool last = false;
size_t k = 0;
size_t limit = (seq.size() >= windowSize) ? seq.size() - windowSize + 1: 0 ;
if (reverse && (limit > (windowSize*2)))
limit = windowSize * 2;
if (seq.size() > (windowSize*2))
limit = seq.size()/2;
while (k < limit)
{
if (last)
......@@ -232,7 +232,7 @@ turnFile2db(fs::path &inFile, D &database, bool reverse)
std::cout << "Number of families: " << families.size() << std::endl;
//const int windowSize = 18;
const int windowSize = 18;
size_t famNumber = 0;
std::vector<D> threadDBs;
......@@ -251,8 +251,8 @@ turnFile2db(fs::path &inFile, D &database, bool reverse)
{
// split sequence into words and store in database
auto &seq = seqSet[j];
if (seq.size() >= WORD_SIZE)
splitSequence(seq, dbTmp, WORD_SIZE, reverse);
if (seq.size() >= windowSize)
splitSequence(seq, dbTmp, windowSize, reverse);
}
merge2dbs(dbTmp, threadDB);
......
......@@ -185,9 +185,10 @@ readDatabase(const fs::path &inFile, std::unordered_map<PrefixType, vector<Suffi
void
assignWords(const fs::path &inFile, BSDL::SequenceSet<BSDL::Sequence<> > &seqSet, vector<multiset<pair<size_t, pair<unsigned short, bool> > > > &assignments, bool reverse)
{
auto start = std::chrono::steady_clock::now();
RadiantDB database;
database.read(inFile);
auto middle = std::chrono::steady_clock::now();
size_t nSeqs = seqSet.size();
assignments.resize(nSeqs);
PrefixType prefix;
......@@ -228,6 +229,9 @@ assignWords(const fs::path &inFile, BSDL::SequenceSet<BSDL::Sequence<> > &seqSet
}
}
}
auto end = std::chrono::steady_clock::now();
std::cout << (std::chrono::duration_cast< std::chrono::milliseconds>(middle - start)).count() << "\n";
std::cout << (std::chrono::duration_cast< std::chrono::milliseconds>(end - middle)).count() << "\n";
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment