Commit 165a9062 authored by Carsten Kemena's avatar Carsten Kemena

added first test, fixed -1 shift in outputs of databases build of domain annotation files

parent b19337ba
......@@ -25,19 +25,12 @@
using namespace std;
DBAccess::DBAccess(const fs::path &prefix, const fs::path &matrix)
DBAccess::DBAccess(const fs::path &prefix)
{
open(prefix);
setMatrix(matrix);
}
void
DBAccess::setMatrix(const fs::path &matrix)
{
similarityMatrix_.read(matrix);
similarityMatrix_.useNegative(true);
}
void DBAccess::open(const fs::path &prefix)
......@@ -52,7 +45,7 @@ void DBAccess::open(const fs::path &prefix)
}
void
DBAccess::readSequences_(RadsQueryResult &results, const std::vector<std::string> &domainNames)
DBAccess::readSequences_(std::vector<TargetSequence> &sequences, const std::vector<std::string> &domainNames)
{
size_t nDomains = domainNames.size();
std::string line;
......@@ -66,8 +59,8 @@ DBAccess::readSequences_(RadsQueryResult &results, const std::vector<std::string
string name(line.begin(), line.begin()+pos);
char *tmp;
c_line += pos;
results.targets.back().targetSequences.emplace_back(std::move(name), strtoul(c_line, &tmp, 10));
auto &da = results.targets.back().targetSequences.back().da;
sequences.emplace_back(std::move(name), strtoul(c_line, &tmp, 10));
auto &da = sequences.back().da;
size_t start, end;
double eval;
c_line = tmp;
......@@ -154,7 +147,7 @@ DBAccess::search(BSDL::AlignmentMatrix<int, BSDL::DSM> &matrix, bool all, bool c
results.targets.emplace_back(std::move(targetDA), std::move(alnStrings.first), std::move(alnStrings.second), score, normScore, priority);
// add all sequences having the same domain arrangement
readSequences_(results, domains);
readSequences_(results.targets.back().targetSequences, domains);
queryDA.reconstruct();
}
......
......@@ -99,16 +99,18 @@ struct RadsQueryResult
}
};
/**
* \brief Class to access a RADS database and perform searches.
*
*/
class DBAccess
{
private:
SQLiteDB index_; // The object used to connect to the sqlite database.
AlgorithmPack::Input arrangementDB_; // The file object containing the domain arrangements
BSDL::DSM similarityMatrix_; // The similarity matrix to be used.
void
readSequences_(RadsQueryResult &results, const std::vector<std::string> &vectorNames);
readSequences_(std::vector<TargetSequence> &sequences, const std::vector<std::string> &vectorNames);
public:
......@@ -125,7 +127,7 @@ class DBAccess
* @param prefix The prefix of the database path.
* @param matrix The filename of the DSM to be used.
*/
DBAccess(const fs::path &prefix, const fs::path&matrix);
DBAccess(const fs::path &prefix);
/**
* @brief Opens the database
......@@ -135,14 +137,6 @@ class DBAccess
void
open(const fs::path &prefix);
/**
* @brief Set the Matrix object to be used for the alignment.
*
* @param matrix The filename of the DSM to be used.
*/
void
setMatrix(const fs::path &matrix);
/**
* @brief Searches in the database for matching sequences.
*
......
......@@ -203,7 +203,7 @@ DBCreator::readInterPro(const fs::path &interProFile, const std::string &include
string start(m[1].first, m[1].second);
string end(m[2].first, m[2].second);
string score(m[3].first, m[3].second);
da.emplace_back(domainAcc, stoul(start), stoul(end), stod(score));
da.emplace_back(domainAcc, stoul(start)-1, stoul(end)-1, stod(score));
}
}
// end of protein, store in dbContent
......
......@@ -84,9 +84,9 @@ printResult(const RadsQueryResult &results, AP::Output &outS, bool listAlignment
buf << hit.score << "\t" << hit.normalized << "\t" << seq.targetName << "\t" << seq.length << "\t";
const auto &da = seq.da;
size_t len = da.size();
buf << da[0].accession() << " " << da[0].start() << " " << da[0].end();
buf << da[0].accession() << " " << da[0].start()+1 << " " << da[0].end()+1;
for (size_t i = 1; i<len; ++i)
buf << " " << da[i].accession() << " " << da[i].start() << " " << da[i].end();
buf << " " << da[i].accession() << " " << da[i].start()+1 << " " << da[i].end()+1;
if (listAlignments)
buf << "\t" << alnNumber;
buf << "\n";
......@@ -211,7 +211,7 @@ main(int argc, char *argv[])
return EXIT_FAILURE;
}
DBAccess db(prefix, matrixName);
DBAccess db(prefix);
BSDL::DomainArrangementSet<BSDL::Domain> querySet;
//read query arrangement
......
......@@ -13,7 +13,7 @@ FUNCTION(PREPEND var prefix)
SET(${var} "${listVar}" PARENT_SCOPE)
ENDFUNCTION(PREPEND)
SET(tests_src ./unitTests/tests.cpp ../src/external/SQLiteDB.cpp ${BSDL_src})
SET(tests_src ./unitTests/tests.cpp ../src/DBAccess.cpp ../src/external/SQLiteDB.cpp ${BSDL_src})
SET(tests_exe tests)
ADD_EXECUTABLE(${tests_exe} ${tests_src})
target_link_libraries(${tests_exe}
......
# RADS version 2.3.0
# RADS Output v1
# run at Fri Jun 29 09:02:21 2018
# run at Fri Jun 29 14:25:11 2018
#
# query file: -
# database: /local/home/ckeme_01/projects/domainWorld/RADS/tests/integrationTests/annotation
......@@ -17,8 +17,8 @@ Domain arrangement: PF02543 PF16861
# score | normalized | SeqID | sequence length | domain arrangement | aln
# -------------------------------------------------------------------
200 1.00 A0A010 530 PF02543 9 62 PF16861 361 523 1
200 1.00 A0A009 530 PF02543 9 62 PF02543 103 311 PF16861 361 523 2
200 1.00 A0A010 530 PF02543 10 63 PF16861 362 524 1
200 1.00 A0A009 530 PF02543 10 63 PF02543 104 312 PF16861 362 524 2
# -------------------------------------------------------------------
......
......@@ -23,87 +23,54 @@
#include <boost/test/unit_test.hpp>
//#include "../../src/db.hpp"
#include "../../src/DBAccess.hpp"
#include "../../libs/BioSeqDataLib/src/DomainModule.hpp"
#include "../../libs/BioSeqDataLib/src/utility/Settings.hpp"
#ifndef DB_TEST_HPP_
#define DB_TEST_HPP_
#ifndef DBACCESS_TEST_HPP_
#define DBACCESS_TEST_HPP_
BOOST_AUTO_TEST_SUITE(DB_Test)
BOOST_AUTO_TEST_SUITE(DBACCESS_Test)
namespace BSDL=BioSeqDataLib;
BOOST_AUTO_TEST_CASE( cleanDA_TEST)
BOOST_AUTO_TEST_CASE( search_TEST)
{
}
/*
BOOST_AUTO_TEST_CASE( cleanDA_TEST)
{
BSDL::DomainArrangement<BSDL::Domain> da;
da.emplace_back("G3DSA:3.40.50.620", 155, 297, 2.2e-10);
da.emplace_back("G3DSA:3.40.50.620", 357, 425, 2.2e-10);
da.emplace_back("G3DSA:3.60.20.10", 47, 139, 2.5e-12);
da.emplace_back("PF00733", 166, 412, 0.00022);
da.emplace_back("SSF52402", 154, 303, 5.5e-13);
da.emplace_back("SSF52402", 358, 422, 5.5e-13);
da.emplace_back("SSF56235", 46, 143, 7.14e-14);
BSDL::DomainArrangement<BSDL::Domain> da2 = da;
std::set<std::string> querySet = {"PF13537", "SSF52402"};
cleanDA(da, querySet, 10);
BOOST_CHECK_EQUAL(da.size(), 3);
BOOST_CHECK_EQUAL(da[0].accession(), "SSF56235");
BOOST_CHECK_EQUAL(da[1].accession(), "SSF52402");
BOOST_CHECK_EQUAL(da[2].accession(), "SSF52402");
cleanDA(da2, {}, 10);
BOOST_CHECK_EQUAL(da2.size(), 2);
BOOST_CHECK_EQUAL(da2[0].accession(), "SSF56235");
BOOST_CHECK_EQUAL(da2[1].accession(), "PF00733");
}*/
DBAccess db("../tests/data/test");
BSDL::Settings settings;
settings.readSettings();
auto matrixName = settings["dsm"] / "pfam-31.dsm";
BioSeqDataLib::DSM simMat;
simMat.read(matrixName);
simMat.useNegative(true);
BSDL::AlignmentMatrix<int, BSDL::DSM> matrix(-50, -10, simMat);
RadsQueryResult results;
results.queryDA.emplace_back("PF00005",4,20,0);
db.search(matrix, false, false, 0, results);
BOOST_CHECK_EQUAL(results.targets.size(),1);
auto &sequences = results.targets[0].targetSequences;
BOOST_CHECK_EQUAL(sequences.size(), 2);
BOOST_CHECK_EQUAL(sequences[0].targetName, "A0A001");
BOOST_CHECK_EQUAL(sequences[1].targetName, "A0A002");
auto &da = sequences[1].da;
BOOST_CHECK_EQUAL(da.size(), 2);
BOOST_CHECK_EQUAL(da[0].accession(), "PF00664");
BOOST_CHECK_EQUAL(da[0].start(), 18);
BOOST_CHECK_EQUAL(da[0].end(), 278);
BOOST_CHECK_CLOSE(da[0].evalue(), 2.9e-29, 0.001);
BOOST_CHECK_EQUAL(da[1].accession(), "PF00005");
BOOST_CHECK_EQUAL(da[1].start(), 339);
BOOST_CHECK_EQUAL(da[1].end(), 488);
BOOST_CHECK_CLOSE(da[1].evalue(), 1.6e-26, 0.001);
// A0A001 591 19 275 3.7e-07 360 503 1.9e-22
//A0A002 564 18 278 2.9e-29 339 488 1.6e-26
/*
BOOST_AUTO_TEST_CASE( domainIDSearch_Test)
{
SQLiteDB db;
db.open("../tests/test.db");
std::map<std::string, std::string> acc2ids;
std::set<std::string> accs = {"PF00155"};
getDomIDs(db, accs, acc2ids);
BOOST_CHECK_EQUAL(acc2ids.size(), 1);
BOOST_CHECK_EQUAL(acc2ids.begin()->first, "PF00155");
BOOST_CHECK_EQUAL(acc2ids.begin()->second, "1");
std::map<std::string, std::string> acc2ids2;
std::set<std::string> accs2 = {"PF00155", "SSF53756"};
getDomIDs(db, accs2, acc2ids2);
BOOST_CHECK_EQUAL(acc2ids2.size(), 2);
BOOST_CHECK_EQUAL(acc2ids2.begin()->first, "PF00155");
BOOST_CHECK_EQUAL(acc2ids2.begin()->second, "1");
BOOST_CHECK_EQUAL(acc2ids2.rbegin()->first, "SSF53756");
BOOST_CHECK_EQUAL(acc2ids2.rbegin()->second, "16");
}
BOOST_AUTO_TEST_CASE( search_Test)
{
SQLiteDB db;
db.open("../tests/test.db");
std::set<std::string> da = {"PF13537", "SSF52402"};
BSDL::DomainArrangementSet<BSDL::Domain> result;
runDASearch(db, da, result, 1, false);
BOOST_CHECK_EQUAL(result.size(), 2);
BSDL::DomainArrangementSet<BSDL::Domain> result2;
runDASearch(db, da, result2, 2);
BOOST_CHECK_EQUAL(result2.size(), 1);
}*/
BOOST_AUTO_TEST_SUITE_END()
......@@ -111,4 +78,4 @@ BOOST_AUTO_TEST_SUITE_END()
#endif /* TWOVALUES_TEST_HPP_ */
#endif /* DBACCESS_TEST_HPP_ */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment