Commit ccb16af0 authored by Carsten Kemena's avatar Carsten Kemena

Merge branch 'develop-ck' into 'master'

hmmscan read fix and more tests

See merge request !12
parents 7572866f f75f5326
Pipeline #620 passed with stages
in 3 minutes and 19 seconds
......@@ -108,7 +108,7 @@ template<>
void
DomainArrangementSet<Domain>::hmmTokens2Domain_(std::string &&acc, const std::vector<std::string> &tokens, DomainArrangement<Domain> &da)
{
da.emplace_back(acc, stoul(tokens[17])-1, stoul(tokens[18])-1, stod(tokens[11]));
da.emplace_back(acc, stoul(tokens[17])-1, stoul(tokens[18])-1, stod(tokens[12]));
}
template<>
......@@ -116,7 +116,7 @@ void
DomainArrangementSet<DomainExt>::hmmTokens2Domain_(std::string &&acc, const std::vector<std::string> &tokens, DomainArrangement<DomainExt> &da)
{
//accession, name seqStart seqEnd envStart envEnd hmmStart hmmEnd hmm_length bit_score evalue)
da.emplace_back(acc, tokens[0], std::stoul(tokens[17])-1, std::stoul(tokens[18])-1, std::stoul(tokens[19])-1, std::stoul(tokens[20])-1, std::stoul(tokens[15])-1, std::stoul(tokens[16])-1, std::stoul(tokens[2]), std::stod(tokens[7]), std::stod(tokens[6]));
da.emplace_back(acc, tokens[0], std::stoul(tokens[17])-1, std::stoul(tokens[18])-1, std::stoul(tokens[19])-1, std::stoul(tokens[20])-1, std::stoul(tokens[15])-1, std::stoul(tokens[16])-1, std::stoul(tokens[2]), std::stod(tokens[13]), std::stod(tokens[12]));
}
template<>
......@@ -124,7 +124,7 @@ void
DomainArrangementSet<PfamDomain>::hmmTokens2Domain_(std::string &&acc, const std::vector<std::string> &tokens, DomainArrangement<PfamDomain> &da)
{
//std::string accession, std::string name, size_t seqStart, size_t seqEnd, size_t envStart, size_t envEnd, size_t hmmStart, size_t hmmEnd, size_t hmm_length, double bit_score, double evalue, double significance, std::string clan, std::string type
da.emplace_back(acc, tokens[0], std::stoul(tokens[17])-1, std::stoul(tokens[18])-1, std::stoul(tokens[19])-1, std::stoul(tokens[20])-1, std::stoul(tokens[15])-1, std::stoul(tokens[16])-1, std::stoul(tokens[2]), std::stod(tokens[7]), std::stod(tokens[6]), 0, "", "");
da.emplace_back(acc, tokens[0], std::stoul(tokens[17])-1, std::stoul(tokens[18])-1, std::stoul(tokens[19])-1, std::stoul(tokens[20])-1, std::stoul(tokens[15])-1, std::stoul(tokens[16])-1, std::stoul(tokens[2]), std::stod(tokens[13]), std::stod(tokens[12]), 0, "", "");
}
......@@ -132,7 +132,7 @@ template<>
void
DomainArrangementSet<SFDomain>::hmmTokens2Domain_(std::string &&acc, const std::vector<std::string> &tokens, DomainArrangement<SFDomain> &da)
{
da.emplace_back(acc, tokens[0], std::stoul(tokens[17])-1, std::stoul(tokens[18])-1, std::stoul(tokens[19])-1, std::stoul(tokens[20])-1, std::stoul(tokens[15])-1, std::stoul(tokens[16])-1, std::stoul(tokens[2]), std::stod(tokens[7]), std::stod(tokens[6]), "");
da.emplace_back(acc, tokens[0], std::stoul(tokens[17])-1, std::stoul(tokens[18])-1, std::stoul(tokens[19])-1, std::stoul(tokens[20])-1, std::stoul(tokens[15])-1, std::stoul(tokens[16])-1, std::stoul(tokens[2]), std::stod(tokens[13]), std::stod(tokens[12]), "");
}
......
......@@ -27,7 +27,6 @@ BOOST_AUTO_TEST_CASE( DomainArrangement_readHmmScan_Test )
arrangementSet.read("../tests/domain/data/BB20012.hmmScan");
BOOST_CHECK_EQUAL(arrangementSet.size(), 27);
//GTP_EFTU PF00009.22 188 IF2G_METTH - 408 5.3e-44 149.9 0.2 1 1 7.1e-47 8.1e-44 149.3 0.2 3 187 6 198 4 199 0.95 Elongation factor Tu GTP binding domain
const BioSeqDataLib::DomainArrangement<BioSeqDataLib::PfamDomain> &set = arrangementSet["IF2G_METTH"];
BOOST_CHECK_EQUAL(set.size(), 17);
......@@ -41,10 +40,43 @@ BOOST_AUTO_TEST_CASE( DomainArrangement_readHmmScan_Test )
BOOST_CHECK_EQUAL(dom.hmmStart(), 2);
BOOST_CHECK_EQUAL(dom.hmmEnd(), 186);
BOOST_CHECK_EQUAL(dom.hmmLength(), 188);
BOOST_CHECK_EQUAL(dom.evalue(), 5.3e-44);
BOOST_CHECK_CLOSE(dom.bitscore(), 149.9, 0.01);
BOOST_CHECK_EQUAL(dom.evalue(), 8.1e-44);
BOOST_CHECK_CLOSE(dom.bitscore(), 149.3, 0.01);
BOOST_CHECK_EQUAL(dom.type(), "");
BioSeqDataLib::DomainArrangementSet<BioSeqDataLib::DomainExt> arrangementSet2;
arrangementSet2.read("../tests/domain/data/BB20012.hmmScan");
BOOST_CHECK_EQUAL(arrangementSet2.size(), 27);
const BioSeqDataLib::DomainArrangement<BioSeqDataLib::DomainExt> &set2 = arrangementSet2["IF2G_METTH"];
BOOST_CHECK_EQUAL(set2.size(), 17);
const BioSeqDataLib::DomainExt &dom2 = set2[1];
BOOST_CHECK_EQUAL(dom2.name(), "GTP_EFTU");
BOOST_CHECK_EQUAL(dom2.accession(), "PF00009");
BOOST_CHECK_EQUAL(dom2.start(), 5);
BOOST_CHECK_EQUAL(dom2.end(), 197);
BOOST_CHECK_EQUAL(dom2.envStart(), 3);
BOOST_CHECK_EQUAL(dom2.envEnd(), 198);
BOOST_CHECK_EQUAL(dom2.hmmStart(), 2);
BOOST_CHECK_EQUAL(dom2.hmmEnd(), 186);
BOOST_CHECK_EQUAL(dom2.hmmLength(), 188);
BOOST_CHECK_EQUAL(dom2.evalue(), 8.1e-44);
BOOST_CHECK_CLOSE(dom2.bitscore(), 149.3, 0.01);
BioSeqDataLib::DomainArrangementSet<BioSeqDataLib::Domain> arrangementSet3;
arrangementSet3.read("../tests/domain/data/BB20012.hmmScan");
BOOST_CHECK_EQUAL(arrangementSet3.size(), 27);
const BioSeqDataLib::DomainArrangement<BioSeqDataLib::Domain> &set3 = arrangementSet3["IF2G_METTH"];
BOOST_CHECK_EQUAL(set3.size(), 17);
const BioSeqDataLib::Domain &dom3 = set3[1];
BOOST_CHECK_EQUAL(dom3.accession(), "PF00009");
BOOST_CHECK_EQUAL(dom3.start(), 5);
BOOST_CHECK_EQUAL(dom3.end(), 197);
BOOST_CHECK_EQUAL(dom3.evalue(), 8.1e-44);
BioSeqDataLib::SequenceSet<BioSeqDataLib::Sequence<BioSeqDataLib::DomainArrangementWrapper<BioSeqDataLib::PfamDomain> > > seqSet;
seqSet.read("../tests/domain/data/BB20012.fa");
arrangementSet.moveTo(seqSet);
......@@ -82,6 +114,36 @@ BOOST_AUTO_TEST_CASE( DomainArrangement_readASS_Test )
BOOST_CHECK_EQUAL(dom.scopID(), "39423");
//gnl|Cobs_1.4|Cobs_00021-mRNA-1 0045850 1131-1229 2.35e-15 2 QSRDKVGVMFASVPNFTEFYSED--VNKGMECIRLLNEIIADFDELLDETAFHCIEKIKTVGATYMAASGLNPSQTDNNGDDM----EHLCKLVDYAVAMRHRLE 9.09e-05 39423 55074
BioSeqDataLib::DomainArrangementSet<BioSeqDataLib::DomainExt> arrangementSet2;
arrangementSet2.read("../tests/domain/data/test.ass");
BOOST_CHECK_EQUAL(arrangementSet2.size(), 2);
const BioSeqDataLib::DomainArrangement<BioSeqDataLib::DomainExt> &set2 = arrangementSet2["gnl|Cobs_1.4|Cobs_00021-mRNA-1"];
BOOST_CHECK_EQUAL(set2.size(), 2);
const BioSeqDataLib::DomainExt &dom2 = set2[1];
BOOST_CHECK_EQUAL(dom2.name(), "0045850");
BOOST_CHECK_EQUAL(dom2.accession(), "55074");
BOOST_CHECK_EQUAL(dom2.start(), 1130);
BOOST_CHECK_EQUAL(dom2.end(), 1228);
BOOST_CHECK_EQUAL(dom2.envEnd(), 1228);
BOOST_CHECK_EQUAL(dom2.envStart(), 1130);
BOOST_CHECK_EQUAL(dom2.hmmStart(), 1);
BOOST_CHECK_EQUAL(dom2.hmmEnd(), 0);
BOOST_CHECK_EQUAL(dom2.hmmLength(), 0);
BOOST_CHECK_EQUAL(dom2.evalue(), 2.35e-15);
BOOST_CHECK_CLOSE(dom2.bitscore(), -1, 0.01);
//gnl|Cobs_1.4|Cobs_00021-mRNA-1 0045850 1131-1229 2.35e-15 2 QSRDKVGVMFASVPNFTEFYSED--VNKGMECIRLLNEIIADFDELLDETAFHCIEKIKTVGATYMAASGLNPSQTDNNGDDM----EHLCKLVDYAVAMRHRLE 9.09e-05 39423 55074
BioSeqDataLib::DomainArrangementSet<BioSeqDataLib::Domain> arrangementSet3;
arrangementSet3.read("../tests/domain/data/test.ass");
BOOST_CHECK_EQUAL(arrangementSet3.size(), 2);
const BioSeqDataLib::DomainArrangement<BioSeqDataLib::Domain> &set3 = arrangementSet3["gnl|Cobs_1.4|Cobs_00021-mRNA-1"];
BOOST_CHECK_EQUAL(set3.size(), 2);
const BioSeqDataLib::Domain &dom3 = set2[1];
BOOST_CHECK_EQUAL(dom3.accession(), "55074");
BOOST_CHECK_EQUAL(dom3.start(), 1130);
BOOST_CHECK_EQUAL(dom3.end(), 1228);
BOOST_CHECK_EQUAL(dom3.evalue(), 2.35e-15);
}
......@@ -428,7 +490,17 @@ BOOST_AUTO_TEST_CASE( Domain_count_Test )
}
BOOST_AUTO_TEST_CASE( format_string_Test )
{
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::unknown), "unknown");
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::pfam), "Pfam");
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::hmmscan_domtbl), "HMMscan domtblout");
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::xdom), "XDOM");
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::ass), "SUPERFAMILY");
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::interpro_tsv), "InterPro TSV");
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::dama), "DAMA");
BOOST_CHECK_EQUAL(BioSeqDataLib::getFormatString(BioSeqDataLib::DomainFileFormat::radiant), "RADIANT");
}
BOOST_AUTO_TEST_SUITE_END()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment