Commit 45c6d419 authored by Carsten Kemena's avatar Carsten Kemena

fixed collapse option

- query is now cllapsed as well
- fixed calculation of normalized score when using collapse option
parent 3d7b4389
v. 2.3.0
- added option to display computed alignments
- added option to collapse domain repeats
- updated manual and changed to sphinx
v. 2.2.0
......
......@@ -57,6 +57,7 @@ DBAccess::search(BSDL::AlignmentMatrix<int, BSDL::DSM> &matrix, bool all, bool c
RadsQueryResult &results)
{
auto &queryDA = results.queryDA;
queryDA.collapse(true);
string query = "Select position from domain where accession in ('" + queryDA[0].accession() + "'";
size_t nDomains = queryDA.size();
for (size_t i=1; i<nDomains; ++i)
......@@ -104,11 +105,11 @@ DBAccess::search(BSDL::AlignmentMatrix<int, BSDL::DSM> &matrix, bool all, bool c
}
// calculate score for the domain arrangement
size_t targetLength = targetDA.size();
int order = targetDA.size();
if (collapse)
targetDA.collapse(true);
order -= targetDA.size();
size_t targetLength = targetDA.size();
order -= targetLength;
matrix.gotoh(queryDA, targetDA);
int score = matrix.score();
if (score < scoreThres)
......@@ -152,6 +153,7 @@ DBAccess::search(BSDL::AlignmentMatrix<int, BSDL::DSM> &matrix, bool all, bool c
{
return a.order < b.order;
});
queryDA.reconstruct();
}
results.sort();
}
......@@ -234,7 +234,15 @@ DBCreator::readAnnotationFile(const fs::path &annotationFile, const fs::path &se
// read each sequence seperately
for (auto pair : domSet)
{
string seqLength = sequenceFile.empty() ? "0" : to_string(seqLengths[pair.first]);
string seqLength;
if (sequenceFile.empty())
seqLength="0";
else
{
seqLength = to_string(seqLengths[pair.first]);
if (seqLength == "0")
cerr << "WARNING! " << pair.first << " not in sequence file. Protein length will be set to 0.\n";
}
this->addDomainArrangement_(pair.second, pair.first, seqLength);
}
}
......
......@@ -121,7 +121,10 @@ main(int argc, char *argv[])
if (!seqFiles.empty())
db.readAnnotationFile(daFiles[i], seqFiles[i]);
else
{
cerr << "WARNING! No sequence file provided, protein lengths will be set to 0.\n";
db.readAnnotationFile(daFiles[i], fs::path(""));
}
}
if (!interProFile.empty())
......
......@@ -65,6 +65,16 @@ ACGDAGHLTGAGLYALAQVAGVKPEPFSVYRNGGGEARAAVLEAVEGAGLRAVPYDRSAV
AGVLAGGGVVALTQGAAELGPRALGHRSLLGSPAVPGMRERMSEKLKRREWFRPLGAVMR
DERFAGLYPGRAPSPYMLFEYRLPDGIAPEARHVNGTCRIQTLGPEEDRLYGLLAEFEEL
SGVPALINTSLNGPGKPIAHTARDVLDDFARTDVDLFVFDDLMVRGAAAR
>A0A010
MKVLSLHSAGHDTGVAYFEDGRLVFAVETERLTRVKHDHRSDVALRHVLEQECVDTDGID
LVAVSTPVRSGLLRIPDLDRAMERIGAGALHHRTVCEMLGRRVECVVVTHEVSHAALAAH
YADWEEGTVVLVNEGRGQLTRSSLFRVTGGALEWVDKDPLPWYGNGFGWTAIGYLLGFGP
SPSVAGKVMAMGGYGQPDPRIREQLLSVDPEVMNDRELAERVRADLAGRPEFAPGFETAS
QVVATFQEMFTEAVRAVLDRHVTRTDAGVGPIALGGGCALNIVANSALREEYGRDVAIPP
ACGDAGHLTGAGLYALAQVAGVKPEPFSVYRNGGGEARAAVLEAVEGAGLRAVPYDRSAV
AGVLAGGGVVALTQGAAELGPRALGHRSLLGSPAVPGMRERMSEKLKRREWFRPLGAVMR
DERFAGLYPGRAPSPYMLFEYRLPDGIAPEARHVNGTCRIQTLGPEEDRLYGLLAEFEEL
SGVPALINTSLNGPGKPIAHTARDVLDDFARTDVDLFVFDDLMVRGAAAR
>A0A009DWE1
EQNIQVAAGTIGASPSNSPLQLSVNAQGRLTTEQEFADIILKTAPDGAVTRLGDVARVEL
AASQYGLRSLLDNKQAVAIPIFQAPGANALQVSDQVRSTMKELSKDFPSSIKYDIVYDPT
......
# RADS version 2.3.0
# RADS Output v1
# run at Thu Jun 28 11:22:17 2018
# run at Thu Jun 28 17:15:52 2018
#
# query file: -
# database: /local/home/ckeme_01/projects/domainWorld/RADS/tests/integrationTests/annotation
......@@ -17,8 +17,8 @@ Domain arrangement: PF02543 PF16861
# score | normalized | SeqID | sequence length | domain arrangement | aln
# -------------------------------------------------------------------
200 0.71 A0A009 530 PF02543 9 62 PF02543 103 311 PF16861 361 523 1
200 1.00 A0A010 0 PF02543 9 62 PF16861 361 523 2
200 1.00 A0A009 530 PF02543 9 62 PF02543 103 311 PF16861 361 523 1
200 1.00 A0A010 530 PF02543 9 62 PF16861 361 523 2
# -------------------------------------------------------------------
......
# RADS version 2.3.0
# RADS Output v1
# run at Thu Jun 28 17:44:56 2018
#
# query file: -
# database: /local/home/ckeme_01/projects/domainWorld/RADS/tests/integrationTests/ip_order
# gap open penalty -50
# gap extension penalty -10
# matrix: /local/home/ckeme_01/.domainWorld/dsm/pfam-31.dsm
# all: false
# collapse: true
# ******************************************************************
# -------------------------------------------------------------------
Results for: manual entered query
Domain arrangement: PF00001 PF00001
# score | normalized | SeqID | sequence length | domain arrangement | aln
# -------------------------------------------------------------------
80 0.35 A0A000 394 PF00001 41 60 PF00002 80 100 PF00003 125 250 1
80 0.35 A0A001 394 PF00001 41 60 PF00002 80 100 PF00003 120 250 1
# -------------------------------------------------------------------
List of alignments:
# -------------------------------------------------------------------
1)
Query DA: PF00001 ******* *******
Target DA: PF00001 PF00002 PF00003
......@@ -103,4 +103,21 @@
[ $status == 0 ]
rm ip_order.db ip_order.da test-order.txt
}
\ No newline at end of file
}
@test "rads - query collapse" {
# database based on pfam annotation files
run ../../build/makeRadsDB -I ../data/sort-test.xml -s ../data/db_seqs.fa -o ip_order -d PFAM
[ $status == 0 ]
echo $output
[ "$output" == $'Number of sequences included: 3\nNumber of distinct arrangements 2' ]
run ../../build/rads -D PF00001 PF00001 -m pfam-31.dsm -d ip_order -o test-collapse.txt -c -l
run diff <(grep -v '#' test-collapse.txt) <(grep -v '#' results/test-collapse.txt)
[ $status == 0 ]
rm ip_order.db ip_order.da test-collapse.txt
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment