Commit da247ecf authored by Carsten Kemena's avatar Carsten Kemena

[PairwiseAligner] removed raspodom_nw, fixed calculateScore

parent e4daf46a
......@@ -261,24 +261,6 @@ public:
void
sw(const SeqType &seq1, const SeqType &seq2);
/*
* \brief Run the RASPDOM algorithm using Needleman-Wunsch
* @param seq1 First sequence
* @param seq2 Second sequence
*/
template<typename SeqType>
void
raspodom_nw(const SeqType &seq1, const SeqType &seq2)
{
algorithm_ = Algorithm::Raspodom_NW;
prepareMatrix_(seq1, seq2);
scoringScheme_.end_gop(0);
scoringScheme_.end_gep(0);
init_borders_();//(0, 0);
general_nw_();
}
/**
* \brief Run the Gotoh algorithm
......@@ -307,10 +289,23 @@ public:
{
algorithm_ = Algorithm::Raspodom_Gotoh;
prepareMatrix_(seq1, seq2);
auto egop = scoringScheme_.end_gop();
auto egep = scoringScheme_.end_gep();
scoringScheme_.end_gop(0);
scoringScheme_.end_gep(0);
init_borders_();//0, 0);
init_borders_();
scoringScheme_.end_gop(scoringScheme_.gop());
scoringScheme_.end_gep(scoringScheme_.gep());
general_gotoh_();
traceback_raspodom();
scoringScheme_.end_gop(egop);
scoringScheme_.end_gep(egep);
if (isCP_)
{
find_correct_rotation_();
}
else
score_ = 0;
}
......@@ -424,7 +419,7 @@ PairwiseAligner<ScoringScheme>::prepareMatrix_(const SeqType &seq1, const SeqTyp
seq1_length_ = seq1.size();
seq2_length_ = seq2.size();
if ((this->algorithm_ == Algorithm::Raspodom_Gotoh) || (this->algorithm_ == Algorithm::Raspodom_NW))
if (this->algorithm_ == Algorithm::Raspodom_Gotoh)
{
dim1_ = seq1.size()*2;
dim2_ = seq2.size()*2;
......@@ -570,21 +565,6 @@ PairwiseAligner<ScoringScheme>::general_nw_() noexcept
}
}
score_ = matrix[dim1_][dim2_].first;
if (this->algorithm_ == Algorithm::Raspodom_NW)
{
this->traceback_raspodom();
// calculate score
score_ = 0;
for (size_t i=0; i<this->editString_.size(); ++i)
{
if ((this->editString_[i].first == -1) || (this->editString_[i].second == -1))
score_ += gep;
else
score_ += this->scoresMat_[this->editString_[i].first+1][this->editString_[i].second+1];
}
}
}
......@@ -765,134 +745,12 @@ PairwiseAligner<ScoringScheme>::determine_quadrant(size_t i, size_t j) const
return 4;
}
template<typename ScoringScheme>
typename PairwiseAligner<ScoringScheme>::TracebackResult
PairwiseAligner<ScoringScheme>::traceback_raspodom_nw_optimal_path_(bool recordTrace) const
{
auto &matrix = matrices_[0];
bool go_further = true;
auto i = best_score_x_;
auto j = best_score_y_;
bool check = false;
// cell corresponding to tl quadrant of best cell in br
auto ctl_x = best_score_x_ - seq1_length_;
auto ctl_y = best_score_y_ - seq2_length_;
if ((best_score_x_ <= seq1_length_) || (best_score_y_ <= seq2_length_))
return TracebackResult::No_Path;
// editString preparations
std::vector<size_t> quadrants(5,0);
while(go_further)
{
if(matrix[i][j].second & VISITED)
return TracebackResult::Visited_Path;
matrix[i][j].second |= VISITED;
++quadrants[determine_quadrant(i, j)];
/* the path of the righteous CP shall pass through the same cell in the
* BR and TL quadrant */
if (i == ctl_x && j == ctl_y)
check=true;
/* exit immediatly once the path is found to be OK, that is, it passes
* through at least three quadrants */
if( ((quadrants[0] >= 1) && (quadrants[3] >= 1) ) && check && ((quadrants[1] >= 2) || (quadrants[2] >= 2) ))
{
return (quadrants[1] >= 2) ? TracebackResult::CP_Path_TR : TracebackResult::CP_Path_BL;
}
// The order of the traceback is (of course only if several are available):
// D if match score is positive
// J if match score is positive
// I if match score is positive
// D
// J
// I
if ((matrix[i][j].second & D) && (scoresMat_[i-1][j-1] > 0))
{
--i;
--j;
if (recordTrace)
{
editString_.addMatch(i % seq1_length_, j % seq2_length_);
}
}
else
{
if ((matrix[i][j].second & J) && (scoresMat_[i][j-1] > 0))
{
--j;
if (recordTrace)
{
editString_.addMatch(-1, j % seq2_length_);
}
}
else
{
if ((matrix[i][j].second & I) && (scoresMat_[i-1][j] > 0))
{
--i;
if (recordTrace)
{
editString_.addMatch(i % seq1_length_, -1);
}
}
else
{
if (matrix[i][j].second & D)
{
--i;
--j;
if (recordTrace)
{
editString_.addMatch(i % seq1_length_, j % seq2_length_);
}
}
else
{
if (matrix[i][j].second & J)
{
--j;
if (recordTrace)
{
editString_.addMatch(-1, j % seq2_length_);
}
}
else
{
--i;
if (recordTrace)
{
editString_.addMatch(i % seq1_length_, -1);
}
}
}
}
}
}
if ((i == 0) || (j == 0))
go_further = false;
}
if ( (quadrants[0] >= 1) && (quadrants[3] >= 1) )
return TracebackResult::Normal_Path;
return TracebackResult::No_Path;
}
template<typename ScoringScheme>
void
PairwiseAligner<ScoringScheme>::traceback_raspodom()
{
std::function<TracebackResult(bool)> optimal_path;
if (algorithm_ == Algorithm::Raspodom_Gotoh)
optimal_path = std::bind(&PairwiseAligner<ScoringScheme>::traceback_raspodom_gotoh_optimal_path_, this, std::placeholders::_1);
else
optimal_path = std::bind(&PairwiseAligner<ScoringScheme>::traceback_raspodom_nw_optimal_path_, this, std::placeholders::_1);
bool score_found = find_max_raspodom_score_();
isCP_ = 0;
int n_paths = 0;
......@@ -906,7 +764,7 @@ PairwiseAligner<ScoringScheme>::traceback_raspodom()
while (test)
{
TracebackResult traceback_result = optimal_path(recordTrace);
TracebackResult traceback_result = traceback_raspodom_gotoh_optimal_path_(recordTrace);
/*switch (traceback_result)
{
case TracebackResult::No_Path:
......@@ -1074,19 +932,8 @@ PairwiseAligner<ScoringScheme>::general_gotoh_() noexcept
}
}
}
score_ = matrices_[0][dim1_][dim2_].first;
if (algorithm_ == Algorithm::Raspodom_Gotoh) // when RASPODOM is used traceback has to be done immidiately to have a correct score
{
traceback_raspodom();
if (isCP_)
{
find_correct_rotation_();
}
else
score_ = 0;
}
/*
const DataType MINIMUM = (-1)*(std::numeric_limits<DataType>::max()-1000);
for (size_t x = 0; x<3; ++x)
......@@ -1133,23 +980,35 @@ PairwiseAligner<ScoringScheme>::calculateScore_() const
{
return;
}
size_t start = 0;
while (editString_[start].first == -1) ++start;
if (start == 0)
long int start = -1;
while (editString_[start+1].first == -1)
++start;
if (start == -1)
{
while (editString_[start].second == -1) ++start;
while (editString_[start+1].second == -1)
++start;
}
size_t end = editString_.size()-1;
while (editString_[end].first == -1) --end;
if (end == editString_.size()-1)
if (start != -1)
{
score_ += scoringScheme_.end_gop() + start * scoringScheme_.end_gep();
}
// std::cout << "start_score: " << score_ << std::endl;
size_t end = editString_.size();
while (editString_[end-1].first == -1) --end;
if (end == editString_.size())
{
while (editString_[end-1].second == -1) --end;
}
if (end != editString_.size())
{
while (editString_[end].second == -1) --end;
score_ += scoringScheme_.end_gop() + (editString_.size()-end-1) * scoringScheme_.end_gep();
}
//std::cout << "end_score: " << score_ << std::endl;
score_ = 0;
int last_gap = 0;
for (size_t i=start; i<=end; ++i)
for (size_t i=start+1; i<=end-1; ++i)
{
if (this->editString_[i].first == -1)
{
......@@ -1180,6 +1039,7 @@ PairwiseAligner<ScoringScheme>::calculateScore_() const
last_gap = 0;
}
}
//std::cout << "final_score: " << score_ << std::endl;
}
template<typename ScoringScheme>
......
......@@ -245,26 +245,6 @@ BOOST_AUTO_TEST_CASE(sw_align_test )
}
BOOST_AUTO_TEST_CASE(raspodom_align_sequence_test )
{
BioSeqDataLib::Sequence<> seq1("seq1", "MGHFTEEDKATITSLWGKVNVED", "", "test sequence");
BioSeqDataLib::Sequence<> seq2("seq2", "TSLWGKVNVEDMGHFTEEDKATI", "", "test sequence");
BioSeqDataLib::SimilarityMatrix<float> simMat("../tests/align/data/BLOSUM62.txt");
BioSeqDataLib::ScoringScheme<float, BioSeqDataLib::SimilarityMatrix<float> > scoring(std::move(simMat), -3);
BioSeqDataLib::PairwiseAligner<BioSeqDataLib::ScoringScheme<float, BioSeqDataLib::SimilarityMatrix<float> >> mat(scoring);
mat.raspodom_nw(seq1, seq2);
std::vector<long int> expected1 { 12, 13, 14, 15, 16, 17 ,18, 19, 20, 21, 22, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
std::vector<long int> expected2 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ,18, 19, 20, 21, 22};
auto result = mat.result();
for (size_t i=0; i<result.size(); ++i)
{
BOOST_CHECK_EQUAL(result[i].first, expected1[i]);
BOOST_CHECK_EQUAL(result[i].second, expected2[i]);
}
BOOST_CHECK_EQUAL(mat.isCP(), 1);
}
BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
{
BioSeqDataLib::Settings settings;
......@@ -277,9 +257,9 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 19, 20, 0.0}};
seq2 = {{"PF00003", 0, 1, 0.0}, {"PF00004", 10, 11, 0.0}, {"PF00001", 15, 16, 0.0}, {"PF00002", 19, 20, 0.0}};
BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::DSM> scoring(std::move(simMat), -10);
BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::DSM> scoring(std::move(simMat), -10, -1, -1, -1);
BioSeqDataLib::PairwiseAligner<BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::DSM>> mat(scoring);
mat.raspodom_nw(seq1, seq2);
mat.raspodom_gotoh(seq1, seq2);
std::vector<long int> expected2 { 2, 3, 0, 1};
std::vector<long int> expected1 { 0, 1, 2, 3};
auto result = mat.result();
......@@ -295,9 +275,9 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}, {"PF00005", 25, 26, 0.0}};
seq2 = {{"PF00003", 0, 1, 0.0}, {"PF00004", 10, 11, 0.0}, {"PF00001", 15, 16, 0.0}, {"PF00002", 23, 26, 0.0}};
mat.raspodom_nw(seq1, seq2);
expected1 = { 4, 0, 1, 2, 3};
expected2 = { -1, 2, 3, 0, 1};
mat.raspodom_gotoh(seq1, seq2);
expected1 = { 0, 1, 2, 3, 4};
expected2 = { 2, 3, 0, 1, -1};
result = mat.result();
for (size_t i=0; i<result.size(); ++i)
{
......@@ -305,12 +285,12 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
BOOST_CHECK_EQUAL(result[i].second, expected2[i]);
}
BOOST_CHECK_EQUAL(mat.isCP(), 1);
BOOST_CHECK_EQUAL(mat.score(), 390);
BOOST_CHECK_EQUAL(mat.score(), 399);
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}};
seq2 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}};
mat.raspodom_nw(seq1, seq2);
mat.raspodom_gotoh(seq1, seq2);
result = mat.result();
BOOST_CHECK_EQUAL(mat.isCP(), 0);
BOOST_CHECK_EQUAL(result.size(), 0);
......@@ -318,7 +298,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}};
seq2 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}};
mat.raspodom_nw(seq1, seq2);
mat.raspodom_gotoh(seq1, seq2);
result = mat.result();
BOOST_CHECK_EQUAL(mat.isCP(), 0);
BOOST_CHECK_EQUAL(result.size(), 0);
......@@ -326,7 +306,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}};
seq2 = {{"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}};
mat.raspodom_nw(seq1, seq2);
mat.raspodom_gotoh(seq1, seq2);
result = mat.result();
BOOST_CHECK_EQUAL(mat.isCP(), 0);
BOOST_CHECK_EQUAL(result.size(), 0);
......@@ -334,7 +314,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq1 = {{"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}};
seq2 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}};
mat.raspodom_nw(seq1, seq2);
mat.raspodom_gotoh(seq1, seq2);
result = mat.result();
BOOST_CHECK_EQUAL(mat.isCP(), 0);
BOOST_CHECK_EQUAL(result.size(), 0);
......@@ -342,9 +322,9 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00055", 15, 16, 0.0}};
seq2 = {{"PF00003", 10, 11, 0.0}, {"PF00055", 15, 16, 0.0}, {"PF00004", 17, 18, 0.0}, {"PF00001", 19, 26, 0.0}, {"PF00002", 29, 296, 0.0}};
mat.raspodom_nw(seq1, seq2);
expected1 = { -1, 0, 1, 2, 3};
expected2 = { 2, 3, 4, 0, 1};
mat.raspodom_gotoh(seq1, seq2);
expected1 = { 0, 1, 2, 3, -1};
expected2 = { 3, 4, 0, 1, 2};
result = mat.result();
BOOST_CHECK_EQUAL(result.size(), 5);
for (size_t i=0; i<result.size(); ++i)
......@@ -353,12 +333,12 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
BOOST_CHECK_EQUAL(result[i].second, expected2[i]);
}
BOOST_CHECK_EQUAL(mat.isCP(), 1);
BOOST_CHECK_EQUAL(mat.score(), 390);
BOOST_CHECK_EQUAL(mat.score(), 399);
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}};
seq2 = {{"PF00002", 0, 1, 0.0}, {"PF00003", 10, 11, 0.0}, {"PF00001", 15, 16, 0.0}, {"PF00002", 19, 26, 0.0}, {"PF00003", 29, 36, 0.0}};
mat.raspodom_nw(seq1, seq2);
mat.raspodom_gotoh(seq1, seq2);
result = mat.result();
BOOST_CHECK_EQUAL(mat.isCP(), 0);
BOOST_CHECK_EQUAL(result.size(), 0);
......@@ -366,7 +346,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq2 = {{"PF00001", 0, 1, 0.0}, {"PF00001", 10, 11, 0.0}, {"PF00001", 15, 16, 0.0}, {"PF00001", 19, 26, 0.0}};
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00001", 10, 11, 0.0}, {"PF00001", 15, 16, 0.0}};
mat.raspodom_nw(seq1, seq2);
mat.raspodom_gotoh(seq1, seq2);
result = mat.result();
BOOST_CHECK_EQUAL(mat.isCP(), 0);
BOOST_CHECK_EQUAL(result.size(), 0);
......@@ -419,7 +399,7 @@ BOOST_AUTO_TEST_CASE(raspodom_gotoh_align_domain_test )
BOOST_CHECK_EQUAL(result[i].second, expected2[i]);
}
BOOST_CHECK_EQUAL(mat.isCP(), 1);
BOOST_CHECK_EQUAL(mat.score(), 40);
BOOST_CHECK_EQUAL(mat.score(), 35);
seq1 = {{"PF00001", 0, 1, 0.0}, {"PF00002", 10, 11, 0.0}, {"PF00003", 15, 16, 0.0}, {"PF00004", 15, 16, 0.0}, {"PF00005", 25, 26, 0.0}};
seq2 = {{"PF00003", 0, 1, 0.0}, {"PF00004", 10, 11, 0.0}, {"PF00001", 15, 16, 0.0}, {"PF00002", 23, 26, 0.0}};
......@@ -433,7 +413,7 @@ BOOST_AUTO_TEST_CASE(raspodom_gotoh_align_domain_test )
BOOST_CHECK_EQUAL(result[i].second, expected2[i]);
}
BOOST_CHECK_EQUAL(mat.isCP(), 1);
BOOST_CHECK_EQUAL(mat.score(), 40);
BOOST_CHECK_EQUAL(mat.score(), 35);
seq1 = {{"PF13894", 0, 1, 0.0}, {"PF00096", 10, 11, 0.0}, {"PF00096", 15, 16, 0.0}, {"PF13912", 15, 16, 0.0}, {"PF13912", 25, 26, 0.0}};
seq2 = {{"PF13912", 0, 1, 0.0}, {"PF13912", 10, 11, 0.0}, {"PF00096", 15, 16, 0.0}, {"PF00096", 23, 26, 0.0}};
......@@ -447,7 +427,7 @@ BOOST_AUTO_TEST_CASE(raspodom_gotoh_align_domain_test )
BOOST_CHECK_EQUAL(result[i].second, expected2[i]);
}
BOOST_CHECK_EQUAL(mat.isCP(), 1);
BOOST_CHECK_EQUAL(mat.score(), 40);
BOOST_CHECK_EQUAL(mat.score(), 35);
seq1 = {{"PF13912", 0, 1, 0.0}, {"PF00096", 10, 11, 0.0}, {"PF00096", 12, 13, 0.0}, {"PF00096", 14, 15, 0.0}, {"PF00096", 16, 17, 0.0}};
seq2 = {{"PF07776", 0, 1, 0.0}, {"PF00096", 10, 11, 0.0}, {"PF00096", 13, 14, 0.0}, {"PF00096", 15, 16, 0.0}, {"PF00096", 17, 18, 0.0}, {"PF13912", 19, 20, 0.0}, {"PF13912", 21, 22, 0.0}};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment