Commit 858df53d authored by Carsten Kemena's avatar Carsten Kemena

[PairwiseAligner] implemented terminal gap costs

parent 09497c0b
......@@ -152,7 +152,7 @@ private:
find_max_raspodom_score_() const;
void
init_borders_(DataType gop, DataType gep);
init_borders_();
size_t
determine_quadrant(size_t i, size_t j) const;
......@@ -244,7 +244,7 @@ public:
{
algorithm_ = Algorithm::NW;
prepareMatrix_(seq1, seq2);
init_borders_(0, scoringScheme_.gep());
init_borders_();//0, scoringScheme_.gep());
general_nw_();
}
......@@ -268,7 +268,9 @@ public:
{
algorithm_ = Algorithm::Raspodom_NW;
prepareMatrix_(seq1, seq2);
init_borders_(0, 0);
scoringScheme_.end_gop(0);
scoringScheme_.end_gep(0);
init_borders_();//(0, 0);
general_nw_();
}
......@@ -285,7 +287,7 @@ public:
{
algorithm_ = Algorithm::Gotoh;
prepareMatrix_(seq1, seq2);
init_borders_(0, scoringScheme_.gep());
init_borders_();//0, scoringScheme_.gep());
general_gotoh_();
}
......@@ -301,7 +303,9 @@ public:
{
algorithm_ = Algorithm::Raspodom_Gotoh;
prepareMatrix_(seq1, seq2);
init_borders_(0, 0);
scoringScheme_.end_gop(0);
scoringScheme_.end_gep(0);
init_borders_();//0, 0);
general_gotoh_();
}
......@@ -450,11 +454,11 @@ PairwiseAligner<ScoringScheme>::prepareMatrix_(const SeqType &seq1, const SeqTyp
template<typename ScoringScheme>
void
PairwiseAligner<ScoringScheme>::init_borders_(DataType gop, DataType gep)
PairwiseAligner<ScoringScheme>::init_borders_()
{
if ((this->algorithm_ == Algorithm::Gotoh) || (this->algorithm_ == Algorithm::Raspodom_Gotoh))
{
const DataType MINIMUM = (-1)*(std::numeric_limits<DataType>::max()-1000);
const DataType MINIMUM = (-1)*(std::numeric_limits<DataType>::max()-100000);
for (size_t k=1; k<3; ++k)
{
......@@ -473,27 +477,26 @@ PairwiseAligner<ScoringScheme>::init_borders_(DataType gop, DataType gep)
mat[i][0].second = O;
}
}
gep = 0;
}
auto &mat = this->matrices_[0];
mat[0][0].first = 0;
mat[0][0].second = O;
if (this->algorithm_ != Algorithm::SW)
{
//if (this->algorithm_ != Algorithm::SW)
//{
for (size_t j=1; j<=dim2_; ++j)
{
mat[0][j].first = gop + j * gep;
mat[0][j].first = scoringScheme_.end_gop() + (j-1) * scoringScheme_.end_gep();
mat[0][j].second = J;
}
for (size_t i=1; i<=dim1_; ++i)
{
mat[i][0].first = gop + i * gep;
mat[i][0].first = scoringScheme_.end_gop() + (i-1) * scoringScheme_.end_gep();
mat[i][0].second = I;
}
}
else
// }
/* else
{
for (size_t j=1; j<=dim2_; ++j)
......@@ -507,7 +510,7 @@ PairwiseAligner<ScoringScheme>::init_borders_(DataType gop, DataType gep)
mat[i][0].first = 0;
mat[i][0].second = O;
}
}
}*/
}
......@@ -980,17 +983,17 @@ PairwiseAligner<ScoringScheme>::general_gotoh_() noexcept
{
auto gep = scoringScheme_.gep();
auto gop = scoringScheme_.gop();
DataType end_gop = 0;
auto end_gop = scoringScheme_.end_gop();
auto end_gep = scoringScheme_.end_gep();
Matrix<std::pair<DataType, int> > &matM = matrices_[0];
Matrix<std::pair<DataType, int> > &matI = matrices_[1];
Matrix<std::pair<DataType, int> > &matJ = matrices_[2];
DataType use_gop;
DataType use_gop, use_gep;
DataType match_score;
if (algorithm_ == Algorithm::Raspodom_Gotoh)
end_gop = gop;
//if (algorithm_ == Algorithm::Raspodom_Gotoh)
// end_gop = gop;
// calculate matrix values
for (size_t i=1; i<=dim1_; ++i)
{
......@@ -1008,32 +1011,34 @@ PairwiseAligner<ScoringScheme>::general_gotoh_() noexcept
// calculate value of cell I
use_gop = (j==(dim2_)) ? end_gop : gop;
if ((vecPrevI[j].first + gep) > (vecPrevM[j].first + use_gop))
use_gep = (j==(dim2_)) ? end_gep : gep;
if ((vecPrevI[j].first + use_gep) > (vecPrevM[j].first + use_gop))
{
currentCellI.second = I;
currentCellI.first = vecPrevI[j].first + gep;
currentCellI.first = vecPrevI[j].first + use_gep;
}
else
{
currentCellI.second = D;
currentCellI.first = vecPrevM[j].first + use_gop;
}
if ((vecPrevI[j].first + gep) == (vecPrevM[j].first + use_gop))
if ((vecPrevI[j].first + use_gep) == (vecPrevM[j].first + use_gop))
currentCellI.second = I|D;
// calculate value of cell J
use_gop = (i==(dim1_)) ? end_gop : gop;
if ((vecJ[j-1].first + gep) > (vecM[j-1].first + use_gop))
use_gep = (i==(dim1_)) ? end_gep : gep;
if ((vecJ[j-1].first + use_gep) > (vecM[j-1].first + use_gop))
{
currentCellJ.second = J;
currentCellJ.first = vecJ[j-1].first + gep;
currentCellJ.first = vecJ[j-1].first + use_gep;
}
else
{
currentCellJ.second = D;
currentCellJ.first = vecM[j-1].first + use_gop;
}
if ((vecJ[j-1].first + gep) == (vecM[j-1].first + use_gop))
if ((vecJ[j-1].first + use_gep) == (vecM[j-1].first + use_gop))
currentCellJ.second = J|D;
// calculate value of cell M
......@@ -1343,7 +1348,9 @@ PairwiseAligner<ScoringScheme>::sw(const SeqType &seq1, const SeqType &seq2)
{
algorithm_ = Algorithm::SW;
prepareMatrix_(seq1, seq2);
init_borders_(0, 0);
scoringScheme_.end_gop(0);
scoringScheme_.end_gep(0);
init_borders_();
auto &matrix = matrices_[0];
for (size_t i=1; i <= dim1_; ++i)
......
......@@ -18,20 +18,28 @@ class ScoringScheme {
SimMat simMat_;
DataType gop_;
DataType gep_;
DataType end_gop_;
DataType end_gep_;
public:
using value_type = DataType;
ScoringScheme(SimMat &&simMat, DataType gep) : ScoringScheme(std::move(simMat), 0, gep)
ScoringScheme(SimMat &&simMat, DataType gep) : ScoringScheme(std::move(simMat), 0, gep, 0, gep)
{}
ScoringScheme(const SimMat &simMat, DataType gep) : ScoringScheme(simMat, 0, gep)
ScoringScheme(const SimMat &simMat, DataType gep) : ScoringScheme(simMat, 0, gep, 0, gep)
{}
ScoringScheme(SimMat &&simMat, DataType gop, DataType gep) : simMat_(simMat), gop_(gop), gep_(gep)
ScoringScheme(SimMat &&simMat, DataType gop, DataType gep) : simMat_(simMat), gop_(gop), gep_(gep), end_gop_(gop), end_gep_(gep)
{}
ScoringScheme(const SimMat &simMat, DataType gop, DataType gep) : simMat_(simMat), gop_(gop), gep_(gep)
ScoringScheme(const SimMat &simMat, DataType gop, DataType gep) : simMat_(simMat), gop_(gop), gep_(gep), end_gop_(gop), end_gep_(gep)
{}
ScoringScheme(SimMat &&simMat, DataType gop, DataType gep, DataType end_gop, DataType end_gep) : simMat_(simMat), gop_(gop), gep_(gep), end_gop_(end_gop), end_gep_(end_gep)
{}
ScoringScheme(const SimMat &simMat, DataType gop, DataType gep, DataType end_gop, DataType end_gep) : simMat_(simMat), gop_(gop), gep_(gep), end_gop_(end_gop), end_gep_(end_gep)
{}
ScoringScheme() = default;
......@@ -59,19 +67,43 @@ public:
}
DataType
gep() const
end_gop()
{
return end_gop_;
}
DataType
end_gep()
{
return end_gep_;
}
DataType
gep() const
{
return gep_;
}
void
end_gep(DataType end_gep)
{
end_gep_ = end_gep;
}
void
end_gop(DataType end_gop)
{
end_gep_ = end_gop;
}
void
gep(DataType gep)
{
gep_ = gep;
}
};
......
......@@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE( Gotoh_alignDomain_Test2 )
seq1.push_back(dom);
seq2.push_back(dom);
BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::DSM> scoring(std::move(simMat), -50,-10);
BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::DSM> scoring(std::move(simMat), -50,-10, 0, 0);
BioSeqDataLib::PairwiseAligner<BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::DSM>> mat(scoring);
mat.gotoh(seq1, seq2);
......@@ -456,7 +456,6 @@ BOOST_AUTO_TEST_CASE(raspodom_gotoh_align_domain_test )
seq1 = {{"A", 0, 1, 0.0}, {"B", 10, 11, 0.0}, {"B", 12, 13, 0.0}, {"B", 14, 15, 0.0}, {"B", 16, 17, 0.0}, {"C", 14, 15, 0.0}, {"C", 16, 17, 0.0}};
seq2 = {{"A", 0, 1, 0.0}, {"C", 10, 11, 0.0}, {"C", 13, 14, 0.0}, {"B", 15, 16, 0.0}, {"B", 17, 18, 0.0}, {"B", 19, 20, 0.0}, {"B", 21, 22, 0.0}};
//scoring.gop(0);
mat.raspodom_gotoh(seq1, seq2);
BOOST_CHECK_EQUAL(mat.isCP(), 1);
......
......@@ -26,7 +26,7 @@
struct MatrixFixture
{
MatrixFixture() : mat(BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::IdentityMatrix>(BioSeqDataLib::IdentityMatrix(10, -1), -5, -5))
MatrixFixture() : mat(BioSeqDataLib::ScoringScheme<int, BioSeqDataLib::IdentityMatrix>(BioSeqDataLib::IdentityMatrix(10, -1), -5, -5, -1, -1))
{
BOOST_TEST_MESSAGE("setup fixture");
}
......@@ -98,11 +98,29 @@ struct MatrixFixture
BOOST_FIXTURE_TEST_SUITE(s, MatrixFixture)
BOOST_AUTO_TEST_CASE( gotoh_protein_sequence_Test1 )
{
BioSeqDataLib::ScoringScheme<float, BioSeqDataLib::SimilarityMatrix<float>> scoring2(BioSeqDataLib::SimilarityMatrix<float>
("../tests/align/data/BLOSUM62.txt"), -10, -0.5, -1.0, -0.5);
//end-gap free alignment. score schecked with EMBOSS needle
BioSeqDataLib::Sequence<> seq1("seq1", "LE", "", "test sequence");
BioSeqDataLib::Sequence<> seq2("seq2", "LEVTGKEPLP", "", "test sequence");
BioSeqDataLib::PairwiseAligner<BioSeqDataLib::ScoringScheme<float, BioSeqDataLib::SimilarityMatrix<float>>> mat2(scoring2);
mat2.gotoh(seq1, seq2);
std::vector<long int> expected1 {0,1,-1,-1,-1,-1,-1,-1,-1,-1};
std::vector<long int> expected2 {0,1, 2, 3, 4, 5, 6, 7, 8, 9};
auto result = mat2.result();
checkAlnOutput(mat2, expected1, expected2, 4.5, 0);
}
BOOST_AUTO_TEST_CASE( gotoh_protein_sequence_Test )
{
BioSeqDataLib::ScoringScheme<float, BioSeqDataLib::SimilarityMatrix<float>> scoring2(BioSeqDataLib::SimilarityMatrix<float>("../tests/align/data/BLOSUM62.txt"), -10, -0.5);
BioSeqDataLib::ScoringScheme<float, BioSeqDataLib::SimilarityMatrix<float>> scoring2(BioSeqDataLib::SimilarityMatrix<float>
("../tests/align/data/BLOSUM62.txt"), -10, -0.5, -1.0, -0.5);
//end-gap free alignment. score schecked with EMBOSS needle
BioSeqDataLib::Sequence<> seq1("seq1", "LMLDSGSEPKLIAEPLPPQGPYELSDETLQAPVLNDEGTEAVFELLSNAVEV", "", "test sequence");
BioSeqDataLib::Sequence<> seq2("seq2", "LLDSKLIAEPLPPQGPYELSDETLQAPVLNDEGTEAVFELLSNAVEVTGKEPLP", "", "test sequence");
......@@ -115,7 +133,7 @@ BOOST_AUTO_TEST_CASE( gotoh_protein_sequence_Test )
31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53};
auto result = mat2.result();
checkAlnOutput(mat2, expected1, expected2, 220.5, 0);
checkAlnOutput(mat2, expected1, expected2, 215.5, 0);
}
......@@ -134,8 +152,7 @@ BOOST_AUTO_TEST_CASE(simple_gotoh)
mat.gotoh(seq1, seq2);
std::vector<long int> expected1 { 0, 1, 2, 3, -1, -1};
std::vector<long int> expected2 { -1, -1, 0, 1, 2, 3};
checkAlnOutput(mat, expected1, expected2, 20, 0);
checkAlnOutput(mat, expected1, expected2, 16, 0);
}
BOOST_AUTO_TEST_CASE(simple_CP)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment