Commit 9e07b28b authored by Carsten Kemena's avatar Carsten Kemena

Merge branch 'develop-ck' into 'master'

added functionalities

See merge request !18
parents cc31689a 94c5cc62
Pipeline #645 passed with stages
in 3 minutes and 45 seconds
......@@ -38,52 +38,11 @@
#include "../utility/MatrixStack.hpp"
#include "../utility/SimilarityMatrix.hpp"
#include "../utility/LineMatrix.hpp"
#include "EditSequence.hpp"
namespace BioSeqDataLib
{
/**
* \class EditSequence
* \brief Simple class to store alignment results.
*
* The edit sequence eS1 and eS2 contain the alignment positions. Gaps are denoted as one, else the number represents the index in the sequence.
*
* 0 -1 1 2 (sequence ABC) results in A-BC
*/
class EditSequence
{
public:
std::vector<long int> eS1; //!< Stores the edit string of sequence 1
std::vector<long int> eS2; //!< Stores the edit string of sequence 2
size_t start1; //!< Start of the alignment in sequence 1
size_t end1; //!< End of the alignment in sequence 1
size_t start2; //!< Start of the alignment in sequence 2
size_t end2; //!< End of the alignment in sequence 2
/**
* @brief Clears the EditSequence
*/
void
clear()
{
eS1.clear();
eS2.clear();
start1=end1=start2=end2=0;
}
/**
* @brief Returns the size of the alignment.
*
* @return size_t The size of the alignment.
*/
size_t
size() const
{
return eS1.size();
}
};
/**
* \class AlignmentMatrix
* \brief A class that handled the computation of pairwise alignments
......@@ -95,7 +54,7 @@ class EditSequence
template<typename DataType, typename SimMat>
class AlignmentMatrix {
private:
enum class Algorithm {NW, SW, Gotoh, Raspodom, Unknown}; // Class to keep track of the algorithm used
enum class Algorithm {NW, SW, Gotoh, Raspodom_NW, Unknown}; // Class to keep track of the algorithm used
DataType gop_; // gap opening penalty
DataType gep_; // gap extension penalty
......@@ -146,9 +105,9 @@ private:
/*
* \brief Calculates the traceback for the Raspodom alignment
*
void
traceback_raspodom_() const;*/
*/
// void
// traceback_raspodom_nw_() const;
/**
* \brief Calculates the traceback for the Smith-Waterman alignment
......@@ -157,13 +116,13 @@ private:
traceback_sw_() const;
using MatchTracker = std::set<std::pair<size_t, size_t> >;
/*
bool
traceback_raspodom_optimal_path_(MatchTracker &matches_tl, MatchTracker &matches_tr,
/* bool
traceback_raspodom_nw_optimal_path_(MatchTracker &matches_tl, MatchTracker &matches_tr,
MatchTracker &matches_bl, MatchTracker &matches_br) const;
std::pair<bool, bool>
traceback_raspodom_suboptimal_path_(size_t x, size_t y, MatchTracker &matches_tl, MatchTracker &matches_tr,
traceback_raspodom_nw_suboptimal_path_(size_t x, size_t y, MatchTracker &matches_tl, MatchTracker &matches_tr,
MatchTracker &matches_bl, MatchTracker &matches_br) const;
*/
......@@ -180,9 +139,9 @@ private:
case Algorithm::Gotoh:
traceback_gotoh_();
break;
//case Algorithm::Raspodom:
// traceback_raspodom_();
// break;
/*case Algorithm::Raspodom_NW:
traceback_raspodom_nw_();
break;*/
case Algorithm::SW:
traceback_sw_();
break;
......@@ -260,9 +219,9 @@ public:
*
template<typename SeqType>
void
raspodom(const SeqType &seq1, const SeqType &seq2);
raspodom_nw(const SeqType &seq1, const SeqType &seq2);
*/
*/
/**
* \brief Sets the gap extension penalites, used as well as homogenous gap costs.
* @param penalty Value of the gap costs.
......@@ -276,7 +235,7 @@ public:
/**
* @brief Returns the current gap extension penalites
*
* @return The used gap extension pentalties.
* @return The used gap extension pentalties.traceback_raspodom_
*/
DataType
gep() const
......@@ -323,8 +282,8 @@ public:
DataType
score() const
{
if ((editString_.eS1.empty()) && (algorithm_ == Algorithm::Raspodom))
traceback_();
//if ((editString_.eS1.empty()) && (algorithm_ == Algorithm::Raspodom_NW))
// traceback_();
return score_;
}
......@@ -336,18 +295,19 @@ public:
return editString_;
}
/**
* \brief Returns whether the RASPDOM algorithm has determined the two sequences to be a circular permutation.
* @return true if a circular permutation was detected, otherwise false
*/
*
bool
isCP() const
{
if ((editString_.eS1.empty()) && (algorithm_ == Algorithm::Raspodom))
if ((editString_.eS1.empty()) && (algorithm_ == Algorithm::Raspodom_NW))
traceback_();
return isCP_;
}
}*/
};
......@@ -676,16 +636,16 @@ AlignmentMatrix<DataType, SimMat>::traceback_gotoh_() const
/***************************************************
* RASPDOM - Algorithm *
* RASPODOM - Algorithm *
***************************************************/
/*
template<typename DataType, typename SimMat>
template<typename SeqType>
void
AlignmentMatrix<DataType, SimMat>::raspodom(const SeqType &seq1, const SeqType &seq2)
AlignmentMatrix<DataType, SimMat>::raspodom_nw(const SeqType &seq1, const SeqType &seq2)
{
editString_.clear();
algorithm_ = Algorithm::Raspodom;
algorithm_ = Algorithm::Raspodom_NW;
dim1_= 2 * seq1.size();
dim2_= 2 * seq2.size();
seq1_length_ = seq1.size();
......@@ -763,7 +723,7 @@ AlignmentMatrix<DataType, SimMat>::raspodom(const SeqType &seq1, const SeqType &
template<typename DataType, typename SimMat>
bool
AlignmentMatrix<DataType, SimMat>::traceback_raspodom_optimal_path_(MatchTracker &matches_tl, MatchTracker &matches_tr,
AlignmentMatrix<DataType, SimMat>::traceback_raspodom_nw_optimal_path_(MatchTracker &matches_tl, MatchTracker &matches_tr,
MatchTracker &matches_bl, MatchTracker &matches_br) const
{
editString_.start1 = 0;
......@@ -857,7 +817,7 @@ AlignmentMatrix<DataType, SimMat>::traceback_raspodom_optimal_path_(MatchTracker
template<typename DataType, typename SimMat>
std::pair<bool, bool>
AlignmentMatrix<DataType, SimMat>::traceback_raspodom_suboptimal_path_(size_t x, size_t y, MatchTracker &matches_tl, MatchTracker &matches_tr,
AlignmentMatrix<DataType, SimMat>::traceback_raspodom_nw_suboptimal_path_(size_t x, size_t y, MatchTracker &matches_tl, MatchTracker &matches_tr,
MatchTracker &matches_bl, MatchTracker &matches_br) const
{
matches_tl.clear();
......@@ -935,14 +895,14 @@ AlignmentMatrix<DataType, SimMat>::traceback_raspodom_suboptimal_path_(size_t x,
template<typename DataType, typename SimMat>
void
AlignmentMatrix<DataType, SimMat>::traceback_raspodom_() const
AlignmentMatrix<DataType, SimMat>::traceback_raspodom_nw_() const
{
MatchTracker matches_tl, matches_tr, matches_bl, matches_br;
isCP_ = false;
short path = 0;
// check optimal path
bool potential_CP = true;
bool match = traceback_raspodom_optimal_path_(matches_tl, matches_tr, matches_bl, matches_br);
bool match = traceback_raspodom_nw_optimal_path_(matches_tl, matches_tr, matches_bl, matches_br);
// check if three quadrants are touched
std::cout << "OPTIMAL: "<< matches_br.size() << " " << matches_bl.size() << " " << matches_tl.size() << " " << matches_tr.size() <<"\n";
if ((matches_tl.size() != 0) && (matches_br.size() != 0) && ((matches_bl.size() != 0) || (matches_tr.size() != 0)))
......@@ -975,7 +935,7 @@ AlignmentMatrix<DataType, SimMat>::traceback_raspodom_() const
for (const auto &elem : scores)
{
potential_CP = false;
auto success = traceback_raspodom_suboptimal_path_(std::get<1>(elem), std::get<2>(elem), matches_tl, matches_tr, matches_bl, matches_br);
auto success = traceback_raspodom_nw_suboptimal_path_(std::get<1>(elem), std::get<2>(elem), matches_tl, matches_tr, matches_bl, matches_br);
std::cout << success.first << " " << success.second << std::endl;
if (success.second)
{
......@@ -999,8 +959,8 @@ AlignmentMatrix<DataType, SimMat>::traceback_raspodom_() const
if (isCP_ && (path_counter == 1))
isCP_ = true;
}
*/
/***************************************************
* SW - Algorithm *
***************************************************/
......
#ifndef EDITSEQUENCE_HPP
#define EDITSEQUENCE_HPP
#include <string>
#include <vector>
#include "../domain/DomainArrangement.hpp"
#include "../sequence/Sequence.hpp"
namespace BioSeqDataLib
{
/**
* \class EditSequence
* \brief Simple class to store alignment results.
*
* The edit sequence eS1 and eS2 contain the alignment positions. Gaps are denoted as one, else the number represents the index in the sequence.
*
* 0 -1 1 2 (sequence ABC) results in A-BC
*/
class EditSequence
{
public:
std::vector<long int> eS1; //!< Stores the edit string of sequence 1
std::vector<long int> eS2; //!< Stores the edit string of sequence 2
size_t start1; //!< Start of the alignment in sequence 1
size_t end1; //!< End of the alignment in sequence 1
size_t start2; //!< Start of the alignment in sequence 2
size_t end2; //!< End of the alignment in sequence 2
/**
* @brief Clears the EditSequence
*/
void
clear()
{
eS1.clear();
eS2.clear();
start1=end1=start2=end2=0;
}
/**
* @brief Returns the size of the alignment.
*
* @return size_t The size of the alignment.
*/
size_t
size() const
{
return eS1.size();
}
/**
* @brief Returns the the actual alignment of the two given domain arrangement
*
* @return std::pair<std::string, std::string> A tuple containing the two alignment strings
*/
template<typename D>
std::pair<std::string, std::string>
alnStrings(DomainArrangement<D> &da1, DomainArrangement<D> &da2)
{
std::string alnString1, alnString2;
for (size_t i = 0; i<eS1.size(); ++i)
{
if (i>0)
{
alnString1 += " ";
alnString2 += " ";
}
if (eS1[i] == -1)
{
alnString1 += std::string(da2[eS2[i]].accession().size(), '*');
alnString2 += da2[eS2[i]].accession();
}
else
{
if (eS2[i] == -1)
{
alnString1 += da1[eS1[i]].accession();
alnString2 += std::string(da1[eS1[i]].accession().size(), '*');
}
else
{
alnString1 += da1[eS1[i]].accession();
alnString2 += da2[eS2[i]].accession();
}
}
}
return make_pair(std::move(alnString1), std::move(alnString2));
}
};
} // BSDL namespace
#endif /* EDITSEQUENCE */
\ No newline at end of file
......@@ -31,6 +31,7 @@
#include <memory>
#include <string>
#include <algorithm>
#include "../utility/stringHelpers.hpp"
......@@ -252,6 +253,19 @@ public:
}
/**
* \brief Calculates the distance (gap) between two domains
* return if positive the length of the gap between two domains if negative the number of overlapping positions.
*/
int
distance_overlap(const Domain &dom)
{
auto start = std::max(dom.start(), this->start());
auto end = std::min(dom.end(), this->end());
int value = end - start + 1;
return value *-1;
}
};
......
......@@ -107,6 +107,12 @@ BOOST_AUTO_TEST_CASE( nw_domain_test)
BOOST_CHECK_EQUAL(mat.score(), 100);
BOOST_CHECK_EQUAL_COLLECTIONS(result.eS1.begin(), result.eS1.end(), expected1.begin(), expected1.end());
BOOST_CHECK_EQUAL_COLLECTIONS(result.eS2.begin(), result.eS2.end(), expected2.begin(), expected2.end());
auto alnStrings = result.alnStrings(seq1, seq2);
std::string alnString1 = "PF02458 ******* PF00545";
std::string alnString2 = "PF02458 PF00001 PF00545";
BOOST_CHECK_EQUAL(alnStrings.first, alnString1);
BOOST_CHECK_EQUAL(alnStrings.second, alnString2);
BioSeqDataLib::AlignmentMatrix<int, BioSeqDataLib::DSM> mat2(-100, simMat);
mat2.nw(seq1, seq2);
......@@ -282,7 +288,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
seq2.push_back(dom);
BioSeqDataLib::AlignmentMatrix<int, BioSeqDataLib::DSM> mat(-10, simMat);
mat.raspodom(seq1, seq2);
mat.raspodom_nw(seq1, seq2);
std::vector<long int> expected1 { 0, 1, 2};
std::vector<long int> expected2 { 2, 0, 1};
auto result = mat.result();
......@@ -307,7 +313,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
dom.accession("PF00001");
seq2.push_back(dom);
mat.raspodom(seq1, seq2);
mat.raspodom_nw(seq1, seq2);
expected1 = { 3, 0, 1, 2};
expected2 = { -1, 2, 0, 1};
result = mat.result();
......@@ -332,7 +338,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
dom.accession("PF00001");
seq2.push_back(dom);
mat.raspodom(seq1, seq2);
mat.raspodom_nw(seq1, seq2);
expected1 = { -1, 0, 1, 2};
expected2 = { 2, 3, 0, 1};
result = mat.result();
......@@ -359,7 +365,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
dom.accession("PF00003");
seq2.push_back(dom);
mat.raspodom(seq1, seq2);
mat.raspodom_nw(seq1, seq2);
expected1 = { -1, -1, 0, 1, 2};
expected2 = { 0, 1, 2, 3, 4};
result = mat.result();
......@@ -367,8 +373,8 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
BOOST_CHECK_EQUAL_COLLECTIONS(result.eS2.begin(), result.eS2.end(), expected2.begin(), expected2.end());
BOOST_CHECK_EQUAL(mat.isCP(), false);
}
*/
}*/
BOOST_AUTO_TEST_SUITE_END()
#endif /* ALIGNMENTMATRIX_TEST_HPP_ */
......@@ -314,6 +314,32 @@ BOOST_AUTO_TEST_CASE( DomainArrangement_Method_Test )
BOOST_CHECK_EQUAL(arrangementSet.size(), 0);
}
BOOST_AUTO_TEST_CASE( Function_Test )
{
BioSeqDataLib::Domain dom("A", 2, 30, 0.5);
BioSeqDataLib::Domain dom2("B", 2, 30, 0.5);
BOOST_CHECK_EQUAL(dom.distance_overlap(dom2), -29);
BioSeqDataLib::Domain dom3("A", 10, 30, 0.5);
BioSeqDataLib::Domain dom4("B", 35, 50, 0.5);
BOOST_CHECK_EQUAL(dom3.distance_overlap(dom4), 4);
BOOST_CHECK_EQUAL(dom4.distance_overlap(dom3), 4);
BioSeqDataLib::Domain dom5("A", 10, 30, 0.5);
BioSeqDataLib::Domain dom6("B", 25, 50, 0.5);
BOOST_CHECK_EQUAL(dom5.distance_overlap(dom6), -6);
BOOST_CHECK_EQUAL(dom6.distance_overlap(dom5), -6);
BioSeqDataLib::Domain dom7("A", 10, 30, 0.5);
BioSeqDataLib::Domain dom8("B", 31, 50, 0.5);
BOOST_CHECK_EQUAL(dom7.distance_overlap(dom8), 0);
BOOST_CHECK_EQUAL(dom8.distance_overlap(dom7), 0);
}
BOOST_AUTO_TEST_SUITE_END()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment