Commit 6f2b58a5 authored by Carsten Kemena's avatar Carsten Kemena

added documentation/changed to usage of AlignmentMatrix

parent dddddae3
......@@ -46,17 +46,24 @@ namespace BioSeqDataLib
/**
* \class EditSequence
* \brief Simple class to store alignment results.
*
* The edit sequence eS1 and eS2 contain the alignment positions. Gaps are denoted as one, else the number represents the index in the sequence.
*
* 0 -1 1 2 (sequence ABC) results in A-BC
*/
class EditSequence
{
public:
std::vector<long int> eS1; // stores the edit string of sequence 1
std::vector<long int> eS2; // stores the edit string of sequence 2
size_t start1; // Start of the alignment in sequence 1
size_t end1; // End of the alignment in sequence 1
size_t start2; // Start of the alignment in sequence 2
size_t end2; // End of the alignment in sequence 2
std::vector<long int> eS1; //!< Stores the edit string of sequence 1
std::vector<long int> eS2; //!< Stores the edit string of sequence 2
size_t start1; //!< Start of the alignment in sequence 1
size_t end1; //!< End of the alignment in sequence 1
size_t start2; //!< Start of the alignment in sequence 2
size_t end2; //!< End of the alignment in sequence 2
/**
* @brief Clears the EditSequence
*/
void
clear()
{
......@@ -65,8 +72,13 @@ class EditSequence
start1=end1=start2=end2=0;
}
/**
* @brief Returns the size of the alignment.
*
* @return size_t The size of the alignment.
*/
size_t
length() const
size() const
{
return eS1.size();
}
......@@ -83,7 +95,7 @@ class EditSequence
template<typename DataType, typename SimMat>
class AlignmentMatrix {
private:
enum class Algorithm {NW, SW, Gotoh, Raspodom, Unknown};
enum class Algorithm {NW, SW, Gotoh, Raspodom, Unknown}; // Class to keep track of the algorithm used
DataType gop_; // gap opening penalty
DataType gep_; // gap extension penalty
......@@ -102,6 +114,7 @@ private:
size_t seq1_length_; // length of the first sequence
size_t seq2_length_; // length of the second sequence
//**********************************************************
//* Result Variables *
//**********************************************************
......@@ -287,13 +300,9 @@ public:
DataType
score() const
{
if ((!editString_.eS1.empty()) || (algorithm_ != Algorithm::Raspodom))
return score_;
else
{
if ((editString_.eS1.empty()) && (algorithm_ == Algorithm::Raspodom))
traceback_();
return score_;
}
return score_;
}
EditSequence&
......@@ -312,7 +321,7 @@ public:
bool
isCP() const
{
if (editString_.eS1.empty())
if ((editString_.eS1.empty()) && (algorithm_ == Algorithm::Raspodom))
traceback_();
return isCP_;
}
......@@ -329,6 +338,7 @@ template<typename SeqType>
void
AlignmentMatrix<DataType, SimMat>::nw(const SeqType &seq1, const SeqType &seq2)
{
isCP_ = false;
editString_.clear();
editString_.start1 = 0;
editString_.end1 = seq1.size()-1;
......@@ -467,6 +477,7 @@ template<typename SeqType>
void
AlignmentMatrix<DataType, SimMat>::gotoh(const SeqType &seq1, const SeqType &seq2)
{
isCP_ = false;
editString_.clear();
algorithm_ = Algorithm::Gotoh;
dim1_ = seq1.size();
......@@ -574,6 +585,7 @@ template<typename DataType, typename SimMat>
void
AlignmentMatrix<DataType, SimMat>::traceback_gotoh_() const
{
isCP_ = false;
size_t i = dim1_;
size_t j = dim2_;
editString_.start1 = 0;
......@@ -745,7 +757,6 @@ AlignmentMatrix<DataType, SimMat>::traceback_raspodom_optimal_path_(MatchTracker
size_t stop2 = best_score_y_ % seq2_length_;
if (stop2 == 0)
stop2 = seq2_length_;
std::cout << stop1 << " " << stop2 << " -stops\n";
while ((dim1 > stop1) || (dim2 > stop2))
{
std::cout << dim1 << " " << dim2 << std::endl;
......@@ -789,7 +800,6 @@ AlignmentMatrix<DataType, SimMat>::traceback_raspodom_optimal_path_(MatchTracker
}
}
}
std::cout << dim1 << " " << dim2 << " - arg\n";
// reached first position of a sequence, fill remaining length with gaps
editString1.reserve(editString1.size() + dim1);
editString2.reserve(editString2.size() + dim1);
......@@ -812,9 +822,8 @@ AlignmentMatrix<DataType, SimMat>::traceback_raspodom_optimal_path_(MatchTracker
std::reverse(editString1.begin(), editString1.end());
std::reverse(editString2.begin(), editString2.end());
// std::cout << dim1 << " " << dim2 << " - arg\n";
// std::cout << matrix_[dim1][dim2].second << "\n";
if (matrix_[dim1][dim2].second == 'm')// && (matches_tl.find(std::make_pair(dim1, dim2))!= matches_tl.end()))
if (matrix_[dim1][dim2].second == 'm')
{
matches_tl.emplace(dim1, dim2);
return true;
......@@ -978,6 +987,7 @@ template<typename SeqType>
void
AlignmentMatrix<DataType, SimMat>::sw(const SeqType &seq1, const SeqType &seq2)
{
isCP_ = false;
editString_.clear();
algorithm_ = Algorithm::SW;
dim1_ = seq1.size();
......
......@@ -48,7 +48,7 @@ template <typename D>
void
insertGaps(DomainArrangementSet<D> &domSet, const std::string &editString)
{
D emptyDomain;;
D emptyDomain;
for (auto &pair : domSet)
{
auto &arrangement = pair.second;
......
......@@ -40,8 +40,30 @@ namespace BioSeqDataLib
*
* \mainpage
*
* The BioSeqDataLib has been developed to provide an easy access to biological data. Its main focus is on domain data however it contains several classes to deal with other (mainly sequence) data.
* The BioSeqDataLib (BSDL) has been developed to provide an easy access to biological data. Its main focus is on domain data however it
* contains several classes to deal with other (mainly sequence) data.
*
* ## Modules
* The BSDL currently consists of three main modules taking care of three areas of Bioinformatics.
*
* ### Sequence module
*
* The sequence module contains mainly classes and functions that are related to reading, writing and analysing sequence data. A range of
* different Input formats are currently supported.
*
* ### Domain module
*
* The domain module contains classes and functions to reading and writing of domain data. A range of
* different Input formats are currently supported.
*
* ### Phylogeny module
*
* The phylogeny module provides a class to read a phylogenetic tree and allows to traverse it in different ways.
*/
/*
# test
*/
......
......@@ -37,7 +37,7 @@
#include <utility>
// BioSeqDataLib header
#include "../align/nw_gotoh.hpp"
#include "../align/AlignmentMatrix.hpp"
#ifndef SEQSETFUNCTIONS_HPP_
#define SEQSETFUNCTIONS_HPP_
......@@ -123,12 +123,13 @@ avgId(const SequenceSetType &set, const SimilarityMatrix<float> &simMat)
{
float id=0;
size_t nSeqs = set.size();
MatrixStack<3, std::pair<float, char> > dynMat(1,1);
//MatrixStack<3, std::pair<float, char> > dynMat(1,1);
std::string editString1, editString2;
std::string::size_type len;
size_t j,k;
size_t pos1, pos2;
size_t same, total;
AlignmentMatrix<float, SimilarityMatrix<float> > dynMat(-11,-1, simMat);
for (size_t i=0; i<nSeqs; ++i)
{
const typename SequenceSetType::value_type &seq1 = set[i];
......@@ -137,27 +138,31 @@ avgId(const SequenceSetType &set, const SimilarityMatrix<float> &simMat)
editString1.clear();
editString2.clear();
const typename SequenceSetType::value_type &seq2 = set[j];
initGotohMatrix(seq1, seq2, simMat, dynMat);
runGotoh(dynMat, seq1.size(), seq2.size(), -11, -1);
gotohTraceback(dynMat, seq1.size(), seq2.size(), editString1, editString2);
len=editString1.size();
pos1=seq1.size();
pos2=seq2.size();
dynMat.gotoh(seq1, seq2);
//initGotohMatrix(seq1, seq2, simMat, dynMat);
//runGotoh(dynMat, seq1.size(), seq2.size(), -11, -1);
//gotohTraceback(dynMat, seq1.size(), seq2.size(), editString1, editString2);
const auto &result = dynMat.result();
len=result.size();
//pos1=seq1.size();
//pos2=seq2.size();
same=0;
total=0;
const auto &editString1 = result.eS1;
const auto &editString2 = result.eS2;
for (k=0; k<len; ++k)
{
if ((editString1[k]=='m') && (editString2[k]=='m'))
if ((editString1[k] != -1) && (editString2[k] != -1))
{
if (std::tolower(seq1[--pos1]) == std::tolower(seq2[--pos2]))
if (std::tolower(seq1[editString1[k]]) == std::tolower(seq2[editString2[k]]))
++same;
++total;
}
else
{
if (editString1[k]=='m')
if (editString1[k] != -1)
--pos1;
if (editString2[k]=='m')
if (editString2[k] != -1)
--pos2;
}
}
......
......@@ -57,7 +57,7 @@ BOOST_AUTO_TEST_CASE( nw_sequence_test)
std::vector<long int> expected2 {0,1,2,3,4,-1,-1,-1,-1,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54};
auto result = mat.result();
BOOST_CHECK_EQUAL(result.length(), 59);
BOOST_CHECK_EQUAL(result.size(), 59);
BOOST_CHECK_EQUAL(result.start1, 0);
BOOST_CHECK_EQUAL(result.start2, 0);
BOOST_CHECK_EQUAL(result.end1, 51);
......@@ -119,7 +119,7 @@ BOOST_AUTO_TEST_CASE( gotoh_align_Test )
31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53};
auto result = mat.result();
BOOST_CHECK_EQUAL(result.length(), 59);
BOOST_CHECK_EQUAL(result.size(), 59);
BOOST_CHECK_EQUAL(result.start1, 0);
BOOST_CHECK_EQUAL(result.start2, 0);
BOOST_CHECK_EQUAL(result.end1, 51);
......@@ -206,7 +206,7 @@ BOOST_AUTO_TEST_CASE( Gotoh_alignDomain_Test2 )
expected1 = { 0, -1, 1};
expected2 = { 0, 1, 2};
result = mat.result();
BOOST_CHECK_EQUAL(result.length(), 3);
BOOST_CHECK_EQUAL(result.size(), 3);
BOOST_CHECK_EQUAL(result.start1, 0);
BOOST_CHECK_EQUAL(result.start2, 0);
BOOST_CHECK_EQUAL(result.end1, 1);
......@@ -237,7 +237,7 @@ BOOST_AUTO_TEST_CASE(sw_align_test )
std::vector<long int> expected1 { 10, 11, 12, 13, 14, 15, 16, 17, -1, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41};
std::vector<long int> expected2 { 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, -1, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
auto result = mat.result();
BOOST_CHECK_EQUAL(result.length(), 33);
BOOST_CHECK_EQUAL(result.size(), 33);
BOOST_CHECK_EQUAL(result.start1, 10);
BOOST_CHECK_EQUAL(result.start2, 11);
BOOST_CHECK_EQUAL(result.end1, 41);
......@@ -278,7 +278,7 @@ BOOST_AUTO_TEST_CASE(raspodom_align_domain_test )
BOOST_CHECK_EQUAL_COLLECTIONS(result.eS2.begin(), result.eS2.end(), expected2.begin(), expected2.end());
BOOST_CHECK_EQUAL(mat.isCP(), true);
BOOST_CHECK_EQUAL(mat.score(), 500);
BOOST_CHECK_EQUAL(result.length(), 3);
BOOST_CHECK_EQUAL(result.size(), 3);
seq1.clear();
seq2.clear();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment