Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
BioSeqDataLib
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
1
Issues
1
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
domainWorld
BioSeqDataLib
Commits
858df53d
Commit
858df53d
authored
Jul 23, 2019
by
Carsten Kemena
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
[PairwiseAligner] implemented terminal gap costs
parent
09497c0b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
95 additions
and
40 deletions
+95
-40
src/align/PairwiseAligner.hpp
src/align/PairwiseAligner.hpp
+34
-27
src/utility/ScoringScheme.hpp
src/utility/ScoringScheme.hpp
+38
-6
tests/align/PairwiseAligner_Test.hpp
tests/align/PairwiseAligner_Test.hpp
+1
-2
tests/align/PairwiseAligner_TestGotoh.hpp
tests/align/PairwiseAligner_TestGotoh.hpp
+22
-5
No files found.
src/align/PairwiseAligner.hpp
View file @
858df53d
...
...
@@ -152,7 +152,7 @@ private:
find_max_raspodom_score_
()
const
;
void
init_borders_
(
DataType
gop
,
DataType
gep
);
init_borders_
();
size_t
determine_quadrant
(
size_t
i
,
size_t
j
)
const
;
...
...
@@ -244,7 +244,7 @@ public:
{
algorithm_
=
Algorithm
::
NW
;
prepareMatrix_
(
seq1
,
seq2
);
init_borders_
(
0
,
scoringScheme_
.
gep
());
init_borders_
(
);
//
0, scoringScheme_.gep());
general_nw_
();
}
...
...
@@ -268,7 +268,9 @@ public:
{
algorithm_
=
Algorithm
::
Raspodom_NW
;
prepareMatrix_
(
seq1
,
seq2
);
init_borders_
(
0
,
0
);
scoringScheme_
.
end_gop
(
0
);
scoringScheme_
.
end_gep
(
0
);
init_borders_
();
//(0, 0);
general_nw_
();
}
...
...
@@ -285,7 +287,7 @@ public:
{
algorithm_
=
Algorithm
::
Gotoh
;
prepareMatrix_
(
seq1
,
seq2
);
init_borders_
(
0
,
scoringScheme_
.
gep
());
init_borders_
(
);
//
0, scoringScheme_.gep());
general_gotoh_
();
}
...
...
@@ -301,7 +303,9 @@ public:
{
algorithm_
=
Algorithm
::
Raspodom_Gotoh
;
prepareMatrix_
(
seq1
,
seq2
);
init_borders_
(
0
,
0
);
scoringScheme_
.
end_gop
(
0
);
scoringScheme_
.
end_gep
(
0
);
init_borders_
();
//0, 0);
general_gotoh_
();
}
...
...
@@ -450,11 +454,11 @@ PairwiseAligner<ScoringScheme>::prepareMatrix_(const SeqType &seq1, const SeqTyp
template
<
typename
ScoringScheme
>
void
PairwiseAligner
<
ScoringScheme
>::
init_borders_
(
DataType
gop
,
DataType
gep
)
PairwiseAligner
<
ScoringScheme
>::
init_borders_
()
{
if
((
this
->
algorithm_
==
Algorithm
::
Gotoh
)
||
(
this
->
algorithm_
==
Algorithm
::
Raspodom_Gotoh
))
{
const
DataType
MINIMUM
=
(
-
1
)
*
(
std
::
numeric_limits
<
DataType
>::
max
()
-
1000
);
const
DataType
MINIMUM
=
(
-
1
)
*
(
std
::
numeric_limits
<
DataType
>::
max
()
-
1000
00
);
for
(
size_t
k
=
1
;
k
<
3
;
++
k
)
{
...
...
@@ -473,27 +477,26 @@ PairwiseAligner<ScoringScheme>::init_borders_(DataType gop, DataType gep)
mat
[
i
][
0
].
second
=
O
;
}
}
gep
=
0
;
}
auto
&
mat
=
this
->
matrices_
[
0
];
mat
[
0
][
0
].
first
=
0
;
mat
[
0
][
0
].
second
=
O
;
if
(
this
->
algorithm_
!=
Algorithm
::
SW
)
{
//
if (this->algorithm_ != Algorithm::SW)
//
{
for
(
size_t
j
=
1
;
j
<=
dim2_
;
++
j
)
{
mat
[
0
][
j
].
first
=
gop
+
j
*
gep
;
mat
[
0
][
j
].
first
=
scoringScheme_
.
end_gop
()
+
(
j
-
1
)
*
scoringScheme_
.
end_gep
()
;
mat
[
0
][
j
].
second
=
J
;
}
for
(
size_t
i
=
1
;
i
<=
dim1_
;
++
i
)
{
mat
[
i
][
0
].
first
=
gop
+
i
*
gep
;
mat
[
i
][
0
].
first
=
scoringScheme_
.
end_gop
()
+
(
i
-
1
)
*
scoringScheme_
.
end_gep
()
;
mat
[
i
][
0
].
second
=
I
;
}
}
else
//
}
/*
else
{
for (size_t j=1; j<=dim2_; ++j)
...
...
@@ -507,7 +510,7 @@ PairwiseAligner<ScoringScheme>::init_borders_(DataType gop, DataType gep)
mat[i][0].first = 0;
mat[i][0].second = O;
}
}
}
*/
}
...
...
@@ -980,17 +983,17 @@ PairwiseAligner<ScoringScheme>::general_gotoh_() noexcept
{
auto
gep
=
scoringScheme_
.
gep
();
auto
gop
=
scoringScheme_
.
gop
();
DataType
end_gop
=
0
;
auto
end_gop
=
scoringScheme_
.
end_gop
()
;
auto
end_gep
=
scoringScheme_
.
end_gep
();
Matrix
<
std
::
pair
<
DataType
,
int
>
>
&
matM
=
matrices_
[
0
];
Matrix
<
std
::
pair
<
DataType
,
int
>
>
&
matI
=
matrices_
[
1
];
Matrix
<
std
::
pair
<
DataType
,
int
>
>
&
matJ
=
matrices_
[
2
];
DataType
use_gop
;
DataType
use_gop
,
use_gep
;
DataType
match_score
;
if
(
algorithm_
==
Algorithm
::
Raspodom_Gotoh
)
end_gop
=
gop
;
//
if (algorithm_ == Algorithm::Raspodom_Gotoh)
//
end_gop = gop;
// calculate matrix values
for
(
size_t
i
=
1
;
i
<=
dim1_
;
++
i
)
{
...
...
@@ -1008,32 +1011,34 @@ PairwiseAligner<ScoringScheme>::general_gotoh_() noexcept
// calculate value of cell I
use_gop
=
(
j
==
(
dim2_
))
?
end_gop
:
gop
;
if
((
vecPrevI
[
j
].
first
+
gep
)
>
(
vecPrevM
[
j
].
first
+
use_gop
))
use_gep
=
(
j
==
(
dim2_
))
?
end_gep
:
gep
;
if
((
vecPrevI
[
j
].
first
+
use_gep
)
>
(
vecPrevM
[
j
].
first
+
use_gop
))
{
currentCellI
.
second
=
I
;
currentCellI
.
first
=
vecPrevI
[
j
].
first
+
gep
;
currentCellI
.
first
=
vecPrevI
[
j
].
first
+
use_
gep
;
}
else
{
currentCellI
.
second
=
D
;
currentCellI
.
first
=
vecPrevM
[
j
].
first
+
use_gop
;
}
if
((
vecPrevI
[
j
].
first
+
gep
)
==
(
vecPrevM
[
j
].
first
+
use_gop
))
if
((
vecPrevI
[
j
].
first
+
use_
gep
)
==
(
vecPrevM
[
j
].
first
+
use_gop
))
currentCellI
.
second
=
I
|
D
;
// calculate value of cell J
use_gop
=
(
i
==
(
dim1_
))
?
end_gop
:
gop
;
if
((
vecJ
[
j
-
1
].
first
+
gep
)
>
(
vecM
[
j
-
1
].
first
+
use_gop
))
use_gep
=
(
i
==
(
dim1_
))
?
end_gep
:
gep
;
if
((
vecJ
[
j
-
1
].
first
+
use_gep
)
>
(
vecM
[
j
-
1
].
first
+
use_gop
))
{
currentCellJ
.
second
=
J
;
currentCellJ
.
first
=
vecJ
[
j
-
1
].
first
+
gep
;
currentCellJ
.
first
=
vecJ
[
j
-
1
].
first
+
use_
gep
;
}
else
{
currentCellJ
.
second
=
D
;
currentCellJ
.
first
=
vecM
[
j
-
1
].
first
+
use_gop
;
}
if
((
vecJ
[
j
-
1
].
first
+
gep
)
==
(
vecM
[
j
-
1
].
first
+
use_gop
))
if
((
vecJ
[
j
-
1
].
first
+
use_
gep
)
==
(
vecM
[
j
-
1
].
first
+
use_gop
))
currentCellJ
.
second
=
J
|
D
;
// calculate value of cell M
...
...
@@ -1343,7 +1348,9 @@ PairwiseAligner<ScoringScheme>::sw(const SeqType &seq1, const SeqType &seq2)
{
algorithm_
=
Algorithm
::
SW
;
prepareMatrix_
(
seq1
,
seq2
);
init_borders_
(
0
,
0
);
scoringScheme_
.
end_gop
(
0
);
scoringScheme_
.
end_gep
(
0
);
init_borders_
();
auto
&
matrix
=
matrices_
[
0
];
for
(
size_t
i
=
1
;
i
<=
dim1_
;
++
i
)
...
...
src/utility/ScoringScheme.hpp
View file @
858df53d
...
...
@@ -18,20 +18,28 @@ class ScoringScheme {
SimMat
simMat_
;
DataType
gop_
;
DataType
gep_
;
DataType
end_gop_
;
DataType
end_gep_
;
public:
using
value_type
=
DataType
;
ScoringScheme
(
SimMat
&&
simMat
,
DataType
gep
)
:
ScoringScheme
(
std
::
move
(
simMat
),
0
,
gep
)
ScoringScheme
(
SimMat
&&
simMat
,
DataType
gep
)
:
ScoringScheme
(
std
::
move
(
simMat
),
0
,
gep
,
0
,
gep
)
{}
ScoringScheme
(
const
SimMat
&
simMat
,
DataType
gep
)
:
ScoringScheme
(
simMat
,
0
,
gep
)
ScoringScheme
(
const
SimMat
&
simMat
,
DataType
gep
)
:
ScoringScheme
(
simMat
,
0
,
gep
,
0
,
gep
)
{}
ScoringScheme
(
SimMat
&&
simMat
,
DataType
gop
,
DataType
gep
)
:
simMat_
(
simMat
),
gop_
(
gop
),
gep_
(
gep
)
ScoringScheme
(
SimMat
&&
simMat
,
DataType
gop
,
DataType
gep
)
:
simMat_
(
simMat
),
gop_
(
gop
),
gep_
(
gep
)
,
end_gop_
(
gop
),
end_gep_
(
gep
)
{}
ScoringScheme
(
const
SimMat
&
simMat
,
DataType
gop
,
DataType
gep
)
:
simMat_
(
simMat
),
gop_
(
gop
),
gep_
(
gep
)
ScoringScheme
(
const
SimMat
&
simMat
,
DataType
gop
,
DataType
gep
)
:
simMat_
(
simMat
),
gop_
(
gop
),
gep_
(
gep
),
end_gop_
(
gop
),
end_gep_
(
gep
)
{}
ScoringScheme
(
SimMat
&&
simMat
,
DataType
gop
,
DataType
gep
,
DataType
end_gop
,
DataType
end_gep
)
:
simMat_
(
simMat
),
gop_
(
gop
),
gep_
(
gep
),
end_gop_
(
end_gop
),
end_gep_
(
end_gep
)
{}
ScoringScheme
(
const
SimMat
&
simMat
,
DataType
gop
,
DataType
gep
,
DataType
end_gop
,
DataType
end_gep
)
:
simMat_
(
simMat
),
gop_
(
gop
),
gep_
(
gep
),
end_gop_
(
end_gop
),
end_gep_
(
end_gep
)
{}
ScoringScheme
()
=
default
;
...
...
@@ -59,19 +67,43 @@ public:
}
DataType
gep
()
const
end_gop
()
{
return
end_gop_
;
}
DataType
end_gep
()
{
return
end_gep_
;
}
DataType
gep
()
const
{
return
gep_
;
}
void
end_gep
(
DataType
end_gep
)
{
end_gep_
=
end_gep
;
}
void
end_gop
(
DataType
end_gop
)
{
end_gep_
=
end_gop
;
}
void
gep
(
DataType
gep
)
{
gep_
=
gep
;
}
};
...
...
tests/align/PairwiseAligner_Test.hpp
View file @
858df53d
...
...
@@ -140,7 +140,7 @@ BOOST_AUTO_TEST_CASE( Gotoh_alignDomain_Test2 )
seq1
.
push_back
(
dom
);
seq2
.
push_back
(
dom
);
BioSeqDataLib
::
ScoringScheme
<
int
,
BioSeqDataLib
::
DSM
>
scoring
(
std
::
move
(
simMat
),
-
50
,
-
10
);
BioSeqDataLib
::
ScoringScheme
<
int
,
BioSeqDataLib
::
DSM
>
scoring
(
std
::
move
(
simMat
),
-
50
,
-
10
,
0
,
0
);
BioSeqDataLib
::
PairwiseAligner
<
BioSeqDataLib
::
ScoringScheme
<
int
,
BioSeqDataLib
::
DSM
>>
mat
(
scoring
);
mat
.
gotoh
(
seq1
,
seq2
);
...
...
@@ -456,7 +456,6 @@ BOOST_AUTO_TEST_CASE(raspodom_gotoh_align_domain_test )
seq1
=
{{
"A"
,
0
,
1
,
0.0
},
{
"B"
,
10
,
11
,
0.0
},
{
"B"
,
12
,
13
,
0.0
},
{
"B"
,
14
,
15
,
0.0
},
{
"B"
,
16
,
17
,
0.0
},
{
"C"
,
14
,
15
,
0.0
},
{
"C"
,
16
,
17
,
0.0
}};
seq2
=
{{
"A"
,
0
,
1
,
0.0
},
{
"C"
,
10
,
11
,
0.0
},
{
"C"
,
13
,
14
,
0.0
},
{
"B"
,
15
,
16
,
0.0
},
{
"B"
,
17
,
18
,
0.0
},
{
"B"
,
19
,
20
,
0.0
},
{
"B"
,
21
,
22
,
0.0
}};
//scoring.gop(0);
mat
.
raspodom_gotoh
(
seq1
,
seq2
);
BOOST_CHECK_EQUAL
(
mat
.
isCP
(),
1
);
...
...
tests/align/PairwiseAligner_TestGotoh.hpp
View file @
858df53d
...
...
@@ -26,7 +26,7 @@
struct
MatrixFixture
{
MatrixFixture
()
:
mat
(
BioSeqDataLib
::
ScoringScheme
<
int
,
BioSeqDataLib
::
IdentityMatrix
>
(
BioSeqDataLib
::
IdentityMatrix
(
10
,
-
1
),
-
5
,
-
5
))
MatrixFixture
()
:
mat
(
BioSeqDataLib
::
ScoringScheme
<
int
,
BioSeqDataLib
::
IdentityMatrix
>
(
BioSeqDataLib
::
IdentityMatrix
(
10
,
-
1
),
-
5
,
-
5
,
-
1
,
-
1
))
{
BOOST_TEST_MESSAGE
(
"setup fixture"
);
}
...
...
@@ -98,11 +98,29 @@ struct MatrixFixture
BOOST_FIXTURE_TEST_SUITE
(
s
,
MatrixFixture
)
BOOST_AUTO_TEST_CASE
(
gotoh_protein_sequence_Test1
)
{
BioSeqDataLib
::
ScoringScheme
<
float
,
BioSeqDataLib
::
SimilarityMatrix
<
float
>>
scoring2
(
BioSeqDataLib
::
SimilarityMatrix
<
float
>
(
"../tests/align/data/BLOSUM62.txt"
),
-
10
,
-
0.5
,
-
1.0
,
-
0.5
);
//end-gap free alignment. score schecked with EMBOSS needle
BioSeqDataLib
::
Sequence
<>
seq1
(
"seq1"
,
"LE"
,
""
,
"test sequence"
);
BioSeqDataLib
::
Sequence
<>
seq2
(
"seq2"
,
"LEVTGKEPLP"
,
""
,
"test sequence"
);
BioSeqDataLib
::
PairwiseAligner
<
BioSeqDataLib
::
ScoringScheme
<
float
,
BioSeqDataLib
::
SimilarityMatrix
<
float
>>>
mat2
(
scoring2
);
mat2
.
gotoh
(
seq1
,
seq2
);
std
::
vector
<
long
int
>
expected1
{
0
,
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
};
std
::
vector
<
long
int
>
expected2
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
auto
result
=
mat2
.
result
();
checkAlnOutput
(
mat2
,
expected1
,
expected2
,
4.5
,
0
);
}
BOOST_AUTO_TEST_CASE
(
gotoh_protein_sequence_Test
)
{
BioSeqDataLib
::
ScoringScheme
<
float
,
BioSeqDataLib
::
SimilarityMatrix
<
float
>>
scoring2
(
BioSeqDataLib
::
SimilarityMatrix
<
float
>
(
"../tests/align/data/BLOSUM62.txt"
),
-
10
,
-
0.5
);
BioSeqDataLib
::
ScoringScheme
<
float
,
BioSeqDataLib
::
SimilarityMatrix
<
float
>>
scoring2
(
BioSeqDataLib
::
SimilarityMatrix
<
float
>
(
"../tests/align/data/BLOSUM62.txt"
),
-
10
,
-
0.5
,
-
1.0
,
-
0.5
);
//end-gap free alignment. score schecked with EMBOSS needle
BioSeqDataLib
::
Sequence
<>
seq1
(
"seq1"
,
"LMLDSGSEPKLIAEPLPPQGPYELSDETLQAPVLNDEGTEAVFELLSNAVEV"
,
""
,
"test sequence"
);
BioSeqDataLib
::
Sequence
<>
seq2
(
"seq2"
,
"LLDSKLIAEPLPPQGPYELSDETLQAPVLNDEGTEAVFELLSNAVEVTGKEPLP"
,
""
,
"test sequence"
);
...
...
@@ -115,7 +133,7 @@ BOOST_AUTO_TEST_CASE( gotoh_protein_sequence_Test )
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
45
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
};
auto
result
=
mat2
.
result
();
checkAlnOutput
(
mat2
,
expected1
,
expected2
,
2
20
.5
,
0
);
checkAlnOutput
(
mat2
,
expected1
,
expected2
,
2
15
.5
,
0
);
}
...
...
@@ -134,8 +152,7 @@ BOOST_AUTO_TEST_CASE(simple_gotoh)
mat
.
gotoh
(
seq1
,
seq2
);
std
::
vector
<
long
int
>
expected1
{
0
,
1
,
2
,
3
,
-
1
,
-
1
};
std
::
vector
<
long
int
>
expected2
{
-
1
,
-
1
,
0
,
1
,
2
,
3
};
checkAlnOutput
(
mat
,
expected1
,
expected2
,
20
,
0
);
checkAlnOutput
(
mat
,
expected1
,
expected2
,
16
,
0
);
}
BOOST_AUTO_TEST_CASE
(
simple_CP
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment