Commit cef2dfd3 authored by Carsten Kemena's avatar Carsten Kemena

improved abstract representation

parent f4e3e912
#include "SeqFunctions.hpp"
namespace BioSeqDataLib
{
std::vector<float>
calc_HCA_percentages(const std::vector<short> &translated)
{
// calculate percentages
std::vector<float> percentages;
auto seq_length = translated.size();
percentages.reserve(seq_length);
int n_hydrophobic = 0;
size_t until = (seq_length > 9) ? 9 : seq_length;
int n_total = 0;
for (size_t i=0; i<until; ++i)
{
n_hydrophobic += translated[i];
++n_total;
}
until = (seq_length > 9) ? seq_length-8 : 0;
percentages.push_back(n_hydrophobic*1.0/n_total);
for (size_t i=1; i<seq_length; ++i)
{
if (i > 8)
{
n_hydrophobic -= translated[i-9];
--n_total;
}
if (i < until)
{
n_hydrophobic += translated[i+8];
++n_total;
}
percentages.push_back(n_hydrophobic*1.0/n_total);
}
return percentages;
}
} // namespace BioSeqDataLib
\ No newline at end of file
#include "algorithm.hpp"
#include <utility>
#include <algorithm>
#include <iostream>
namespace BioSeqDataLib
{
long int
previous_non_conflict(const std::vector<Interval> &intervals, size_t i)
{
for (size_t j = i; j-- >0;)
{
if (intervals[j].end < intervals[i].start)
{
return j;
}
}
return -1;
}
std::vector<size_t>
max_weight_subset(const std::vector<Interval> &intervals)
{
std::vector<size_t> result;
if (intervals.empty())
{
return result;
}
// first: score, second: trace_back
std::vector<std::pair<long int, long int>> opt;
opt.resize(intervals.size());
opt[0] = std::make_pair(0,-2);
for (size_t i=1; i<intervals.size(); ++i)
{
auto p = previous_non_conflict(intervals, i);
if (p != -1)
{
if (intervals[i].weight + opt[p].first > opt[i-1].first)
{
opt[i].first = intervals[i].weight + opt[p].first;
opt[i].second = p;
}
else
{
opt[i].first = opt[i-1].first;
opt[i].second = -1;
}
}
else
{
if (intervals[i].weight > opt[i-1].first)
{
opt[i].first = intervals[i].weight;
opt[i].second = -2;
}
else
{
opt[i].first = opt[i-1].first;
opt[i].second = -1;
}
}
}
/*for (auto &elem : opt)
{
std::cout << elem.first << " " << elem.second << "\n";
}
std::cout << "\n";*/
size_t i = opt.size()-1;
while (opt[i].second != -2)
{
if (opt[i].second == -1)
--i;
else
{
result.emplace_back(i);
i = opt[i].second;
}
}
result.emplace_back(i);
std::reverse(result.begin(), result.end());
return result;
}
} // namespace BioSeqDataLib
\ No newline at end of file
#include <vector>
#ifndef SRC_UTILITY_ALGORITHM_HPP_
#define SRC_UTILITY_ALGORITHM_HPP_
namespace BioSeqDataLib
{
struct Interval
{
size_t start;
size_t end;
int weight;
Interval(size_t s, size_t e, size_t w) : start(s), end(e), weight(w)
{}
};
/**
* @brief Calculates the maximum weight subset of non overlapping intervals.
*
* @param intervals A set of by endpoint sorted intervals.
* @return std::vector<Interval> The maximum weight subset.
*/
std::vector<size_t>
max_weight_subset(const std::vector<Interval> &intervals);
} // namespace BioSeqDataLib
#endif
\ No newline at end of file
#ifndef ALGORITHM_TEST_HPP_
#define ALGORITHM_TEST_HPP_
#include <boost/test/unit_test.hpp>
#include "../../src/utility/algorithm.hpp"
BOOST_AUTO_TEST_SUITE(BitMask_Test)
BOOST_AUTO_TEST_CASE( interval_test)
{
std::vector<BioSeqDataLib::Interval> intervals;
intervals.emplace_back(0,1,2);
intervals.emplace_back(0,2,3);
intervals.emplace_back(1,2,2);
intervals.emplace_back(2,5,4);
intervals.emplace_back(3,5,4);
auto result = max_weight_subset(intervals);
BOOST_CHECK_EQUAL(result.size(), 2);
BOOST_CHECK_EQUAL(result[0], 1);
BOOST_CHECK_EQUAL(result[1], 4);
intervals.clear();
intervals.emplace_back(0,2,3);
intervals.emplace_back(3,5,4);
result = max_weight_subset(intervals);
BOOST_CHECK_EQUAL(result.size(), 2);
BOOST_CHECK_EQUAL(result[0], 0);
BOOST_CHECK_EQUAL(result[1], 1);
intervals.clear();
intervals.emplace_back(0,3,3);
intervals.emplace_back(3,5,4);
result = max_weight_subset(intervals);
BOOST_CHECK_EQUAL(result.size(), 1);
BOOST_CHECK_EQUAL(result[0], 1);
}
BOOST_AUTO_TEST_SUITE_END()
#endif /* BITMASK_TEST_HPP_ */
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment