Commit 96a1f140 authored by Elias Dohmen's avatar Elias Dohmen 🎓

canonical string function added

parent d9bc3c66
Pipeline #752 failed with stages
in 4 seconds
......@@ -66,9 +66,9 @@ ENDFUNCTION(PREPEND)
# To run in debug mode -DCMAKE_BUILD_TYPE=Debug
if (WITH_UNIT_TEST)
FIND_PACKAGE(Boost 1.49 COMPONENTS system filesystem iostreams unit_test_framework REQUIRED)
FIND_PACKAGE(Boost 1.49 COMPONENTS system filesystem iostreams algorithm unit_test_framework REQUIRED)
else (WITH_UNIT_TEST)
FIND_PACKAGE(Boost 1.49 COMPONENTS system filesystem iostreams REQUIRED)
FIND_PACKAGE(Boost 1.49 COMPONENTS system filesystem iostreams algorithm REQUIRED)
endif(WITH_UNIT_TEST)
INCLUDE_DIRECTORIES(SYSTEM ${Boost_INCLUDE_DIR})
link_directories(${Boost_LIBRARY_DIRS})
......
......@@ -11,7 +11,7 @@ Requirements
We try to keep the dependencies as little as possible. Current dependencies are:
- cmake (https://cmake.org)
- compiler supporting c++11 - e.g. g++ 4.8 or higher
- boost modules: system filesystem (http://www.boost.org/)
- boost modules: system filesystem iostream algorithm (http://www.boost.org/)
Optional:
- boost modules: unit_test - only needed when unit_tests should be compiled
......
......@@ -36,6 +36,7 @@
#include "../utility/stringHelpers.hpp"
#include "../external/Input.hpp"
#include <boost/algorithm/string/join.hpp>
namespace BioSeqDataLib
{
......@@ -101,6 +102,12 @@ public:
*/
std::string str();
/**
* \brief Turns the tree into a newick string in a canonical way, so that rotation variants of trees lead to the same string.
* @return The tree in canonical string format
*/
std::string canonical_str();
/**
* \brief Reads a file in newick Format.
* @param inFile The file to read.
......@@ -364,6 +371,64 @@ PhylogeneticTree<DataType>::str()
return treeString;
}
template<typename DataType>
std::string
PhylogeneticTree<DataType>::canonical_str()
{
std::string treeString;
std::string* spec;
std::stack<std::pair<TreeNodePhylo<DataType>*, int> > toDo;
std::stack<std::vector<std::string> > nextParenth;
toDo.push(std::make_pair(this->root_.get(), 0));
while (!toDo.empty())
{
TreeNodePhylo<DataType> *currentNode = toDo.top().first;
unsigned int child_id = toDo.top().second;
if (!currentNode->isLeaf())
{
if (child_id == 0) {
nextParenth.push({""});
spec = &nextParenth.top().at(0);
}
else if (currentNode->nChildren() == child_id)
{
toDo.pop();
if (!toDo.empty()) {
std::sort(nextParenth.top().begin(), nextParenth.top().end());
std::string currParenth = "(" + boost::algorithm::join(nextParenth.top(), ",") + ")";
nextParenth.pop();
if (nextParenth.top().at(0).empty()) {
nextParenth.top()[0] = currParenth + currentNode->name+ ":" + std::to_string(currentNode->edgeLength);
}
else {
nextParenth.top().back() = currParenth + currentNode->name+ ":" + std::to_string(currentNode->edgeLength);
}
}
else {
std::sort(nextParenth.top().begin(), nextParenth.top().end());
std::string currParenth = "(" + boost::algorithm::join(nextParenth.top(), ",") + ")";
treeString.append(currParenth + currentNode->name + ";");
nextParenth.pop();
}
continue;
}
else
{
nextParenth.top().push_back("");
spec = &nextParenth.top().back();
}
++toDo.top().second;
toDo.push(std::make_pair(currentNode->child(child_id),0));
}
else
{
toDo.pop();
spec->append(currentNode->name + ":" + std::to_string(currentNode->edgeLength));
}
}
return treeString;
}
template<typename DataType>
void
PhylogeneticTree<DataType>::read(const std::string &inFile)
......@@ -481,8 +546,6 @@ PhylogeneticTree<DataType>::str2tree(const std::string &treeLine)
}
/** @} */ // PhyloGroup
} /* namespace BioSeqDataLib */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment