Commit 93521002 authored by Carsten Kemena's avatar Carsten Kemena

Implementation of the new DASetWriter

parent cef2dfd3
#ifndef BSDL_DOMAIN_DASETOUTPUTSTRATEGY_HPP
#define BSDL_DOMAIN_DASETOUTPUTSTRATEGY_HPP
#include "DomainArrangementSet.hpp"
#include "DomainOutStrategy.hpp"
#include "../external/Output.hpp"
namespace BioSeqDataLib
{
template<class DomainType>
void
daSet2XDom(const DomainArrangementSet<DomainType> &daSet, AlgorithmPack::Output &out, std::function<std::string(DomainType)> domOut)
{
for (auto &da : daSet)
{
out << ">" << da.first << "\n";
for (auto &domain : da.second)
{
out << domOut(domain) << "\n";
}
}
}
template<class DomainType>
void
daSet2Pfam(const DomainArrangementSet<DomainType> &daSet, AlgorithmPack::Output &out, std::function<std::string(DomainType)> domOut)
{
out << "# pfam_scan.pl\n";
out << "# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan>\n\n";
size_t len = 0;
for (const auto &da : daSet)
{
if (len < da.first.length())
len = da.first.length();
}
out.precision(1);
for (auto &da : daSet)
{
out.width(static_cast<std::streamsize>(len));
for (auto &domain : da.second)
{
out << std::left << da.first << " ";
out << domOut(domain) << "\n";
}
}
}
} // BioSeqDataLib
#endif // BSDL_DOMAIN_DASETOUTPUTSTRATEGY_HPP
\ No newline at end of file
#ifndef BSDL_DOMAIN_DASETWRITER_HPP
#define BSDL_DOMAIN_DASETWRITER_HPP
#include <functional>
#include <map>
#include <memory>
#include <string>
#include "DomainArrangementSet.hpp"
#include "DASetOutputStrategy.hpp"
#include <boost/filesystem.hpp>
#include "../external/Output.hpp"
namespace BioSeqDataLib
{
template<class DomainType>
class DASetWriter
{
private:
std::map<std::string, std::function<void(const DomainArrangementSet<DomainType> &, AlgorithmPack::Output &)> > strategies;
public:
DASetWriter()
{}
virtual
~DASetWriter()
{}
void
write(const DomainArrangementSet<DomainType> &daSet, const std::string &format, const fs::path &out)
{
AlgorithmPack::Output outF(out);
strategies.at(format)(daSet, outF);
}
void
addStrategy(const std::string &name, std::function<void(const DomainArrangementSet<DomainType> &, AlgorithmPack::Output &)> strategy)
{
strategies.emplace(name, strategy);
}
};
}
#endif // BSDL_DOMAIN_DASETWRITER_HPP
......@@ -63,8 +63,10 @@ private:
public:
static Domain createFromPfamScanLine(const std::string &line);
//static Domain createFromPfamScanLine(const std::string &line);
//static Domain createFromHMMScanLine(const std::string &line);
//static Domain createFromHMMScanLine(const std::string &line);
/**
* \brief Standard constructor.
*/
......
......@@ -173,58 +173,6 @@ DomainArrangementSet<Domain>::interProTSVTokens2Domain_(const std::vector<std::s
}
template<>
void
DomainArrangementSet<PfamDomain>::_writePfamScanOutput(std::ofstream &outFile)
{
outFile << "# pfam_scan.pl\n";
outFile << "# <seq id> <alignment start> <alignment end> <envelope start> <envelope end> <hmm acc> <hmm name> <type> <hmm start> <hmm end> <hmm length> <bit score> <E-value> <significance> <clan>\n\n";
size_t len = 0;
for (const auto &pair : arrangements_)
{
if (len < pair.first.length())
len = pair.first.length();
}
outFile.precision(1);
for (const auto &pair : arrangements_)
{
for (const auto &domain : pair.second)
{
outFile.width(static_cast<std::streamsize>(len));
outFile << std::left << pair.first;
outFile.width(7);
outFile << std::right << domain.start()+1;
outFile.width(7);
outFile << domain.end()+1;
outFile.width(7);
outFile << domain.envStart()+1;
outFile.width(7);
outFile << domain.envEnd()+1;
outFile << " " << domain.accession() << " ";// << "." << std::setw(4) << std::left << domain.version();
outFile << std::setw(18) << std::left << domain.name() << domain.type();
outFile << std::setw(6) << std::right << domain.hmmStart()+1 << std::setw(6) << std::right << domain.hmmEnd()+1 << std::setw(6) << std::right << domain.hmmLength();
outFile << std::setw(9) << std::fixed << std::right << domain.bitscore();
outFile << std::setw(10) << std::scientific << std::right << domain.evalue();
outFile.unsetf ( std::ios::floatfield);
outFile << std::setw(4) << std::right << domain.significance();
if (domain.clan().empty())
outFile << " No_Clan";
else
outFile << " " << domain.clan();
outFile << "\n";
}
}
}
/**
*
* \brief Counts the occurrence of each clan in a domainArrangmentSet.
......
......@@ -185,21 +185,6 @@ private:
_readASSFile(AP::Input &inFile);
// empty function, only exist to make specialization for cases it makes sense
void
_writePfamScanOutput(std::ofstream &outFile __attribute__((unused)))
{}
template<typename SeqSet>
void
_writeXDomFormat(std::ofstream &outFile, const SeqSet &seqSet) const;
void
_writeXDomFormat(std::ofstream &outFile) const;
public:
using iterator = typename DomainArrangement_t::iterator;
using const_iterator = typename DomainArrangement_t::const_iterator;
......@@ -267,20 +252,6 @@ public:
void
read(const fs::path &in_f);
/**
* \brief Writes arrangement set to a file.
* @param outFileName The output file
* @param format The format to use.
*/
template<typename SeqSet>
void
write(const std::string &outFileName, const std::string &format, const SeqSet &seqSet);
void
write(const std::string &outFileName, const std::string &format);
/**
* \brief Deletes elements from the DomainArrangementSet
......@@ -478,23 +449,6 @@ public:
};
/* Template overloads for specific domain types */
/*
template<>
void
DomainArrangementSet<PfamDomain>::_writePfamScanOutput(std::ofstream &outFile);
template<>
void
DomainArrangementSet<PfamDomain>::_readPfamScanOutput(AP::Input &inFile);
template<>
void
DomainArrangementSet<SFDomain>::_readASSFile(AP::Input &inFile);
*/
template<typename DomainType>
DomainArrangementSet<DomainType>::DomainArrangementSet() : arrangements_(), uniqueIds_()
{}
......@@ -564,101 +518,6 @@ DomainArrangementSet<DomainType>::read(const fs::path &inFileName)
}
template<>
void
DomainArrangementSet<PfamDomain>::_writePfamScanOutput(std::ofstream &outFile);
template<typename DomainType>
template<typename SeqSet>
void
DomainArrangementSet<DomainType>::write(const std::string &outFileName, const std::string &format, const SeqSet &seqSet)
{
try
{
std::ofstream outFile;
outFile.exceptions (std::ofstream::failbit | std::ofstream::badbit );
outFile.open(outFileName);
outFile.exceptions ( std::ifstream::badbit);
if (format == "pfam")
_writePfamScanOutput(outFile);
else
{
if (format == "xdom")
_writeXDomFormat(outFile, seqSet);
else
throw FormatException("Unknown format " + format);
}
}
catch (const std::exception&)
{
throw FormatException("Failed to write file '" + outFileName + "' of format " + format);
}
}
template<typename DomainType>
void
DomainArrangementSet<DomainType>::write(const std::string &outFileName, const std::string &format)
{
try
{
std::ofstream outFile;
outFile.exceptions (std::ofstream::failbit | std::ofstream::badbit );
outFile.open(outFileName);
outFile.exceptions ( std::ifstream::badbit);
if (format == "pfam")
_writePfamScanOutput(outFile);
else
{
if (format == "xdom")
_writeXDomFormat(outFile);
else
throw FormatException("Unknown format " + format);
}
}
catch (const std::exception&)
{
throw FormatException("Failed to write file '" + outFileName + "' of format " + format);
}
}
template<typename DomainType>
template<typename SeqSet>
void
DomainArrangementSet<DomainType>::_writeXDomFormat(std::ofstream &outFile, const SeqSet &seqSet) const
{
for (auto &pair : arrangements_)
{
outFile << ">" << pair.first << " " << seqSet[pair.first].length() << "\n";
for (auto &dom : pair.second)
{
outFile << dom.start()+1 << " " << dom.end()+1 << " " << dom.accession() << " " << dom.evalue() << "\n";
}
}
}
template<typename DomainType>
void
DomainArrangementSet<DomainType>::_writeXDomFormat(std::ofstream &outFile) const
{
for (auto &pair : arrangements_)
{
outFile << ">" << pair.first << "\n";
for (auto &dom : pair.second)
{
outFile << dom.start()+1 << " " << dom.end()+1 << " " << dom.accession() << " " << dom.evalue() << "\n";
}
}
}
template<typename DomainType>
DomainFileFormat
DomainArrangementSet<DomainType>::_identifyFormat(AP::Input &inFile)
......
#ifndef BSDL_DOMAIN_DOMAINOUTSTRATEGY_HPP
#define BSDL_DOMAIN_DOMAINOUTSTRATEGY_HPP
#include <string>
#include <sstream>
#include <ios>
#include <iomanip>
namespace BioSeqDataLib
{
template<class DomainType>
std::string
dom2XDom(const DomainType &dom)
{
return (std::to_string(dom.start()+1) + " " + std::to_string(dom.end()+1) + " " + dom.accession() + " " + std::to_string(dom.evalue()));
}
template<class DomainType>
std::string
dom2Pfam(const DomainType &dom)
{
std::stringstream outFile;
outFile.width(7);
outFile << std::right << dom.start()+1;
outFile.width(7);
outFile << dom.end()+1;
outFile.width(7);
outFile << dom.envStart()+1;
outFile.width(7);
outFile << dom.envEnd()+1;
outFile << " " << dom.accession() << " ";// << "." << std::setw(4) << std::left << dom.version();
outFile << std::setw(18) << std::left << dom.name() << dom.type();
outFile << std::setw(6) << std::right << dom.hmmStart()+1 << std::setw(6) << std::right << dom.hmmEnd()+1 << std::setw(6) << std::right << dom.hmmLength();
outFile << std::setw(9) << std::fixed << std::right << dom.bitscore();
outFile << std::setw(10) << std::scientific << std::right << dom.evalue();
outFile.unsetf ( std::ios::floatfield);
outFile << std::setw(4) << std::right << dom.significance();
if (dom.clan().empty())
outFile << " No_Clan";
else
outFile << " " << dom.clan();
return outFile.str();
}
} // BioSeqDataLib
#endif // BSDL_DOMAIN_DOMAINOUTSTRATEGY_HPP
......@@ -138,6 +138,12 @@ public:
oS_->precision(p);
}
void
width(std::streamsize w)
{
oS_->width(w);
}
};
template <typename T>
......
#ifndef DASETWRITER_TEST_HPP_
#define DASETWRITER_TEST_HPP_
#include <boost/test/unit_test.hpp>
#include "../../src/domain/DASetWriter.hpp"
#include "../../src/domain/DASetOutputStrategy.hpp"
#include "../../src/domain/DomainOutStrategy.hpp"
BOOST_AUTO_TEST_SUITE(DomainArrangementSet_Test)
namespace BSDL = BioSeqDataLib;
BOOST_AUTO_TEST_CASE( write_xdom )
{
BSDL::DomainArrangementSet<BSDL::PfamDomain> daSet;
BSDL::DomainArrangement<BSDL::PfamDomain> da;
da.emplace_back("A", 0, 2, 0.1);
da.emplace_back("bbbb", 5, 10, 0.5);
daSet.emplace("arg", da);
BSDL::DASetWriter<BSDL::PfamDomain> writer;
auto f = std::bind(BSDL::daSet2XDom<BSDL::PfamDomain>, std::placeholders::_1, std::placeholders::_2, BSDL::dom2XDom<BSDL::PfamDomain>);
writer.addStrategy("xdom", f);
auto x = std::bind(BSDL::daSet2Pfam<BSDL::PfamDomain>, std::placeholders::_1, std::placeholders::_2, BSDL::dom2Pfam<BSDL::PfamDomain>);
writer.addStrategy("pfam", x);
writer.write(daSet, "xdom", "test53.txt");
writer.write(daSet, "pfam", "test56.txt");
}
BOOST_AUTO_TEST_SUITE_END()
#endif /* DASETWRITER_TEST_HPP_ */
......@@ -294,6 +294,7 @@ BOOST_AUTO_TEST_CASE(ERROR_WHILE_READING_TEST)
* WRITE FUNCTION TESTS *
**********************************************************/
/*
BOOST_AUTO_TEST_CASE(write_pfam_Test)
{
BioSeqDataLib::DomainArrangementSet<BioSeqDataLib::PfamDomain> arrangementSet;
......@@ -343,6 +344,7 @@ BOOST_AUTO_TEST_CASE(write_xdom_Test)
}
BOOST_AUTO_TEST_CASE(write_exception_Test)
{
BioSeqDataLib::DomainArrangementSet<BioSeqDataLib::PfamDomain> arrangementSet;
......@@ -354,6 +356,7 @@ BOOST_AUTO_TEST_CASE(write_exception_Test)
BOOST_CHECK_THROW(arrangementSet.write("test.txt", "not_a_format", seqSet), BioSeqDataLib::FormatException);
}
*/
/**********************************************************
......
......@@ -31,3 +31,4 @@
#include "DomainTest.hpp"
#include "DomainArrangementTest.hpp"
#include "DomainArrangementSetTest.hpp"
#include "DASetWriter_Test.hpp"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment