Commit a83c8324 authored by Carsten Kemena's avatar Carsten Kemena

Merge branch 'develop' into 'master'

Develop

See merge request !1
parents 44e4d60a 3e6e2dd1
Pipeline #649 passed with stage
in 4 minutes and 7 seconds
......@@ -47,6 +47,10 @@ doc/html
.project
.settings
# Visual studio code #
######################
.vscode
# Packages #
############
# it's better to unpack these files and commit the raw source
......
v. 2.3.0
- added option to display computed alignments
- added option to collapse domain repeats
- updated manual and changed to sphinx
- fixed 1 shift in databases build on domain annotation files. This fix makes it necessary to update the databases.
v. 2.2.0
- internal code improvements
v. 2.1.3
- added gop/gep information to output file
- added commandline and rads version to databse file in additional table
......
......@@ -3,8 +3,8 @@ cmake_minimum_required(VERSION 2.6)
project (RADS C CXX)
SET(MAJOR_VERSION 2)
SET(MINOR_VERSION 1)
SET(PATCH_VERSION 3)
SET(MINOR_VERSION 3)
SET(PATCH_VERSION 0)
SET(CMAKE_CXX_FLAGS_COVERAGE
......@@ -97,14 +97,14 @@ ${BSDL_PATH}utility/DSM.cpp ${BSDL_PATH}utility/stringHelpers.cpp
)
SET(rads_src ${PROJECT_SOURCE_DIR}/src/rads.cpp ${PROJECT_SOURCE_DIR}/src/external/SQLiteDB.cpp ${PROJECT_SOURCE_DIR}/src/db.cpp ${BSDL_src} ${BSDL_PATH}/external_interfaces/domainProgs.cpp)
SET(rads_src ${PROJECT_SOURCE_DIR}/src/rads.cpp ${PROJECT_SOURCE_DIR}/src/DBAccess.cpp ${PROJECT_SOURCE_DIR}/src/external/SQLiteDB.cpp ${BSDL_src} ${BSDL_PATH}/external_interfaces/domainProgs.cpp)
SET(rads_exe rads )
ADD_EXECUTABLE(${rads_exe} ${rads_src})
target_link_libraries(${rads_exe}
${Boost_LIBRARIES} ${SQLITE3_LIBRARY} ${ZLIB_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT}
)
SET(makeDB_src ${PROJECT_SOURCE_DIR}/src/makeRadsDB.cpp ${PROJECT_SOURCE_DIR}/src/external/SQLiteDB.cpp ${PROJECT_SOURCE_DIR}/src/db.cpp ${BSDL_src})
SET(makeDB_src ${PROJECT_SOURCE_DIR}/src/makeRadsDB.cpp ${PROJECT_SOURCE_DIR}/src/external/SQLiteDB.cpp ${PROJECT_SOURCE_DIR}/src/DBCreator.cpp ${BSDL_src})
SET(makeDB_exe makeRadsDB)
ADD_EXECUTABLE(${makeDB_exe} ${makeDB_src})
target_link_libraries(${makeDB_exe}
......
RADS 2.1.2 (beta)
RADS 2.3.0
====
This program can perform a domain arrangement similarity search on databases.
......@@ -14,9 +14,21 @@ We try to keep the dependencies as little as possible. Current dependencies are:
- compiler with c++11 and OpenMP support
Download
--------
```bash
git clone https://ebbgit.uni-muenster.de/domainWorld/RADS.git
cd RADS
git submodule init
git submodule update
```
Installation
------------
Change into the RADS directory and run the following commands:
```bash
......@@ -30,7 +42,8 @@ make
Usage
-----
Please take a look at the wiki page (https://ebbgit.uni-muenster.de/domainWorld/RADS/wikis/home) for detailed a description
Please take a look at the file UserManual.pdf included in this program to get a detailed overview on how to install and run the program.
Problems, Bugs & Suggestions
----------------------------
......
File added
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = RADS
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
\ No newline at end of file
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'RADS'
copyright = '2018, Carsten Kemena'
author = 'Carsten Kemena'
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '2.3.0'
# -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path .
exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'alabaster'
# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_logo = '_static/logo.png'
# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}
# -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder.
htmlhelp_basename = 'RADSdoc'
# -- Options for LaTeX output ------------------------------------------------
latex_elements = {
'sphinxsetup':'VerbatimColor={rgb}{0.87,0.87,0.87},verbatimwithframe=false',
'classoptions': ',openany'
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',
# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'RADS.tex', 'RADS Documentation',
'Carsten Kemena', 'manual'),
]
latex_logo = '_static/logo.png'
# -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'rads', 'RADS Documentation',
[author], 1)
]
# -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'RADS', 'RADS Documentation',
author, 'RADS', 'One line description of project.',
'Miscellaneous'),
]
.. _general:
************
General
************
RADS 2.0 and newer is a complete new implementation based on the ideas in the original publication (see below). However,
the scoring scheme, options etc. have been changed. The next chapters will show you how to install and use this version.
===============
Contact
===============
If you have any problems, questions or suggestions concerning this program please contact us: domainWorld@uni-muenster.de
===============
Citation
===============
If you find RADS useful in your research, please cite:
Terrapon, Nicolas, Weiner, January, Grath, Sonja, Moore, Andrew D, Bornberg-Bauer, Erich: Rapid similarity search of proteins using alignments of domain arrangements., Bioinformatics (2014) 30 (2): 274-281. doi: 10.1093/bioinformatics/btt379
http://bioinformatics.oxfordjournals.org/content/30/2/274.long
.. _installation:
************
Installation
************
------------
Requirements
------------
We try to keep the dependencies as few as possible. Current dependencies are:
* BioSeqDataLib (https://ebbgit.uni-muenster.de/domainWorld/BioSeqDataLib) (can be added via git submodule)
* boost (http://www.boost.org)
* SQLite (https://www.sqlite.org)
* compiler with c++11 and OpenMP support
--------
Download
--------
You can currently choose between two different donwnload options: ``git`` and a manual download. Using ``git`` is the recommended way but if that is
not possible you can use the manual way instead. Both ways are described in the next sections.
Git
^^^
The easiest way to download the most current version of RADS is to use ``git``:
.. code-block:: bash
git clone https://ebbgit.uni-muenster.de/domainWorld/RADS.git
cd RADS
git submodule init
git submodule update
If you want to update to a newer version you can simply run the following command:
.. code-block:: bash
git pull
git submodule update
Do not forget to recombile the program after this step.
Manual download
^^^^^^^^^^^^^^^
If you don't want to use git, you can download the source code from here: https://ebbgit.uni-muenster.de/domainWorld/RADS/-/archive/master/RADS-master.tar.gz.
Below you find the commands needed to put everything necessary in its correct place. You can replace the ``wget`` command with manual downloads and copying the
file to the correct position if you do not have an internet connection.
.. code-block:: bash
wget https://ebbgit.uni-muenster.de/domainWorld/RADS/-/archive/master/RADS-master.tar.gz
tar xfz RADS-master.tar.gz
# The BioSeqDataLib now needs to be added manually
cd RADS-master/libs
rmdir BioSeqDataLib
wget https://ebbgit.uni-muenster.de/domainWorld/BioSeqDataLib/-/archive/master/BioSeqDataLib-master.tar.gz
tar xfz BioSeqDataLib-master.tar.gz
mv BioSeqDataLib-master BioSeqDataLib
-----------
Compilation
-----------
Change into the RADS directory and run the following commands:
.. code-block:: bash
mkdir build
cd build
cmake ..
make
.. _makeradsdb:
****************
makeRadsDB Usage
****************
``makeRadsDB`` is a program to compute a data base that can be used by RADS. A database consists of two files an index file
(SQLite database) and a domain arrangement file (simple text file). Therefore, if the name of the data base is MyDB the
files needed are MyDB.db and MyDB.da.
===============
Program options
===============
---------------
General options
---------------
The basic options
.. program:: makeRadsDB
.. option:: -h, --help
Produces this help message
.. option:: -i <FILE>, --input <FILE>
Domain arrangement file(s) that should be turned into a database.
.. option:: -I <FILE>, --InterPro <FILE>
Used to turn the InterPro annotation file (match\_complete.xml.gz) found on https://www.ebi.ac.uk/interpro/download.html into a RADS database.
This option is used to compute the precomputed InterPro databases. Use the :option:`--database` option to extract the domain arrangements
of a single database.
.. option:: -s <FILE>, --seqs <FILE>
Sequence files. Are used in combination with the domain arrangement files. If none is given all sequence lengths are set to 0. If you provide a
sequence file you need to provide as many files as you provide domain annotation files. It is necessary that the order of the sequence files is
the same as for the domain files: seqFile1 for domFile1, seqFile2 for domFile2, ...
.. option:: -o <FILE>, --out <FILE>
The output prefix used to produce two files in format prefix.db and prefix.da. Be aware that we currently do not support adding data to an existing data base.
---------------
Filter options
---------------
Some options to influence the data base construction.
.. program:: makeRadsDB
.. option:: -d, --database
This options is used together with the option: :option:`--InterPro`. It determines which of the supported databases to include in the RADS database.
===============
Examples
===============
.. code-block:: bash
# running makeRadsDB providing pfam annotations and sequences
makeRadsDB -i domains1.pfam domains2.pfam -s seqs1.fa ses2.fa -o myDB
.. _rads:
**********
RADS Usage
**********
============
Simple Usage
============
This section assumes that you have installed RADS as described in :ref:`installation` and setup RADS as described in :ref:`setup`.
Three parameters are required, a query, the database to search in and a scoring matrix. There are three different ways to provide a query, either as a simple list of domain IDs,
a protein sequence that will be automatically annotated, or already an existing domain annotation file (e.g. the result of a run of ``pfam_scan.pl``).
.. code-block:: bash
# running RADS providing a manual list of domains as query
rads -D PF02758 PF05729 --db InterPro60-pfam -m pfam30.dsm
# running RADS providing a sequence as query
rads -Q seq.fasta --db InterPro60-pfam -m pfam30.dsm
# running RADS providing a domain annotation as query
rads -q seq.dom --db InterPro60-pfam -m pfam30.dsm
===============
Program Options
===============
---------------
General options
---------------
The general option influence the general behaviour of RADS:
.. program:: rads
.. option:: -h, --help
Prints a simple help message with a small description of all the available options.
.. option:: -d <FILE>, --db <FILE>
Prefix of the database. Can be either one of the precomputed ones downloaded from the website (see :ref:`setup`) or self-computed (see :ref:`makeRadsDB`).
.. option:: -o <FILE>, --out <FILE>
The output file.
.. option:: -l, --list-alignments
Report the alignments computed for the different domain arrangements.
.. option:: -n <INT>, --threads <INT>
The number of threads to be used by the program. Currently with this option several queries can be processed in parallel. If only one query is given, this program will still use only a single core. *Default: 1*
--------------
Query options
--------------
The query options define the different ways a query can be provided.
.. program:: rads
.. option:: -q <FILE>, --query-dom <FILE>
The domain annotation file to be used as query. This is a simple domain annotation file in one of the supported formats (e.g. the output of ``pfam_scan.pl``).
.. option:: -Q <FILE>, --query-seq <FILE>
File containing sequences to be used as queries. The file has to be in FASTA format.
.. option:: --domain-db <FILE>
The domain database to use for automated annotation.
.. option:: -D <IDs>, --domains <IDs>
Provide a domain arrangement manually in form of space separated domain accession numbers (e.g. PF00001 PF00002).
---------------
Scoring options
---------------
These parameters influence the alignment scoring similar to the same values in a standard alignment.
.. program:: rads
.. option:: -m <FILE>, --matrix <FILE>
The domain similarity matrix. This one needs to fit the data in the database meanint, that if you work with a database that contains Pfam domains, use the corresponding Pfam similarity matrix.
.. option:: --gop <INT>
Gap opening penalty. These costs are applied once for each consecutive set of gaps in a domain arrangement. They are not applied to gaps at the ends of the alignment. *Default: -50*
.. option:: --gep <INT>
Gap extension penalty. These costs are applied to each single gap character in the alignment. *Default: -10*
.. option:: -c, --collapse
Collapse consecutive identical domains. It is **recommended to use** this option. The reason why this is not automatically done is that it actually changes the domain arrangements. However, domains can often duplicate and several repeats of the same domain in a row is not uncommon, usually without affecting the function of a protein. *Default: false*
------------------------------
Result filtering options
------------------------------
These options can be used to filter the hits that are reported.
.. program:: rads
.. option:: -a, --all
All of the domain IDs in the query have to appear in the target sequences as well. *Default: false*
.. option:: -M <INT>, --min-score <INT>
Only alignments with a score larger or equal to this value are reported. *Default: 0*
===============
Output format
===============
The output is in a simple text file format and contains two parts. The first part is a summary of the process containing the date of execution, The version
of RADS and the parameters used. The second part of the file contains the result. The hits are listed in a table of five `tab` separated columns. The first column contains the alignment score and the second the normalized version. The third column contains the the target id followed by the sequence length in the fourth column.
The table is sorted according to the first column.
.. code-block:: text
# RADS version 2.2.0
# RADS Output v1
# run at Fri Apr 20 14:19:09 2018
#
# query file: -
# database: interPro-test
# gap open penalty -50
# gap extension penalty -10
# matrix: pfam-31.dsm
# all: false
# collapse: true
# ******************************************************************
# -------------------------------------------------------------------
Results for: manual entered query
Domain arrangement: PF00001 PF00002 PF00003
# score | normalized | SeqID | sequence length | domain arrangement
# -------------------------------------------------------------------
300 1.00 test-seq1 530 PF00001 10 63 PF00002 104 312 PF00003 362 524
300 1.00 test-seq2 530 PF00001 10 63 PF00002 104 312 PF00003 362 524
190 0.69 test-seq3 530 PF00002 104 312 PF00003 362 524
190 0.69 test-seq5 530 PF00001 10 63 PF00002 104 312 PF00002 362 524
If you used the :option:`--list-alignments` option you will find additional output. An additional column denotes the alignment ID. The alignments can then be found at the end of the table.
.. note::
Be aware that if you use additionally the :option:`--collapse` option the table will still show the original domain arrangement, the alignment though will use the collapsed version. See example below.
.. code-block:: text
# RADS version 2.3.0
# RADS Output v1
# run at Wed Jun 27 15:09:15 2018
#
# query file: -
# database: /local/home/ckeme_01/projects/domainWorld/RADS/tests/integrationTests/interPro-test
# gap open penalty -50
# gap extension penalty -10
# matrix: /local/home/ckeme_01/.domainWorld/dsm/pfam-31.dsm
# all: false
# collapse: true
# ******************************************************************
# -------------------------------------------------------------------
Results for: manual entered query
Domain arrangement: PF00001 PF00002 PF00003
# score | normalized | SeqID | sequence length | domain arrangement | aln
# -------------------------------------------------------------------
300 1.00 test-seq1 530 PF00001 10 63 PF00002 104 312 PF00003 362 524 1
300 1.00 test-seq2 530 PF00001 10 63 PF00002 104 312 PF00003 362 524 1
190 0.69 test-seq5 530 PF00001 10 63 PF00002 104 312 PF00002 362 524 2
190 0.69 test-seq3 530 PF00002 104 312 PF00003 362 524 3
# -------------------------------------------------------------------
List of alignments:
# -------------------------------------------------------------------
1)
Query DA: PF00001 PF00002 PF00003
Target DA: PF00001 PF00002 PF00003
2)
Query DA: PF00001 PF00002 PF00003
Target DA: PF00001 PF00002 *******
3)
Query DA: PF00001 PF00002 PF00003
Target DA: ******* PF00002 PF00003
.. _setup:
***************
Setting up RADS
***************
This chapter describes how to setup RADS so it can access all the data it needs. Additional to your query you will also need a RADS database and a similarity matrix.
=======================
Setting up the database
=======================
You need a database to search in. You can use one of the databases we precomputed based on InterPro annotations available here: http://domainworld.uni-muenster.de/programs/rads/ or you can compute your own one using the ``makeRadsDB`` program described in :ref:`makeRadsDB`.
=============================================
Setting up the domain similarity matrix (DSM)
=============================================
These precomputed similarity matrices should be fitting to the domain database used, e.g. If you database contains PFAM domain, use the DSM containing the PFAM match scores. You can download DSMs for PFAM and SUPERFAMILY from: http://domainworld.uni-muenster.de/data/dsm/.
.. RADS documentation master file, created by
sphinx-quickstart on Tue Jun 26 16:47:50 2018.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to RADS's documentation!
================================
.. toctree::
:maxdepth: 2
:caption: Contents:
content/general.rst
content/installation.rst
content/setup.rst
content/rads_usage.rst
content/makedb_usage.rst
.. only:: html
Indices and tables
==================
* :ref:`genindex`
* :ref:`search`
Subproject commit cada6232b356db742926226965626bb7a567eb37
Subproject commit 9e07b28bc1fa57600877d964f7aea8bc740aedac
\documentclass{scrartcl}
\usepackage{hyperref}
\usepackage{xcolor}
\usepackage{listings}
\usepackage{float}
\begin{document}
\title{RADS manual}
\subtitle{2.1.2 (beta)}
\author{Carsten Kemena}
\maketitle
\tableofcontents
\section{RADS}
\subsection{Introduction}
RADS is a program to search for domain arrangements in a given database.
\subsection{Program Options}
\subsubsection*{General options}
The general option influence the general behaviour of RADS:
\begin{tabular}{llp{9.5cm}}
\hline
parameter & default & description\\\hline
-h, --help & - & Produces this help message \\
-d, --db & - & Prefix to the database. Can be either one of the precomputed ones downloaded from the website or self-computed. \\
-a, --all & - & All domain types need to occur\\