Commit 969e2931 by Elias Dohmen

v3.1 - Error handling improved and parameters in outfile printed

parent f8caf8ce
......@@ -39,14 +39,15 @@ from argparse import ArgumentParser
from argparse import RawTextHelpFormatter as HelpFormat
from hashlib import md5
import warnings
import traceback
import gzip
# no external python libraries are required.
annotype = 'rad'
conversion_dictionary = None
def main():
DOGMA_version = '3.1'
try:
# top level argument parsing
......@@ -61,7 +62,7 @@ def main():
'python dogma.py transcriptome --help\n')
subparsers = parser.add_subparsers(help='Program mode DOGMA should run (proteome or transcriptome analysis mode)',
dest='mode')
parser.add_argument("-v", "--version", action="version", version="DOGMA version 3.0")
parser.add_argument("-v", "--version", action="version", version=DOGMA_version)
# proteome mode argument parsing
parser_proteome = subparsers.add_parser("proteome", help="Analyse proteome data.")
......@@ -161,13 +162,13 @@ def main():
if args.reference_proteomes is None:
args.reference_proteomes = 'eukaryotes'
print ('# running python dogma.py v3.0 proteome -a {} -r {} -c {} -s {} -o {} -m {}'.format(
args.annotation_file, args.reference_proteomes, args.CDA_count_cutoff,
print ('# running dogma.py v{} proteome -a {} -r {} -c {} -s {} -o {} -m {}'.format(
DOGMA_version, args.annotation_file, args.reference_proteomes, args.CDA_count_cutoff,
args.cda_size, args.outfile, args.pfam))
# starting the main method with the specified parameters
score_single_proteome(args.annotation_file, args.outfile, args.CDA_count_cutoff,
args.cda_size, args.reference_proteomes, args.mode, args.pfam)
args.cda_size, args.reference_proteomes, args.mode, args.pfam, args.initial_radiant_run, DOGMA_version)
elif args.mode == 'transcriptome':
if args.initial_radiant_run is not None:
......@@ -202,13 +203,13 @@ def main():
if args.reference_transcriptomes is None:
args.reference_transcriptomes = 'eukaryotes'
print ('# running python dogma.py v3.0 transcriptome -i {} -s {} -a {} -r {} -o {} -m {}'.format(
args.initial_radiant_run, args.cda_size, args.annotation_file,
print ('# running dogma.py v{} transcriptome -i {} -s {} -a {} -r {} -o {} -m {}'.format(
DOGMA_version, args.initial_radiant_run, args.cda_size, args.annotation_file,
args.reference_transcriptomes, args.outfile, args.pfam))
score_single_transcriptome(args.annotation_file, args.outfile, args.cda_size, args.reference_transcriptomes,
args.mode, args.pfam)
args.mode, args.pfam, args.initial_radiant_run, DOGMA_version)
except Exception, e:
print("Error! " + str(e), file=sys.stderr)
print("Error! " + 'Line ' + str(traceback.extract_tb(sys.exc_info()[2])[-1][1]) + " " + str(e), file=sys.stderr)
sys.exit(1)
class ConversionDictionary(dict):
......@@ -247,7 +248,7 @@ class ConversionDictionary(dict):
self['acc_to_dom'][match[0]] = match[1]
def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
hq_transcriptomes=None, mode='transcriptome', pfam='31'):
hq_transcriptomes=None, mode='transcriptome', pfam='31', initial=None, version='3.0'):
"""
combines the functions and classes to score a sample proteome in terms of it's domain completeness.
The function parameters correspond to the argparse-arguments:
......@@ -309,7 +310,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
q.append(q_i) # for all domain tuples of the same length, the quality check is appended to q.
# generating a user readable summary
summary = Summary(q, basename(annotation_file), mode, hq_transcriptomes)
summary = Summary(q, basename(annotation_file), mode, hq_transcriptomes, version, initial, outfile, pfam)
# summary printed in console or saved in file
if outfile is not None:
......@@ -322,7 +323,7 @@ def score_single_transcriptome(annotation_file, outfile=None, max_dom_tup_len=3,
def score_single_proteome(annotation_file, outfile=None, cutoff=2,
max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='31'):
max_dom_tup_len=3, hq_proteomes=None, mode='proteome', pfam='31', initial=None, version='3.0'):
"""
combines the functions and classes to score a sample proteome in terms of it's domain completeness.
The function parameters correspond to the argparse-arguments:
......@@ -385,7 +386,7 @@ def score_single_proteome(annotation_file, outfile=None, cutoff=2,
q.append(q_i) # for all domain tuples of the same length, the quality check is appended to q.
# generating a user readable summary
summary = Summary(q, basename(annotation_file), mode, hq_proteomes)
summary = Summary(q, basename(annotation_file), mode, hq_proteomes, version, initial, outfile, pfam, cutoff)
# summary printed in console or saved in file
if outfile is not None:
......@@ -748,17 +749,23 @@ class Summary:
species_name_str: a string containing the name of the species (for printing)
"""
def __init__(self, quality_checkers, species_name, mode, corespecies):
def __init__(self, quality_checkers, species_name, mode, corespecies, version, initial, output, pfam, cutoff=None):
self.quality_checkers = quality_checkers
self.species_name = species_name
self.mode = mode
self.corespecies = corespecies
self.version = version
self.initial = initial
self.output = output
self.pfam = pfam
self.cutoff = cutoff
self.max_dom_tuple_len = len(quality_checkers)
self.cdas_hq_dict = {}
self.cdas_found_dict = {}
self.percentage_dict = {}
self.transcript_total_score = 0.00
global conversion_dictionary
if self.corespecies is None:
......@@ -788,14 +795,28 @@ class Summary:
Allows printing of the Summary class to show the summarized statistics of the completeness of the proteome.
Prints a human-readable summary of the domain completeness report.
"""
str1 = "\n".join(
["\nRunning python dogma.py v{} {} -i {} -s {} -a {} -r {} -o {} -m {}\n",
"Statistics of the completeness of the {} ",
"- {} -",
"based on {} single-domain CDAs and {} multiple-domain CDAs\n "
"({} was used as core set for this analysis):\n",
"CDAsize\t#Found\t#Expct\t%Completeness\n"]
).format(self.version, self.mode, self.initial, self.s, self.annotation_file, self.corespecies, ,self.mode, self.species_name, self.num_single_cdas, self.num_cda_tuples, self.corespecies)
if self.mode == 'proteome':
str1 = "\n".join(
["\nRunning dogma.py v{} {} -i {} -s {} -c {} -a {} -r {} -o {} -m {}\n",
"Statistics of the completeness of the {} ",
"- {} -",
"based on {} single-domain CDAs and {} multiple-domain CDAs\n "
"({} was used as core set for this analysis):\n",
"CDAsize\t#Found\t#Expct\t%Completeness\n"]
).format(self.version, self.mode, self.initial, self.max_dom_tuple_len, self.cutoff, self.species_name, self.corespecies, self.output, self.pfam, self.mode, self.species_name, self.num_single_cdas, self.num_cda_tuples, self.corespecies)
elif self.mode == 'transcriptome':
str1 = "\n".join(
["\nRunning dogma.py v{} {} -i {} -s {} -a {} -r {} -o {} -m {}\n",
"Statistics of the completeness of the {} ",
"- {} -",
"based on {} single-domain CDAs and {} multiple-domain CDAs\n "
"({} was used as core set for this analysis):\n",
"CDAsize\t#Found\t#Expct\t%Completeness\n"]
).format(self.version, self.mode, self.initial, self.max_dom_tuple_len, self.species_name,
self.corespecies, self.output, self.pfam, self.mode, self.species_name, self.num_single_cdas,
self.num_cda_tuples, self.corespecies)
else:
raise RuntimeError('Problem to detect if DOGMA runs in proteome or transcriptome mode. Error102')
str2_list = []
str4_list = []
......@@ -819,7 +840,7 @@ class Summary:
missing_cdas_string = "\n".join(
["{}".format(" ; ".join(line[2])) for line in self.quality_checkers[x].out])
else:
raise StandardError("Problem to detect if DOGMA runs in proteome or transcriptome mode. Error103")
raise RuntimeError("Problem to detect if DOGMA runs in proteome or transcriptome mode. Error103")
str4_list.append(header)
str4_list.append(missing_cdas_string)
......@@ -860,7 +881,7 @@ class Summary:
).format(self.transcript_total_score, basename(self.species_name),
basename(self.species_name))
else:
raise StandardError("Problem to detect if DOGMA runs in proteome or transcriptome mode. Error104")
raise RuntimeError("Problem to detect if DOGMA runs in proteome or transcriptome mode. Error104")
str4 = "\n".join(str4_list)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment