Source code for bsPlugins.QuantifyTable

from bsPlugins import *
from bbcflib.gfminer.stream import neighborhood, score_by_feature
from bbcflib.track import track
from bbcflib import genrep
import os

prom_up_def = 1000
prom_down_def = 100
ftypes = [(0, 'gene bodies'), (1, 'gene promoters'), (2, 'exons'), (3, 'custom upload')]
funcs = ['mean', 'sum', 'median', 'min', 'max']

meta = {'version': "1.0.0",
        'author': "BBCF",
        'contact': "webmaster-bbcf@epfl.ch"}

in_parameters = [{'id': 'signals', 'type': 'track', 'multiple': 'SigMulti', 'required': True},
                 {'id': 'feature_type', 'type': 'list'},
                 {'id': 'features', 'type': 'track'},
                 {'id': 'format', 'type': 'text'},
                 {'id': 'assembly', 'type': 'assembly'},
                 {'id': 'upstream', 'type': 'int', 'required': True},
                 {'id': 'downstream', 'type': 'int', 'required': True}]
out_parameters = [{'id': 'features_quantification', 'type': 'track'}]

class QuantifyTableForm(BaseForm):
    child = twd.HidingTableLayout()
    class SigMulti(twb.BsMultiple):
        label='Signals: '
        signals = twb.BsFileField(label=' ',
                                  help_text='Select signal files (e.g. bedgraph)',
                                  validator=twb.BsFileFieldValidator(required=True))

    score_op = twf.SingleSelectField(label='Score operation: ',
                                     options=funcs,
                                     prompt_text=None,
                                     help_text='Operation performed on scores within each feature')
    feature_type = twd.HidingSingleSelectField(label='Feature type: ',
                                               options=ftypes,
                                               prompt_text=None,
                                               mapping={ftypes[-1][0]: ['features'],
                                                        1: ['upstream', 'downstream']},
                                               help_text='Choose a feature set or upload your own')
    features = twb.BsFileField(label='Custom feature set: ',
                               help_text='Select a feature file (e.g. bed)',
                               validator=twb.BsFileFieldValidator())
    upstream = twf.TextField(label='Promoter upstream distance: ',
                             validator=twc.IntValidator(),
                             value=prom_up_def,
                             help_text='Size of promoter upstream of TSS')
    downstream = twf.TextField(label='Promoter downstream distance: ',
                               validator=twc.IntValidator(),
                               value=prom_down_def,
                               help_text='Size of promoter downstream of TSS')
    assembly = twf.SingleSelectField(label='Assembly: ',
                                     prompt_text=None,
                                     options=genrep.GenRep().assemblies_available(),
                                     help_text='Reference genome')
    format = twf.SingleSelectField(label='Output format: ',
                                   prompt_text=None,
                                   options=["txt", "sql"],
                                   validator=twc.Validator(required=True),
                                   help_text='Format of the output file')
    submit = twf.SubmitButton(id="submit", value="Quantify")



[docs]class QuantifyTablePlugin(BasePlugin): """Quantify signal tracks on a set of regions. Given a set of signal tracks, and a bed-like track containing intervals (e.g. genes), builds a table of the score of each signal in each of the intervals. That is, each cell of the output table is the score given by one of the tracks to a specific interval. Scores can be the sum/mean/median/min/max of the tag count in the interval.""" info = { 'title': 'Quantify signals in regions', 'description': __doc__, 'path': ['Analysis', 'Quantify features'], 'output': QuantifyTableForm, 'in': in_parameters, 'out': out_parameters, 'meta': meta, } def quantify(self,**kw): feature_type = kw.get('feature_type', 0) if str(feature_type) in [str(x[0]) for x in ftypes]: feature_type = int(feature_type) func = str(kw.get('score_op', 'mean')) assembly_id = kw.get('assembly') format = kw.get('format') or 'txt' chrmeta = "guess" if assembly_id: assembly = genrep.Assembly(assembly_id) chrmeta = assembly.chrmeta genes = assembly.gene_track exons = assembly.exon_track elif not(feature_type in ftypes[3]): raise ValueError("Please specify an assembly") signals = kw['SigMulti']['signals'] if not isinstance(signals, list): signals = [signals] signals = [track(sig, chrmeta=chrmeta) for sig in signals] if feature_type in ftypes[0]: features = genes elif feature_type in ftypes[1]: prom_pars = {'before_start': int(kw.get('upstream') or prom_up_def), 'after_start': int(kw.get('downstream') or prom_down_def), 'on_strand': True} features = lambda c: neighborhood(genes(c), **prom_pars) elif feature_type in ftypes[2]: features = exons elif feature_type in ftypes[3]: assert os.path.exists(str(kw.get('features'))), "Features file not found: '%s'" % kw.get("features") _t = track(kw['features'], chrmeta=chrmeta) chrmeta = _t.chrmeta features = _t.read else: raise ValueError("Take feature_type in %s." %ftypes) output = self.temporary_path(fname='quantification.'+format) if len(signals) > 1: _f = ["score%i"%i for i in range(len(signals))] else: _f = ["score"] tout = track(output, format, fields=['chr','start','end','name']+_f, chrmeta=chrmeta, info={'datatype':'qualitative'}) header = ['#chr','start','end','name']+[s.name for s in signals] tout.make_header("\t".join(header)) for chrom in chrmeta: sread = [sig.read(chrom) for sig in signals] tout.write(score_by_feature(sread, features(chrom), method=func), chrom=chrom, clip=True, mode="append") return output def __call__(self, **kw): output = self.quantify(**kw) self.new_file(output, 'features_quantification') return self.display_time()

Other BBCF projects