Source code for bsPlugins.Bam2Density

from bsPlugins import *
from bein import execution
from bbcflib.track import track, convert
from bbcflib.mapseq import bam_to_density
from bbcflib.gfminer.stream import merge_scores
import os

__requires__ = ["pysam"]


meta = {'version': "1.0.0",
        'author': "BBCF",
        'contact': "webmaster-bbcf@epfl.ch"}

in_parameters = [{'id': 'sample', 'type': 'bam', 'required': True, 'multiple': 'BamMulti'},
                 {'id': 'control', 'type': 'bam'},
                 {'id': 'format', 'type': 'text'},
                 {'id': 'normalization', 'type': 'int'},
                 {'id': 'merge_strands', 'type': 'int'},
                 {'id': 'read_extension', 'type': 'int'}]
out_parameters = [{'id': 'density_merged', 'type': 'track'},
                  {'id': 'density_fwd', 'type': 'track'},
                  {'id': 'density_rev', 'type': 'track'}]


class Bam2DensityForm(BaseForm):
    class BamMulti(twb.BsMultiple):
        label='Test BAMs: '
        sample = twb.BsFileField(label=' ',
                                 help_text='Select main bam file(s)',
                                 validator=twb.BsFileFieldValidator(required=True))
    control = twb.BsFileField(label='Control BAM: ',
                              help_text='Select control bam file to compute enrichment',
                              validator=twb.BsFileFieldValidator(required=False))
    format = twf.SingleSelectField(label='Output format: ',
                                   options=["sql", "bedGraph", "bigWig"],
                                   prompt_text=None,
                                   help_text='Format of the output file')
    normalization = twf.TextField(label='Normalization: ',
                                  validator=twc.IntValidator(),
                                  help_text='Normalization factor, default is total number of reads')
    merge_strands = twf.TextField(label='Shift and merge strands: ',
                                  validator=twc.IntValidator(),
                                  value=-1,
                                  help_text='Shift value (in bp) if you want to merge strand-specific densities (will not merge if negative)')
    read_extension = twf.TextField(label='Read extension: ',
                                   validator=twc.IntValidator(),
                                   value=-1,
                                   help_text='Read extension (in bp) to be applied when constructing densities (will use read length if negative)')
    submit = twf.SubmitButton(id="submit", value='bam2density')


[docs]class Bam2DensityPlugin(BasePlugin): """From a BAM file, creates a track file of the read count/density along the whole genome, in the chosen format. Read counts are divided by 10^-7 times the normalization factor (which is total number of reads by default). Positive and negative strand densities are generated and optionally merged (averaged) if a shift value >=0 is given. The read extension is the number of basepairs a read will cover, starting from its most 5' position (e.g. with a read extension of 1, only the starting position of each alignment will be considered, default is read length). """ info = { 'title': 'Genome-wide reads density from BAM', 'description': __doc__, 'path': ['Files', 'Bam2density'], 'output': Bam2DensityForm, 'in': in_parameters, 'out': out_parameters, 'meta': meta, } def __call__(self, **kw): b2wargs = [] control = None samples = kw.get('BamMulti',{}).get('sample', []) if not isinstance(samples, list): samples = [samples] samples = [os.path.abspath(s) for s in samples if os.path.exists(s)] if kw.get('control'): control = kw['control'] b2wargs = ["-c", str(control)] assert os.path.exists(str(control)), "Control file not found: '%s'." % control control = os.path.abspath(control) if kw.get('normalization') is None: nreads = -1 else: nreads = int(kw['normalization']) bamfiles = [track(s, format='bam') for s in samples] if nreads < 0: if control is None: _nreads = [len(set((t[4] for t in b.read()))) for b in bamfiles] else: b2wargs += ["-r"] else: _nreads = [nreads for s in samples] if kw.get('merge_strands') is None: merge_strands = -1 else: merge_strands = int(kw['merge_strands']) if kw.get('read_extension') is None: read_extension = -1 else: read_extension = int(kw['read_extension']) output = [self.temporary_path(fname=b.name+'_density_') for b in bamfiles] format = kw.get("format", "sql") with execution(None) as ex: files = [bam_to_density( ex, s, output[n], nreads=_nreads[n], merge=merge_strands, read_extension=read_extension, sql=True, args=b2wargs ) for n,s in enumerate(samples)] info = {'datatype': 'quantitative', 'read_extension': read_extension} if merge_strands >= 0: suffixes = ["merged"] info['shift'] = merge_strands else: suffixes = ["fwd", "rev"] chrmeta = bamfiles[0].chrmeta for suf in suffixes: all_s_files = [x for y in files for x in y if x.endswith(suf+".sql")] if len(all_s_files) > 1: x = self.temporary_path(fname="Density_average_"+suf+".sql") tsql = track( x, fields=['start', 'end', 'score'], chrmeta=chrmeta, info={'datatype': 'quantitative'} ) insql = [] for f in all_s_files: t = track(f, format='sql', chrmeta=chrmeta) t.save() insql.append(t) for c in tsql.chrmeta: tsql.write(merge_scores([t.read(c) for t in insql]),chrom=c) else: x = all_s_files[0] tsql = track( x, format='sql', fields=['start', 'end', 'score'], chrmeta=chrmeta, info=info ) tsql.save() if format in [None,"sql"]: outname = x else: outname = os.path.splitext(x)[0]+"."+format convert(x, outname, mode="overwrite") self.new_file(outname, 'density_'+suf) return self.display_time()

Other BBCF projects