Source code for bsPlugins.Bam2Density

from bsPlugins import *
from bein import execution
from bbcflib.track import track, convert
from bbcflib.mapseq import bam_to_density
from bbcflib.gfminer.stream import merge_scores
import os

__requires__ = ["pysam"]


meta = {'version': "1.0.0",
        'author': "BBCF",
        'contact': "webmaster-bbcf@epfl.ch"}

in_parameters = [{'id': 'sample', 'type': 'bam', 'required': True, 'multiple': 'BamMulti'},
                 {'id': 'control', 'type': 'bam'},
                 {'id': 'format', 'type': 'text'},
                 {'id': 'normalization', 'type': 'int'},
                 {'id': 'merge_strands', 'type': 'int'},
                 {'id': 'read_extension', 'type': 'int'}]
out_parameters = [{'id': 'density_merged', 'type': 'track'},
                  {'id': 'density_fwd', 'type': 'track'},
                  {'id': 'density_rev', 'type': 'track'}]


class Bam2DensityForm(BaseForm):
    class BamMulti(twb.BsMultiple):
        label='Test BAMs: '
        sample = twb.BsFileField(label=' ',
                                 help_text='Select main bam file(s)',
                                 validator=twb.BsFileFieldValidator(required=True))
    control = twb.BsFileField(label='Control BAM: ',
                              help_text='Select control bam file to compute enrichment',
                              validator=twb.BsFileFieldValidator(required=False))
    format = twf.SingleSelectField(label='Output format: ',
                                   options=["sql", "bedGraph", "bigWig"],
                                   prompt_text=None,
                                   help_text='Format of the output file')
    normalization = twf.TextField(label='Normalization: ',
                                  validator=twc.IntValidator(),
                                  help_text='Normalization factor, default is total number of reads')
    merge_strands = twf.TextField(label='Shift and merge strands: ',
                                  validator=twc.IntValidator(),
                                  value=-1,
                                  help_text='Shift value (in bp) if you want to merge strand-specific densities (will not merge if negative)')
    read_extension = twf.TextField(label='Read extension: ',
                                   validator=twc.IntValidator(),
                                   value=-1,
                                   help_text='Read extension (in bp) to be applied when constructing densities (will use read length if negative)')
    submit = twf.SubmitButton(id="submit", value='bam2density')


[docs]class Bam2DensityPlugin(BasePlugin):
    """From a BAM file, creates a track file of the read count/density along the whole genome,
in the chosen format.

Read counts are divided by 10^-7 times the normalization factor (which is total number of reads by default).
Positive and negative strand densities are generated and optionally merged (averaged) if a
shift value >=0 is given. The read extension is the number of basepairs a read will cover,
starting from its most 5' position (e.g. with a read extension of 1, only the starting position of
each alignment will be considered, default is read length).
"""
    info = {
        'title': 'Genome-wide reads density from BAM',
        'description': __doc__,
        'path': ['Files', 'Bam2density'],
        'output': Bam2DensityForm,
        'in': in_parameters,
        'out': out_parameters,
        'meta': meta,
        }

    def __call__(self, **kw):
        b2wargs = []
        control = None
        samples = kw.get('BamMulti',{}).get('sample', [])
        if not isinstance(samples, list): samples = [samples]
        samples = [os.path.abspath(s) for s in samples if os.path.exists(s)]
        if kw.get('control'):
            control = kw['control']
            b2wargs = ["-c", str(control)]
            assert os.path.exists(str(control)), "Control file not found: '%s'." % control
            control = os.path.abspath(control)
        if kw.get('normalization') is None: nreads = -1
        else: nreads = int(kw['normalization'])
        bamfiles = [track(s, format='bam') for s in samples]
        if nreads < 0:
            if control is None:
                _nreads = [len(set((t[4] for t in b.read()))) for b in bamfiles]
            else:
                b2wargs += ["-r"]
        else:
            _nreads = [nreads for s in samples]
        if kw.get('merge_strands') is None: merge_strands = -1
        else: merge_strands = int(kw['merge_strands'])
        if kw.get('read_extension') is None: read_extension = -1
        else: read_extension = int(kw['read_extension'])
        output = [self.temporary_path(fname=b.name+'_density_') for b in bamfiles]
        format = kw.get("format", "sql")
        with execution(None) as ex:
            files = [bam_to_density( ex, s, output[n], nreads=_nreads[n], 
                                     merge=merge_strands,
                                     read_extension=read_extension,
                                     sql=True, args=b2wargs ) 
                     for n,s in enumerate(samples)]
        info = {'datatype': 'quantitative',
                'read_extension': read_extension}
        if merge_strands >= 0:
            suffixes = ["merged"]
            info['shift'] = merge_strands
        else:
            suffixes = ["fwd", "rev"]
        chrmeta = bamfiles[0].chrmeta
        for suf in suffixes:
            all_s_files = [x for y in files for x in y if x.endswith(suf+".sql")]
            if len(all_s_files) > 1:
                x = self.temporary_path(fname="Density_average_"+suf+".sql")
                tsql = track( x, fields=['start', 'end', 'score'],
                              chrmeta=chrmeta, info={'datatype': 'quantitative'} )
                insql = []
                for f in all_s_files:
                    t = track(f, format='sql', chrmeta=chrmeta)
                    t.save()
                    insql.append(t)
                for c in tsql.chrmeta:
                    tsql.write(merge_scores([t.read(c) for t in insql]),chrom=c)
            else:
                x = all_s_files[0]
                tsql = track( x, format='sql', fields=['start', 'end', 'score'],
                              chrmeta=chrmeta, info=info )
                tsql.save()
            if format in [None,"sql"]:
                outname = x
            else:
                outname = os.path.splitext(x)[0]+"."+format
                convert(x, outname, mode="overwrite")
            self.new_file(outname, 'density_'+suf)
        return self.display_time()
Navigation

Source code for bsPlugins.Bam2Density

Other BBCF projects

Quick search

Navigation