Source code for bsPlugins.MotifSearch

from bsPlugins import *
from bein import execution
from bbcflib.common import fasta_length
from bbcflib.motif import meme
from bbcflib import genrep
from bbcflib.track import track, FeatureStream
import os, tarfile

input_types = [(0, 'Fasta upload'), (1, 'Select regions from genome')]
input_map = {0: ['fastafile'], 1: ['assembly', 'regions']}
_nm = 4

assembly_list = genrep.GenRep().assemblies_available()

meta = {'version': "1.0.0",
        'author': "BBCF",
        'contact': "webmaster-bbcf@epfl.ch"}

in_parameters = [{'id': 'input_type', 'type': 'radio'},
                 {'id': 'fastafile', 'type': 'userfile'},
                 {'id': 'assembly', 'type': 'assembly'},
                 {'id': 'regions', 'type': 'track'},
                 {'id': 'nmotifs', 'type': 'int'}]
out_parameters = [{'id': 'meme_archive', 'type': 'file'}]


class MotifSearchForm(BaseForm):

    child = twd.HidingTableLayout()
    input_type = twd.HidingRadioButtonList(label='Sequence source: ',
                                           options=input_types,
                                           value=0,
                                           mapping=input_map)

    fastafile = twb.BsFileField(label='Fasta file: ', help_text='Sequences to scan')
    assembly = twf.SingleSelectField(label='Assembly: ', options=assembly_list,
                                     prompt_text=None,
                                     help_text='Assembly to fetch sequences from')
    regions = twb.BsFileField(label='Regions: ', help_text='Genomic regions to search (e.g. bed)',
                              validator=twb.BsFileFieldValidator())
    nmotifs = twf.TextField(label='Number of motifs: ',
                            validator=twc.IntValidator(),
                            value=_nm,
                            help_text='Number of motifs to search')
    submit = twf.SubmitButton(id="submit", value='Search motifs')


[docs]class MotifSearchPlugin(BasePlugin): """Search over-represented motifs in a set of a genomic regions using MEME""" info = { 'title': 'Motif search by MEME', 'description': __doc__, 'path': ['Sequence analysis', 'Motif search'], 'output': MotifSearchForm, 'in': in_parameters, 'out': out_parameters, 'meta': meta, } def __call__(self, **kw): input_type = kw.get('input_type', 0) if str(input_type) in [str(x[0]) for x in input_types]: input_type = int(input_type) if input_type in input_types[0]: #fasta fasta = kw.get('fastafile') name = os.path.splitext(os.path.basename(fasta))[0] assembly = genrep.Assembly(fasta=fasta) size = None elif input_type in input_types[1]: #regions assembly = genrep.Assembly(kw.get('assembly')) regions_file = kw.get('regions') or '' if not os.path.exists(regions_file): raise ValueError("File not found: %s" %regions_file) regions = track(regions_file,chrmeta=assembly.chrmeta) name = regions.name gRef = assembly.untar_genome_fasta() fasta = self.temporary_path(fname=regions.name+'.fa') (fasta, size) = assembly.fasta_from_regions( list(regions.read(fields=['chr','start','end'])), out=fasta, path_to_ref=gRef ) else: raise ValueError("Input type not implemented: %s" %input_type) background = assembly.statistics(self.temporary_path(fname="background"), frequency=True) output = self.temporary_path(fname=name+"_meme.tgz") outdir = os.path.join(os.path.split(fasta)[0],name+"_meme") meme_args = kw.get("meme_args",[]) nmotifs = kw.get('nmotifs') or _nm if not '-nmotifs' in meme_args: meme_args += ['-nmotifs',"%i" %int(nmotifs)] with execution(None) as ex: if size is None: size = sum(fasta_length(ex,fasta).values()) meme_out = meme( ex, fasta, outdir, background, maxsize=(size*3)/2, args=meme_args ) tarf = tarfile.open(output, "w:gz") tarf.add(outdir,arcname=os.path.basename(outdir)) tarf.add(fasta,arcname=os.path.basename(fasta)) tarf.close() self.new_file(output, 'meme_archive') return self.display_time()

Other BBCF projects