Source code for bsPlugins.List2Track

from bsPlugins import *
from bbcflib.track import track,FeatureStream
from bbcflib import genrep
import os


class List2TrackForm(BaseForm):
    child = twd.HidingTableLayout()
    assembly = twf.SingleSelectField(label='Assembly: ',
        options=genrep.GenRep().assemblies_available(),
        help_text='Reference genome',
        validator=twc.Validator(required=True), )
    feature_type = twf.SingleSelectField(label='Feature type: ',
        options=['genes','exons','transcripts'],
        prompt_text=None,
        help_text='Choose the kind of genomic features yo want to annotate',
        validator=twc.Validator(required=True), )
    ids_list = twf.FileField(
        label='IDs list: ',
        help_text='Select the file with the list of IDs',)
    format = twf.SingleSelectField(label='Output format: ',
        options=["sql","bed"],
        prompt_text=None,
        help_text='Format of the output file', )
    submit = twf.SubmitButton(id="submit", value="Submit")


meta = {'version': "1.0.0",
        'author': "BBCF",
        'contact': "webmaster-bbcf@epfl.ch"}
in_parameters = [
        {'id': 'ids_list', 'type': 'txt', 'required': True, 'multiple': True},
        {'id': 'format', 'type': ''}]
out_parameters = [{'id': 'fulltrack', 'type': 'track'}]


[docs]class List2TrackPlugin(BasePlugin):
    """Create a fully annotated track file from a features type or a subset of Ensembl IDs.
    
Either upload a raw text file with one Ensembl ID on each line, or choose a feature type to fetch them all."""
    info = {
        'title': 'Genome track from IDs',
        'description': __doc__,
        'path': ['Files', 'List2Track'],
        'output': List2TrackForm,
        'in': in_parameters,
        'out': out_parameters,
        'meta': meta,
        }
    # extract (chr,start,end,gene_id|gene_name,score,strand) - bed format
    def genes_annot(self,id,x):
        return (x[5],x[1],x[2],id+'|'+x[0],0.0,x[4])
    def exons_annot(self,id,x):
        return (x[6],x[3],x[4],x[1]+'|'+x[2],0.0,x[5])
    def trans_annot(self,id,x):
        return (x[6],x[2],x[3],x[0]+'|'+x[1],0.0,x[4])

    def __call__(self, **kw):
        assembly = genrep.Assembly(kw.get('assembly'))
        format = kw['format']
        if kw['feature_type'] == 'genes':
            map = assembly.get_gene_mapping()
            get_info = self.genes_annot
        elif kw['feature_type'] == 'exons':
            map = assembly.get_exon_mapping()
            get_info = self.exons_annot
        elif kw['feature_type'] == 'transcripts':
            map = assembly.get_transcript_mapping()
            get_info = self.trans_annot
        def _annotate(ids_list):
            with open(ids_list) as ids_file:
                for id in ids_file:
                    id = id.strip()
                    if map.get(id):
                        yield get_info(id,map.get(id))
                    else:
                        yield ('NA','0','0',id,0.0,'0')
        ids_list = kw.get('ids_list')
        fields = ['chr','start','end','name','score','strand']
        if ids_list:
            assert os.path.exists(str(ids_list)), "File not found: '%s'" % ids_list
            fulltrack = FeatureStream(_annotate(ids_list),fields=fields)
            fname = os.path.splitext(os.path.basename(ids_list))[0]
        else:
            fulltrack = FeatureStream((get_info(g,map[g]) for g in map),fields=fields)
            fname = kw['feature_type']
        output = self.temporary_path(fname=fname+'.'+format)
        out = track(output,chrmeta=assembly)
        out.write(fulltrack)
        self.new_file(output, 'fulltrack')
        return self.display_time()

# nosetests --logging-filter=-tw2 test_List2Track.py
Navigation

Source code for bsPlugins.List2Track

Other BBCF projects

Quick search

Navigation