Source code for bsPlugins.TopGo

from bsPlugins import *
import rpy2.robjects as robjects
import os, tarfile

mart_map = [("GRCh37.p5",'hg19'), ("NCBIM37",'mm9'), ("EF3","sacCer2"),
            ("BDGP5.25",'dm3'),("Zv9",'zv9')]

default_path = "/mnt/common/epfl/share"

meta = {'version': "1.0.0",
        'author': "BBCF",
        'contact': "webmaster-bbcf@epfl.ch"}

in_parameters = [{'id': 'gene_list', 'type': 'userfile', 'required': True},
                 {'id': 'assembly', 'type': 'assembly'},
                 {'id': 'num_terms', 'type': 'int'},
                 {'id': 'pval', 'type': 'float'}]
out_parameters = [{'id': 'TopGO_table_tar', 'type': 'file'},
                  {'id': 'TopGO_plots_tar', 'type': 'file'},
                  {'id': 'TopGO_table', 'type': 'txt'},
                  {'id': 'TopGO_plots', 'type': 'pdf'}]


class TopGoForm(BaseForm):
    gene_list = twb.BsFileField(label='Genes: ',
                              help_text='Provide a list of ensmbl IDs',
                              validator=twb.BsFileFieldValidator(required=True))
    assembly = twf.SingleSelectField(label='Assembly: ',
                                     options=mart_map,
                                     prompt_text=None,
                                     help_text='Reference genome')
    num_terms = twf.TextField(label='Number of significant terms: ',
                              validator=twc.IntValidator(required=False),
                              value=10,
                              help_text='Number of most significant terms to return')
    pval = twf.TextField(label='P-value threshold: ',
                         validator=twb.FloatValidator(min=0,max=1),
                         value=.05,
                         help_text='Maximum p-value to include in the output')
    submit = twf.SubmitButton(id="submit", value="TopGo analysis")


[docs]class TopGoPlugin(BasePlugin): """Makes a GO analysis on a list of Ensembl IDs. Given a file with one Ensembl ID on each line, it returns a summary table (.txt) and GO networks in a pdf. The first regroups the most significant terms concerning Biological Processes (BP), Cellular Components (CC) and Molecular Function (MF). One can choose the maximum number of each of these terms to include in the output, with a threshold on the p-value. """ info = { 'title': 'Gene Ontology analysis (TopGO)', 'description': __doc__, 'path': ['Analysis', 'TopGo'], 'output': TopGoForm, 'in': in_parameters, 'out': out_parameters, 'meta': meta, } def __call__(self, **kw): assembly_id = kw.get('assembly') or None for k,v in mart_map: if assembly_id == v: assembly_id = k break if assembly_id is None: raise ValueError("Please specify an assembly") filename = kw.get('gene_list') assert os.path.exists(str(filename)), "File not found: '%s'" %filename script_path = kw.get("script_path",default_path) fname = os.path.splitext(os.path.basename(filename))[0] pdf = self.temporary_path(fname='TopGO_plots.pdf') table = self.temporary_path(fname='TopGO_tables.txt') num_terms = int(kw.get('num_terms') or 10) pval = float(kw.get('pval') or .05) robjects.r(""" source("%s/TopGo.R") out = multi_topGo("%s","%s","%s","%s",%i,%f) """%(script_path,filename,assembly_id,pdf,table,num_terms,pval)) pdf_list = [f[0] for f in robjects.r('out')[0]] table_list = [f[0] for f in robjects.r('out')[1]] if len(pdf_list) > 1: tar_pdf_name = self.temporary_path('TopGO_plots_'+fname+'.tgz') tar_pdf = tarfile.open(tar_pdf_name, "w:gz") [tar_pdf.add(f,arcname=os.path.basename(f)) for f in pdf_list] tar_pdf.close() tar_table_name = self.temporary_path(fname='TopGO_tables_'+fname+'.tgz') tar_table = tarfile.open(tar_table_name, "w:gz") [tar_table.add(f,arcname=os.path.basename(f)) for f in table_list] tar_table.close() self.new_file(tar_pdf_name, 'TopGO_plots_tar') self.new_file(tar_table_name, 'TopGO_table_tar') else: self.new_file(pdf_list[0],'TopGO_plots') self.new_file(table_list[0],'TopGO_table') return self.display_time()

Other BBCF projects