Commit a656e288 authored by David Hoksza's avatar David Hoksza
Browse files
parents 3fed1bfd 2368b33b
......@@ -34,3 +34,19 @@ python opentargets.py -o orphan_id [-s threshold_score]
- The threshold score is by default set to 0 (although OpenTargets recommend 0.6 as a reasonable
threshold).
## Ensembl
The script [vepAllInOne.py](bh19-rare-diseases/associations/vepmining) retrives information about [allele frequencies](https://en.wikipedia.org/wiki/Allele\_frequency) in several populations available in the [Ensembl](http://www.ensembl.org/index.html) databse. This is done throug [this endpoint](https://rest.ensembl.org/documentation/info/vep\_id\_post) of the Ensembl API.
```commandline
python3 vepAllInOne.py -f <single column of rsid input file > -t < float, allele frequency threshold below with variant is retained >
```
It takes as *input* a file with a single column of rs# and give as *output* in the standard out a single column of rs#.
In the output there are the variants from the input that are either not described in the genomic databses contained in Ensembl or for which there is at least one population with allele frequency >= the threshold indicated in the command line. As an example, using a threshold of 0.10 will filter out form the input file all variants that have minor allele frequency eqaul to or greater than 10\% in at least one population among those described in Ensembl. Also variant not present in Ensembl are filtered out under the assumption that they have never been described so far.
This diff is collapsed.
rs1060501114
rs1060501127
rs1060501130
rs1060501135
rs1060501136
rs1060501142
rs1060501145
rs1064792926
rs1131691708
rs12720452
rs137854600
rs137854601
rs137854602
rs137854603
rs137854604
rs137854606
rs137854607
rs137854609
rs137854611
rs137854612
rs137854614
rs137854615
rs137854616
rs137854617
rs137854618
rs185492581
rs185638763
rs193922726
rs199473045
rs199473050
rs199473051
rs199473052
rs199473053
rs199473054
rs199473055
rs199473056
rs199473058
rs199473061
rs199473062
rs199473063
rs199473065
rs199473066
rs199473067
rs199473070
rs199473072
rs199473074
rs199473076
rs199473079
rs199473081
rs199473082
rs199473083
rs199473086
rs199473088
rs199473090
rs199473091
rs199473092
rs199473093
rs199473095
rs199473096
rs199473097
rs199473098
rs199473101
rs199473102
rs199473103
rs199473104
rs199473117
rs199473122
rs199473124
rs199473133
rs199473134
rs199473137
rs199473139
rs199473143
rs199473144
rs199473147
rs199473149
rs199473153
rs199473154
rs199473156
rs199473157
rs199473158
rs199473159
rs199473160
rs199473161
rs199473164
rs199473167
rs199473168
rs199473169
rs199473170
rs199473171
rs199473172
rs199473173
rs199473174
rs199473175
rs199473176
rs199473178
rs199473179
['rs1060501114\n', 'rs1060501127\n', 'rs1060501130\n', 'rs1060501135\n']
[
[
"rs1060501114",
{}
],
[
"rs1060501127",
{}
],
[
"rs1060501130",
{}
],
[
"rs1060501135",
{}
],
[
"rs1060501136",
{}
],
[
"rs1060501142",
{}
],
[
"rs1060501145",
{}
],
[
"rs1064792926",
{}
],
[
"rs1131691708",
{}
]
]
This diff is collapsed.
import requests, sys, json , argparse
listOfDict=[]
listOfrsid=["rs10494366", "rs1060501114", "rs1060501127", "rs1060501130"]
server = "https://rest.ensembl.org"
ext = "/vep/human/id"
headers={ "Content-Type" : "application/json", "Accept" : "application/json"}
strOfDataDictionary=json.dumps({"ids" : listOfrsid })
res = requests.post(server+ext, headers=headers, data=strOfDataDictionary)
#return json.loads(res.text)
decoded = res.json() # a python dictionary
for elem in decoded:
#freq_dict={}
for var in elem["colocated_variants"]:
if 'frequencies' in var:#freq_dict=var["frequencies"]
listOfDict.append(var["frequencies"] )
return listOfDict
#print (len(decoded) )
#print(json.dumps(decoded, indent=2))
import requests, sys, json , argparse
"""
USAGE: python3 vep.py -t <threshold below with variant is retained> -f <disgenet.test.out>
send a request to https://rest.ensembl.org/documentation/info/vep_id_post to retrive information about allele frequencies in the general population (if available) for a list of rsid
INPUT: an output file form disgenet.py or in general any file that contains rsid in the form
"rs1060501145",
(one or more rsid per line, rsdi must have quotes
USAGE: python3 vepAllInOne.py -f <single column of rsid input file > -t < allele frequency threshold below with variant is retained >
OUTPUT: a single column text file with rsids of variants that are either not described in the genomic databses contained in VEP or for which there is at least one population described in VEP with allele frequency >= the threshold indicated in the command line
INPUT: single column of rsid input file
OUTPUT: in the standard out, a single column of rsids of variants that are either not described in the genomic databses contained in VEP or for which there is at least one population described in VEP with allele frequency >= the threshold indicated in the command line
"""
def compareFreq (dictionary, threshold):
variantIsCommon=0; retain=True
for elem in dictionary:
......@@ -60,15 +61,19 @@ def main():
with open(args.f) as myf:
allrsId = myf.readlines()
#thislist=["rs12720452", "rs1060501130"]
result = getfreqfromVEPbulck(allrsId[1:100])
for rsinfo in result:
rsdict=rsinfo[1]; rsname=rsinfo[0]
if not any(rsdict): print (rsname) #, rsdict, "RARE")
elif compareFreq(rsdict, args.t ): print(rsname) #, rsdict, "YAY, RARE" )
#else: print(rsname, rsdict, "DISCARDED")
maxNumbRsIdForRequest=200 # there is a limit in the VEP rest
while len(allrsId) > 0 :
sublist=allrsId[0:maxNumbRsIdForRequest]
allrsId=allrsId[maxNumbRsIdForRequest:]
result = getfreqfromVEPbulck(sublist)
for rsinfo in result:
rsdict=rsinfo[1]; rsname=rsinfo[0]
if not any(rsdict): print (rsname) #, rsdict, "RARE")
elif compareFreq(rsdict, args.t ): print(rsname) #, rsdict, "YAY, RARE" )
#else: print(rsname, rsdict, "DISCARDED")
#print(json.dumps(result, indent=2))
#print (result)
#print(json.dumps(result, indent=2))
#print (result)
if __name__ == "__main__":
main()
rs1060501114
rs1060501127
rs1060501130
rs1060501135
rs1060501136
rs1060501142
rs1060501145
rs1064792926
rs1131691708
rs12720452
rs137854600
rs137854601
rs137854602
rs137854603
rs137854604
rs137854606
rs137854607
rs137854609
rs137854611
rs137854612
rs137854614
rs137854615
rs137854616
rs137854617
rs137854618
rs185492581
rs185638763
rs193922726
rs199473045
rs199473050
rs199473051
rs199473052
rs199473053
rs199473054
rs199473055
rs199473056
rs199473058
rs199473061
rs199473062
rs199473063
rs199473065
rs199473066
rs199473067
rs199473070
rs199473072
rs199473074
rs199473076
rs199473079
rs199473081
rs199473082
rs199473083
rs199473086
rs199473088
rs199473090
rs199473091
rs199473092
rs199473093
rs199473095
rs199473096
rs199473097
rs199473098
rs199473101
rs199473102
rs199473103
rs199473104
rs199473117
rs199473122
rs199473124
rs199473133
rs199473134
rs199473137
rs199473139
rs199473143
rs199473144
rs199473147
rs199473149
rs199473153
rs199473154
rs199473156
rs199473157
rs199473158
rs199473159
rs199473160
rs199473161
rs199473164
rs199473167
rs199473168
rs199473169
rs199473170
rs199473171
rs199473172
rs199473173
rs199473174
rs199473175
rs199473176
rs199473178
rs199473179
rs199473180
rs199473181
rs199473188
rs199473194
rs199473199
rs199473204
rs199473205
rs199473206
rs199473207
rs199473208
rs199473210
rs199473211
rs199473213
rs199473214
rs199473217
rs199473219
rs199473220
rs199473221
rs199473225
rs199473228
rs199473229
rs199473230
rs199473231
rs199473232
rs199473233
rs199473234
rs199473235
rs199473236
rs199473237
rs199473239
rs199473240
rs199473241
rs199473242
rs199473244
rs199473245
rs199473246
rs199473247
rs199473248
rs199473249
rs199473250
rs199473251
rs199473252
rs199473254
rs199473261
rs199473266
rs199473267
rs199473269
rs199473270
rs199473271
rs199473273
rs199473274
rs199473275
rs199473280
rs199473281
rs199473282
rs199473287
rs199473289
rs199473292
rs199473293
rs199473294
rs199473295
rs199473297
rs199473298
rs199473299
rs199473302
rs199473304
rs199473305
rs199473309
rs199473311
rs199473315
rs199473316
......@@ -56,24 +56,6 @@
</repositories>
<dependencies>
<dependency>
<groupId>org.pathvisio</groupId>
<artifactId>core</artifactId>
<version>${pathvisio.version}</version>
</dependency>
<dependency>
<groupId>org.pathvisio</groupId>
<artifactId>desktop</artifactId>
<version>${pathvisio.version}</version>
</dependency>
<dependency>
<groupId>org.pathvisio</groupId>
<artifactId>gui</artifactId>
<version>${pathvisio.version}</version>
</dependency>
<dependency>
<groupId>lcsb.mapviewer</groupId>
<artifactId>model</artifactId>
......
package lcsb.mapviewer.wikipathway;
import org.osgi.framework.BundleActivator;
import org.osgi.framework.BundleContext;
import org.pathvisio.desktop.plugin.Plugin;
/**
* Bundle activator of plugin. Responsible for registering bundle and cleaning
* during finishing.
*
* @author Piotr Gawron
*
*/
public class Activator implements BundleActivator {
/**
* CellDesigner plugin for pathvisio.
*/
private ImportExport plugin;
@Override
public void start(BundleContext context) throws Exception {
plugin = new ImportExport();
context.registerService(Plugin.class.getName(), plugin, null);
}
@Override
public void stop(BundleContext context) throws Exception {
if (plugin != null) {
plugin.done();
}
}
}
package lcsb.mapviewer.wikipathway;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.core.LogEvent;
import org.apache.logging.log4j.core.LoggerContext;
import org.pathvisio.core.model.*;
import org.pathvisio.desktop.PvDesktop;
import org.pathvisio.desktop.plugin.Plugin;
import lcsb.mapviewer.common.MinervaLoggerAppender;
import lcsb.mapviewer.converter.ConverterParams;
import lcsb.mapviewer.converter.model.celldesigner.CellDesignerXmlParser;
import lcsb.mapviewer.model.map.model.Model;
import lcsb.mapviewer.wikipathway.XML.GPMLToModel;
import lcsb.mapviewer.wikipathway.XML.ModelToGPML;
/**
* Pathvisio plugin that allows to import/export CellDesigner file.
*
* @author Piotr Gawron
*
*/
public class ImportExport implements Plugin {
/**
* Default class logger.
*/
private static Logger logger = LogManager.getLogger(ImportExport.class);
/**
* List of extensions supported by import plugin.
*/
private final String[] importExtensions = new String[] { "xml" };
/**
* List of extensions supported by export plugin.
*/
private final String[] exportExtensions = new String[] { "cell" };
@Override
public void init(PvDesktop desktop) {
try {
LoggerContext context = (org.apache.logging.log4j.core.LoggerContext) LogManager.getContext(false);
File file = new File("log4j2.properties");
context.setConfigLocation(file.toURI());
} catch (Exception e) {
e.printStackTrace();
}
ExportCellDesigner exportPlugin = new ExportCellDesigner();
ImportCellDesigner importPlugin = new ImportCellDesigner();
desktop.getSwingEngine().getEngine().addPathwayImporter(importPlugin);
desktop.getSwingEngine().getEngine().addPathwayExporter(exportPlugin);
}
@Override
public void done() {
}
/**
* Implementation of {@link PathwayExporter} that allows PathVisio to import
* from CellDesigner xml file.
*
* @author Piotr Gawron
*
*/
protected class ImportCellDesigner implements PathwayImporter {
/**
* List of warnings that occured during conversion.
*/
private List<LogEvent> warnings = new ArrayList<>();
/**
* Default constructor.
*/
public ImportCellDesigner() {
}
@Override
public String getName() {
return "CellDesigner";
}
@Override
public String[] getExtensions() {
return importExtensions;
}
@Override
public List<String> getWarnings() {
List<String> result = new ArrayList<>();
for (LogEvent event : warnings) {
result.add(event.getMessage().getFormattedMessage());
}
return result;
}
@Override
public boolean isCorrectType(File arg0) {
return true;
}
@Override
public Pathway doImport(File file) throws ConverterException {
MinervaLoggerAppender appender = MinervaLoggerAppender.createAppender();
try {
Pathway pathway = new Pathway();
String fileName = file.getPath();
CellDesignerXmlParser parser = new CellDesignerXmlParser();
Model model = (Model) parser.createModel(new ConverterParams().filename(fileName).sizeAutoAdjust(false));
String tmp = new ModelToGPML(model.getName()).getGPML(model);
InputStream stream = new ByteArrayInputStream(tmp.getBytes(StandardCharsets.UTF_8));
Boolean validate = false;
pathway.readFromXml(stream, validate);
MinervaLoggerAppender.unregisterLogEventStorage(appender);
warnings.addAll(appender.getWarnings());
return pathway;
} catch (Exception e) {
logger.error(e, e);
throw new ConverterException(e);
} finally {
MinervaLoggerAppender.unregisterLogEventStorage(appender);
}
}
}
/**
* Implementation of {@link PathwayExporter} that allows PathVisio to export
* into CellDesigner xml file.
*
* @author Piotr Gawron
*
*/
protected class ExportCellDesigner implements PathwayExporter {
/**