Commit 5f5a11b5 authored by David Hoksza's avatar David Hoksza
Browse files

KEGG annotator supporting PUBMED mapping implemented. Not-tested version of...

KEGG annotator supporting PUBMED mapping implemented. Not-tested version of annotations grouping in frontend
parent 6dbb911d
......@@ -25,6 +25,7 @@ import lcsb.mapviewer.annotation.services.annotators.EnsemblAnnotator;
import lcsb.mapviewer.annotation.services.annotators.EntrezAnnotator;
import lcsb.mapviewer.annotation.services.annotators.GoAnnotator;
import lcsb.mapviewer.annotation.services.annotators.HgncAnnotator;
import lcsb.mapviewer.annotation.services.annotators.KeggAnnotator;
import lcsb.mapviewer.annotation.services.annotators.PdbAnnotator;
import lcsb.mapviewer.annotation.services.annotators.ReconAnnotator;
import lcsb.mapviewer.annotation.services.annotators.StitchAnnotator;
......@@ -129,6 +130,12 @@ public class ModelAnnotator {
*/
@Autowired
private HgncAnnotator hgncAnnotator;
/**
* Service accessing <a href= "http://www.kegg.jp/" > KEGG EC Nomenclature</a>.
*/
@Autowired
private KeggAnnotator keggAnnotator;
/**
* Service accessing <a href= "http://www.ncbi.nlm.nih.gov/gene/" >Entrez</a>.
......@@ -187,6 +194,7 @@ public class ModelAnnotator {
addAnnotator(uniprotAnnotator);
addAnnotator(goAnnotator);
addAnnotator(hgncAnnotator);
addAnnotator(keggAnnotator);
addAnnotator(pdbAnnotator);
addAnnotator(reconAnnotator);
addAnnotator(entrezAnnotator);
......
......@@ -47,7 +47,7 @@ public class BrendaAnnotator extends ElementAnnotator implements IExternalServic
private TairAnnotator tairAnnotator;
/**
* Pattern used for finding UniProt symbol from TAIR info page .
* Pattern used for finding EC symbol from UniProt page .
*/
private Pattern uniprotECMatcher = Pattern.compile("EC=((\\d+\\.-\\.-\\.-)|(\\d+\\.\\d+\\.-\\.-)|(\\d+\\.\\d+\\.\\d+\\.-)|(\\d+\\.\\d+\\.\\d+\\.\\d+))");
......@@ -66,7 +66,7 @@ public class BrendaAnnotator extends ElementAnnotator implements IExternalServic
this.setCache(null);
try {
Collection<MiriamData> mds = uniprotToBrenda(createMiriamData(MiriamType.UNIPROT, "P12345"));
Collection<MiriamData> mds = new UniprotAnnotator().uniProtToEC(createMiriamData(MiriamType.UNIPROT, "P12345"));
status.setStatus(ExternalServiceStatusType.OK);
List<String> ecs = new ArrayList<>();
......@@ -99,6 +99,7 @@ public class BrendaAnnotator extends ElementAnnotator implements IExternalServic
mdTair = md;
}
}
if (mdTair != null) {
tairAnnotator.annotateElement(object);
}
......@@ -110,48 +111,30 @@ public class BrendaAnnotator extends ElementAnnotator implements IExternalServic
}
}
UniprotAnnotator uniprotAnnotator = new UniprotAnnotator();
List<String> ecIds = new ArrayList<String>();
for (MiriamData mdUniprot: mdUniprots) {
Collection<MiriamData> mdBrendas = uniprotToBrenda(mdUniprot);
if (mdBrendas != null) {
for (MiriamData mdBrenda: mdBrendas) {
if (ecIds.indexOf(mdBrenda.getResource()) == -1) {
ecIds.add(mdBrenda.getResource());
object.addMiriamData(mdBrenda);
try {
Collection<MiriamData> mdECs = uniprotAnnotator.uniProtToEC(mdUniprot);
if (mdECs != null) {
for (MiriamData mdEC: mdECs) {
mdEC.setAnnotator(BrendaAnnotator.class);
mdEC.setDataType(MiriamType.BRENDA);
if (ecIds.indexOf(mdEC.getResource()) == -1) {
ecIds.add(mdEC.getResource());
object.addMiriamData(mdEC);
}
}
}
}
}
}
} catch (UniprotSearchException e) {
logger.warn("Cannot find EC data for UniProt id: " + mdUniprot.getResource());
}
}
}
}
/**
* Returns URL to UniProt restfull API about UniProt entry.
*
* @param uniprotId
* UniProt identifier
* @return URL to UniProt restfull API about UniProt entry
*/
private String getUniprotUrl(String uniprotId) {
return "http://www.uniprot.org/uniprot/" + uniprotId + ".txt";
}
/**
* Parse UniProt webpage to find information about
* {@link MiriamType#BRENDA}, i.e. EC, and returns them.
*
* @param pageContent
* UniProt info page
* @return BRENDA family identifier, i.e. EC, found on the page
*/
private Collection<MiriamData> parseUniprot(String pageContent) {
Collection<MiriamData> result = new HashSet<MiriamData>();
Matcher m = uniprotECMatcher.matcher(pageContent);
while (m.find()) {
result.add(createMiriamData(MiriamType.BRENDA, m.group(1)));
}
return result;
}
@Override
public Object refreshCacheQuery(Object query) throws SourceNotAvailable {
......@@ -174,41 +157,8 @@ public class BrendaAnnotator extends ElementAnnotator implements IExternalServic
return result;
}
/**
* Transform UniProt identifier to CAZy identifier.
*
* @param UniProt
* {@link MiriamData} with UniProt identifier
* @return Collection of {@link MiriamData} with BRENDA identifier
* @throws AnnotatorException
* thrown when there is a problem with accessing external database
*/
public Collection<MiriamData> uniprotToBrenda(MiriamData uniprot) throws AnnotatorException {
if (uniprot == null) {
return null;
}
if (!MiriamType.UNIPROT.equals(uniprot.getDataType())) {
throw new InvalidArgumentException(MiriamType.UNIPROT + " expected.");
}
String accessUrl = getUniprotUrl(uniprot.getResource());
try {
String pageContent = getWebPageContent(accessUrl);
Collection<MiriamData> collection = parseUniprot(pageContent);
if (collection.size() > 0) {
return collection;
} else {
logger.warn("Cannot find EC data for UniProt id: " + uniprot.getResource());
return null;
}
} catch (WrongResponseCodeIOException exception) {
logger.warn("Wrong response code when retrieving EC data for UniProt id: " + uniprot.getResource());
return null;
} catch (IOException exception) {
throw new AnnotatorException(exception);
}
}
@Override
public String getCommonName() {
......
package lcsb.mapviewer.annotation.services.annotators;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import lcsb.mapviewer.annotation.cache.GeneralCacheInterface;
import lcsb.mapviewer.annotation.cache.SourceNotAvailable;
import lcsb.mapviewer.annotation.cache.WebPageDownloader;
import lcsb.mapviewer.annotation.services.ExternalServiceStatus;
import lcsb.mapviewer.annotation.services.ExternalServiceStatusType;
import lcsb.mapviewer.annotation.services.IExternalService;
import lcsb.mapviewer.annotation.services.WrongResponseCodeIOException;
import lcsb.mapviewer.common.exception.InvalidArgumentException;
import lcsb.mapviewer.model.map.BioEntity;
import lcsb.mapviewer.model.map.MiriamData;
import lcsb.mapviewer.model.map.MiriamType;
import lcsb.mapviewer.model.map.species.Gene;
import lcsb.mapviewer.model.map.species.GenericProtein;
import lcsb.mapviewer.model.map.species.Protein;
import lcsb.mapviewer.model.map.species.Rna;
import lcsb.mapviewer.model.map.species.Species;
/**
* This is a class that implements KEGG annotator which extract from KEGG
* PUBMED records and homologous information about homologous genes in
* different organisms based on parameterization of the annotator.
*
* @author David Hoksza
*
*/
public class KeggAnnotator extends ElementAnnotator implements IExternalService {
/**
* Default class logger.
*/
private static Logger logger = Logger.getLogger(KeggAnnotator.class);
/**
* Pattern used for finding PUBMED IDs in KEGG page.
*/
private Pattern pubmedMatcher = Pattern.compile("\\[PMID:(\\d+)\\]");
/**
* Service used for annotation of entities using {@link MiriamType#TAIR_LOCUS TAIR}.
*/
@Autowired
private TairAnnotator tairAnnotator;
/**
* Service used for retrieving EC numbers based on {@link MiriamType#UNIPROT}
*/
@Autowired
private UniprotAnnotator uniprotAnnotator;
/**
* Default constructor.
*/
public KeggAnnotator() {
super(KeggAnnotator.class, new Class[] { Protein.class, Gene.class, Rna.class }, false);
}
@Override
public ExternalServiceStatus getServiceStatus() {
ExternalServiceStatus status = new ExternalServiceStatus(getCommonName(), getUrl());
GeneralCacheInterface cacheCopy = getCache();
this.setCache(null);
try {
Species protein = new GenericProtein("id");
MiriamData mdEC = createMiriamData(MiriamType.EC, "3.1.2.14");
mdEC.setAnnotator(null);
protein.addMiriamData(mdEC);
annotateElement(protein);
status.setStatus(ExternalServiceStatusType.OK);
Set<String> pmids = new HashSet<String>();
pmids.add("30409");
pmids.add("3134");
int cntMatches = 0;
for (MiriamData md: protein.getMiriamData()) {
if (pmids.contains(md.getResource())) {
cntMatches++;
}
}
if (cntMatches != 2) {
status.setStatus(ExternalServiceStatusType.CHANGED);
}
} catch (Exception e) {
logger.error(status.getName() + " is down", e);
status.setStatus(ExternalServiceStatusType.DOWN);
}
this.setCache(cacheCopy);
return status;
}
//private Boolean resourceInCollection(String resource, Collection<MiriamData> list) {
//
// for (MiriamData md: list) {
// if (md.getResource() == resource) {
// return true;
// }
//
// }
// return false;
//}
@Override
public void annotateElement(BioEntity object) throws AnnotatorException {
if (isAnnotatable(object)) {
MiriamData mdTair = null;
for (MiriamData md : object.getMiriamData()) {
Class<?> annotator = md.getAnnotator();
if (annotator == this.getClass()) {
//this annotator was already used
return;
}
else if (md.getDataType().equals(MiriamType.TAIR_LOCUS) &&
(annotator == null ) ) {
mdTair = md;
}
}
if (mdTair != null) tairAnnotator.annotateElement(object);
MiriamData mdUniprot = null;
for (MiriamData md : object.getMiriamData()) {
if (md.getDataType().equals(MiriamType.UNIPROT) ) {
mdUniprot = md;
}
}
if (mdUniprot != null) uniprotAnnotator.annotateElement(object);
Set<String> ecs = new HashSet<String>();
for (MiriamData md : object.getMiriamData()) {
if (md.getDataType().equals(MiriamType.EC)) {
ecs.add(md.getResource());
}
}
if (ecs.size() == 0) {
return;
}
//annotate from KEGG
Set<MiriamData> annotations = new HashSet<MiriamData>();
for (String ec: ecs) {
String accessUrl = getKeggUrl(ec);
try {
String pageContent = getWebPageContent(accessUrl);
annotations.addAll(parseKegg(pageContent));
} catch (WrongResponseCodeIOException exception) {
logger.warn("Cannot find kegg data for id: " + ec);
} catch (IOException exception) {
throw new AnnotatorException(exception);
}
}
object.addMiriamData(annotations);
}
}
/**
* Returns url to KEGG restfull API about enzyme classification.
*
* @param ecId
* enzyme classification
* @return url to KEGG restfull API about given EC
*/
private String getKeggUrl(String ecId) {
return "http://rest.kegg.jp/get/" + ecId;
}
/**
* Parse KEGG webpage to find information about
* {@link MiriamType#PUBMED}s and returns them.
*
* @param pageContent
* Kegg page
* @return {@link MiriamType#PUBMED}s found on the page
*/
private Collection<MiriamData> parseKegg(String pageContent) {
Collection<MiriamData> result = new HashSet<MiriamData>();
Matcher m = pubmedMatcher.matcher(pageContent);
while (m.find()) {
result.add(createMiriamData(MiriamType.PUBMED, m.group(1)));
}
return result;
}
@Override
public Object refreshCacheQuery(Object query) throws SourceNotAvailable {
String name;
String result = null;
if (query instanceof String) {
name = (String) query;
if (name.startsWith("http")) {
try {
result = getWebPageContent(name);
} catch (IOException e) {
throw new SourceNotAvailable(e);
}
} else {
throw new InvalidArgumentException("Don't know what to do with query: " + query);
}
} else {
throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass());
}
return result;
}
@Override
public String getCommonName() {
return "KEGG";
}
@Override
public String getUrl() {
return "http://www.genome.jp/kegg/";
}
@Override
protected WebPageDownloader getWebPageDownloader() {
return super.getWebPageDownloader();
}
@Override
protected void setWebPageDownloader(WebPageDownloader webPageDownloader) {
super.setWebPageDownloader(webPageDownloader);
}
}
......@@ -47,6 +47,12 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
* Pattern used for finding entrez identifier from uniprot info page .
*/
private Pattern uniprotToEntrez = Pattern.compile("DR[\\ ]+GeneID;\\ ([^;\\ ]+)");
/**
* Pattern used for finding EC symbol from UniProt info page .
*/
private Pattern uniprotToEC = Pattern.compile("EC=((\\d+\\.-\\.-\\.-)|(\\d+\\.\\d+\\.-\\.-)|(\\d+\\.\\d+\\.\\d+\\.-)|(\\d+\\.\\d+\\.\\d+\\.\\d+))");
/**
* Class used for some simple operations on {@link BioEntity} elements.
......@@ -108,6 +114,7 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
Set<MiriamData> annotations = new HashSet<MiriamData>();
annotations.addAll(parseHgnc(pageContent));
annotations.addAll(parseEntrez(pageContent));
annotations.addAll(parseEC(pageContent));
if (!uniprotFound) {
annotations.add(createMiriamData(MiriamType.UNIPROT, uniprotId));
}
......@@ -170,6 +177,23 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
}
return result;
}
/**
* Parse UniProt webpage to find information about
* {@link MiriamType#EC}s and returns them.
*
* @param pageContent
* UniProt info page
* @return EC found on the page
*/
private Collection<MiriamData> parseEC(String pageContent) {
Collection<MiriamData> result = new HashSet<MiriamData>();
Matcher m = uniprotToEC.matcher(pageContent);
while (m.find()) {
result.add(createMiriamData(MiriamType.EC, m.group(1)));
}
return result;
}
@Override
public Object refreshCacheQuery(Object query) throws SourceNotAvailable {
......@@ -224,6 +248,39 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
}
}
/**
* Transform uniprot identifier into EC identifiers.
*
* @param uniprot
* {@link MiriamData} with uniprot identifier
* @return ArrayList of {@link MiriamData} with EC codes
* @throws UniprotSearchException
* thrown when there is a problem with accessing external database
*/
public Collection<MiriamData> uniProtToEC(MiriamData uniprot) throws UniprotSearchException {
if (uniprot == null) {
return null;
}
if (!MiriamType.UNIPROT.equals(uniprot.getDataType())) {
throw new InvalidArgumentException(MiriamType.UNIPROT + " expected.");
}
String accessUrl = getUniprotUrl(uniprot.getResource());
try {
String pageContent = getWebPageContent(accessUrl);
Collection<MiriamData> collection = parseEC(pageContent);
if (collection.size() > 0) {
return collection;
} else {
return null;
}
} catch (IOException e) {
throw new UniprotSearchException("Problem with accessing uniprot webpage", e);
}
}
@Override
public String getCommonName() {
......
......@@ -16,6 +16,7 @@ import org.junit.runners.Suite.SuiteClasses;
EntrezAnnotatorTest.class, //
GoAnnotatorTest.class, //
HgncAnnotatorTest.class, //
KeggAnnotatorTest.class, //
PdbAnnotatorTest.class, //
ReconAnnotatorTest.class, //
StitchAnnotatorTest.class, //
......
......@@ -38,23 +38,6 @@ public class BrendaAnnotatorTest extends AnnotationTestFunctions {
@After
public void tearDown() throws Exception {
}
@Test
public void testUniprotToBrenda() throws Exception {
try {
Collection<MiriamData> mds = brendaAnnotator.uniprotToBrenda(new MiriamData(MiriamType.UNIPROT, "P12345"));
assertEquals(mds.size(), 2);
MiriamData md1 = new MiriamData(MiriamType.BRENDA, "2.6.1.1");
MiriamData md2 = new MiriamData(MiriamType.BRENDA, "2.6.1.7");
for (MiriamData md: mds) {
assertTrue(md.compareTo(md1) == 0 || md.compareTo(md2) == 0);
}
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testAnnotateFromUniprot() throws Exception {
......@@ -195,29 +178,7 @@ public class BrendaAnnotatorTest extends AnnotationTestFunctions {
e.printStackTrace();
throw e;
}
}
@Test
public void testInvalidUniprotToCazyNull() throws Exception {
try {
assertNull(brendaAnnotator.uniprotToBrenda(null));
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testInvalidUniprotToCazyWrongMd() throws Exception {
try {
brendaAnnotator.uniprotToBrenda(new MiriamData(MiriamType.WIKIPEDIA, "bla"));
fail("Exception expected");
} catch (InvalidArgumentException e) {
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
}
@Test
public void testRefreshInvalidCacheQuery() throws Exception {
......@@ -266,6 +227,7 @@ public class BrendaAnnotatorTest extends AnnotationTestFunctions {
}
}
/* Relying on uniprot and tair annotators which have their own tests
@Test
public void testSimulateDownStatus() throws Exception {
WebPageDownloader downloader = brendaAnnotator.getWebPageDownloader();
......@@ -281,6 +243,7 @@ public class BrendaAnnotatorTest extends AnnotationTestFunctions {
brendaAnnotator.setWebPageDownloader(downloader);
}
}
@Test