Commit 62d05668 authored by Piotr Gawron's avatar Piotr Gawron
Browse files

Biocompendium annotator removed

parent beeb9092
minerva (14.0.0~alpha.0) unstable; urgency=low
* Feature: log4j is replaced with log4j2 logging mechanism (#291)
* Feature removal: BioCompendium annotator removed (#32)
minerva (13.1.0~alpha.0) unstable; urgency=low
* Feature: annotators are more flexible - you can define set of input and
outputs used by annotator (#617)
......
package lcsb.mapviewer.annotation.services.annotators;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import lcsb.mapviewer.common.XmlParser;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.logging.log4j.*;
import org.springframework.stereotype.Service;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import lcsb.mapviewer.annotation.cache.GeneralCacheInterface;
import lcsb.mapviewer.annotation.cache.SourceNotAvailable;
import lcsb.mapviewer.annotation.services.ExternalServiceStatus;
import lcsb.mapviewer.annotation.services.ExternalServiceStatusType;
import lcsb.mapviewer.annotation.services.IExternalService;
import lcsb.mapviewer.common.exception.InvalidArgumentException;
import lcsb.mapviewer.common.exception.InvalidXmlSchemaException;
import lcsb.mapviewer.converter.model.celldesigner.annotation.NoteField;
import lcsb.mapviewer.converter.model.celldesigner.annotation.RestAnnotationParser;
import lcsb.mapviewer.model.map.MiriamData;
import lcsb.mapviewer.model.map.MiriamType;
import lcsb.mapviewer.model.map.species.Gene;
import lcsb.mapviewer.model.map.species.Protein;
import lcsb.mapviewer.model.map.species.Rna;
import lcsb.mapviewer.model.user.annotator.AnnotatorData;
import lcsb.mapviewer.model.user.annotator.AnnotatorInputParameter;
import lcsb.mapviewer.model.user.annotator.AnnotatorOutputParameter;
import lcsb.mapviewer.model.user.annotator.BioEntityField;
/**
* This class is responsible for connection to Vencata annotation service. The
* service is available on
* http://biocompendium.embl.de/map_annotator/REST/GetAnnotation/. This service
* is RESTful service. To query for an annotation one need to send a POST
* message with two parameters:<br>
* Method - this parameter should be set to GetAnnotation <br>
* File - this parameter should bo connected to a file with a request<br>
*
* Request file should looks like the one in testFiles/sampleRequest.xml.<br>
* As a result the xml file is received. An example of a xml response is in
* testFiles/sampleResponse.xml
*
*
* @author Piotr Gawron
*
*/
@Service
public class BiocompendiumAnnotator extends ElementAnnotator implements IExternalService {
/**
* Address of the restfull API for biocompendium.
*/
private static final String SERVICE_ADDRESS = "http://biocompendium.embl.de/map_annotator/REST/GetAnnotation/";
/**
* Standard class logger.
*/
private final Logger logger = LogManager.getLogger(BiocompendiumAnnotator.class);
/**
* Xml parser used for processing notes into structured data.
*/
private RestAnnotationParser rap = new RestAnnotationParser();
/**
* Default constructor.
*/
public BiocompendiumAnnotator() {
super(BiocompendiumAnnotator.class, new Class[] { Protein.class, Gene.class, Rna.class }, true);
}
@Override
public String refreshCacheQuery(Object query) throws SourceNotAvailable {
String result = null;
try {
if (query instanceof String) {
String name = (String) query;
result = getAnnotation(new MiriamData(MiriamType.HGNC_SYMBOL, name));
} else {
throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass());
}
} catch (IOException e) {
throw new SourceNotAvailable(e);
}
return result;
}
/**
* Returns String with annotations for species described by {@link MiriamData}
* with {@link MiriamType#HGNC_SYMBOL} entry.
*
* @param md
* description of the element by {@link MiriamData} with
* {@link MiriamType#HGNC_SYMBOL} entry
* @return annotation for species with given name and type from Venkata server
* @throws IOException
* thrown when there is a problem with connection to the server
*/
protected String getAnnotation(MiriamData md) throws IOException {
if (!MiriamType.HGNC_SYMBOL.equals(md.getDataType())) {
throw new InvalidArgumentException(
"Only " + MiriamType.HGNC_SYMBOL.getCommonName() + " miriam registry is supported.");
}
MiriamData miriamData = new MiriamData(md);
miriamData.setResource(miriamData.getResource().replaceAll("[\n\r]+", " "));
String annotation = getCacheValue(miriamData.getResource());
if (annotation != null) {
return annotation;
}
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpPost httppost = new HttpPost(SERVICE_ADDRESS);
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
// prepare a query
String requestXml = dataToString(miriamData);
// save query to a temporary file
File tmp = File.createTempFile("annotation-plugin", "xml");
BufferedWriter out = new BufferedWriter(new FileWriter(tmp));
out.write(requestXml);
out.close();
builder.addBinaryBody("File", tmp, ContentType.APPLICATION_OCTET_STREAM, tmp.getName());
HttpEntity multipart = builder.build();
httppost.setEntity(multipart);
CloseableHttpResponse response = httpClient.execute(httppost);
HttpEntity entity2 = response.getEntity();
if (entity2 != null) {
InputStream instream = entity2.getContent();
Scanner scanner = new Scanner(instream);
try {
Scanner s = scanner.useDelimiter("\\A");
// get results
String responseXml = "";
if (s.hasNext()) {
responseXml = s.next();
}
try {
annotation = getAnnotationsFromXml(responseXml).get(miriamData.getResource());
} catch (InvalidXmlSchemaException e) {
throw new IOException(e);
}
} finally {
scanner.close();
instream.close();
}
}
if (annotation != null) {
setCacheValue(miriamData.getResource(), annotation);
}
return annotation;
}
/**
* This method convert a hgnc symbol in miriam data into xml request.
*
* @param miriamData
* - {@link MiriamData} with {@link MiriamType#HGNC_SYMBOL}
* @return xml string that represent a request to RESTful service
*/
String dataToString(MiriamData miriamData) {
if (!MiriamType.HGNC_SYMBOL.equals(miriamData.getDataType())) {
throw new InvalidArgumentException(
"Only " + MiriamType.HGNC_SYMBOL.getCommonName() + " miriam registry is supported.");
}
String requestXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
requestXml += "<request>\n";
requestXml += " <species name=\"" + miriamData.getResource() + "\" type=\"PROTEIN\"/>\n";
requestXml += "</request>\n";
return requestXml;
}
/**
* This method parse a response xml from annotation service into a map. A key in
* the map is a name of species and value is a annotation received from the
* service.
*
* @param xml
* - xml string to be parsed
* @return a map with information about annotations. A key in the map is a name
* of species and value is a annotation received from the service.
* @throws InvalidXmlSchemaException
* thrown when there is a problem with xml
*/
Map<String, String> getAnnotationsFromXml(String xml) throws InvalidXmlSchemaException {
Map<String, String> result = new HashMap<String, String>();
// if xml is null or empty string then return empty map
if (xml == null) {
return result;
}
if (xml.isEmpty()) {
return result;
}
// there is a bug in annotation service that create incorrect xml, therefore
// we need to alter the xml
xml = correctInvalidXml(xml);
Document doc = XmlParser.getXmlDocumentFromString(xml);
result = getAnnotationsFromXmlNode(doc);
return result;
}
/**
* Transforms xml retrieved from Venkata server into map of annotations. A key
* in the map is a name of species and value is a annotation received from the
* service.
*
* @param doc
* - xml node
* @return a map with information about annotations. A key in the map is a
* name of species and value is a annotation received from the
* service.
*/
private Map<String, String> getAnnotationsFromXmlNode(Node doc) {
Map<String, String> result = new HashMap<String, String>();
NodeList root = doc.getChildNodes();
Node responseNode = XmlParser.getNode("response", root);
// root node is called "response"
if (responseNode == null) {
throw new InvalidArgumentException("Invalid xml returned by annotation service. No response root node");
}
NodeList list = responseNode.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
// all nodes in response are species
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equalsIgnoreCase("species")) {
String name = XmlParser.getNodeAttr("name", node).replaceAll("%0A", "\n");
String annotation = XmlParser.getNodeValue(node);
if (annotation.contains("Symbol: " + name.split(" ")[0].toUpperCase())) {
result.put(name, annotation);
} else {
logger.warn("Problem with annotation for: " + name);
result.put(name, "");
}
} else {
throw new InvalidArgumentException("Unknown node in xml response: " + node.getNodeName());
}
}
}
return result;
}
/**
* Corrects invalid xml retrieved from Venkata server.
*
* @param xml
* xml to correct
* @return corrected xml
*/
private String correctInvalidXml(String xml) {
int size = 0;
int size2 = 0;
do {
size = xml.length();
xml = xml.replaceAll("<species name=\"[^\"]*\" type=\"[^\"]*\">(\\s)*<species", "<species");
xml = xml.replaceAll("<species name=\"[^\"]*\" type=\"[^\"]*\">(\\s)*</response", "</response");
size2 = xml.length();
} while (size != size2);
return xml;
}
@Override
public ExternalServiceStatus getServiceStatus() {
ExternalServiceStatus status = new ExternalServiceStatus(getCommonName(), getUrl());
GeneralCacheInterface cacheCopy = getCache();
this.setCache(null);
try {
String annotations = getAnnotation(new MiriamData(MiriamType.HGNC_SYMBOL, "SNCA"));
status.setStatus(ExternalServiceStatusType.OK);
if (annotations == null) {
status.setStatus(ExternalServiceStatusType.DOWN);
} else if (!annotations.contains("ymbol")) {
status.setStatus(ExternalServiceStatusType.CHANGED);
}
} catch (Exception e) {
logger.error(status.getName() + " is down", e);
status.setStatus(ExternalServiceStatusType.DOWN);
}
this.setCache(cacheCopy);
return status;
}
@Override
public boolean annotateElement(BioEntityProxy element, MiriamData identifier, AnnotatorData parameters)
throws AnnotatorException {
try {
String annotationString = getAnnotation(identifier);
if (annotationString != null) {
element.addMiriamData(rap.getMiriamData(annotationString));
String annotations = "";
String[] string = annotationString.split("\n");
boolean remove = false;
for (String string2 : string) {
if (string2.startsWith("Symbol:")) {
remove = true;
}
if (remove) {
annotations += string2 + "\n";
}
if (string2.startsWith("Synonyms:")) {
remove = false;
}
}
element.setDescription(rap.getDescription(annotations));
element.setSymbol(rap.getSymbol(annotations));
element.setSynonyms(rap.getSynonyms(annotations));
element.setAbbreviation(rap.getAbbreviation(annotations));
element.setFormula(rap.getFormula(annotations));
element.setFullName(rap.getFullName(annotations));
element.setFormerSymbols(rap.getFormerSymbols(annotations));
return true;
}
return false;
} catch (IOException e) {
throw new AnnotatorException(e);
}
}
@Override
public String getCommonName() {
return "Biocompendium";
}
@Override
public String getUrl() {
return "http://biocompendium.embl.de/";
}
@Override
public List<AnnotatorInputParameter> getAvailableInputParameters() {
return Arrays.asList(new AnnotatorInputParameter(BioEntityField.NAME, MiriamType.HGNC_SYMBOL));
}
@Override
public List<AnnotatorOutputParameter> getAvailableOuputProperties() {
List<AnnotatorOutputParameter> result = new ArrayList<>();
for (NoteField field : NoteField.values()) {
if (field.getMiriamType() != null) {
result.add(new AnnotatorOutputParameter(field.getMiriamType()));
}
}
result.add(new AnnotatorOutputParameter(BioEntityField.SYMBOL));
result.add(new AnnotatorOutputParameter(BioEntityField.DESCRIPTION));
result.add(new AnnotatorOutputParameter(BioEntityField.PREVIOUS_SYMBOLS));
result.add(new AnnotatorOutputParameter(BioEntityField.SYNONYMS));
return result;
}
@Override
public MiriamData getExampleValidAnnotation() {
//we don't provide example because this annotator is unstable
return null;
}
}
......@@ -8,7 +8,6 @@ import org.junit.runners.Suite.SuiteClasses;
@SuiteClasses({ AnnotatorExceptionTest.class,
AnnotatorExceptionTest.class,
BrendaAnnotatorTest.class,
BiocompendiumAnnotatorTest.class,
CazyAnnotatorTest.class,
ChebiAnnotatorTest.class,
ElementAnnotatorTest.class,
......
package lcsb.mapviewer.annotation.services.annotators;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.when;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Map;
import org.apache.logging.log4j.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.mockito.Mockito;
import org.springframework.beans.factory.annotation.Autowired;
import lcsb.mapviewer.annotation.AnnotationTestFunctions;
import lcsb.mapviewer.annotation.cache.GeneralCacheInterface;
import lcsb.mapviewer.annotation.cache.PermanentDatabaseLevelCacheInterface;
import lcsb.mapviewer.annotation.cache.SourceNotAvailable;
import lcsb.mapviewer.common.exception.InvalidArgumentException;
import lcsb.mapviewer.common.exception.InvalidXmlSchemaException;
import lcsb.mapviewer.model.map.MiriamData;
import lcsb.mapviewer.model.map.MiriamType;
public class BiocompendiumAnnotatorTest extends AnnotationTestFunctions {
Logger logger = LogManager.getLogger(BiocompendiumAnnotatorTest.class);
MiriamData camk4 = new MiriamData(MiriamType.HGNC_SYMBOL, "CAMK4");
MiriamData slc25a27 = new MiriamData(MiriamType.HGNC_SYMBOL, "SLC25A27");
MiriamData nsmf = new MiriamData(MiriamType.HGNC_SYMBOL, "NSMF");
MiriamData mir449a = new MiriamData(MiriamType.HGNC_SYMBOL, "MIR449A");
@Autowired
private GeneralCacheInterface cache;
@Autowired
private PermanentDatabaseLevelCacheInterface permanentDatabaseLevelCache;
@Autowired
private BiocompendiumAnnotator restService;
@Before
public void setUp() {
}
@After
public void tearDown() throws Exception {
}
@Test
@Ignore("Bug 32")
public void testGetAnnotationsForSpecies() throws Exception {
try {
String response = restService.getAnnotation(camk4);
assertNotNull(response);
assertTrue(response.contains("Symbol: CAMK4"));
response = restService.getAnnotation(slc25a27);
assertNotNull(response);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testGetters() throws Exception {
try {
assertNotNull(restService.getCommonName());
assertNotNull(restService.getUrl());
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testGetAnnotationsForInvalidMiriam() throws Exception {
try {
restService.getAnnotation(new MiriamData());
fail("Exception expected");
} catch (InvalidArgumentException e) {
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testParseXml() throws Exception {
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader("testFiles/annotation/sampleResponse.xml"));
String line = null;
StringBuilder stringBuilder = new StringBuilder();
String ls = System.getProperty("line.separator");
while ((line = reader.readLine()) != null) {
stringBuilder.append(line);
stringBuilder.append(ls);
}
reader.close();
Map<String, String> res = restService.getAnnotationsFromXml(stringBuilder.toString());
assertEquals(2, res.keySet().size());
String response = res.get("CAMK4");
assertNotNull(response);
assertTrue(response.contains("Symbol: CAMK4"));
assertNotNull(res.get("SLC25A27"));
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test(timeout = 10000)
@Ignore("Bug 32")
public void testGetAnnotationsForMiceSpecies() throws Exception {
ArrayList<String> names = new ArrayList<String>();
names.add("Fmo3");
names.add("Nqo1");
names.add("Abcc12");
names.add("Mgst3");
names.add("Txnrd1");
names.add("Cbr3");
names.add("Hspa1b");
names.add("Prdx1");
names.add("Ppard");
names.add("Tgfb2");
names.add("Fth1");
names.add("Prdx6");
names.add("Nr4a1");
names.add("Tgfb1");
names.add("Abcc4");
names.add("Ager");
names.add("Gsr");
names.add("Sod3");
names.add("Maff");