Commit 2a895d17 authored by David Hoksza's avatar David Hoksza
Browse files

tair name; tair and string annotator only annotate based on human annotated miriam data

parent edaf7627
......@@ -208,7 +208,7 @@ public class KeggAnnotator extends ElementAnnotator implements IExternalService
String[] tairLocusNames = m.group(1).trim().split(" ");
for (String tairLocusName : tairLocusNames) {
tairLocusName = tairLocusName.split("\\(")[0]; // some codes are in the form AT1G08510(FATB)
MiriamData md = uniprotAnnotator.uniprotTairLocusToId(tairLocusName);
MiriamData md = uniprotAnnotator.uniprotTairLocusNameToId(tairLocusName);
if (!md.equals(new MiriamData())){
result.add(md);
}
......
......@@ -32,6 +32,12 @@ public class StringAnnotator extends ElementAnnotator implements IExternalServic
/**
* Service used for annotation of entities using {@link MiriamType#TAIR_LOCUS
* TAIR}.
* Note that STRING annotation process will annotate only
* records which have a TAIR ID assigned by a human annotator.
* Otherwise, it would generate UniProt miriam records also for
* TAIR IDs generated from, e.g., KEGG annotator, i.e. for homologues
* and these UniProt IDs would be indistinguishable from the
* UniProt IDs describing the molecule.
*/
private TairAnnotator tairAnnotator;
......@@ -54,7 +60,9 @@ public class StringAnnotator extends ElementAnnotator implements IExternalServic
throws AnnotatorException {
List<MiriamData> mdUniprots = new ArrayList<>();
if (identifier.getDataType().equals(MiriamType.TAIR_LOCUS)) {
mdUniprots.addAll(tairAnnotator.tairToUniprot(identifier));
if (identifier.getAnnotator() == null) {
mdUniprots.addAll(tairAnnotator.tairToUniprot(identifier));
}
} else if (identifier.getDataType().equals(MiriamType.UNIPROT)) {
mdUniprots.add(identifier);
} else {
......
......@@ -5,6 +5,8 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Service;
......@@ -28,6 +30,13 @@ import lcsb.mapviewer.model.user.annotator.AnnotatorOutputParameter;
/**
* This is a class that implements a backend to TAIR.
* Note that TAIR annotation process will annotate only
* records which have a TAIR ID assigned by a human annotator.
* Otherwise, it would generate UniProt miriam records also for
* TAIR IDs generated from, e.g., KEGG annotator, i.e. for homologues
* and these UniProt IDs would be indistinguishable from the
* UniProt IDs describing the molecule.
*
*
* @author David Hoksza
*
......@@ -39,6 +48,14 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* Default class logger.
*/
private static Logger logger = Logger.getLogger(TairAnnotator.class);
/**
* Pattern used for getting Tair Locus ID symbol from UniProt result page.
*/
private Pattern getUniprotIdParsePattern(String tairId) {
return Pattern.compile("(\\w*)\\tlocus:" + tairId);
}
/**
* Default constructor.
......@@ -74,7 +91,13 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
@Override
public boolean annotateElement(BioEntityProxy object, MiriamData identifier, AnnotatorData parameters)
throws AnnotatorException {
if (identifier.getAnnotator() != null) {
return false;
}
if (identifier.getDataType().equals(MiriamType.TAIR_LOCUS)) {
//UniProt are only obained from TAIR's which were provided by the annotator (otherwise we would get
//also UniProt IDs for, e.g., homologous genes' TAIR IDs obtained from KEGG
Collection<MiriamData> collection = tairToUniprot(identifier);
if (collection.size() > 0) {
object.addMiriamData(collection);
......@@ -97,8 +120,8 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* @return URL to UniProt result page with the TAIR mapping
*/
private String getUniProtUrl(String tairId) {
return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+" + tairId
+ "&format=list&columns=id";
return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+" + tairId + "&format=tab&columns=id,database(tair)";
// return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair+"+tairId+"%29&format=list&columns=id";
}
/**
......@@ -110,11 +133,14 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* uniprot REST API result page
* @return uniprot identifier found on the page
*/
private Collection<MiriamData> parseUniprotUniprot(String pageContent) {
private Collection<MiriamData> parseUniprotUniprot(String pageContent, String tairId) {
Collection<MiriamData> result = new HashSet<MiriamData>();
if (!pageContent.isEmpty()) {
String[] sPageContent = pageContent.split("\\R");
result.add(new MiriamData(MiriamType.UNIPROT, sPageContent[0]));
//the query returns a list of possible matches which needs to be pruned
Matcher m = getUniprotIdParsePattern(tairId).matcher(pageContent);
if (m.find()) {
result.add(new MiriamData(MiriamType.UNIPROT, m.group(1)));
}
}
return result;
}
......@@ -144,7 +170,7 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
try {
String accessUrl = getUniProtUrl(tair.getResource());
String pageContent = getWebPageContent(accessUrl);
return parseUniprotUniprot(pageContent);
return parseUniprotUniprot(pageContent, tair.getResource());
} catch (WrongResponseCodeIOException exception) {
logger.warn("Wrong reponse code when accessing tair data with id: " + tair.getResource());
return null;
......@@ -185,7 +211,7 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
@Override
public MiriamData getExampleValidAnnotation() {
return new MiriamData(MiriamType.TAIR_LOCUS, "AT1G01030");
return new MiriamData(MiriamType.TAIR_LOCUS, "2200950");
}
}
......@@ -61,7 +61,7 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
.compile("EC=((\\d+\\.-\\.-\\.-)|(\\d+\\.\\d+\\.-\\.-)|(\\d+\\.\\d+\\.\\d+\\.-)|(\\d+\\.\\d+\\.\\d+\\.\\d+))");
/**
* Pattern used for finding EC symbol from UniProt info page .
* Pattern used for getting Tair Locus ID symbol from UniProt result page.
*/
private Pattern uniprotTairLocusToId = Pattern
.compile("locus:(\\d*)");
......@@ -151,9 +151,8 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
* TAIR identifier
* @return URL to UniProt result page with the TAIR mapping
*/
private String getUniProtTairLocus2IdUrl(String tairLocus) {
return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+"+tairLocus+
"&format=tab&columns=id,database(tair) ";
private String getUniProtTairLocus2IdUrl(String tairLocusName) {
return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair+" + tairLocusName + "%29&format=tab&columns=id,database(tair)";
}
/**
......@@ -281,7 +280,7 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
* @throws UniprotSearchException
* thrown when there is a problem with accessing external database
*/
public MiriamData uniprotTairLocusToId(String tairLocus) throws UniprotSearchException {
public MiriamData uniprotTairLocusNameToId(String tairLocus) throws UniprotSearchException {
String accessUrl = getUniProtTairLocus2IdUrl(tairLocus);
try {
String pageContent = getWebPageContent(accessUrl);
......
......@@ -57,12 +57,12 @@ public class StringAnnotatorTest extends AnnotationTestFunctions {
}
@Test
@Ignore("TAIR DB restricts queries by IP")
// @Ignore("TAIR DB restricts queries by IP")
public void testAnnotateTair() throws Exception {
try {
Species bioEntity = new GenericProtein("id");
bioEntity.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT1G01030"));
bioEntity.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200950"));
testedAnnotator.annotateElement(bioEntity);
......@@ -70,18 +70,35 @@ public class StringAnnotatorTest extends AnnotationTestFunctions {
for (MiriamData md : bioEntity.getMiriamData()) {
if (md.getDataType().equals(MiriamType.STRING)) {
mdString = md; // there should be only one EC number for that TAIR<->UNIPROT record
mdString = md;
}
}
assertTrue("No STRING annotation extracted from STRING annotator", mdString != null);
assertTrue("Wrong number of annotations extract from STRING annotator", bioEntity.getMiriamData().size() == 3);
assertTrue("Wrong number of annotations extract from STRING annotator", bioEntity.getMiriamData().size() == 2);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
//@Ignore("TAIR DB restricts queries by IP")
public void testAnnotateTairOnlyFromHumanAnnotator() throws Exception {
try {
Species bioEntity = new GenericProtein("id");
bioEntity.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200950", KeggAnnotator.class));
testedAnnotator.annotateElement(bioEntity);
assertTrue(bioEntity.getMiriamData().size() == 1);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testAnnotateInvalidEmpty() throws Exception {
......
package lcsb.mapviewer.annotation.services.annotators;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
......@@ -45,7 +46,7 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
Species protein = new GenericProtein("id");
protein.setName("bla");
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT1G01030"));
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200950")); //AT1G01030
tairAnnotator.annotateElement(protein);
......@@ -74,7 +75,7 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
Species protein = new GenericProtein("id");
protein.setName("bla");
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT1G15950"));
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200427"));
protein.addMiriamData(new MiriamData(MiriamType.UNIPROT, "P32246")); // Human version of the protein
tairAnnotator.annotateElement(protein);
......@@ -163,6 +164,22 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
throw e;
}
}
@Test
public void testTairToUniprotFromKEGG() throws Exception {
//TAIR Loci comming from annotators should be ignored by TAIR (only TAIR LOCI provided by the human annotator should be considered)
try {
Species protein = new GenericProtein("id");
protein.setName("bla");
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200427", KeggAnnotator.class));
tairAnnotator.annotateElement(protein);
assertTrue( protein.getMiriamData().size() == 1);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
// @Ignore("TAIR DB restricts queries by IP")
......
......@@ -283,7 +283,7 @@ public class UniprotAnnotatorTest extends AnnotationTestFunctions {
public void testUniprotTairLocusToId() throws Exception {
try {
assertEquals(new MiriamData(MiriamType.TAIR_LOCUS, "2201786"),
uniprotAnnotator.uniprotTairLocusToId("AT1G08510") );
uniprotAnnotator.uniprotTairLocusNameToId("AT1G08510") );
} catch (Exception e) {
e.printStackTrace();
throw e;
......@@ -294,7 +294,7 @@ public class UniprotAnnotatorTest extends AnnotationTestFunctions {
public void testUniprotWrongTairLocusToId() throws Exception {
try {
assertEquals(new MiriamData(),
uniprotAnnotator.uniprotTairLocusToId("XXXXXX"));
uniprotAnnotator.uniprotTairLocusNameToId("bla"));
} catch (Exception e) {
e.printStackTrace();
throw e;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment