Commit f9d69bcf authored by Piotr Gawron's avatar Piotr Gawron
Browse files

Merge branch 'tair-fix' into 'devel_13.1.x'

Tair fix

See merge request minerva/core!774
parents 451c1b87 6c6c9988
minerva (12.3.1~beta.1) unstable; urgency=low
* Bug fix: tair locus identifiers were used improperly - instead of id the
name was used
minerva (13.1.0~beta.0) unstable; urgency=low
* Feature: annotators are more flexible - you can define set of input and
outputs used by annotator (#617)
* Feature: changes in admin panel doesn't require saving - they are saved
automatically (#676)
* Feature: elements can have custom glyphs used for visualization (#487)
* Small improvement: all bio entities have z-index associated with them
* Small improvement: all bio entities have z-index associated with them
(#487)
* Small improvement: validation of the organism and disease id on map upload
added (#618)
......@@ -51,7 +55,7 @@ minerva (13.1.0~beta.0) unstable; urgency=low
* Small improvement: allow admin to disable CORS check (#802)
* Small improvement: TransparencyZoomLevelVisibility parameter renamed to
SemanticZoomLevelTransparency (#801)
* Small improvement: export/import from SBML support z-index in LAYOUT
* Small improvement: export/import from SBML support z-index in LAYOUT
extension
* Bug fix: progress bar of gene genome mapping upload is refreshing properly
(#728)
......@@ -68,7 +72,7 @@ minerva (13.1.0~beta.0) unstable; urgency=low
* Bug fix: plugin contect element width is adjusted when link to tabs are
wrapped in more than one line (#758)
* Bug fix: export to CellDesigner preserve font size (#803)
* Bug fix: layout data was ignored for some reactions when importing from
* Bug fix: layout data was ignored for some reactions when importing from
SBML (#812)
-- Piotr Gawron <piotr.gawron@uni.lu> Mon, 13 May 2019 19:00:00 +0200
......
......@@ -152,6 +152,9 @@ public class KeggAnnotator extends ElementAnnotator implements IExternalService
logger.warn("Cannot find kegg data for id: " + identifier);
} catch (IOException exception) {
throw new AnnotatorException(exception);
} catch (UniprotSearchException e) {
logger.warn(e, e);
return false;
}
}
object.addMiriamData(annotations);
......@@ -181,8 +184,9 @@ public class KeggAnnotator extends ElementAnnotator implements IExternalService
* organisms names. If the value has not been set by the user, null
* will be passed.
* @return {@link MiriamType#PUBMED}s found on the page
* @throws UniprotSearchException
*/
private Collection<MiriamData> parseKegg(String pageContent, AnnotatorData params) {
private Collection<MiriamData> parseKegg(String pageContent, AnnotatorData params) throws UniprotSearchException {
// Retrieve Pubmeds
Collection<MiriamData> result = new HashSet<>();
......@@ -201,10 +205,13 @@ public class KeggAnnotator extends ElementAnnotator implements IExternalService
} else {
m = athOrthologMatcher.matcher(pageContent);
if (m.find()) {
String[] tairCodes = m.group(1).trim().split(" ");
for (String tairCode : tairCodes) {
tairCode = tairCode.split("\\(")[0]; // some codes are in the form AT1G08510(FATB)
result.add(new MiriamData(MiriamType.TAIR_LOCUS, tairCode));
String[] tairLocusNames = m.group(1).trim().split(" ");
for (String tairLocusName : tairLocusNames) {
tairLocusName = tairLocusName.split("\\(")[0]; // some codes are in the form AT1G08510(FATB)
MiriamData md = uniprotAnnotator.uniprotTairLocusNameToId(tairLocusName);
if (!md.equals(new MiriamData())){
result.add(md);
}
}
}
}
......
......@@ -32,6 +32,12 @@ public class StringAnnotator extends ElementAnnotator implements IExternalServic
/**
* Service used for annotation of entities using {@link MiriamType#TAIR_LOCUS
* TAIR}.
* Note that STRING annotation process will annotate only
* records which have a TAIR ID assigned by a human annotator.
* Otherwise, it would generate UniProt miriam records also for
* TAIR IDs generated from, e.g., KEGG annotator, i.e. for homologues
* and these UniProt IDs would be indistinguishable from the
* UniProt IDs describing the molecule.
*/
private TairAnnotator tairAnnotator;
......@@ -54,7 +60,9 @@ public class StringAnnotator extends ElementAnnotator implements IExternalServic
throws AnnotatorException {
List<MiriamData> mdUniprots = new ArrayList<>();
if (identifier.getDataType().equals(MiriamType.TAIR_LOCUS)) {
mdUniprots.addAll(tairAnnotator.tairToUniprot(identifier));
if (identifier.getAnnotator() == null) {
mdUniprots.addAll(tairAnnotator.tairToUniprot(identifier));
}
} else if (identifier.getDataType().equals(MiriamType.UNIPROT)) {
mdUniprots.add(identifier);
} else {
......
......@@ -5,6 +5,8 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Service;
......@@ -28,6 +30,13 @@ import lcsb.mapviewer.model.user.annotator.AnnotatorOutputParameter;
/**
* This is a class that implements a backend to TAIR.
* Note that TAIR annotation process will annotate only
* records which have a TAIR ID assigned by a human annotator.
* Otherwise, it would generate UniProt miriam records also for
* TAIR IDs generated from, e.g., KEGG annotator, i.e. for homologues
* and these UniProt IDs would be indistinguishable from the
* UniProt IDs describing the molecule.
*
*
* @author David Hoksza
*
......@@ -39,6 +48,14 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* Default class logger.
*/
private static Logger logger = Logger.getLogger(TairAnnotator.class);
/**
* Pattern used for getting Tair Locus ID symbol from UniProt result page.
*/
private Pattern getUniprotIdParsePattern(String tairId) {
return Pattern.compile("(\\w*)\\tlocus:" + tairId);
}
/**
* Default constructor.
......@@ -74,7 +91,13 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
@Override
public boolean annotateElement(BioEntityProxy object, MiriamData identifier, AnnotatorData parameters)
throws AnnotatorException {
if (identifier.getAnnotator() != null) {
return false;
}
if (identifier.getDataType().equals(MiriamType.TAIR_LOCUS)) {
//UniProt are only obained from TAIR's which were provided by the annotator (otherwise we would get
//also UniProt IDs for, e.g., homologous genes' TAIR IDs obtained from KEGG
Collection<MiriamData> collection = tairToUniprot(identifier);
if (collection.size() > 0) {
object.addMiriamData(collection);
......@@ -97,8 +120,8 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* @return URL to UniProt result page with the TAIR mapping
*/
private String getUniProtUrl(String tairId) {
return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+database%3A%28type%3Atair+" + tairId
+ "%29&format=list&columns=id";
return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+" + tairId + "&format=tab&columns=id,database(tair)";
// return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair+"+tairId+"%29&format=list&columns=id";
}
/**
......@@ -110,11 +133,14 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* uniprot REST API result page
* @return uniprot identifier found on the page
*/
private Collection<MiriamData> parseUniprotUniprot(String pageContent) {
private Collection<MiriamData> parseUniprotUniprot(String pageContent, String tairId) {
Collection<MiriamData> result = new HashSet<MiriamData>();
if (!pageContent.isEmpty()) {
String[] sPageContent = pageContent.split("\\R");
result.add(new MiriamData(MiriamType.UNIPROT, sPageContent[0]));
//the query returns a list of possible matches which needs to be pruned
Matcher m = getUniprotIdParsePattern(tairId).matcher(pageContent);
if (m.find()) {
result.add(new MiriamData(MiriamType.UNIPROT, m.group(1)));
}
}
return result;
}
......@@ -144,7 +170,7 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
try {
String accessUrl = getUniProtUrl(tair.getResource());
String pageContent = getWebPageContent(accessUrl);
return parseUniprotUniprot(pageContent);
return parseUniprotUniprot(pageContent, tair.getResource());
} catch (WrongResponseCodeIOException exception) {
logger.warn("Wrong reponse code when accessing tair data with id: " + tair.getResource());
return null;
......@@ -185,7 +211,7 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
@Override
public MiriamData getExampleValidAnnotation() {
return new MiriamData(MiriamType.TAIR_LOCUS, "AT1G01030");
return new MiriamData(MiriamType.TAIR_LOCUS, "2200950");
}
}
......@@ -59,6 +59,12 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
*/
private Pattern uniprotToEC = Pattern
.compile("EC=((\\d+\\.-\\.-\\.-)|(\\d+\\.\\d+\\.-\\.-)|(\\d+\\.\\d+\\.\\d+\\.-)|(\\d+\\.\\d+\\.\\d+\\.\\d+))");
/**
* Pattern used for getting Tair Locus ID symbol from UniProt result page.
*/
private Pattern uniprotTairLocusToId = Pattern
.compile("locus:(\\d*)");
/**
* Default constructor.
......@@ -136,6 +142,18 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
private String getUniprotUrl(String uniprotId) {
return "https://www.uniprot.org/uniprot/" + uniprotId + ".txt";
}
/**
* Returns URL to UniProt result containing mapped UniProtIds for submitted TAIR
* entry.
*
* @param tairId
* TAIR identifier
* @return URL to UniProt result page with the TAIR mapping
*/
private String getUniProtTairLocus2IdUrl(String tairLocusName) {
return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair+" + tairLocusName + "%29&format=tab&columns=id,database(tair)";
}
/**
* Parse uniprot webpage to find information about {@link MiriamType#ENTREZ} and
......@@ -250,7 +268,34 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
}
}
/**
* Transform TAIR Locus name into TAIR Locus identifier.
* UniProt is used for this task because TAIR i) does not have
* an API and ii) restricts the number of accesses.
*
* @param tairLocus
* String with the TAIR Locus name.
* @return {@link MiriamData} with TAIR Locus ID
* @throws UniprotSearchException
* thrown when there is a problem with accessing external database
*/
public MiriamData uniprotTairLocusNameToId(String tairLocus) throws UniprotSearchException {
String accessUrl = getUniProtTairLocus2IdUrl(tairLocus);
try {
String pageContent = getWebPageContent(accessUrl);
Matcher m = uniprotTairLocusToId.matcher(pageContent);
if (m.find()) {
return new MiriamData(MiriamType.TAIR_LOCUS, m.group(1));
} else {
logger.warn("No TAIR ID found for locus: " + tairLocus);
return new MiriamData();
}
} catch (IOException e) {
throw new UniprotSearchException("Problem with accessing uniprot webpage", e);
}
}
@Override
public String getCommonName() {
return MiriamType.UNIPROT.getCommonName();
......
......@@ -189,7 +189,7 @@ public class KeggAnnotatorTest extends AnnotationTestFunctions {
try {
Species protein = new GenericProtein("id");
protein.setName("bla");
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT3G25110"));
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2090285")); //TAIR locus AT3G25110
keggAnnotator.annotateElement(protein);
......
......@@ -57,12 +57,12 @@ public class StringAnnotatorTest extends AnnotationTestFunctions {
}
@Test
@Ignore("TAIR DB restricts queries by IP")
// @Ignore("TAIR DB restricts queries by IP")
public void testAnnotateTair() throws Exception {
try {
Species bioEntity = new GenericProtein("id");
bioEntity.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT1G01030"));
bioEntity.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200950"));
testedAnnotator.annotateElement(bioEntity);
......@@ -70,18 +70,35 @@ public class StringAnnotatorTest extends AnnotationTestFunctions {
for (MiriamData md : bioEntity.getMiriamData()) {
if (md.getDataType().equals(MiriamType.STRING)) {
mdString = md; // there should be only one EC number for that TAIR<->UNIPROT record
mdString = md;
}
}
assertTrue("No STRING annotation extracted from STRING annotator", mdString != null);
assertTrue("Wrong number of annotations extract from STRING annotator", bioEntity.getMiriamData().size() == 3);
assertTrue("Wrong number of annotations extract from STRING annotator", bioEntity.getMiriamData().size() == 2);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
//@Ignore("TAIR DB restricts queries by IP")
public void testAnnotateTairOnlyFromHumanAnnotator() throws Exception {
try {
Species bioEntity = new GenericProtein("id");
bioEntity.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200950", KeggAnnotator.class));
testedAnnotator.annotateElement(bioEntity);
assertTrue(bioEntity.getMiriamData().size() == 1);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testAnnotateInvalidEmpty() throws Exception {
......
package lcsb.mapviewer.annotation.services.annotators;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
......@@ -45,7 +46,7 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
Species protein = new GenericProtein("id");
protein.setName("bla");
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT1G01030"));
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200950")); //AT1G01030
tairAnnotator.annotateElement(protein);
......@@ -74,7 +75,7 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
Species protein = new GenericProtein("id");
protein.setName("bla");
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT1G15950"));
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200427"));
protein.addMiriamData(new MiriamData(MiriamType.UNIPROT, "P32246")); // Human version of the protein
tairAnnotator.annotateElement(protein);
......@@ -156,13 +157,29 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
// @Ignore("TAIR DB restricts queries by IP")
public void testTairToUniprot() throws Exception {
try {
assertTrue(tairAnnotator.tairToUniprot(new MiriamData(MiriamType.TAIR_LOCUS, "AT1G01030"))
assertTrue(tairAnnotator.tairToUniprot(new MiriamData(MiriamType.TAIR_LOCUS, "2200950")) // TAIR locus AT1G01030
.contains(new MiriamData(MiriamType.UNIPROT, "Q9MAN1")));
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testTairToUniprotFromKEGG() throws Exception {
//TAIR Loci comming from annotators should be ignored by TAIR (only TAIR LOCI provided by the human annotator should be considered)
try {
Species protein = new GenericProtein("id");
protein.setName("bla");
protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "2200427", KeggAnnotator.class));
tairAnnotator.annotateElement(protein);
assertTrue( protein.getMiriamData().size() == 1);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
// @Ignore("TAIR DB restricts queries by IP")
......
......@@ -278,6 +278,30 @@ public class UniprotAnnotatorTest extends AnnotationTestFunctions {
throw e;
}
}
@Test
public void testUniprotTairLocusToId() throws Exception {
try {
assertEquals(new MiriamData(MiriamType.TAIR_LOCUS, "2201786"),
uniprotAnnotator.uniprotTairLocusNameToId("AT1G08510") );
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testUniprotWrongTairLocusToId() throws Exception {
try {
assertEquals(new MiriamData(),
uniprotAnnotator.uniprotTairLocusNameToId("bla"));
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testStatus() throws Exception {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment