From 4c43e5a69cff96a99cb9eae05223442afae0743e Mon Sep 17 00:00:00 2001 From: David Hoksza <david.hoksza@uni.lu> Date: Tue, 21 Nov 2017 15:23:03 +0100 Subject: [PATCH] CAZy annotator including unit tests. --- .../annotation/services/ModelAnnotator.java | 8 + .../services/annotators/CazyAnnotator.java | 229 ++++++++++++++ .../services/annotators/TairAnnotator.java | 2 +- .../applicationContext-annotation.xml | 1 + .../annotators/AllAnnotatorTests.java | 1 + .../annotators/CazyAnnotatorTest.java | 283 ++++++++++++++++++ .../lcsb/mapviewer/model/map/MiriamType.java | 8 + persist/src/db/11.1.1/fix_db_20171114.sql | 5 +- 8 files changed, 535 insertions(+), 2 deletions(-) create mode 100644 annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotator.java create mode 100644 annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotatorTest.java diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java index c9d17d4db5..a9eb813ceb 100644 --- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/ModelAnnotator.java @@ -17,6 +17,7 @@ import org.springframework.beans.factory.annotation.Autowired; import lcsb.mapviewer.annotation.services.annotators.AnnotatorException; import lcsb.mapviewer.annotation.services.annotators.BiocompendiumAnnotator; +import lcsb.mapviewer.annotation.services.annotators.CazyAnnotator; import lcsb.mapviewer.annotation.services.annotators.ChebiAnnotator; import lcsb.mapviewer.annotation.services.annotators.ElementAnnotator; import lcsb.mapviewer.annotation.services.annotators.EnsemblAnnotator; @@ -75,6 +76,12 @@ public class ModelAnnotator { */ @Autowired private BiocompendiumAnnotator biocompendiumAnnotator; + + /** + * CAZy annotator. + */ + @Autowired + private CazyAnnotator cazyAnnotator; /** * Backend to the chebi database. @@ -153,6 +160,7 @@ public class ModelAnnotator { defaultAnnotators = new ArrayList<>(); addAnnotator(biocompendiumAnnotator); + addAnnotator(cazyAnnotator); addAnnotator(chebiBackend); addAnnotator(uniprotAnnotator); addAnnotator(goAnnotator); diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotator.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotator.java new file mode 100644 index 0000000000..6fb6ef830a --- /dev/null +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotator.java @@ -0,0 +1,229 @@ +package lcsb.mapviewer.annotation.services.annotators; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.log4j.Logger; +import org.springframework.beans.factory.annotation.Autowired; + +import lcsb.mapviewer.annotation.cache.GeneralCacheInterface; +import lcsb.mapviewer.annotation.cache.SourceNotAvailable; +import lcsb.mapviewer.annotation.cache.WebPageDownloader; +import lcsb.mapviewer.annotation.services.ExternalServiceStatus; +import lcsb.mapviewer.annotation.services.ExternalServiceStatusType; +import lcsb.mapviewer.annotation.services.IExternalService; +import lcsb.mapviewer.annotation.services.WrongResponseCodeIOException; +import lcsb.mapviewer.common.exception.InvalidArgumentException; +import lcsb.mapviewer.model.map.BioEntity; +import lcsb.mapviewer.model.map.MiriamData; +import lcsb.mapviewer.model.map.MiriamType; +import lcsb.mapviewer.model.map.species.Gene; +import lcsb.mapviewer.model.map.species.Protein; +import lcsb.mapviewer.model.map.species.Rna; + +/** + * This is a class that implements a backend to CAZy. + * + * @author David Hoksza + * + */ +public class CazyAnnotator extends ElementAnnotator implements IExternalService { + + /** + * Default class logger. + */ + private static Logger logger = Logger.getLogger(CazyAnnotator.class); + + /** + * Service used for annotation of entities using {@link MiriamType#TAIR_LOCUS + * tair}. + */ + @Autowired + private TairAnnotator tairAnnotator; + + /** + * Pattern used for finding UniProt symbol from TAIR info page . + */ + private Pattern cazyIdMatcher = Pattern.compile("\\/((GT|GH|PL|CE|CBM)\\d+(\\_\\d+)?)\\.html"); + + /** + * Default constructor. + */ + public CazyAnnotator() { + super(CazyAnnotator.class, new Class[] { Protein.class, Gene.class, Rna.class }, false); + } + + @Override + public ExternalServiceStatus getServiceStatus() { //TODO + ExternalServiceStatus status = new ExternalServiceStatus(getCommonName(), getUrl()); + + GeneralCacheInterface cacheCopy = getCache(); + this.setCache(null); + + try { + MiriamData md = uniprotToCazy(new MiriamData(MiriamType.UNIPROT, "Q9SG95")); + + status.setStatus(ExternalServiceStatusType.OK); + if (md == null || !md.getResource().equalsIgnoreCase("GH5_7")) { + status.setStatus(ExternalServiceStatusType.CHANGED); + } + } catch (Exception e) { + logger.error(status.getName() + " is down", e); + status.setStatus(ExternalServiceStatusType.DOWN); + } + this.setCache(cacheCopy); + return status; + } + + @Override + public void annotateElement(BioEntity object) throws AnnotatorException { + if (isAnnotatable(object)) { + + MiriamData mdTair = null; + MiriamData mdCazy = null; + for (MiriamData md : object.getMiriamData()) { + if (md.getDataType().equals(MiriamType.CAZY)) { + mdCazy = md; + } + else if (md.getDataType().equals(MiriamType.TAIR_LOCUS)) { + mdTair = md; + } + } + + if (mdCazy != null) { + return; + } + + if (mdTair != null) { + tairAnnotator.annotateElement(object); + } + + List<MiriamData> mdUniprots = new ArrayList<MiriamData>(); + for (MiriamData md : object.getMiriamData()) { + if (md.getDataType().equals(MiriamType.UNIPROT)) { + mdUniprots.add(md); + } + } + + List<String> cazyIds = new ArrayList<String>(); + for (MiriamData mdUniprot: mdUniprots) { + mdCazy = uniprotToCazy(mdUniprot); + if (mdCazy != null && cazyIds.indexOf(mdCazy.getResource()) == -1) { + cazyIds.add(mdCazy.getResource()); + object.addMiriamData(mdCazy); + } + } + } + } + + /** + * Returns URL to TAIR page about TAIR entry. + * + * @param uniProtId + * UniProt identifier + * @return URL to CAZY UniProt accession search result page + */ + private String getCazyUrl(String uniProtId) { + return "http://www.cazy.org/search?page=recherche&recherche=" + uniProtId + "&tag=10"; + } + + /** + * Parse CAZy webpage to find information about + * {@link MiriamType#CAZY} and returns them. + * + * @param pageContent + * CAZy info page + * @return CAZy family identifier found on the page + */ + private Collection<MiriamData> parseCazy(String pageContent) { + Collection<MiriamData> result = new HashSet<MiriamData>(); + Matcher m = cazyIdMatcher.matcher(pageContent); + if (m.find()) { + result.add(new MiriamData(MiriamType.CAZY, m.group(1))); + } + return result; + } + + @Override + public Object refreshCacheQuery(Object query) throws SourceNotAvailable { + String name; + String result = null; + if (query instanceof String) { + name = (String) query; + if (name.startsWith("http")) { + try { + result = getWebPageContent(name); + } catch (IOException e) { + throw new SourceNotAvailable(e); + } + } else { + throw new InvalidArgumentException("Don't know what to do with query: " + query); + } + } else { + throw new InvalidArgumentException("Don't know what to do with class: " + query.getClass()); + } + return result; + } + + /** + * Transform UniProt identifier to CAZy identifier. + * + * @param UniProt + * {@link MiriamData} with UniProt identifier + * @return {@link MiriamData} with CAZy identifier + * @throws AnnotatorException + * thrown when there is a problem with accessing external database + */ + public MiriamData uniprotToCazy(MiriamData uniprot) throws AnnotatorException { + if (uniprot == null) { + return null; + } + + if (!MiriamType.UNIPROT.equals(uniprot.getDataType())) { + throw new InvalidArgumentException(MiriamType.UNIPROT + " expected."); + } + + String accessUrl = getCazyUrl(uniprot.getResource()); + try { + String pageContent = getWebPageContent(accessUrl); + Collection<MiriamData> collection = parseCazy(pageContent); + if (collection.size() > 0) { + return collection.iterator().next(); + } else { + logger.warn("Cannot find CAZy data for UniProt id: " + uniprot.getResource()); + return null; + } + } catch (WrongResponseCodeIOException exception) { + logger.warn("Wrong response code when retrieving CAZy data for UniProt id: " + uniprot.getResource()); + return null; + } catch (IOException exception) { + throw new AnnotatorException(exception); + } + } + + @Override + public String getCommonName() { + return MiriamType.CAZY.getCommonName(); + } + + @Override + public String getUrl() { + return MiriamType.CAZY.getDbHomepage(); + } + + @Override + protected WebPageDownloader getWebPageDownloader() { + return super.getWebPageDownloader(); + } + + @Override + protected void setWebPageDownloader(WebPageDownloader webPageDownloader) { + super.setWebPageDownloader(webPageDownloader); + } + +} diff --git a/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/TairAnnotator.java b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/TairAnnotator.java index 7749ccbe43..eda127b27e 100644 --- a/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/TairAnnotator.java +++ b/annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/TairAnnotator.java @@ -154,7 +154,7 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService * @param tair * {@link MiriamData} with TAIR identifier * @return {@link MiriamData} with UniProt identifier - * @throws UniprotSearchException + * @throws AnnotatorException * thrown when there is a problem with accessing external database */ public MiriamData tairToUniprot(MiriamData tair) throws AnnotatorException { diff --git a/annotation/src/main/resources/applicationContext-annotation.xml b/annotation/src/main/resources/applicationContext-annotation.xml index d93c9675f4..4e0956e5d1 100644 --- a/annotation/src/main/resources/applicationContext-annotation.xml +++ b/annotation/src/main/resources/applicationContext-annotation.xml @@ -14,6 +14,7 @@ <!-- Annotators --> <bean id="BiocompendiumAnnotator" class="lcsb.mapviewer.annotation.services.annotators.BiocompendiumAnnotator"/> + <bean id="CazyAnnotator" class="lcsb.mapviewer.annotation.services.annotators.CazyAnnotator"/> <bean id="ChebiAnnotator" class="lcsb.mapviewer.annotation.services.annotators.ChebiAnnotator"/> <bean id="EnsemblAnnotator" class="lcsb.mapviewer.annotation.services.annotators.EnsemblAnnotator"/> <bean id="EntrezAnnotator" class="lcsb.mapviewer.annotation.services.annotators.EntrezAnnotator"/> diff --git a/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java index f49dd7668d..1f994c0d3c 100644 --- a/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java +++ b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/AllAnnotatorTests.java @@ -7,6 +7,7 @@ import org.junit.runners.Suite.SuiteClasses; @RunWith(Suite.class) @SuiteClasses({ AnnotatorExceptionTest.class, // BiocompendiumAnnotatorTest.class, // + CazyAnnotatorTest.class, // ChebiAnnotatorTest.class, // ElementAnnotatorTest.class, // EnsemblAnnotatorTest.class, // diff --git a/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotatorTest.java b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotatorTest.java new file mode 100644 index 0000000000..66f7e4f58f --- /dev/null +++ b/annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/CazyAnnotatorTest.java @@ -0,0 +1,283 @@ +package lcsb.mapviewer.annotation.services.annotators; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.when; + +import java.io.IOException; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; +import org.springframework.beans.factory.annotation.Autowired; + +import lcsb.mapviewer.annotation.AnnotationTestFunctions; +import lcsb.mapviewer.annotation.cache.WebPageDownloader; +import lcsb.mapviewer.annotation.services.ExternalServiceStatusType; +import lcsb.mapviewer.common.exception.InvalidArgumentException; +import lcsb.mapviewer.model.map.MiriamData; +import lcsb.mapviewer.model.map.MiriamType; +import lcsb.mapviewer.model.map.species.GenericProtein; +import lcsb.mapviewer.model.map.species.Species; + +public class CazyAnnotatorTest extends AnnotationTestFunctions { + + @Autowired + CazyAnnotator cazyAnnotator; + + @Before + public void setUp() throws Exception { + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void testUniprotToCazy() throws Exception { + try { + assertEquals(new MiriamData(MiriamType.CAZY, "GH5_7"), + cazyAnnotator.uniprotToCazy(new MiriamData(MiriamType.UNIPROT, "Q9SG95"))); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateFromUniprot() throws Exception { + try { + + Species protein = new GenericProtein("id"); + protein.setName("bla"); + protein.addMiriamData(new MiriamData(MiriamType.UNIPROT, "Q9SG95")); + + cazyAnnotator.annotateElement(protein); + + MiriamData mdCazy = null; + + for (MiriamData md : protein.getMiriamData()) { + if (md.getDataType().equals(MiriamType.CAZY)) { + mdCazy = md; + } + } + + assertTrue("No UNIPROT annotation extracted from TAIR annotator", mdCazy != null); + assertTrue("Invalid UNIPROT annotation extracted from TAIR annotator", mdCazy.getResource().equalsIgnoreCase("GH5_7") ); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateFromTair() throws Exception { + try { + + Species protein = new GenericProtein("id"); + protein.setName("bla"); + protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "AT3G53010")); + + cazyAnnotator.annotateElement(protein); + + MiriamData mdCazy = null; + + for (MiriamData md : protein.getMiriamData()) { + if (md.getDataType().equals(MiriamType.CAZY)) { + mdCazy = md; + } + } + + assertTrue("No CAZy annotation extracted from CAZy annotator", mdCazy != null); + assertTrue("Invalid CAZy annotation extracted from CAZy annotator", mdCazy.getResource().equalsIgnoreCase("CE6") ); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + + } + + @Test + public void testAnnotateMultipleUniprots() throws Exception { + try { + + Species protein = new GenericProtein("id"); + protein.setName("bla"); + protein.addMiriamData(new MiriamData(MiriamType.UNIPROT, "Q9SG95")); + protein.addMiriamData(new MiriamData(MiriamType.UNIPROT, "Q12540")); + + cazyAnnotator.annotateElement(protein); + + int cntMDs = 0; + + for (MiriamData md : protein.getMiriamData()) { + if (md.getDataType().equals(MiriamType.CAZY)) { + cntMDs++; + } + } + + assertTrue("Wrong number of CAZy identifiers extracted from CAZy annotator", cntMDs == 2 ); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateInvalidEmpty() throws Exception { + try { + Species protein = new GenericProtein("id"); + protein.setName("bla"); + cazyAnnotator.annotateElement(protein); + + assertEquals(0, protein.getMiriamData().size()); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateInvalidUniprot() throws Exception { + try { + Species protein = new GenericProtein("id"); + protein.addMiriamData(new MiriamData(MiriamType.UNIPROT, "bla")); + cazyAnnotator.annotateElement(protein); + + assertEquals(1, protein.getMiriamData().size()); + + assertEquals(1, getWarnings().size()); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testAnnotateInvalidTair() throws Exception { + try { + Species protein = new GenericProtein("id"); + protein.addMiriamData(new MiriamData(MiriamType.TAIR_LOCUS, "bla")); + cazyAnnotator.annotateElement(protein); + + assertEquals(1, protein.getMiriamData().size()); + + assertEquals(1, getWarnings().size()); + + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testInvalidUniprotToCazyNull() throws Exception { + try { + assertNull(cazyAnnotator.uniprotToCazy(null)); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testInvalidUniprotToCazyWrongMd() throws Exception { + try { + cazyAnnotator.uniprotToCazy(new MiriamData(MiriamType.WIKIPEDIA, "bla")); + fail("Exception expected"); + } catch (InvalidArgumentException e) { + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testRefreshInvalidCacheQuery() throws Exception { + try { + cazyAnnotator.refreshCacheQuery("invalid_query"); + fail("Exception expected"); + } catch (InvalidArgumentException e) { + assertTrue(e.getMessage().contains("Don't know what to do")); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testRefreshInvalidCacheQuery2() throws Exception { + try { + cazyAnnotator.refreshCacheQuery(new Object()); + fail("Exception expected"); + } catch (InvalidArgumentException e) { + assertTrue(e.getMessage().contains("Don't know what to do")); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testRefreshCacheQuery() throws Exception { + try { + Object res = cazyAnnotator.refreshCacheQuery("http://google.cz/"); + assertNotNull(res); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testStatus() throws Exception { + try { + assertEquals(ExternalServiceStatusType.OK, cazyAnnotator.getServiceStatus().getStatus()); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + + @Test + public void testSimulateDownStatus() throws Exception { + WebPageDownloader downloader = cazyAnnotator.getWebPageDownloader(); + try { + WebPageDownloader mockDownloader = Mockito.mock(WebPageDownloader.class); + when(mockDownloader.getFromNetwork(anyString(), anyString(), anyString())).thenThrow(new IOException()); + cazyAnnotator.setWebPageDownloader(mockDownloader); + assertEquals(ExternalServiceStatusType.DOWN, cazyAnnotator.getServiceStatus().getStatus()); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } finally { + cazyAnnotator.setWebPageDownloader(downloader); + } + } + + @Test + public void testSimulateChangedStatus() throws Exception { + WebPageDownloader downloader = cazyAnnotator.getWebPageDownloader(); + try { + WebPageDownloader mockDownloader = Mockito.mock(WebPageDownloader.class); + when(mockDownloader.getFromNetwork(anyString(), anyString(), anyString())).thenReturn("GN Name=ACSS2; Synonyms=ACAS2;"); + cazyAnnotator.setWebPageDownloader(mockDownloader); + assertEquals(ExternalServiceStatusType.CHANGED, cazyAnnotator.getServiceStatus().getStatus()); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } finally { + cazyAnnotator.setWebPageDownloader(downloader); + } + } + +} diff --git a/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java b/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java index 56e6dd03a0..cb51066a69 100644 --- a/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java +++ b/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java @@ -29,6 +29,14 @@ public enum MiriamType { "http://commonchemistry.org", // new String[] { "urn:miriam:cas" }, // new Class<?>[] {}, "MIR:00000237"), // + + /** + * The Carbohydrate-Active Enzyme (CAZy) database: http://www.cazy.org/. + */ + CAZY("Carbohydrate-Active enzYmes", // + "http://commonchemistry.org", // + new String[] { "urn:miriam:cazy" }, // + new Class<?>[] {}, "MIR:00000195"), // /** * Consensus CDS: http://identifiers.org/ccds/. diff --git a/persist/src/db/11.1.1/fix_db_20171114.sql b/persist/src/db/11.1.1/fix_db_20171114.sql index 4eae7d939e..8dd8092a16 100644 --- a/persist/src/db/11.1.1/fix_db_20171114.sql +++ b/persist/src/db/11.1.1/fix_db_20171114.sql @@ -1,2 +1,5 @@ DELETE FROM cache_type WHERE classname = 'lcsb.mapviewer.annotation.services.annotators.TairAnnotator'; -INSERT INTO cache_type(validity, classname) VALUES (365, 'lcsb.mapviewer.annotation.services.annotators.TairAnnotator'); \ No newline at end of file +INSERT INTO cache_type(validity, classname) VALUES (365, 'lcsb.mapviewer.annotation.services.annotators.TairAnnotator'); + +DELETE FROM cache_type WHERE classname = 'lcsb.mapviewer.annotation.services.annotators.CazyAnnotator'; +INSERT INTO cache_type(validity, classname) VALUES (365, 'lcsb.mapviewer.annotation.services.annotators.CazyAnnotator'); -- GitLab