Commit 3f3f2fc4 authored by Piotr Gawron's avatar Piotr Gawron
Browse files

mesh parser uses sparQL end point

parent bdbe6a3b
minerva (16.0.7) stable; urgency=medium
* Bug fix (performance): vacuum cron job could cause deadlock and starvation
of db connections
* Bug fix (performance): mesh parser stopped working after mesh API change
-- Piotr Gawron <piotr.gawron@uni.lu> Fri, 14 Jan 2022 13:00:00 +0200
......
package lcsb.mapviewer.annotation.data;
import java.io.Serializable;
import java.util.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import javax.xml.bind.annotation.XmlRootElement;
......@@ -67,7 +69,7 @@ public class MeSH implements Serializable {
* @param synonyms
* list of terms used as names for this object.
*/
public MeSH(String meSHId, String name, String description, List<String> synonyms) {
public MeSH(final String meSHId, final String name, final String description, final List<String> synonyms) {
super();
this.name = name;
this.meSHId = meSHId;
......@@ -88,7 +90,7 @@ public class MeSH implements Serializable {
* the name to set
* @see #name
*/
public void setName(String name) {
public void setName(final String name) {
this.name = name;
}
......@@ -105,7 +107,7 @@ public class MeSH implements Serializable {
* the synonyms to set
* @see #synonyms
*/
public void setSynonyms(List<String> synonyms) {
public void setSynonyms(final List<String> synonyms) {
this.synonyms = synonyms;
}
......@@ -120,7 +122,7 @@ public class MeSH implements Serializable {
* @param meSHId
* database identifier
*/
public void setMeSHId(String meSHId) {
public void setMeSHId(final String meSHId) {
this.meSHId = meSHId;
}
......@@ -135,7 +137,7 @@ public class MeSH implements Serializable {
* @param description
* long description.
*/
public void setDescription(String description) {
public void setDescription(final String description) {
this.description = description;
}
......@@ -162,7 +164,7 @@ public class MeSH implements Serializable {
* @param synonym
* synonym to add
*/
public void addSynonym(String synonym) {
public void addSynonym(final String synonym) {
synonyms.add(synonym);
}
......@@ -172,11 +174,15 @@ public class MeSH implements Serializable {
* @param synonymsToAdd
* synonyms to add
*/
public void addSynonyms(Set<String> synonymsToAdd) {
public void addSynonyms(final Set<String> synonymsToAdd) {
for (String synonym : synonymsToAdd) {
this.synonyms.add(synonym);
}
}
public void removeSynonym(final String synonym) {
this.synonyms.remove(synonym);
}
}
package lcsb.mapviewer.annotation.services;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.net.URLEncoder;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.SerializationException;
import org.apache.http.HttpStatus;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.w3c.dom.Node;
import com.google.gson.Gson;
import com.fasterxml.jackson.databind.ObjectMapper;
import lcsb.mapviewer.annotation.cache.CachableInterface;
import lcsb.mapviewer.annotation.cache.GeneralCacheInterface;
......@@ -27,12 +26,6 @@ import lcsb.mapviewer.common.exception.InvalidArgumentException;
import lcsb.mapviewer.model.map.MiriamData;
import lcsb.mapviewer.model.map.MiriamType;
/**
* Class used for accessing and parsing data from MeSH database.
*
* @author Ayan Rota
*
*/
@Service
public class MeSHParser extends CachableInterface implements IExternalService {
......@@ -40,31 +33,31 @@ public class MeSHParser extends CachableInterface implements IExternalService {
* Prefix used in the DB to identify the cache entry.
*/
static final String MESH_PREFIX = "mesh:";
/**
* Url used for searching drugs by name.
*/
private static final String URL_MESH_DATABASE = "https://meshb.nlm.nih.gov/api/record/ui/";
/**
* Url used for searching mesh terms by synonym.
*/
private static final String URL_SEARCH_BY_SYNONYM = "https://meshb.nlm.nih.gov/api/search/record?searchInField=termDescriptor&sort=&size=20&searchType=exactMatch&searchMethod=FullWord&q=";
/**
* Default class logger.
*/
private static final String SPARQL_QUERY_PREFIX = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" +
"PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" +
"PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>\n" +
"PREFIX owl: <http://www.w3.org/2002/07/owl#>\n" +
"PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>\n" +
"PREFIX mesh: <http://id.nlm.nih.gov/mesh/>\n" +
"PREFIX mesh2022: <http://id.nlm.nih.gov/mesh/2022/>\n" +
"PREFIX mesh2021: <http://id.nlm.nih.gov/mesh/2021/>\n" +
"PREFIX mesh2020: <http://id.nlm.nih.gov/mesh/2020/>\n" +
"\n";
private static final String SPARQL_URL = "https://id.nlm.nih.gov/mesh/sparql?format=JSON&inference=true&offset=0&limit=1000&query=";
private Logger logger = LogManager.getLogger();
/**
* Object that allows to serialize {@link MeSH} elements into xml string and
* deserialize xml into {@link MeSH} objects.
*/
private XmlSerializer<MeSH> meshSerializer;
/**
* Default constructor.
*/
public MeSHParser() {
private ObjectMapper objectMapper;
@Autowired
public MeSHParser(final ObjectMapper objectMapper) {
super(MeSHParser.class);
meshSerializer = new XmlSerializer<>(MeSH.class);
this.objectMapper = objectMapper;
}
@Override
......@@ -88,7 +81,7 @@ public class MeSHParser extends CachableInterface implements IExternalService {
} else {
result = super.refreshCacheQuery(query);
}
} catch (IOException e) {
} catch (SourceNotAvailable e) {
throw new SourceNotAvailable("Problem with accessing Mesh database", e);
}
return result;
......@@ -134,7 +127,7 @@ public class MeSHParser extends CachableInterface implements IExternalService {
if (mesh == null) {
try {
mesh = getMeSHByIdFromDB(meshID);
} catch (IOException e) {
} catch (SourceNotAvailable e) {
throw new AnnotatorException("Problem with accessing MeSH database", e);
}
}
......@@ -160,100 +153,108 @@ public class MeSHParser extends CachableInterface implements IExternalService {
* @return return as mesh object.
* @throws IOException
* thrown when there is problem with accessing web page
* @throws SourceNotAvailable
* @throws AnnotatorException
* thrown when there is a problem with accessing mesh db
*/
private MeSH getMeSHByIdFromDB(final MiriamData meshID) throws IOException {
try {
MeSH result = new MeSH();
String page = getWebPageContent(URL_MESH_DATABASE + meshID.getResource());
Gson gson = new Gson();
Map<?, ?> gsonObject = new HashMap<String, Object>();
gsonObject = gson.fromJson(page, gsonObject.getClass());
Set<String> synonyms = getSynonyms(gsonObject);
String name = getName(gsonObject);
String description = getDescription(gsonObject);
String id = getId(gsonObject);
synonyms.remove(name);
result.addSynonyms(synonyms);
result.setName(name);
result.setDescription(description);
result.setMeSHId(id);
return result;
} catch (WrongResponseCodeIOException e) {
if (e.getResponseCode() == HttpStatus.SC_NOT_FOUND) {
return null;
} else {
throw e;
}
}
}
private MeSH getMeSHByIdFromDB(final MiriamData meshID) throws SourceNotAvailable {
MeSH result = new MeSH();
/**
* Extracts name from gson object.
*
* @param gsonObject
* gson to process
* @return name of {@link MeSH} entry
*/
private String getName(final Map<?, ?> gsonObject) {
Map<?, ?> descriptorTag = (Map<?, ?>) gsonObject.get("DescriptorName");
if (descriptorTag == null) {
descriptorTag = (Map<?, ?>) gsonObject.get("SupplementalRecordName");
result.setName(getName(meshID));
if (result.getName() == null) {
return null;
}
return (String) (((Map<?, ?>) descriptorTag.get("String")).get("t"));
result.addSynonyms(getSynonyms(meshID));
result.setDescription(getDescription(meshID));
result.setMeSHId(meshID.getResource());
result.removeSynonym(result.getName());
return result;
}
/**
* Extracts Mesh id name from gson object.
*
* @param gsonObject
* gson to process
* @return id of {@link MeSH} entry
*/
private String getId(final Map<?, ?> gsonObject) {
Map<?, ?> descriptorTag = (Map<?, ?>) gsonObject.get("DescriptorUI");
if (descriptorTag == null) {
descriptorTag = (Map<?, ?>) gsonObject.get("SupplementalRecordUI");
private String getName(final MiriamData meshID) throws SourceNotAvailable {
String query = (SPARQL_QUERY_PREFIX +
"SELECT * \n" +
"FROM <http://id.nlm.nih.gov/mesh>\n" +
"WHERE {\n" +
" mesh:MESH_ID rdfs:label ?name .\n" +
"}\n").replace("MESH_ID", meshID.getResource());
try {
String page = getWebPageContent(SPARQL_URL + URLEncoder.encode(query, "UTF-8"));
SpqarQLResult result = objectMapper.readValue(page, SpqarQLResult.class);
String name = null;
for (Map<String, Map<String, String>> row : result.getResults()) {
name = (String) ((Map<?, ?>) row.get("name")).get("value");
}
return name;
} catch (Exception e) {
throw new SourceNotAvailable(e);
}
return (String) descriptorTag.get("t");
}
/**
* Extracts Mesh term description from gson object.
*
* @param gsonObject
* gson to process
* @return description of {@link MeSH} entry
*/
private String getDescription(final Map<?, ?> gsonObject) {
Map<?, ?> concepts = (Map<?, ?>) gsonObject.get("_generated");
return (String) concepts.get("PreferredConceptScopeNote");
private String getDescription(final MiriamData meshID) throws SourceNotAvailable {
String query = (SPARQL_QUERY_PREFIX +
"SELECT * \n" +
"FROM <http://id.nlm.nih.gov/mesh>\n" +
"WHERE {\n" +
" mesh:MESH_ID meshv:concept ?concept .\n" +
" ?concept meshv:scopeNote ?description .\n" +
"}\n").replace("MESH_ID", meshID.getResource());
try {
String page = getWebPageContent(SPARQL_URL + URLEncoder.encode(query, "UTF-8"));
SpqarQLResult result = objectMapper.readValue(page, SpqarQLResult.class);
String description = null;
for (Map<String, Map<String, String>> row : result.getResults()) {
description = (String) ((Map<?, ?>) row.get("description")).get("value");
}
return description;
} catch (Exception e) {
throw new SourceNotAvailable(e);
}
}
/**
* Extracts list of synonyms from gson object.
*
* @param gsonObject
* gson to process
* @return synonyms of {@link MeSH} entry
*/
private Set<String> getSynonyms(final Map<?, ?> gsonObject) {
private Set<String> getSynonyms(final MiriamData meshID) throws SourceNotAvailable {
Set<String> synonyms = new HashSet<>();
Map<?, ?> concepts = (Map<?, ?>) gsonObject.get("ConceptList");
ArrayList<?> conceptList = (ArrayList<?>) concepts.get("Concept");
for (Object object : conceptList) {
Map<?, ?> concept = (Map<?, ?>) object;
ArrayList<?> termList = (ArrayList<?>) ((Map<?, ?>) concept.get("TermList")).get("Term");
for (Object object2 : termList) {
Map<?, ?> term = (Map<?, ?>) object2;
Map<?, ?> synonym = (Map<?, ?>) term.get("String");
synonyms.add((String) synonym.get("t"));
String query = (SPARQL_QUERY_PREFIX +
"SELECT * \n" +
"FROM <http://id.nlm.nih.gov/mesh>\n" +
"WHERE {\n" +
" mesh:MESH_ID meshv:concept ?concept .\n" +
" ?concept meshv:term ?term.\n" +
" ?term rdfs:label ?synonym.\n" +
"}\n").replace("MESH_ID", meshID.getResource());
try {
String page = getWebPageContent(SPARQL_URL + URLEncoder.encode(query, "UTF-8"));
SpqarQLResult result = objectMapper.readValue(page, SpqarQLResult.class);
for (Map<String, Map<String, String>> row : result.getResults()) {
synonyms.add((String) ((Map<?, ?>) row.get("synonym")).get("value"));
}
} catch (Exception e) {
throw new SourceNotAvailable(e);
}
query = (SPARQL_QUERY_PREFIX +
"SELECT * \n" +
"FROM <http://id.nlm.nih.gov/mesh>\n" +
"WHERE {\n" +
" mesh:MESH_ID meshv:concept ?concept .\n" +
" ?concept rdfs:label ?conceptName.\n" +
"}\n").replace("MESH_ID", meshID.getResource());
try {
String page = getWebPageContent(SPARQL_URL + URLEncoder.encode(query, "UTF-8"));
SpqarQLResult result = objectMapper.readValue(page, SpqarQLResult.class);
for (Map<String, Map<String, String>> row : result.getResults()) {
synonyms.add((String) ((Map<?, ?>) row.get("conceptName")).get("value"));
}
} catch (Exception e) {
throw new SourceNotAvailable(e);
}
return synonyms;
}
......@@ -294,17 +295,4 @@ public class MeSHParser extends CachableInterface implements IExternalService {
}
return getMeSH(meshId) != null;
}
private Set<MiriamData> getIdsBySynonymQuery(final Map<?, ?> gsonObject) {
Set<MiriamData> result = new HashSet<>();
Map<?, ?> hits = (Map<?, ?>) gsonObject.get("hits");
ArrayList<?> hitsList = (ArrayList<?>) hits.get("hits");
for (Object object : hitsList) {
Map<?, ?> hit = (Map<?, ?>) object;
String id = (String) hit.get("_id");
result.add(new MiriamData(MiriamType.MESH_2012, id));
}
return result;
}
}
package lcsb.mapviewer.annotation.services;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.annotation.JsonProperty;
public class SpqarQLResult {
@JsonProperty
private Map<String, List<String>> head;
private Map<String, List<Map<String, Map<String, String>>>> results;
public List<Map<String, Map<String, String>>> getResults() {
return results.get("bindings");
}
}
package lcsb.mapviewer.annotation;
import org.springframework.context.annotation.*;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import com.fasterxml.jackson.databind.ObjectMapper;
import lcsb.mapviewer.annotation.cache.MockCacheInterface;
import lcsb.mapviewer.persist.SpringPersistConfig;
......@@ -14,4 +18,9 @@ public class SpringAnnotationTestConfig {
return new MockCacheInterface();
}
@Bean
public ObjectMapper objectMapper() {
return new ObjectMapper();
}
}
......@@ -11,13 +11,14 @@ import static org.mockito.Mockito.when;
import java.io.IOException;
import org.apache.http.HttpStatus;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.Test;
import org.mockito.Mockito;
import org.springframework.beans.factory.annotation.Autowired;
import com.fasterxml.jackson.databind.ObjectMapper;
import lcsb.mapviewer.annotation.AnnotationTestFunctions;
import lcsb.mapviewer.annotation.cache.GeneralCacheInterface;
import lcsb.mapviewer.annotation.cache.SourceNotAvailable;
......@@ -36,7 +37,6 @@ public class MeSHParserTest extends AnnotationTestFunctions {
@Test
public void testGetMesh() throws Exception {
// Parkinson disease
MiriamData meshID = new MiriamData(MiriamType.MESH_2012, "D004298");
MeSH mesh = meshParser.getMeSH(meshID);
assertTrue(mesh != null);
......@@ -107,7 +107,7 @@ public class MeSHParserTest extends AnnotationTestFunctions {
public void testGetMEshWithNetworkProblems() throws Exception {
// Parkinson disease
MiriamData meshID = new MiriamData(MiriamType.MESH_2012, "D004298");
MeSHParser parserUnderTest = new MeSHParser();
MeSHParser parserUnderTest = new MeSHParser(new ObjectMapper());
WebPageDownloader webPageDownloader = Mockito.mock(WebPageDownloader.class);
when(webPageDownloader.getFromNetwork(anyString(), anyString(), nullable(String.class)))
.thenThrow(new IOException());
......@@ -193,18 +193,4 @@ public class MeSHParserTest extends AnnotationTestFunctions {
}
}
@Test
public void testSimulateChangeStatus() throws Exception {
WebPageDownloader downloader = meshParser.getWebPageDownloader();
try {
WebPageDownloader mockDownloader = Mockito.mock(WebPageDownloader.class);
when(mockDownloader.getFromNetwork(anyString(), anyString(), nullable(String.class)))
.thenThrow(new WrongResponseCodeIOException(null, HttpStatus.SC_NOT_FOUND));
meshParser.setWebPageDownloader(mockDownloader);
assertEquals(ExternalServiceStatusType.CHANGED, meshParser.getServiceStatus().getStatus());
} finally {
meshParser.setWebPageDownloader(downloader);
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment