Commit f1d306f4 authored by Piotr Gawron's avatar Piotr Gawron
Browse files

fix on drugbank parser (after change of their html repsonse) + suggested query list for drugbank

parent c0c56b82
......@@ -12,7 +12,7 @@ import lcsb.mapviewer.model.cache.CacheType;
import lcsb.mapviewer.persist.dao.cache.CacheTypeDao;
/**
* Interface used for retrieveng data that are no longer valid in the cache
* Interface used for retrieving data that are no longer valid in the cache
* (their expire date was reached).
*
* @author Piotr Gawron
......@@ -20,260 +20,260 @@ import lcsb.mapviewer.persist.dao.cache.CacheTypeDao;
*/
public abstract class CachableInterface extends XmlParser {
/**
* Default class logger.
*/
private static Logger logger = Logger.getLogger(CachableInterface.class);
/**
* Refresh cache data from original source.
*
* @param query
* text used to identify the resource
* @return newly obtained data from the source
* @throws SourceNotAvailable
* thrown when original source is unavailable
*/
public abstract Object refreshCacheQuery(Object query) throws SourceNotAvailable;
/**
* String containing class name of the class that extends
* {@link CachableInterface}. String name is used because cache layer doesn't
* have access to every possible cacheable interface, therefore it cannot
* enlist it.
*/
private String cacheIdentifier;
/**
* Type of the cache elements associated with this interface.
*/
private CacheType cacheType;
/**
* Cache mechanism used to speed up computation.
*/
@Autowired
private GeneralCacheInterface cache;
/**
* Cache mechanism used to speed up computation.
*/
@Autowired
private CacheTypeDao cacheTypeDao;
/**
* This object downloads webpages.
*/
private WebPageDownloader webPageDownloader = new WebPageDownloader();
/**
* Default constructor.
*
* @param clazz
* type that defines this interface
*/
public CachableInterface(final Class<? extends CachableInterface> clazz) {
this.cacheIdentifier = clazz.getCanonicalName();
}
/**
* Puts some string value into cache.
*
* @param key
* string that identifies value
* @param result
* value to put into cache
*/
protected final void setCacheValue(String key, String result) {
if (cache != null) {
cache.setCachedQuery(key, getCacheType(), result);
}
}
/**
* Retrieves string from cache.
*
* @param key
* string that identifies xml node
* @return string from cache
*/
protected final String getCacheValue(String key) {
if (cache != null) {
return cache.getStringByQuery(key, getCacheType());
} else {
return null;
}
}
/**
* @return the cacheType
* @see #cacheType
*/
public final CacheType getCacheType() {
if (cacheType == null) {
cacheType = getCacheTypeDao().getByClassName(cacheIdentifier);
}
return cacheType;
}
/**
* Retrieves xml node from cache.
*
* @param key
* string that identifies xml node
* @return xml node from cache
*/
protected final Node getCacheNode(String key) {
if (cache != null) {
return cache.getXmlNodeByQuery(key, getCacheType());
} else {
return null;
}
}
/**
* Saves xml node in the cache.
*
* @param id
* identifier of the node
* @param item
* node to save
*/
protected void setCacheNode(String id, Node item) {
if (cache != null) {
cache.setCachedQuery(id, getCacheType(), item);
}
}
/**
* @return the cache
* @see #cache
*/
public final GeneralCacheInterface getCache() {
return cache;
}
/**
* @param cache
* the cache to set
* @see #cache
*/
public final void setCache(GeneralCacheInterface cache) {
this.cache = cache;
}
/**
* Returns a content of the webpage for a given url using GET request.
*
* @param accessUrl
* webpage url address
* @return content of the webpage
* @throws IOException
* thrown when there are problems with connection to ChEMBL database
*/
protected final String getWebPageContent(String accessUrl) throws IOException {
return getWebPageContent(accessUrl, "GET", null);
}
/**
* Returns a content of the webpage for a given url. If postData is not null,
* the page will be accessed using POST request. Otherwise GET will be used.
*
* @param accessUrl
* webpage url address
* @param httpRequestMethod
* type of HTTP request (GET, POST, PUT, PATCH, DELETE, ...)
* @param postData
* string to be sent in the body of the request
* @return content of the webpage
* @throws IOException
* thrown when there are problems with connection to ChEMBL database
*/
protected final String getWebPageContent(String accessUrl, String httpRequestMethod, String data) throws IOException {
// check if we have the page in our internal cache
String cacheKey = accessUrl;
if (data != null) {
cacheKey += data;
}
String result = getCacheValue(cacheKey);
if (result == null) {
result = webPageDownloader.getFromNetwork(accessUrl, httpRequestMethod, data);
if (result != null) {
setCacheValue(cacheKey, result);
}
}
return result;
}
/**
* Removes html tags from text.
*
* @param text
* text text to clean
* @return text without html tags
*/
public String cleanHtml(String text) {
int startIndex;
int endIndex;
// and now clean the descripton from html tags (should be somehow
// improved...)
StringBuilder result = new StringBuilder();
int oldEndIndex = 0;
startIndex = text.indexOf("<");
while (startIndex >= 0) {
endIndex = text.indexOf(">", startIndex);
result.append(text.substring(oldEndIndex, startIndex));
if (endIndex < 0) {
logger.warn("Problem with html code: " + text);
oldEndIndex = text.length();
break;
} else {
oldEndIndex = endIndex + 1;
}
startIndex = text.indexOf("<", oldEndIndex);
}
result.append(text.substring(oldEndIndex));
return StringEscapeUtils.unescapeHtml4(result.toString().replaceAll("[\n\r]+", " "));
}
/**
* @return the cacheTypeDao
* @see #cacheTypeDao
*/
public CacheTypeDao getCacheTypeDao() {
return cacheTypeDao;
}
/**
* @param cacheTypeDao
* the cacheTypeDao to set
* @see #cacheTypeDao
*/
public void setCacheTypeDao(CacheTypeDao cacheTypeDao) {
this.cacheTypeDao = cacheTypeDao;
}
/**
* @return the webPageDownloader
* @see #webPageDownloader
*/
protected WebPageDownloader getWebPageDownloader() {
return webPageDownloader;
}
/**
* @param webPageDownloader
* the webPageDownloader to set
* @see #webPageDownloader
*/
protected void setWebPageDownloader(WebPageDownloader webPageDownloader) {
this.webPageDownloader = webPageDownloader;
}
/**
* Default class logger.
*/
private static Logger logger = Logger.getLogger(CachableInterface.class);
/**
* Refresh cache data from original source.
*
* @param query
* text used to identify the resource
* @return newly obtained data from the source
* @throws SourceNotAvailable
* thrown when original source is unavailable
*/
public abstract Object refreshCacheQuery(Object query) throws SourceNotAvailable;
/**
* String containing class name of the class that extends
* {@link CachableInterface}. String name is used because cache layer doesn't
* have access to every possible cacheable interface, therefore it cannot enlist
* it.
*/
private String cacheIdentifier;
/**
* Type of the cache elements associated with this interface.
*/
private CacheType cacheType;
/**
* Cache mechanism used to speed up computation.
*/
@Autowired
private GeneralCacheInterface cache;
/**
* Cache mechanism used to speed up computation.
*/
@Autowired
private CacheTypeDao cacheTypeDao;
/**
* This object downloads web pages.
*/
private WebPageDownloader webPageDownloader = new WebPageDownloader();
/**
* Default constructor.
*
* @param clazz
* type that defines this interface
*/
public CachableInterface(final Class<? extends CachableInterface> clazz) {
this.cacheIdentifier = clazz.getCanonicalName();
}
/**
* Puts some string value into cache.
*
* @param key
* string that identifies value
* @param result
* value to put into cache
*/
protected final void setCacheValue(String key, String result) {
if (cache != null) {
cache.setCachedQuery(key, getCacheType(), result);
}
}
/**
* Retrieves string from cache.
*
* @param key
* string that identifies xml node
* @return string from cache
*/
protected final String getCacheValue(String key) {
if (cache != null) {
return cache.getStringByQuery(key, getCacheType());
} else {
return null;
}
}
/**
* @return the cacheType
* @see #cacheType
*/
public final CacheType getCacheType() {
if (cacheType == null) {
cacheType = getCacheTypeDao().getByClassName(cacheIdentifier);
}
return cacheType;
}
/**
* Retrieves xml node from cache.
*
* @param key
* string that identifies xml node
* @return xml node from cache
*/
protected final Node getCacheNode(String key) {
if (cache != null) {
return cache.getXmlNodeByQuery(key, getCacheType());
} else {
return null;
}
}
/**
* Saves xml node in the cache.
*
* @param id
* identifier of the node
* @param item
* node to save
*/
protected void setCacheNode(String id, Node item) {
if (cache != null) {
cache.setCachedQuery(id, getCacheType(), item);
}
}
/**
* @return the cache
* @see #cache
*/
public final GeneralCacheInterface getCache() {
return cache;
}
/**
* @param cache
* the cache to set
* @see #cache
*/
public final void setCache(GeneralCacheInterface cache) {
this.cache = cache;
}
/**
* Returns a content of the webpage for a given url using GET request.
*
* @param accessUrl
* webpage url address
* @return content of the webpage
* @throws IOException
* thrown when there are problems with connection to ChEMBL database
*/
protected final String getWebPageContent(String accessUrl) throws IOException {
return getWebPageContent(accessUrl, "GET", null);
}
/**
* Returns a content of the webpage for a given url. If postData is not null,
* the page will be accessed using POST request. Otherwise GET will be used.
*
* @param accessUrl
* webpage url address
* @param httpRequestMethod
* type of HTTP request (GET, POST, PUT, PATCH, DELETE, ...)
* @param postData
* string to be sent in the body of the request
* @return content of the webpage
* @throws IOException
* thrown when there are problems with connection to ChEMBL database
*/
protected final String getWebPageContent(String accessUrl, String httpRequestMethod, String data) throws IOException {
// check if we have the page in our internal cache
String cacheKey = accessUrl;
if (data != null) {
cacheKey += data;
}
String result = getCacheValue(cacheKey);
if (result == null) {
result = webPageDownloader.getFromNetwork(accessUrl, httpRequestMethod, data);
if (result != null) {
setCacheValue(cacheKey, result);
}
}
return result;
}
/**
* Removes html tags from text.
*
* @param text
* text text to clean
* @return text without html tags
*/
public String cleanHtml(String text) {
int startIndex;
int endIndex;
// and now clean the descripton from html tags (should be somehow
// improved...)
StringBuilder result = new StringBuilder();
int oldEndIndex = 0;
startIndex = text.indexOf("<");
while (startIndex >= 0) {
endIndex = text.indexOf(">", startIndex);
result.append(text.substring(oldEndIndex, startIndex));
if (endIndex < 0) {
logger.warn("Problem with html code: " + text);
oldEndIndex = text.length();
break;
} else {
oldEndIndex = endIndex + 1;
}
startIndex = text.indexOf("<", oldEndIndex);
}
result.append(text.substring(oldEndIndex));
return StringEscapeUtils.unescapeHtml4(result.toString().replaceAll("[\n\r]+", " "));
}
/**
* @return the cacheTypeDao
* @see #cacheTypeDao
*/
public CacheTypeDao getCacheTypeDao() {
return cacheTypeDao;
}
/**
* @param cacheTypeDao
* the cacheTypeDao to set
* @see #cacheTypeDao
*/
public void setCacheTypeDao(CacheTypeDao cacheTypeDao) {
this.cacheTypeDao = cacheTypeDao;
}
/**
* @return the webPageDownloader
* @see #webPageDownloader
*/
protected WebPageDownloader getWebPageDownloader() {
return webPageDownloader;
}
/**
* @param webPageDownloader
* the webPageDownloader to set
* @see #webPageDownloader
*/
protected void setWebPageDownloader(WebPageDownloader webPageDownloader) {
this.webPageDownloader = webPageDownloader;
}
}
package lcsb.mapviewer.annotation.services;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import lcsb.mapviewer.annotation.cache.CachableInterface;
import lcsb.mapviewer.annotation.cache.SourceNotAvailable;
import lcsb.mapviewer.annotation.cache.XmlSerializer;
import lcsb.mapviewer.annotation.data.Drug;
import lcsb.mapviewer.annotation.services.annotators.AnnotatorException;
import lcsb.mapviewer.annotation.services.annotators.HgncAnnotator;
import lcsb.mapviewer.common.exception.InvalidArgumentException;
import lcsb.mapviewer.model.Project;
import lcsb.mapviewer.model.map.MiriamData;
import lcsb.mapviewer.model.map.MiriamType;
import lcsb.mapviewer.model.map.model.ModelData;
import lcsb.mapviewer.model.map.species.Element;
import lcsb.mapviewer.persist.dao.ProjectDao;
/**
* Abstract class with some functionalities used by class accessing drug
......@@ -24,180 +35,297 @@ import lcsb.mapviewer.model.map.MiriamType;
*/
public abstract class DrugAnnotation extends CachableInterface {
/**
* Default class logger.
*/
private Logger logger = Logger.getLogger(DrugAnnotation.class);
/**
* Object that allows to serialize {@link Drug} elements into xml string and
* deserialize xml into {@link Drug} objects.
*/
private XmlSerializer<Drug> drugSerializer;
/**
* Object used to access information about organism taxonomy.
*/
@Autowired