Commit a4b223d8 authored by Piotr Gawron's avatar Piotr Gawron
Browse files

when searching for drugs by target take into account organism

parent 904f3cd4
......@@ -3,6 +3,7 @@ package lcsb.mapviewer.annotation.services;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
......@@ -111,11 +112,6 @@ public class ChEMBLParser extends DrugAnnotation implements IExternalService {
@Autowired
private HgncAnnotator hgncAnnotator;
/**
* Object used to access information about organism taxonomy.
*/
@Autowired
private TaxonomyBackend taxonomyBackend;
/**
* Default constructor.
......@@ -180,7 +176,7 @@ public class ChEMBLParser extends DrugAnnotation implements IExternalService {
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equalsIgnoreCase("organism")) {
// node with information abou organism
target.setOrganism(taxonomyBackend.getByName(node.getTextContent()));
target.setOrganism(getTaxonomyBackend().getByName(node.getTextContent()));
} else if (node.getNodeName().equalsIgnoreCase("pref_name")) {
// node with information about name
target.setName(node.getTextContent());
......@@ -541,9 +537,9 @@ public class ChEMBLParser extends DrugAnnotation implements IExternalService {
}
@Override
public List<Drug> getDrugListByTarget(MiriamData target) throws DrugSearchException {
List<Drug> result = new ArrayList<Drug>();
List<MiriamData> uniprotData = new ArrayList<MiriamData>();
public List<Drug> getDrugListByTarget(MiriamData target, Collection<MiriamData> organisms) throws DrugSearchException {
List<Drug> result = new ArrayList<>();
List<MiriamData> uniprotData = new ArrayList<>();
if (MiriamType.HGNC_SYMBOL.equals(target.getDataType())) {
try {
uniprotData = hgncAnnotator.hgncToUniprot(target);
......@@ -561,7 +557,10 @@ public class ChEMBLParser extends DrugAnnotation implements IExternalService {
logger.warn("Too many uniprot ids. Only first will be taken");
}
try {
String page = getWebPageContent(URL_TARGET_FROM_UNIPROT + uniprotData.get(0).getResource());
String url = URL_TARGET_FROM_UNIPROT + uniprotData.get(0).getResource();
logger.debug(url);
String page = getWebPageContent(url);
Set<String> drugNames = new HashSet<>();
Document document = getXmlDocumentFromString(page);
......@@ -573,14 +572,17 @@ public class ChEMBLParser extends DrugAnnotation implements IExternalService {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equalsIgnoreCase("target")) {
Node chemblTargetIdNode = getNode("target_chembl_id", node);
MiriamData targetMiriam = new MiriamData(MiriamType.CHEMBL_TARGET, chemblTargetIdNode.getTextContent());
List<Drug> drugs = getDrugsByChemblTarget(targetMiriam);
for (Drug drug : drugs) {
// don't add duplicates
if (!drugNames.contains(drug.getName())) {
drugNames.add(drug.getName());
result.add(drug);
String organismName = getNode("organism", node).getTextContent();
if (organismMatch(organismName, organisms)) {
Node chemblTargetIdNode = getNode("target_chembl_id", node);
MiriamData targetMiriam = new MiriamData(MiriamType.CHEMBL_TARGET, chemblTargetIdNode.getTextContent());
List<Drug> drugs = getDrugsByChemblTarget(targetMiriam);
for (Drug drug : drugs) {
// don't add duplicates
if (!drugNames.contains(drug.getName())) {
drugNames.add(drug.getName());
result.add(drug);
}
}
}
}
......@@ -597,6 +599,7 @@ public class ChEMBLParser extends DrugAnnotation implements IExternalService {
}
/**
* Returns list of drugs found by target identified in the paramter.
*
......@@ -664,23 +667,6 @@ public class ChEMBLParser extends DrugAnnotation implements IExternalService {
super.setWebPageDownloader(webPageDownloader);
}
/**
* @return the taxonomyBackend
* @see #taxonomyBackend
*/
protected TaxonomyBackend getTaxonomyBackend() {
return taxonomyBackend;
}
/**
* @param taxonomyBackend
* the taxonomyBackend to set
* @see #taxonomyBackend
*/
protected void setTaxonomyBackend(TaxonomyBackend taxonomyBackend) {
this.taxonomyBackend = taxonomyBackend;
}
/**
* @return the hgncAnnotator
* @see #hgncAnnotator
......
......@@ -6,6 +6,9 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import lcsb.mapviewer.annotation.cache.CachableInterface;
import lcsb.mapviewer.annotation.cache.XmlSerializer;
import lcsb.mapviewer.annotation.data.Drug;
......@@ -20,11 +23,22 @@ import lcsb.mapviewer.model.map.MiriamData;
*/
public abstract class DrugAnnotation extends CachableInterface {
/**
* Default class logger.
*/
private Logger logger = Logger.getLogger(DrugAnnotation.class);
/**
* Object that allows to serialize {@link Drug} elements into xml string and
* deserialize xml into {@link Drug} objects.
*/
private XmlSerializer<Drug> drugSerializer;
private XmlSerializer<Drug> drugSerializer;
/**
* Object used to access information about organism taxonomy.
*/
@Autowired
private TaxonomyBackend taxonomyBackend;
/**
* Default constructor. Initializes structures used for transforming
......@@ -59,7 +73,11 @@ public abstract class DrugAnnotation extends CachableInterface {
* thrown when there are problems with finding drug
*/
public abstract List<Drug> getDrugListByTarget(MiriamData target) throws DrugSearchException;
public abstract List<Drug> getDrugListByTarget(MiriamData target, Collection<MiriamData> organisms) throws DrugSearchException;
public List<Drug> getDrugListByTarget(MiriamData target) throws DrugSearchException {
return getDrugListByTarget(target, new ArrayList<>());
}
/**
* Returns list of drugs that target at least one protein from the parameter
......@@ -71,7 +89,7 @@ public abstract class DrugAnnotation extends CachableInterface {
* @throws DrugSearchException
* thrown when there are problems with connection to drug database
*/
public List<Drug> getDrugListByTargets(Collection<MiriamData> targets) throws DrugSearchException {
public List<Drug> getDrugListByTargets(Collection<MiriamData> targets, Collection<MiriamData> organisms) throws DrugSearchException {
List<Drug> result = new ArrayList<Drug>();
Set<String> set = new HashSet<String>();
Set<MiriamData> searchedResult = new HashSet<MiriamData>();
......@@ -80,7 +98,7 @@ public abstract class DrugAnnotation extends CachableInterface {
continue;
}
searchedResult.add(md);
List<Drug> drugs = getDrugListByTarget(md);
List<Drug> drugs = getDrugListByTarget(md, organisms);
for (Drug drug : drugs) {
if (!set.contains(drug.getSources().get(0).getResource())) {
result.add(drug);
......@@ -93,6 +111,10 @@ public abstract class DrugAnnotation extends CachableInterface {
return result;
}
public List<Drug> getDrugListByTargets(Collection<MiriamData> targets) throws DrugSearchException {
return getDrugListByTargets(targets, new ArrayList<>());
}
/**
* @return the drugSerializer
* @see #drugSerializer
......@@ -110,4 +132,34 @@ public abstract class DrugAnnotation extends CachableInterface {
this.drugSerializer = drugSerializer;
}
protected boolean organismMatch(String organismName, Collection<MiriamData> organisms) {
if (organismName == null || organismName.isEmpty() || organisms.size() == 0) {
return true;
}
try {
MiriamData organism = taxonomyBackend.getByName(organismName);
return organisms.contains(organism);
} catch (TaxonomySearchException e) {
logger.error("Problem with taxonomy search for: " + organismName, e);
return true;
}
}
/**
* @return the taxonomyBackend
* @see #taxonomyBackend
*/
public TaxonomyBackend getTaxonomyBackend() {
return taxonomyBackend;
}
/**
* @param taxonomyBackend
* the taxonomyBackend to set
* @see #taxonomyBackend
*/
public void setTaxonomyBackend(TaxonomyBackend taxonomyBackend) {
this.taxonomyBackend = taxonomyBackend;
}
}
......@@ -3,6 +3,7 @@ package lcsb.mapviewer.annotation.services;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
......@@ -107,12 +108,6 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
*/
private Pattern targetPattern = Pattern.compile("(?<=\"/biodb/bio_entities/)([\\s\\S]*?)(?=\")");
/**
* Object used to access information about organism taxonomy.
*/
@Autowired
private TaxonomyBackend taxonomyBackend;
/**
* Default constructor.
*/
......@@ -354,7 +349,7 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
i = page.indexOf("Organism</dt><dd>", i);
i = i + "Organism</dt><dd>".length();
j = page.indexOf("</dd>", i);
target.setOrganism(taxonomyBackend.getByName(page.substring(i, j)));
target.setOrganism(getTaxonomyBackend().getByName(page.substring(i, j)));
// Getting References
i = page.indexOf("<strong>References</strong>", i);
......@@ -554,8 +549,8 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
}
@Override
public List<Drug> getDrugListByTarget(MiriamData target) throws DrugSearchException {
List<Drug> result = new ArrayList<Drug>();
public List<Drug> getDrugListByTarget(MiriamData target, Collection<MiriamData> organisms) throws DrugSearchException {
List<Drug> result = new ArrayList<>();
if (target == null) {
return result;
}
......@@ -571,12 +566,12 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
throw new DrugSearchException("Cannot access drug database", e);
}
Set<String> drugNames = new HashSet<String>();
Set<String> drugNames = new HashSet<>();
Matcher matcher = targetPattern.matcher(page);
while (matcher.find()) {
String drugbankTargetId = matcher.group(0);
drugNames.addAll(getDrugNamesForTarget(new MiriamData(MiriamType.DRUGBANK_TARGET_V4, drugbankTargetId), target));
drugNames.addAll(getDrugNamesForTarget(new MiriamData(MiriamType.DRUGBANK_TARGET_V4, drugbankTargetId), target, organisms));
}
for (String string : drugNames) {
Drug drug = findDrug(string);
......@@ -605,27 +600,28 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
* thrown when there are problems with connection to DrugBank
* database
*/
private Set<String> getDrugNamesForTarget(MiriamData drugbankTarget, MiriamData hgncTarget) throws DrugSearchException {
private Set<String> getDrugNamesForTarget(MiriamData drugbankTarget, MiriamData hgncTarget, Collection<MiriamData> organisms) throws DrugSearchException {
if (!MiriamType.DRUGBANK_TARGET_V4.equals(drugbankTarget.getDataType())) {
throw new InvalidArgumentException("drugbankTarget must be of type: " + MiriamType.DRUGBANK_TARGET_V4);
}
try {
Set<String> drugNames = new HashSet<String>();
Set<String> drugNames = new HashSet<>();
String url = URL_TARGET_DETAIL + drugbankTarget.getResource();
String page = getWebPageContent(URL_TARGET_DETAIL + drugbankTarget.getResource());
String page = getWebPageContent(url);
int id = page.indexOf("<th>DrugBank ID</th>");
if (id < 0) {
int idPosition = page.indexOf("<th>DrugBank ID</th>");
if (idPosition < 0) {
throw new DrugSearchException("Problematic web page for target: " + drugbankTarget + "(" + hgncTarget + ")");
}
int i = page.indexOf("/polypeptides/");
int protienLinkPosition = page.indexOf("/polypeptides/");
// sometimes there might not be an element
if (i >= 0) {
i = i + "/polypeptides/".length(); // 20;
int j = page.indexOf('"', i);
String uniprotId = page.substring(i, j);
if (protienLinkPosition >= 0) {
protienLinkPosition = protienLinkPosition + "/polypeptides/".length(); // 20;
int j = page.indexOf('"', protienLinkPosition);
String uniprotId = page.substring(protienLinkPosition, j);
MiriamData uniprotMiriam = new MiriamData(MiriamType.UNIPROT, uniprotId);
MiriamData hgncMiriam = uniprotAnnotator.uniProtToHgnc(uniprotMiriam);
if (hgncMiriam == null || !hgncMiriam.equals(hgncTarget)) {
......@@ -638,7 +634,18 @@ public class DrugbankHTMLParser extends DrugAnnotation implements IExternalServi
return drugNames;
}
Matcher matcher = drugNamePattern.matcher(page.substring(id));
int organismPosition = page.indexOf("Organism<");
if (organismPosition >= 0) {
int organismStart = page.indexOf("<td>", organismPosition) + "<td>".length();
int organismEnd = page.indexOf("<", organismStart + 1);
String organismName = page.substring(organismStart, organismEnd);
if (!organismMatch(organismName, organisms)) {
logger.debug("Organism doesn't match. Found" + organismName + ". Expected: " + organisms);
return drugNames;
}
}
Matcher matcher = drugNamePattern.matcher(page.substring(idPosition));
while (matcher.find()) {
drugNames.add(matcher.group(1));
......
......@@ -694,6 +694,34 @@ public class ChEMBLParserTest extends AnnotationTestFunctions {
}
@Test
public void testFindDrugByHgncTargetAndFilteredOutByOrganism() throws Exception {
try {
List<MiriamData> organisms = new ArrayList<>();
organisms.add(new MiriamData(MiriamType.TAXONOMY, "-1"));
List<Drug> drugs = chemblParser.getDrugListByTarget(new MiriamData(MiriamType.HGNC_SYMBOL, "GRIN3B"), organisms);
assertNotNull(drugs);
assertEquals("No drugs for this organisms should be found", 0, drugs.size());
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testFindDrugByHgncTargetAndFilteredByOrganism() throws Exception {
try {
List<MiriamData> organisms = new ArrayList<>();
organisms.add(TaxonomyBackend.HUMAN_TAXONOMY);
List<Drug> drugs = chemblParser.getDrugListByTarget(new MiriamData(MiriamType.HGNC_SYMBOL, "GRIN3B"), organisms);
assertNotNull(drugs);
assertTrue(drugs.size() > 0);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testFindDrugsByRepeatingHgncTargets() throws Exception {
try {
......
......@@ -2,6 +2,7 @@ package lcsb.mapviewer.annotation.services;
import static org.junit.Assert.assertEquals;
import java.util.Collection;
import java.util.List;
import org.apache.log4j.Logger;
......@@ -47,7 +48,7 @@ public class DrugAnnotationTest extends AnnotationTestFunctions {
}
@Override
public List<Drug> getDrugListByTarget(MiriamData target) {
public List<Drug> getDrugListByTarget(MiriamData target, Collection<MiriamData> organisms) {
// TODO Auto-generated method stub
return null;
}
......
......@@ -302,6 +302,34 @@ public class DrugbankHTMLParserTest extends AnnotationTestFunctions {
}
}
@Test
public void testFindDrugByHgncTargetAndFilteredOutByOrganism() throws Exception {
try {
List<MiriamData> organisms = new ArrayList<>();
organisms.add(new MiriamData(MiriamType.TAXONOMY, "-1"));
List<Drug> drugs = drugBankHTMLParser.getDrugListByTarget(new MiriamData(MiriamType.HGNC_SYMBOL, "NFKB2"), organisms);
assertNotNull(drugs);
assertEquals("No drugs for this organisms should be found", 0, drugs.size());
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testFindDrugByHgncTargetAndFilteredByOrganism() throws Exception {
try {
List<MiriamData> organisms = new ArrayList<>();
organisms.add(TaxonomyBackend.HUMAN_TAXONOMY);
List<Drug> drugs = drugBankHTMLParser.getDrugListByTarget(new MiriamData(MiriamType.HGNC_SYMBOL, "NFKB2"), organisms);
assertNotNull(drugs);
assertTrue(drugs.size() > 0);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
@Test
public void testFindDrugByInvalidTarget() throws Exception {
try {
......
......@@ -46,9 +46,6 @@ public class DrugRestImpl extends BaseRestImpl {
@Autowired
private IUserService userService;
@Autowired
private ElementMatcher elementMatcher;
public List<Map<String, Object>> getDrugsByQuery(String token, String projectId, String columns, String query) throws SecurityException, QueryException {
AuthenticationToken authenticationToken = userService.getToken(token);
Model model = modelService.getLastModelByProjectId(projectId, authenticationToken);
......
......@@ -282,14 +282,14 @@ public class DrugService implements IDrugService {
}
}
try {
List<Drug> drugs = drugBankParser.getDrugListByTargets(targetsMiriam);
List<Drug> drugs = drugBankParser.getDrugListByTargets(targetsMiriam, searchCriteria.getOrganisms());
drugList.addAll(drugs);
} catch (DrugSearchException e) {
logger.error("Problem with accessing drugBank parser", e);
}
try {
List<Drug> drugs = chEMBLParser.getDrugListByTargets(targetsMiriam);
List<Drug> drugs = chEMBLParser.getDrugListByTargets(targetsMiriam, searchCriteria.getOrganisms());
drugList.addAll(drugs);
} catch (DrugSearchException e) {
logger.error("Problem with accessing chembl parser", e);
......@@ -386,9 +386,10 @@ public class DrugService implements IDrugService {
}
}
double counter = 0.0;
List<MiriamData> organisms = new ArrayList<>();
for (MiriamData md : targetMiriams) {
try {
List<Drug> chemicalList = drugBankParser.getDrugListByTarget(md);
List<Drug> chemicalList = drugBankParser.getDrugListByTarget(md, organisms);
for (Drug chemical : chemicalList) {
cacheMiriamData(chemical);
}
......@@ -396,7 +397,7 @@ public class DrugService implements IDrugService {
logger.error("Problem with accessing info about drugbank for target: " + md, e);
}
try {
List<Drug> chemicalList = chEMBLParser.getDrugListByTarget(md);
List<Drug> chemicalList = chEMBLParser.getDrugListByTarget(md, organisms);
for (Drug chemical : chemicalList) {
cacheMiriamData(chemical);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment