Commit 35be18e3 authored by Piotr Gawron's avatar Piotr Gawron

Merge branch 'branch' into 'master'

Anubuthi code

See merge request piotr.gawron/drug-api!1
parents 304534b6 2d5f01b8
Pipeline #9286 passed with stage
in 44 seconds
/target/
/bin/
.classpath
.project
.settings/
/target/
test:
image: maven:3.6.0-jdk-8
stage: test
script:
- mvn test
- awk -F"," '{ instructions += $4 + $5; covered += $5 } END { print covered, "/", instructions, "instructions covered"; print 100*covered/instructions, "%covered" }' target/site/jacoco/jacoco.csv
......@@ -136,6 +136,29 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.11.2</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.11.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.8.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.6</version>
</dependency>
</dependencies>
......@@ -163,6 +186,33 @@
<path>/</path>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<useSystemClassLoader>false</useSystemClassLoader>
</configuration>
</plugin>
<!-- Code Coverage report generation -->
<plugin>
<groupId>org.jacoco</groupId>
<artifactId>jacoco-maven-plugin</artifactId>
<version>0.7.9</version>
<executions>
<execution>
<goals>
<goal>prepare-agent</goal>
</goals>
</execution>
<execution>
<id>generate-code-coverage-report</id>
<phase>test</phase>
<goals>
<goal>report</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
package drugbankParse;
import java.util.List;
public class Drug {
private String name;
private Identifier sourceIdentifier;
private String description;
private Boolean bloodBrainBarrier;
private Boolean approved;
private List<String> brandNames;
private List<String> synonyms;
private List<Target> targets;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Identifier getSourceIdentifier() {
return sourceIdentifier;
}
public void setSourceIdentifier(Identifier sourceIdentifier) {
this.sourceIdentifier = sourceIdentifier;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public Boolean isBloodBrainBarrier() {
return bloodBrainBarrier;
}
public void setBloodBrainBarrier(Boolean bloodBrainBarrier) {
this.bloodBrainBarrier = bloodBrainBarrier;
}
public Boolean isApproved() {
return approved;
}
public void setApproved(Boolean approved) {
this.approved = approved;
}
public List<String> getBrandNames() {
return brandNames;
}
public void setBrandNames(List<String> brandNames) {
this.brandNames = brandNames;
}
public List<String> getSynonyms() {
return synonyms;
}
public void setSynonyms(List<String> synonyms) {
this.synonyms = synonyms;
}
public List<Target> getTargets() {
return targets;
}
public void setTargets(List<Target> targets) {
this.targets = targets;
}
}
package drugbankParse;
public class Identifier {
/**
* The type of database for which we have the identifier.
*/
private IdentifierType databaseIdentifier;
/**
* The unique identifier in a database described by {@link #databaseIdentifier}.
*/
private String resourceIdentifier;
public IdentifierType getDatabaseIdentifier() {
return databaseIdentifier;
}
public void setDatabaseIdentifier(IdentifierType databaseIdentifier) {
this.databaseIdentifier = databaseIdentifier;
}
public String getResourceIdentifier() {
return resourceIdentifier;
}
public void setResourceIdentifier(String resourceIdentifier) {
this.resourceIdentifier = resourceIdentifier;
}
}
package drugbankParse;
import java.util.Arrays;
import java.util.List;
public enum IdentifierType {
DRUG_BANK(new String[] {"urn:miriam:drugbank"}),
UNIPROT(new String[] {"urn:miriam:uniprot"}),
TAXONOMY(new String[] {"urn:miriam:taxonomy"}),
;
private List<String> uris;
private IdentifierType(String[] uris) {
this.uris = Arrays.asList(uris);
}
public List<String> getUris() {
return uris;
}
}
package drugbankParse;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.text.StringEscapeUtils;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class MyHandler extends DefaultHandler {
List<String> drugData = new ArrayList<>();
StringBuilder data = new StringBuilder();
int depthCounter=0;
public List<String> getDrugData() {
return drugData;
}
public void setDrugData(List<String> drugData) {
this.drugData = drugData;
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("drug".equalsIgnoreCase(qName)) {
if (attributes.getLength() > 0) {
data = new StringBuilder();
depthCounter++;
}
else {
depthCounter++;
}
}
data.append("<" + qName);
if (attributes.getLength() > 0) {
for (int i = 0; i < attributes.getLength(); i++) {
data.append(" ");
data.append(attributes.getQName(i));
data.append("=");
data.append("\"" + attributes.getValue(i) + "\"");
}
}
data.append(">");
}
@Override
public void characters(char ch[], int start, int length) throws SAXException {
data.append(StringEscapeUtils.escapeXml10(new String(ch, start, length)));
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
data.append("</" + qName + ">");
if ("drug".equalsIgnoreCase(qName)) {
depthCounter--;
if (depthCounter == 0) {
drugData.add(data.toString());
}
}
}
}
package drugbankParse;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Parser {
Logger logger = LogManager.getLogger(Parser.class);
public List<Drug> getInformation(String filename) throws ParserException {
List<Drug> druglist = new ArrayList<>();
List<String> drugData = new SAXDrugParser().parsedDrugs(filename);
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
for (int i = 0; i < drugData.size(); i++) {
InputStream stream = new ByteArrayInputStream(drugData.get(i).getBytes(StandardCharsets.UTF_8));
Document doc = db.parse(stream);
Element e = doc.getDocumentElement();
TargetFields fillTargets=new TargetFields();
Drug sampleDrug = new Drug();
sampleDrug.setName(extractDrugName(e));
logger.debug(sampleDrug.getName());
sampleDrug.setDescription(extractDrugDescription(e));
logger.debug(sampleDrug.getDescription());
sampleDrug.setApproved(extractDrugApproved(e));
sampleDrug.setSynonyms(extractSynonyms(e));
for (String s : sampleDrug.getSynonyms()) {
logger.debug("synonym is: " + s);
}
sampleDrug.setBrandNames(extractBrandNames(e));
for (String s : sampleDrug.getBrandNames()) {
logger.debug("brand name is: " + s);
}
sampleDrug.setTargets(fillTargets.extractTargets(e));
logger.debug("size of target is: " + sampleDrug.getTargets().size());
sampleDrug.setSourceIdentifier(extractSourceIdentifier(e));
sampleDrug.setBloodBrainBarrier(extractBloodBrainBarrier(e));
druglist.add(sampleDrug);
logger.debug("Drug added to list successfully");
}
}
catch(ParserConfigurationException|IOException|SAXException e){
logger.debug("something is wrong");
throw new ParserException(e);
}
return druglist;
}
// ************** Drug ******************/
private String extractDrugName(Element e) {
NodeList nodeList = e.getElementsByTagName("name");
String drugName = "";
if (nodeList != null && nodeList.getLength() > 0) {
drugName = nodeList.item(0).getChildNodes().item(0).getNodeValue();
}
return drugName;
}
private String extractDrugDescription(Element e) {
NodeList nodeList = e.getElementsByTagName("description");
String description = "";
if (nodeList != null) {
if (nodeList.getLength() > 0 && !nodeList.item(0).getTextContent().isEmpty()) {
description = nodeList.item(0).getChildNodes().item(0).getNodeValue();
}
}
return description;
}
/**
*
* @param e
* three cases arise here:
* 1->the only field is approved then return true
* 2->more than 1 field and one of them is approved then situation is ambigous return null
* 3->more than 1 field and none of which is approved then return false
*/
private Boolean extractDrugApproved(Element e) {
NodeList nodeList = e.getElementsByTagName("groups");
for (int j = 0; j < nodeList.getLength(); j++) {
Element nextEle = (Element) nodeList.item(j);
NodeList secondNodeList = nextEle.getElementsByTagName("group");
boolean approvedFlag = false;
if (secondNodeList.getLength() == 1) {
if(secondNodeList.item(0).getTextContent().equalsIgnoreCase("approved")) {
return true;
}
else {
return false;
}
}
else if (secondNodeList.getLength() > 1) {
for (int k = 0; k < secondNodeList.getLength(); k++) {
if (secondNodeList.item(k).getTextContent().equalsIgnoreCase("approved")) {
approvedFlag = true;
logger.info("the drug is ambigous");
return null;
}
}
if (!approvedFlag) {
return false;
}
}
else {
return false;
}
}
return false;
}
private List<String> extractSynonyms(Element e) {
List<String> listOfSynonyms = new ArrayList<>();
NodeList nodeList = e.getElementsByTagName("synonyms");
if (nodeList != null && nodeList.getLength() > 0) {
for (int i = 0; i < nodeList.getLength(); i++) {
Element SynEle = (Element) nodeList.item(i);
NodeList synonymList = SynEle.getElementsByTagName("synonym");
for (int k = 0; k < synonymList.getLength(); k++) {
if (synonymList != null && synonymList.getLength() > 0)
listOfSynonyms.add(synonymList.item(k).getChildNodes().item(0).getNodeValue());
}
}
}
return listOfSynonyms;
}
private List<String> extractBrandNames(Element e) {
HashSet<String> hs = new HashSet<String>();
NodeList nodeList = e.getElementsByTagName("products");
if (nodeList != null && nodeList.getLength() > 0) {
for (int i = 0; i < nodeList.getLength(); i++) {
Element productEle = (Element) nodeList.item(i);
NodeList prodList = productEle.getElementsByTagName("product");
for (int j = 0; j < prodList.getLength(); j++) {
Element labellerEle = (Element) prodList.item(j);
NodeList labellerList = labellerEle.getElementsByTagName("labeller");
if (labellerList != null && labellerList.getLength() > 0)
hs.add(labellerList.item(0).getChildNodes().item(0).getNodeValue());
}
}
}
List<String> listOfBrandNames = new ArrayList<>(hs);
return listOfBrandNames;
}
private Identifier extractSourceIdentifier(Element e) {
Identifier sampleIdentifier = new Identifier();
NodeList resourceIdentifierList = e.getElementsByTagName("drugbank-id");
sampleIdentifier.setDatabaseIdentifier(IdentifierType.DRUG_BANK);
if (resourceIdentifierList != null && resourceIdentifierList.getLength() > 0)
sampleIdentifier.setResourceIdentifier(resourceIdentifierList.item(0).getNodeValue());
return sampleIdentifier;
}
private Boolean extractBloodBrainBarrier(Element e) {
return null;
}
}
package drugbankParse;
public class ParserException extends Exception {
public ParserException(Throwable e) {
super(e);
}
}
package drugbankParse;
import java.io.File;
import java.io.IOException;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
public class SAXDrugParser {
public List<String> parsedDrugs(String filename) throws ParserException {
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
MyHandler handler = new MyHandler();
try {
SAXParser saxParser = saxParserFactory.newSAXParser();
saxParser.parse(new File(filename), handler);
} catch (ParserConfigurationException | SAXException | IOException e) {
throw new ParserException(e);
}
return handler.getDrugData();
}
}
package drugbankParse;
import java.util.List;
public class Target {
private String name;
private Identifier organism;
private Identifier sourceIdentifier;
private List<Identifier> identifiers;
private String type;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Identifier getOrganism() {
return organism;
}
public void setOrganism(Identifier organism) {
this.organism = organism;
}
public Identifier getSourceIdentifier() {
return sourceIdentifier;
}
public void setSourceIdentifier(Identifier sourceIdentifier) {
this.sourceIdentifier = sourceIdentifier;
}
public List<Identifier> getIdentifiers() {
return identifiers;
}
public void setIdentifiers(List<Identifier> identifiers) {
this.identifiers = identifiers;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}
package drugbankParse;
import java.util.ArrayList;
import java.util.List;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class TargetFields {
Logger logger = LogManager.getLogger(TargetFields.class);
public List<Target> extractTargets(Element e) {
NodeList nodeList = e.getElementsByTagName("targets");
Target sampleTarget = new Target();
List<Target> listOfTargets = new ArrayList<>();
if (nodeList != null && nodeList.getLength() > 0) {
for (int j = 0; j < nodeList.getLength(); j++) {
Element targetEle = (Element) nodeList.item(j);
NodeList targetList = targetEle.getElementsByTagName("target");
for (int k = 0; k < targetList.getLength(); k++) {
Element tarEle = (Element) targetList.item(k);
sampleTarget.setName(extractTargetName(tarEle));
sampleTarget.setSourceIdentifier(extractTargetSourceIdentifier(tarEle));
sampleTarget.setOrganism(extractTargetOrganism(tarEle));
sampleTarget.setIdentifiers(extractTargetIdentifiers(tarEle));
sampleTarget.setType(extractTargetType(tarEle));
listOfTargets.add(sampleTarget);
}
}
}
return listOfTargets;
}
private String extractTargetName(Element tarEle) {
NodeList nameList = tarEle.getElementsByTagName("name");
String targetName = "";
if (nameList != null && nameList.getLength() > 0) {
targetName = nameList.item(0).getChildNodes().item(0).getNodeValue();
}
return targetName;
}
private Identifier extractTargetSourceIdentifier(Element tarEle) {
Identifier sampleIdentifier = null;
NodeList sourceIdentifierList = tarEle.getElementsByTagName("id");
if (sourceIdentifierList != null && sourceIdentifierList.getLength() > 0) {
sampleIdentifier=new Identifier();
sampleIdentifier.setDatabaseIdentifier(IdentifierType.UNIPROT);
if (sourceIdentifierList != null && sourceIdentifierList.getLength() > 0) {
sampleIdentifier
.setResourceIdentifier(sourceIdentifierList.item(0).getChildNodes().item(0).getNodeValue());
}
}
return sampleIdentifier;
}
/**
* @see Targets->target->polypeptide->organism->extract an attribute from organism
*/
private Identifier extractTargetOrganism(Element tarEle) {
Identifier sampleIdentifier=null;
NodeList polypeptideList = tarEle.getElementsByTagName("polypeptide");
if (polypeptideList != null && polypeptideList.getLength() > 0) {
for (int i = 0; i < polypeptideList.getLength(); i++) {
Element polyEle = (Element) polypeptideList.item(i);
NodeList orgList = polyEle.getElementsByTagName("organism");
if (orgList != null && orgList.getLength() > 0) {
for (int j = 0; j < orgList.getLength(); j++) {
sampleIdentifier=new Identifier();
Element orgEle = (Element) orgList.item(j);
if (orgList.getLength() > 1) {
Element orgLastEle = (Element) orgList.item(j - 1);
if (!orgEle.getAttribute("ncbi-taxonomy-id").contentEquals(orgLastEle
.getAttribute("ncbi-taxonomy-id"))) {
logger.warn("Multiple organisms for a single target");
break;
}
}
sampleIdentifier.setDatabaseIdentifier(IdentifierType.TAXONOMY);
sampleIdentifier.setResourceIdentifier(orgEle.getAttribute("ncbi-taxonomy-id"));
}
}
}
}
return sampleIdentifier;
}
private List<Identifier> extractTargetIdentifiers(Element tarEle) {
List<Identifier> idList = new ArrayList<>();
NodeList polypeptideList = tarEle.getElementsByTagName("polypeptide");
if (polypeptideList != null && polypeptideList.getLength() > 0) {
Element polyEle = (Element) polypeptideList.item(0);
NodeList extIdentifierList = polyEle.getElementsByTagName("external-identifiers");
for (int m = 0; m < extIdentifierList.getLength(); m++) {
Element extIdentifierEle = (Element) extIdentifierList.item(m);
NodeList extIdentifiers = extIdentifierEle.getElementsByTagName("external-identifier");
for (int t = 0; t < extIdentifiers.getLength(); t++) {
Element extIdEle = (Element) extIdentifiers.item(t);
NodeList resourceList = extIdEle.getElementsByTagName("resource");
NodeList identifierList = extIdEle.getElementsByTagName("identifier");
if (resourceList != null && resourceList.getLength() > 0 && identifierList != null
&& identifierList.getLength() > 0) {
if (resourceList.item(0).getChildNodes().item(0).getNodeValue().equalsIgnoreCase("UniProtKB")) {
Identifier id = new Identifier();
id.setDatabaseIdentifier(IdentifierType.DRUG_BANK);
id.setResourceIdentifier(identifierList.item(0).getChildNodes().item(0).getNodeValue());
idList.add(id);
}
}
}
}
}
return idList;
}
private String extractTargetType(Element tarEle) {
StringBuilder str = new StringBuilder();
NodeList actionList = tarEle.getElementsByTagName("actions");
if (actionList != null && actionList.getLength() > 0) {
Element actionEle = (Element) actionList.item(0);
NodeList aList = actionEle.getElementsByTagName("action");
if (aList != null && aList.getLength() > 0) {
for (int i = 0; i < aList.getLength(); i++) {
str.append(aList.item(i).getChildNodes().item(0).getNodeValue());
}
}
}
return str.toString();
}
}
name=PropertiesConfig
property.filename =logs
appenders =console,file
appender.console.type = Console
appender.console.name = STDOUT
appender.console.layout.type = PatternLayout
appender.console.layout.pattern = [%-5level] %d{yyyy-MM-dd HH:mm:ss.SSS} [%t] %l - %msg%n
appender.file.type = File
appender.file.name = LOGFILE
appender.file.fileName=${filename}/Mylogs.log