diff --git a/converter/src/main/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParser.java b/converter/src/main/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParser.java index c5562b249b74ece02c3f1c4d858b1c5fa1a64a1a..a80ab4bd6ae5aa9100ba60750e0b30950965ad2a 100644 --- a/converter/src/main/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParser.java +++ b/converter/src/main/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParser.java @@ -26,168 +26,177 @@ import lcsb.mapviewer.model.map.MiriamType; */ public class XmlAnnotationParser extends XmlParser { - /** - * Deafult logger. - */ - private static Logger logger = Logger.getLogger(XmlAnnotationParser.class.getName()); - - /** - * Default constructor. - */ - public XmlAnnotationParser() { - } - - /** - * This method parse the xml string passed as an argument. All information - * obtained by the method are stored in local variables: miriamDataSet, - * speciesId and can be accessed later on. - * - * @param data - * - xml string to be parsed - * - * @return collection of miriam objects that were obtained from the xml node - * @throws InvalidXmlSchemaException - * thrown when there is a problem with xml - */ - public Set<MiriamData> parse(String data) throws InvalidXmlSchemaException { - // start from creating a DOM parser and parse the whole document - Document doc = getXmlDocumentFromString(data); - - NodeList root = doc.getChildNodes(); - - // process the whole schema - return parseRdfNode(root); - } - - /** - * Retrieves set of Miriam annotation from xml rdf node. - * - * @param root - * xml node list - * @return set of miriam annotations. - * @throws InvalidXmlSchemaException - * thrown when there is a problem with xml - */ - public Set<MiriamData> parseRdfNode(NodeList root) throws InvalidXmlSchemaException { - Node rdf = getNode("rdf:RDF", root); - return parseRdfNode(rdf); - } - - /** - * Retrieves set of Miriam annotation from xml rdf node. - * - * @param rdf - * xml node - * @return set of miriam annotations. - * @throws InvalidXmlSchemaException - * thrown when there is a problem with xml - */ - public Set<MiriamData> parseRdfNode(Node rdf) throws InvalidXmlSchemaException { - Set<MiriamData> miriamDataSet = new HashSet<MiriamData>(); - if (rdf != null) { - Node description = getNode("rdf:Description", rdf.getChildNodes()); - if (description != null) { - NodeList list = description.getChildNodes(); - for (int i = 0; i < list.getLength(); i++) { - Node node = list.item(i); - if (node.getNodeType() == Node.ELEMENT_NODE) { - miriamDataSet.addAll(parseMiriamNode(node)); - } - } - } else { - throw new InvalidXmlSchemaException("rdf:Description node not found"); - } - } else { - throw new InvalidXmlSchemaException("rdf:Rdf node not found"); - } - return miriamDataSet; - } - - /** - * This method converts a xml node into MiriamData object (annotation of a - * species). - * - * @param node - * - xml node that contains representation of a single annotation of - * a species - * @return MiriamData object that represents annotation of a species - * @throws InvalidXmlSchemaException - * thrown when there is a problem with xml - */ - public Set<MiriamData> parseMiriamNode(Node node) throws InvalidXmlSchemaException { - Set<MiriamData> result = new HashSet<MiriamData>(); - NodeList list = node.getChildNodes(); - String relationTypeString = node.getNodeName(); - MiriamRelationType relationType = MiriamRelationType.getTypeByStringRepresentation(relationTypeString); - Node bag = getNode("rdf:Bag", list); - if (bag == null) { - throw new InvalidXmlSchemaException("No rdf:Bag node found"); - } - list = bag.getChildNodes(); - List<Node> nodes = getNodes("rdf:li", list); - for (Node li : nodes) { - String dataTypeUri = getNodeAttr("rdf:resource", li); - if (dataTypeUri == null || dataTypeUri.isEmpty()) { - throw new InvalidXmlSchemaException("rdf:li does not have a rdf:resource attribute"); - } - - try { - MiriamData md = MiriamType.getMiriamByUri(dataTypeUri); - if (relationType == null) { - logger.warn("Unknown relation type: " + relationTypeString + ". For miriam uri: " + dataTypeUri + "."); - } else { - if (!MiriamType.PUBMED.equals(md.getDataType())) { - md.setRelationType(relationType); - } - result.add(md); - } - } catch (InvalidArgumentException e) { - logger.warn(e.getMessage()); - } - } - return result; - } - - /** - * This method converts a set of MiriamData into xml string that can be put to - * CellDesigner schema. - * - * @param data - * - a set of MiriamData to be converted. - * @return xml string representation of the input data - */ - public String dataSetToXmlString(Collection<MiriamData> data) { - StringBuilder result = new StringBuilder(""); - result.append( - "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" " + "xmlns:dc=\"http://purl.org/dc/elements/1.1/\" " - + "xmlns:dcterms=\"http://purl.org/dc/terms/\" " + "xmlns:vCard=\"http://www.w3.org/2001/vcard-rdf/3.0#\" " - + "xmlns:bqbiol=\"http://biomodels.net/biology-qualifiers/\" " + "xmlns:bqmodel=\"http://biomodels.net/model-qualifiers/\">\n"); - result.append("<rdf:Description rdf:about=\"#\">\n"); - for (MiriamData miriamData : data) { - result.append(miriamDataToXmlString(miriamData)); - } - result.append("</rdf:Description>\n"); - result.append("</rdf:RDF>\n"); - return result.toString(); - } - - /** - * This method converts a single MiriamData into xml string that can be put to - * CellDesigner schema. - * - * @param data - * - a MiriamData to be converted. - * @return xml string representation of the input data - */ - public String miriamDataToXmlString(MiriamData data) { - StringBuilder result = new StringBuilder(""); - result.append("<" + data.getRelationType().getStringRepresentation() + ">\n"); - result.append("<rdf:Bag>\n"); - result.append("<rdf:li rdf:resource=\"" + data.getDataType().getUris().get(0) + ":" + data.getResource().replaceAll(":", "%3A") + "\"/>\n"); - result.append("</rdf:Bag>\n"); - result.append("</" + data.getRelationType().getStringRepresentation() + ">\n"); - - return result.toString(); - } + /** + * Deafult logger. + */ + private static Logger logger = Logger.getLogger(XmlAnnotationParser.class.getName()); + + /** + * Default constructor. + */ + public XmlAnnotationParser() { + } + + /** + * This method parse the xml string passed as an argument. All information + * obtained by the method are stored in local variables: miriamDataSet, + * speciesId and can be accessed later on. + * + * @param data + * - xml string to be parsed + * + * @return collection of miriam objects that were obtained from the xml node + * @throws InvalidXmlSchemaException + * thrown when there is a problem with xml + */ + public Set<MiriamData> parse(String data) throws InvalidXmlSchemaException { + // start from creating a DOM parser and parse the whole document + Document doc = getXmlDocumentFromString(data); + + NodeList root = doc.getChildNodes(); + + // process the whole schema + return parseRdfNode(root); + } + + /** + * Retrieves set of Miriam annotation from xml rdf node. + * + * @param root + * xml node list + * @return set of miriam annotations. + * @throws InvalidXmlSchemaException + * thrown when there is a problem with xml + */ + public Set<MiriamData> parseRdfNode(NodeList root) throws InvalidXmlSchemaException { + Node rdf = getNode("rdf:RDF", root); + return parseRdfNode(rdf); + } + + /** + * Retrieves set of Miriam annotation from xml rdf node. + * + * @param rdf + * xml node + * @return set of miriam annotations. + * @throws InvalidXmlSchemaException + * thrown when there is a problem with xml + */ + public Set<MiriamData> parseRdfNode(Node rdf) throws InvalidXmlSchemaException { + Set<MiriamData> miriamDataSet = new HashSet<>(); + if (rdf != null) { + Node description = getNode("rdf:Description", rdf.getChildNodes()); + if (description != null) { + NodeList list = description.getChildNodes(); + for (int i = 0; i < list.getLength(); i++) { + Node node = list.item(i); + if (node.getNodeType() == Node.ELEMENT_NODE) { + String relationTypeString = node.getNodeName(); + MiriamRelationType relationType = MiriamRelationType.getTypeByStringRepresentation(relationTypeString); + if (relationType != null) { + miriamDataSet.addAll(parseMiriamNode(node)); + } else { + logger.warn("RDF relation type is not supported: " + relationTypeString); + } + } + } + } else { + throw new InvalidXmlSchemaException("rdf:Description node not found"); + } + } else { + throw new InvalidXmlSchemaException("rdf:Rdf node not found"); + } + return miriamDataSet; + } + + /** + * This method converts a xml node into MiriamData object (annotation of a + * species). + * + * @param node + * - xml node that contains representation of a single annotation of a + * species + * @return MiriamData object that represents annotation of a species + * @throws InvalidXmlSchemaException + * thrown when there is a problem with xml + */ + public Set<MiriamData> parseMiriamNode(Node node) throws InvalidXmlSchemaException { + Set<MiriamData> result = new HashSet<>(); + NodeList list = node.getChildNodes(); + String relationTypeString = node.getNodeName(); + MiriamRelationType relationType = MiriamRelationType.getTypeByStringRepresentation(relationTypeString); + Node bag = getNode("rdf:Bag", list); + if (bag == null) { + throw new InvalidXmlSchemaException("No rdf:Bag node found"); + } + list = bag.getChildNodes(); + List<Node> nodes = getNodes("rdf:li", list); + for (Node li : nodes) { + String dataTypeUri = getNodeAttr("rdf:resource", li); + if (dataTypeUri == null || dataTypeUri.isEmpty()) { + throw new InvalidXmlSchemaException("rdf:li does not have a rdf:resource attribute"); + } + + try { + MiriamData md = MiriamType.getMiriamByUri(dataTypeUri); + if (relationType == null) { + logger.warn("Unknown relation type: " + relationTypeString + ". For miriam uri: " + dataTypeUri + "."); + } else { + if (!MiriamType.PUBMED.equals(md.getDataType())) { + md.setRelationType(relationType); + } + result.add(md); + } + } catch (InvalidArgumentException e) { + logger.warn(e.getMessage()); + } + } + return result; + } + + /** + * This method converts a set of MiriamData into xml string that can be put to + * CellDesigner schema. + * + * @param data + * - a set of MiriamData to be converted. + * @return xml string representation of the input data + */ + public String dataSetToXmlString(Collection<MiriamData> data) { + StringBuilder result = new StringBuilder(""); + result.append( + "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\" " + + "xmlns:dc=\"http://purl.org/dc/elements/1.1/\" " + + "xmlns:dcterms=\"http://purl.org/dc/terms/\" " + "xmlns:vCard=\"http://www.w3.org/2001/vcard-rdf/3.0#\" " + + "xmlns:bqbiol=\"http://biomodels.net/biology-qualifiers/\" " + + "xmlns:bqmodel=\"http://biomodels.net/model-qualifiers/\">\n"); + result.append("<rdf:Description rdf:about=\"#\">\n"); + for (MiriamData miriamData : data) { + result.append(miriamDataToXmlString(miriamData)); + } + result.append("</rdf:Description>\n"); + result.append("</rdf:RDF>\n"); + return result.toString(); + } + + /** + * This method converts a single MiriamData into xml string that can be put to + * CellDesigner schema. + * + * @param data + * - a MiriamData to be converted. + * @return xml string representation of the input data + */ + public String miriamDataToXmlString(MiriamData data) { + StringBuilder result = new StringBuilder(""); + result.append("<" + data.getRelationType().getStringRepresentation() + ">\n"); + result.append("<rdf:Bag>\n"); + result.append("<rdf:li rdf:resource=\"" + data.getDataType().getUris().get(0) + ":" + + data.getResource().replaceAll(":", "%3A") + "\"/>\n"); + result.append("</rdf:Bag>\n"); + result.append("</" + data.getRelationType().getStringRepresentation() + ">\n"); + + return result.toString(); + } } diff --git a/converter/src/test/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParserTest.java b/converter/src/test/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParserTest.java index 68b70d80c391bc064dba2fefeca97284f69671b0..748d392229730dc572aa6a3d0ea978f1ce00f445 100644 --- a/converter/src/test/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParserTest.java +++ b/converter/src/test/java/lcsb/mapviewer/converter/annotation/XmlAnnotationParserTest.java @@ -83,6 +83,20 @@ public class XmlAnnotationParserTest extends ConverterTestFunctions { } } + @Test + public void testParseRdfWithCreatorTag() throws Exception { + try { + XmlAnnotationParser xap = new XmlAnnotationParser(); + String xml = readFile("testFiles/annotation/rdf_with_creator_tag.xml"); + + Set<MiriamData> set = xap.parse(xml); + assertTrue(set.size() > 0); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + } + @Test public void testParseInvalidRdf2() throws Exception { try { diff --git a/converter/testFiles/annotation/rdf_with_creator_tag.xml b/converter/testFiles/annotation/rdf_with_creator_tag.xml new file mode 100644 index 0000000000000000000000000000000000000000..fc1d44356eaa607d7bfd5000710b639cbb24edb7 --- /dev/null +++ b/converter/testFiles/annotation/rdf_with_creator_tag.xml @@ -0,0 +1,74 @@ +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:dcterms="http://purl.org/dc/terms/" + xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" + xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" + xmlns:bqmodel="http://biomodels.net/model-qualifiers/"> + <rdf:Description rdf:about="#metaid_0000002"> + <dc:creator> + <rdf:Bag> + <rdf:li rdf:parseType="Resource"> + <vCard:N rdf:parseType="Resource"> + <vCard:Family>Roberts</vCard:Family> + <vCard:Given>Matthew Grant</vCard:Given> + </vCard:N> + <vCard:EMAIL>mroberts@embl.ac.uk</vCard:EMAIL> + <vCard:ORG rdf:parseType="Resource"> + <vCard:Orgname>EMBL-EBI</vCard:Orgname> + </vCard:ORG> + </rdf:li> + <rdf:li rdf:parseType="Resource"> + <vCard:N rdf:parseType="Resource"> + <vCard:Family>Lloyd</vCard:Family> + <vCard:Given>Catherine</vCard:Given> + </vCard:N> + <vCard:EMAIL>c.lloyd@auckland.ac.nz</vCard:EMAIL> + <vCard:ORG rdf:parseType="Resource"> + <vCard:Orgname>University of Auckland</vCard:Orgname> + </vCard:ORG> + </rdf:li> + </rdf:Bag> + </dc:creator> + <dcterms:created rdf:parseType="Resource"> + <dcterms:W3CDTF>2010-06-25T13:23:00Z</dcterms:W3CDTF> + </dcterms:created> + <dcterms:modified rdf:parseType="Resource"> + <dcterms:W3CDTF>2018-03-08T16:30:35Z</dcterms:W3CDTF> + </dcterms:modified> + <dcterms:modified rdf:parseType="Resource"> + <dcterms:W3CDTF>2018-03-08T16:30:35Z</dcterms:W3CDTF> + </dcterms:modified> + <bqmodel:is> + <rdf:Bag> + <rdf:li + rdf:resource="http://identifiers.org/biomodels.db/MODEL1006230061" /> + </rdf:Bag> + </bqmodel:is> + <bqmodel:isDescribedBy> + <rdf:Bag> + <rdf:li rdf:resource="http://identifiers.org/pubmed/13678598" /> + </rdf:Bag> + </bqmodel:isDescribedBy> + <bqbiol:isVersionOf> + <rdf:Bag> + <rdf:li rdf:resource="http://identifiers.org/go/GO:0002250" /> + </rdf:Bag> + </bqbiol:isVersionOf> + <bqbiol:hasTaxon> + <rdf:Bag> + <rdf:li rdf:resource="http://identifiers.org/taxonomy/9606" /> + </rdf:Bag> + </bqbiol:hasTaxon> + <bqbiol:isDescribedBy> + <rdf:Bag> + <rdf:li rdf:resource="http://identifiers.org/pubmed/13678598" /> + </rdf:Bag> + </bqbiol:isDescribedBy> + <bqmodel:is> + <rdf:Bag> + <rdf:li + rdf:resource="http://identifiers.org/biomodels.db/BIOMD0000000684" /> + </rdf:Bag> + </bqmodel:is> + </rdf:Description> +</rdf:RDF> \ No newline at end of file diff --git a/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java b/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java index 16be247cdd9510a05f251e136da04c44ceea924c..94384aa60ebe99127629b5ccba12b0bc4b5fc3ef 100644 --- a/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java +++ b/model/src/main/java/lcsb/mapviewer/model/map/MiriamType.java @@ -108,7 +108,7 @@ public enum MiriamType { CLINICAL_TRIALS_GOV("ClinicalTrials.gov", "https://clinicaltrials.gov/", new String[] { "urn:miriam:clinicaltrials" }, - new Class<?>[] { }, "MIR:00000137"), + new Class<?>[] {}, "MIR:00000137"), /** * Clusters of Orthologous Groups: https://www.ncbi.nlm.nih.gov/COG/. @@ -178,7 +178,7 @@ public enum MiriamType { */ GO("Gene Ontology", "http://amigo.geneontology.org/amigo", - new String[] { "urn:miriam:obo.go", "urn:miriam:go" }, + new String[] { "urn:miriam:obo.go", "urn:miriam:go", "http://identifiers.org/go/" }, new Class<?>[] { Phenotype.class, Compartment.class, Complex.class }, "MIR:00000022"), /** @@ -358,7 +358,7 @@ public enum MiriamType { */ PUBMED("PubMed", "http://www.ncbi.nlm.nih.gov/PubMed/", - new String[] { "urn:miriam:pubmed" }, + new String[] { "urn:miriam:pubmed", "http://identifiers.org/pubmed/" }, new Class<?>[] { BioEntity.class }, "MIR:00000015", new Class<?>[] { Reaction.class }),