Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Devrim Gunyel
core
Commits
2a895d17
Commit
2a895d17
authored
Apr 01, 2019
by
David Hoksza
Browse files
tair name; tair and string annotator only annotate based on human annotated miriam data
parent
edaf7627
Changes
7
Hide whitespace changes
Inline
Side-by-side
annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/KeggAnnotator.java
View file @
2a895d17
...
...
@@ -208,7 +208,7 @@ public class KeggAnnotator extends ElementAnnotator implements IExternalService
String
[]
tairLocusNames
=
m
.
group
(
1
).
trim
().
split
(
" "
);
for
(
String
tairLocusName
:
tairLocusNames
)
{
tairLocusName
=
tairLocusName
.
split
(
"\\("
)[
0
];
// some codes are in the form AT1G08510(FATB)
MiriamData
md
=
uniprotAnnotator
.
uniprotTairLocusToId
(
tairLocusName
);
MiriamData
md
=
uniprotAnnotator
.
uniprotTairLocus
Name
ToId
(
tairLocusName
);
if
(!
md
.
equals
(
new
MiriamData
())){
result
.
add
(
md
);
}
...
...
annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/StringAnnotator.java
View file @
2a895d17
...
...
@@ -32,6 +32,12 @@ public class StringAnnotator extends ElementAnnotator implements IExternalServic
/**
* Service used for annotation of entities using {@link MiriamType#TAIR_LOCUS
* TAIR}.
* Note that STRING annotation process will annotate only
* records which have a TAIR ID assigned by a human annotator.
* Otherwise, it would generate UniProt miriam records also for
* TAIR IDs generated from, e.g., KEGG annotator, i.e. for homologues
* and these UniProt IDs would be indistinguishable from the
* UniProt IDs describing the molecule.
*/
private
TairAnnotator
tairAnnotator
;
...
...
@@ -54,7 +60,9 @@ public class StringAnnotator extends ElementAnnotator implements IExternalServic
throws
AnnotatorException
{
List
<
MiriamData
>
mdUniprots
=
new
ArrayList
<>();
if
(
identifier
.
getDataType
().
equals
(
MiriamType
.
TAIR_LOCUS
))
{
mdUniprots
.
addAll
(
tairAnnotator
.
tairToUniprot
(
identifier
));
if
(
identifier
.
getAnnotator
()
==
null
)
{
mdUniprots
.
addAll
(
tairAnnotator
.
tairToUniprot
(
identifier
));
}
}
else
if
(
identifier
.
getDataType
().
equals
(
MiriamType
.
UNIPROT
))
{
mdUniprots
.
add
(
identifier
);
}
else
{
...
...
annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/TairAnnotator.java
View file @
2a895d17
...
...
@@ -5,6 +5,8 @@ import java.util.Arrays;
import
java.util.Collection
;
import
java.util.HashSet
;
import
java.util.List
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
org.apache.log4j.Logger
;
import
org.springframework.stereotype.Service
;
...
...
@@ -28,6 +30,13 @@ import lcsb.mapviewer.model.user.annotator.AnnotatorOutputParameter;
/**
* This is a class that implements a backend to TAIR.
* Note that TAIR annotation process will annotate only
* records which have a TAIR ID assigned by a human annotator.
* Otherwise, it would generate UniProt miriam records also for
* TAIR IDs generated from, e.g., KEGG annotator, i.e. for homologues
* and these UniProt IDs would be indistinguishable from the
* UniProt IDs describing the molecule.
*
*
* @author David Hoksza
*
...
...
@@ -39,6 +48,14 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* Default class logger.
*/
private
static
Logger
logger
=
Logger
.
getLogger
(
TairAnnotator
.
class
);
/**
* Pattern used for getting Tair Locus ID symbol from UniProt result page.
*/
private
Pattern
getUniprotIdParsePattern
(
String
tairId
)
{
return
Pattern
.
compile
(
"(\\w*)\\tlocus:"
+
tairId
);
}
/**
* Default constructor.
...
...
@@ -74,7 +91,13 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
@Override
public
boolean
annotateElement
(
BioEntityProxy
object
,
MiriamData
identifier
,
AnnotatorData
parameters
)
throws
AnnotatorException
{
if
(
identifier
.
getAnnotator
()
!=
null
)
{
return
false
;
}
if
(
identifier
.
getDataType
().
equals
(
MiriamType
.
TAIR_LOCUS
))
{
//UniProt are only obained from TAIR's which were provided by the annotator (otherwise we would get
//also UniProt IDs for, e.g., homologous genes' TAIR IDs obtained from KEGG
Collection
<
MiriamData
>
collection
=
tairToUniprot
(
identifier
);
if
(
collection
.
size
()
>
0
)
{
object
.
addMiriamData
(
collection
);
...
...
@@ -97,8 +120,8 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* @return URL to UniProt result page with the TAIR mapping
*/
private
String
getUniProtUrl
(
String
tairId
)
{
return
"https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+"
+
tairId
+
"
&format=list&columns=id"
;
return
"https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+"
+
tairId
+
"&format=tab&columns=id,database(tair)"
;
// return "https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair+"+tairId+"%29
&format=list&columns=id";
}
/**
...
...
@@ -110,11 +133,14 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
* uniprot REST API result page
* @return uniprot identifier found on the page
*/
private
Collection
<
MiriamData
>
parseUniprotUniprot
(
String
pageContent
)
{
private
Collection
<
MiriamData
>
parseUniprotUniprot
(
String
pageContent
,
String
tairId
)
{
Collection
<
MiriamData
>
result
=
new
HashSet
<
MiriamData
>();
if
(!
pageContent
.
isEmpty
())
{
String
[]
sPageContent
=
pageContent
.
split
(
"\\R"
);
result
.
add
(
new
MiriamData
(
MiriamType
.
UNIPROT
,
sPageContent
[
0
]));
//the query returns a list of possible matches which needs to be pruned
Matcher
m
=
getUniprotIdParsePattern
(
tairId
).
matcher
(
pageContent
);
if
(
m
.
find
())
{
result
.
add
(
new
MiriamData
(
MiriamType
.
UNIPROT
,
m
.
group
(
1
)));
}
}
return
result
;
}
...
...
@@ -144,7 +170,7 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
try
{
String
accessUrl
=
getUniProtUrl
(
tair
.
getResource
());
String
pageContent
=
getWebPageContent
(
accessUrl
);
return
parseUniprotUniprot
(
pageContent
);
return
parseUniprotUniprot
(
pageContent
,
tair
.
getResource
()
);
}
catch
(
WrongResponseCodeIOException
exception
)
{
logger
.
warn
(
"Wrong reponse code when accessing tair data with id: "
+
tair
.
getResource
());
return
null
;
...
...
@@ -185,7 +211,7 @@ public class TairAnnotator extends ElementAnnotator implements IExternalService
@Override
public
MiriamData
getExampleValidAnnotation
()
{
return
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"
AT1G0103
0"
);
return
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"
220095
0"
);
}
}
annotation/src/main/java/lcsb/mapviewer/annotation/services/annotators/UniprotAnnotator.java
View file @
2a895d17
...
...
@@ -61,7 +61,7 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
.
compile
(
"EC=((\\d+\\.-\\.-\\.-)|(\\d+\\.\\d+\\.-\\.-)|(\\d+\\.\\d+\\.\\d+\\.-)|(\\d+\\.\\d+\\.\\d+\\.\\d+))"
);
/**
* Pattern used for
finding EC
symbol from UniProt
info
page
.
* Pattern used for
getting Tair Locus ID
symbol from UniProt
result
page.
*/
private
Pattern
uniprotTairLocusToId
=
Pattern
.
compile
(
"locus:(\\d*)"
);
...
...
@@ -151,9 +151,8 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
* TAIR identifier
* @return URL to UniProt result page with the TAIR mapping
*/
private
String
getUniProtTairLocus2IdUrl
(
String
tairLocus
)
{
return
"https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair%29+"
+
tairLocus
+
"&format=tab&columns=id,database(tair) "
;
private
String
getUniProtTairLocus2IdUrl
(
String
tairLocusName
)
{
return
"https://www.uniprot.org/uniprot/?query=database%3A%28type%3Atair+"
+
tairLocusName
+
"%29&format=tab&columns=id,database(tair)"
;
}
/**
...
...
@@ -281,7 +280,7 @@ public class UniprotAnnotator extends ElementAnnotator implements IExternalServi
* @throws UniprotSearchException
* thrown when there is a problem with accessing external database
*/
public
MiriamData
uniprotTairLocusToId
(
String
tairLocus
)
throws
UniprotSearchException
{
public
MiriamData
uniprotTairLocus
Name
ToId
(
String
tairLocus
)
throws
UniprotSearchException
{
String
accessUrl
=
getUniProtTairLocus2IdUrl
(
tairLocus
);
try
{
String
pageContent
=
getWebPageContent
(
accessUrl
);
...
...
annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/StringAnnotatorTest.java
View file @
2a895d17
...
...
@@ -57,12 +57,12 @@ public class StringAnnotatorTest extends AnnotationTestFunctions {
}
@Test
@Ignore
(
"TAIR DB restricts queries by IP"
)
//
@Ignore("TAIR DB restricts queries by IP")
public
void
testAnnotateTair
()
throws
Exception
{
try
{
Species
bioEntity
=
new
GenericProtein
(
"id"
);
bioEntity
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"
AT1G0103
0"
));
bioEntity
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"
220095
0"
));
testedAnnotator
.
annotateElement
(
bioEntity
);
...
...
@@ -70,18 +70,35 @@ public class StringAnnotatorTest extends AnnotationTestFunctions {
for
(
MiriamData
md
:
bioEntity
.
getMiriamData
())
{
if
(
md
.
getDataType
().
equals
(
MiriamType
.
STRING
))
{
mdString
=
md
;
// there should be only one EC number for that TAIR<->UNIPROT record
mdString
=
md
;
}
}
assertTrue
(
"No STRING annotation extracted from STRING annotator"
,
mdString
!=
null
);
assertTrue
(
"Wrong number of annotations extract from STRING annotator"
,
bioEntity
.
getMiriamData
().
size
()
==
3
);
assertTrue
(
"Wrong number of annotations extract from STRING annotator"
,
bioEntity
.
getMiriamData
().
size
()
==
2
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
throw
e
;
}
}
@Test
//@Ignore("TAIR DB restricts queries by IP")
public
void
testAnnotateTairOnlyFromHumanAnnotator
()
throws
Exception
{
try
{
Species
bioEntity
=
new
GenericProtein
(
"id"
);
bioEntity
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"2200950"
,
KeggAnnotator
.
class
));
testedAnnotator
.
annotateElement
(
bioEntity
);
assertTrue
(
bioEntity
.
getMiriamData
().
size
()
==
1
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
throw
e
;
}
}
@Test
public
void
testAnnotateInvalidEmpty
()
throws
Exception
{
...
...
annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/TairAnnotatorTest.java
View file @
2a895d17
package
lcsb.mapviewer.annotation.services.annotators
;
import
static
org
.
junit
.
Assert
.
assertEquals
;
import
static
org
.
junit
.
Assert
.
assertFalse
;
import
static
org
.
junit
.
Assert
.
assertNull
;
import
static
org
.
junit
.
Assert
.
assertTrue
;
import
static
org
.
junit
.
Assert
.
fail
;
...
...
@@ -45,7 +46,7 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
Species
protein
=
new
GenericProtein
(
"id"
);
protein
.
setName
(
"bla"
);
protein
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"AT1G01030
"
));
protein
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"
2200950"
));
//
AT1G01030
tairAnnotator
.
annotateElement
(
protein
);
...
...
@@ -74,7 +75,7 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
Species
protein
=
new
GenericProtein
(
"id"
);
protein
.
setName
(
"bla"
);
protein
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"
AT1G15950
"
));
protein
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"
2200427
"
));
protein
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
UNIPROT
,
"P32246"
));
// Human version of the protein
tairAnnotator
.
annotateElement
(
protein
);
...
...
@@ -163,6 +164,22 @@ public class TairAnnotatorTest extends AnnotationTestFunctions {
throw
e
;
}
}
@Test
public
void
testTairToUniprotFromKEGG
()
throws
Exception
{
//TAIR Loci comming from annotators should be ignored by TAIR (only TAIR LOCI provided by the human annotator should be considered)
try
{
Species
protein
=
new
GenericProtein
(
"id"
);
protein
.
setName
(
"bla"
);
protein
.
addMiriamData
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"2200427"
,
KeggAnnotator
.
class
));
tairAnnotator
.
annotateElement
(
protein
);
assertTrue
(
protein
.
getMiriamData
().
size
()
==
1
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
throw
e
;
}
}
@Test
// @Ignore("TAIR DB restricts queries by IP")
...
...
annotation/src/test/java/lcsb/mapviewer/annotation/services/annotators/UniprotAnnotatorTest.java
View file @
2a895d17
...
...
@@ -283,7 +283,7 @@ public class UniprotAnnotatorTest extends AnnotationTestFunctions {
public
void
testUniprotTairLocusToId
()
throws
Exception
{
try
{
assertEquals
(
new
MiriamData
(
MiriamType
.
TAIR_LOCUS
,
"2201786"
),
uniprotAnnotator
.
uniprotTairLocusToId
(
"AT1G08510"
)
);
uniprotAnnotator
.
uniprotTairLocus
Name
ToId
(
"AT1G08510"
)
);
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
throw
e
;
...
...
@@ -294,7 +294,7 @@ public class UniprotAnnotatorTest extends AnnotationTestFunctions {
public
void
testUniprotWrongTairLocusToId
()
throws
Exception
{
try
{
assertEquals
(
new
MiriamData
(),
uniprotAnnotator
.
uniprotTairLocusToId
(
"
XXXXXX
"
));
uniprotAnnotator
.
uniprotTairLocus
Name
ToId
(
"
bla
"
));
}
catch
(
Exception
e
)
{
e
.
printStackTrace
();
throw
e
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment