Commit de62cea2 authored by Emma Schymanski's avatar Emma Schymanski
Browse files

Update Transformations data

... ran Transformations.R to update stats and files. TODO: two new entries in MassDiffs_ge5_wAnnotation that need annotating.
parent 664f9a38
......@@ -36,12 +36,13 @@ which(is.na(SLE_tp_output$TP_Unique_CIDs))
SLE_cids <- na.exclude(SLE_tp_output$TP_Unique_CIDs)
length(SLE_cids)
# 4771 on June 5th, 4744 on June 12th (S79 was missing), 4786 June 13th.
# 4880 Oct 19th.
# read the contents in ...
SLE_trans <- read.csv(SLE_tp_output$TP_File_Name,stringsAsFactors = F)
# check CID count
length(unique(na.exclude(c(SLE_trans$predecessorcid,SLE_trans$successorcid))))
# 4771 on June 5th
# 4771 on June 5th. 4880 Oct 19th.
# Legacy code to retrieve per CID much slower now below.
......@@ -63,7 +64,7 @@ ChEMBL_trans <- read.csv(ChEMBL_tp_output$TP_File_Name,stringsAsFactors = F)
ChEMBL_cids <- unique(c(ChEMBL_trans$substratecid,ChEMBL_trans$metabolitecid))
length(ChEMBL_cids)
# 1418 on June 5th and 13th
# 1418 on June 5th and 13th and Oct 19th
#### Merging NORMAN-SLE and ChEMBL Data #####
......@@ -76,7 +77,7 @@ colnames(ChEMBL_trans)
SLE_cols <- c("cid","predecessor", "predecessorcid", "transformation", "successor",
"successorcid", "evidencedoi", "evidenceref", "sourcecomment",
"sourcecommentfull", "datasetdoi", "datasetref", "enzyme",
"biosystem", "cmpdname")
"biosystem")#, "cmpdname")
# rename because CID col is incorrectly recognised
colnames(SLE_trans) <- SLE_cols
......@@ -122,7 +123,7 @@ all_trans <- all_trans[,-which(colnames(all_trans) %in% col_headers_rm)]
# now unique-ify, since some trans are duplicated, especially SLE
all_trans <- unique(all_trans)
length(all_trans$predecessor)
# 5844 entries, June 5th and June 13th
# 5844 entries, June 5th and June 13th. 5971 Oct 19th.
# write out merged file - to subdir and main dir
......@@ -134,7 +135,7 @@ write.csv(all_trans,paste0(PC_tp_dir,"PubChem_all_transformations.csv",
# get all CIDs
all_CIDs <- unique(na.omit(c(all_trans$predecessorcid,all_trans$successorcid)))
length(all_CIDs) #6007 - June 5th; 6022 June 13th
length(all_CIDs) #6007 - June 5th; 6022 June 13th; 6109 Oct 19th.
# retrieve the information using tips from Rick
selected_properties <- c("MolecularFormula","ExactMass","XlogP",
......@@ -164,7 +165,7 @@ NA_pred <- which(is.na(all_trans$predecessorcid))
if (length(NA_pred > 0)) {
all_trans <- all_trans[-NA_pred,]
}
length(all_trans$predecessorcid) # June 5: 5822; June 13: 5844
length(all_trans$predecessorcid) # June 5: 5822; June 13: 5844. Oct 19th 5971
all_trans$CID_to_CID <- ""
......@@ -263,6 +264,14 @@ writeLines(summary_text)
# Range of XlogP Differences: -9.9,9.4
# Range of Mass Differences: -893.1581377,749.104645569
## October 19:
# Unique Transformation Entries: 5971
# Unique Reactions by CID: 5056
# Unique Reactions by IK: 5045
# Unique Reactions by IKFB: 4783
# Range of XlogP Differences: -9.9,9.4
# Range of Mass Differences: -893.1581377,749.104645569
summary_file <- "Transformations_Summary_Stats.txt"
writeLines(summary_text,con=summary_file)
......
This source diff could not be displayed because it is too large. You can view the blob instead.
"MassDiff","Count"
15.9949,1247
176.0321,508
-14.0157,428
15.9949,1253
176.0321,510
-14.0157,430
79.9568,105
-28.0313,103
-28.0313,105
-2.0157,83
13.9793,71
2.0157,65
18.0106,57
0,55
13.9793,72
2.0157,67
18.0106,60
0,57
31.9898,52
29.9742,48
-42.047,43
42.0106,43
29.9742,43
-42.047,42
-15.9949,32
14.0157,28
177.0399,24
......@@ -29,47 +29,49 @@
1.9793,15
-30.0106,14
-43.9898,13
-1.0078,13
-29.9742,13
-12.0364,12
-1.0078,12
0.984,12
-72.0211,11
-44.0262,12
-38.0157,11
-44.0262,11
-72.0211,11
79.9663,10
-76.0313,10
-61.9923,10
-15.0109,9
179.1715,9
-27.9949,9
-90.047,9
271.1071,8
-27.9949,8
45.9958,8
-3.9868,8
-0.0364,8
-40.0313,8
-0.0364,7
-58.0419,7
15.9585,8
-20.0062,8
162.0528,7
192.027,7
-58.0419,7
57.0215,7
192.027,7
-29.9928,7
-207.0684,7
-33.961,7
18.9946,7
15.9585,7
44.9851,7
-181.0487,7
-20.0062,7
3.9949,6
-1.9957,6
306.076,6
-56.0262,6
-57.0215,6
-71.0371,6
44.9851,6
-86.0368,6
-49.9923,5
14.9871,5
-68.0626,5
-43.0058,5
-42.0218,5
14.9633,5
-99.0684,5
-123.9748,5
......@@ -80,14 +82,13 @@
-166.0491,5
-4.0313,4
-176.0321,4
-1.9957,4
-147.1048,4
-100.016,4
-100.0524,4
-64.008,4
-146.0579,4
26.0732,4
-125.8966,4
0.9476,4
116.0473,4
-45.9974,4
-70.0419,4
......@@ -95,6 +96,8 @@
-83.0735,4
-241.0045,4
-255.0201,4
-30.047,4
-64.008,4
-44.0011,4
-164.0837,4
-112.1252,4
......@@ -104,17 +107,15 @@
-96.0211,4
-82.0783,4
-25.9793,4
-116.0473,3
-49.9968,4
-13.0316,3
-108.0211,3
-54.047,3
-2.0156,3
43.9898,3
-164.0637,3
178.0477,3
2.0156,3
-893.1581,3
65.9412,3
212.0086,3
-96.1303,3
-260.1889,3
-72.0575,3
......@@ -129,11 +130,11 @@
-14.052,3
-63.9312,3
-41.0629,3
-30.047,3
-90.0236,3
-26.052,3
62.0093,3
-85.0528,3
65.9412,3
-76.008,3
15.0235,3
-182.0376,3
......@@ -141,10 +142,15 @@
-75.0473,3
-119.0371,3
-134.0844,3
11.9636,3
-13.9615,3
-63.9619,3
-116.0473,3
212.0086,3
-235.0376,3
-144.0535,3
43.9898,3
-16.0313,3
-106.0089,3
-346.0545,3
-183.0132,3
......@@ -153,6 +159,7 @@
-94.0419,3
-180.0647,3
-83.1099,3
-85.9638,3
-51.0109,3
-171.0895,2
-152.0637,2
......@@ -170,23 +177,24 @@
-129.0426,2
-45.9782,2
-123.1048,2
-3.0235,2
-45.0215,2
-3.0235,2
-27.0109,2
220.0219,2
-108.0375,2
227.1576,2
119.0041,2
-82.003,2
46.0055,2
0.9476,2
-26.0269,2
-28.0425,2
-56.0374,2
-44.0374,2
-31.0184,2
132.0423,2
2.9997,2
60.0211,2
29.0027,2
-221.1416,2
-24,2
160.0372,2
162.0164,2
-75.9716,2
-66.0218,2
-55.0786,2
......@@ -210,6 +218,10 @@
-60.0364,2
-10.0207,2
-74.0287,2
206.0063,2
-118.0089,2
289.0732,2
26.0031,2
-32.0262,2
-103.8801,2
-69.9191,2
......@@ -219,6 +231,7 @@
30.0106,2
-152.0061,2
-136.0289,2
160.0372,2
-186.1521,2
-92.0029,2
-130.9596,2
......@@ -231,7 +244,6 @@
-274.2045,2
-82.151,2
-131.9004,2
11.9636,2
-4.0232,2
72.0114,2
-31.9721,2
......@@ -240,7 +252,6 @@
-27.0473,2
-14.0156,2
27.9949,2
-118.0089,2
-164.0473,2
-58.0241,2
14.9923,2
......@@ -262,15 +273,19 @@
-195.0644,2
-297.0419,2
47.9847,2
-42.0218,2
220.0219,2
-21.034,2
193.0348,2
-22.0419,2
-88.0524,2
-125.0841,2
-166.0794,2
-98.0592,2
-44.9851,2
-30.0218,2
60.0211,2
-137.9872,2
162.0164,2
-87.032,2
-125.8967,2
-36.0034,2
......@@ -280,23 +295,22 @@
12.0081,2
-27.0837,2
-198.0753,2
-12.0112,2
-28.9902,2
-95.0371,2
-16.0313,2
-122.0368,2
26.0031,2
289.0732,2
16.0313,2
-34.0055,2
-76.0524,2
-0.0476,2
-166.0913,2
-122.0368,2
-177.0651,2
-189.9952,2
-135.9606,2
-84.0575,2
-23.016,2
-89.0265,2
-284.0321,2
-166.0794,2
-140.9884,2
-12.0728,2
-176.0249,2
......@@ -305,7 +319,6 @@
-124.0807,2
-295.0892,2
-194.2762,2
-132.0245,1
-88.016,1
-37.9923,1
-134.0498,1
......@@ -315,11 +328,6 @@
-162.0164,1
-204.0634,1
-142.023,1
-167.0735,1
-334.1893,1
-521.3213,1
-354.2056,1
-41.0993,1
156.0503,1
-81.8974,1
64.016,1
......@@ -328,9 +336,9 @@
20.0132,1
-31.9989,1
-216.115,1
-147.0684,1
140.0554,1
-61.9559,1
-46.0219,1
-65.9872,1
-88.0677,1
32.9977,1
......@@ -341,11 +349,6 @@
85.0891,1
43.0422,1
4.0313,1
222.0376,1
218.0063,1
-104.0278,1
8.8986,1
749.1046,1
-211.9867,1
-146.0368,1
148.0372,1
......@@ -357,26 +360,13 @@
194.0427,1
-178.0477,1
-27.0235,1
-173.0299,1
236.0168,1
59.0007,1
43.0058,1
-89.0441,1
178.0477,1
-202.0918,1
-8.0051,1
-36,1
-40.0061,1
-58.0167,1
-226.0217,1
-14.9552,1
208.0583,1
29.0027,1
63.9619,1
-261.0637,1
-319.0692,1
-138.9858,1
-57.9513,1
-148.1113,1
51.9255,1
19.9818,1
-12.9714,1
190.0477,1
......@@ -399,6 +389,7 @@
-140.0698,1
-267.1695,1
-116.095,1
-147.0684,1
-1.9712,1
-254.0943,1
-262.8768,1
......@@ -409,8 +400,8 @@
-71.9363,1
-144.1051,1
-131.0735,1
-37.0429,1
-224.1313,1
-37.0429,1
-97.0276,1
-421.1277,1
-278.1179,1
......@@ -421,6 +412,10 @@
-322.1958,1
-73.0164,1
-72.0324,1
-167.0735,1
-334.1893,1
-521.3213,1
-354.2056,1
-88.0444,1
-194.0215,1
-125.0225,1
......@@ -458,6 +453,8 @@
182.0732,1
-226.0372,1
-180.0939,1
-149.0841,1
-165.0206,1
-150.1045,1
-445.9963,1
-346.0027,1
......@@ -506,10 +503,16 @@
-261.9964,1
-104.0029,1
93.9725,1
206.0063,1
-172.1212,1
-102.0317,1
-188.1161,1
-108.0245,1
-308.0957,1
-150.0919,1
-122.0038,1
-124.0194,1
272.115,1
-168.2242,1
-202.9694,1
-278.0014,1
-84.1051,1
......@@ -557,7 +560,6 @@
-110.0116,1
-109.064,1
-84.0687,1
-56.0374,1
-164.0757,1
-222.0811,1
-206.0862,1
......@@ -723,6 +725,13 @@
-74.0844,1
-102.0793,1
-274.1039,1
236.0168,1
59.0007,1
43.0058,1
-89.0441,1
-202.0918,1
-8.0051,1
-36,1
-127.1361,1
-169.1579,1
-211.1797,1
......@@ -730,10 +739,27 @@
-179.0866,1
-180.0587,1
-150.0317,1
-60.0575,1
-76.0888,1
-74.0732,1
-88.0888,1
-102.0681,1
-104.0837,1
-264.1573,1
-220.1311,1
-234.1467,1
-236.1624,1
-176.1049,1
-132.0786,1
-146.0943,1
-148.1099,1
-186.0367,1
-244.0422,1
-0.9952,1
-47.0007,1
-132.0245,1
-261.0637,1
-319.0692,1
-307.0248,1
-69.9691,1
-352.8855,1
......@@ -765,20 +791,22 @@
-91.0786,1
-92.0262,1
-102.0946,1
-14.9552,1
-132.001,1
-135.9847,1
-150.9956,1
-129.9853,1
-217.9229,1
-1.0317,1
-138.9858,1
181.0712,1
-156.0818,1
132.0059,1
37.9769,1
-352.9445,1
-351.9605,1
29.0265,1
291.0889,1
-153.0709,1
-118.1021,1
-107.9162,1
-123.8934,1
20.0262,1
......@@ -786,6 +814,7 @@
-52.0313,1
-44.0085,1
-174.1521,1
749.1046,1
113.0412,1
-460.0314,1
-35.9322,1
......@@ -816,18 +845,21 @@
-226.9888,1
-152.0334,1
-123.0433,1
222.0376,1
218.0063,1
-104.0278,1
8.8986,1
27.9585,1
-185.0841,1
-161.076,1
20.9375,1
3.9837,1
1.968,1
-63.0473,1
-144.0211,1
-121.0528,1
-105.0215,1
170.0943,1
-108.0245,1
43.0184,1
-46.0219,1
-159.8346,1
-127.0997,1
-113.1204,1
......@@ -835,17 +867,16 @@
-273.1477,1
-178.0662,1
-255.0314,1
-25.068,1
-57.0578,1
2.9633,1
18.9582,1
16.9425,1
205.0348,1
60.98,1
17.9742,1
-334.9784,1
-333.9944,1
-149.0841,1
-165.0206,1
272.115,1
-168.2242,1
-308.0957,1
-150.0919,1
-122.0038,1
-124.0194,1
-63.9797,1
-144.0423,1
198.0211,1
......@@ -866,10 +897,37 @@
-179.1133,1
-166.0816,1
-165.0976,1
-22.0019,1
15.9749,1
148.0008,1
-176.016,1
-201.0725,1
-207.1259,1
-133.9143,1
-183.9111,1
-233.9079,1
-283.9047,1
-47.9505,1
-99.9936,1
-185.9574,1
-235.9542,1
-63.9961,1
-213.9865,1
-113.9929,1