Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Fractalis
fractalis
Commits
be9f918c
Commit
be9f918c
authored
Jun 07, 2018
by
Sascha Herzinger
Browse files
Fixed a very critical bug in array_stats that messed with the statistics
parent
b09f0529
Pipeline
#5290
failed with stages
in 2 minutes and 50 seconds
Changes
2
Pipelines
1
Show whitespace changes
Inline
Side-by-side
fractalis/analytics/tasks/shared/array_stats.py
View file @
be9f918c
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
from
copy
import
deepcopy
from
copy
import
deepcopy
from
typing
import
List
,
TypeVar
from
typing
import
List
,
TypeVar
from
collections
import
OrderedDict
import
logging
import
logging
import
pandas
as
pd
import
pandas
as
pd
...
@@ -115,7 +116,7 @@ def get_limma_stats(df: pd.DataFrame, subsets: List[List[T]]) -> pd.DataFrame:
...
@@ -115,7 +116,7 @@ def get_limma_stats(df: pd.DataFrame, subsets: List[List[T]]) -> pd.DataFrame:
r_data
=
pandas2ri
.
py2ri
(
df
)
r_data
=
pandas2ri
.
py2ri
(
df
)
# py2ri is stupid and makes too many assumptions.
# py2ri is stupid and makes too many assumptions.
# These two lines restore the column order
# These two lines restore the column order
r_data
.
colnames
=
list
(
set
(
ids
))
r_data
.
colnames
=
list
(
OrderedDict
.
fromkeys
(
ids
))
r_data
=
r_data
.
rx
(
robj
.
StrVector
(
ids
))
r_data
=
r_data
.
rx
(
robj
.
StrVector
(
ids
))
r_fit
=
r
[
'lmFit'
](
r_data
,
r_design
)
r_fit
=
r
[
'lmFit'
](
r_data
,
r_design
)
...
@@ -160,13 +161,13 @@ def get_deseq2_stats(df: pd.DataFrame,
...
@@ -160,13 +161,13 @@ def get_deseq2_stats(df: pd.DataFrame,
df
=
df
[
flattened_subsets
]
df
=
df
[
flattened_subsets
]
# filter rows with too few reads
# filter rows with too few reads
total_row_counts
=
df
.
sum
(
axis
=
1
)
total_row_counts
=
df
.
sum
(
axis
=
1
)
keep
=
total_row_counts
[
total_row_counts
>
min_total_row_count
].
index
keep
=
total_row_counts
[
total_row_counts
>
=
min_total_row_count
].
index
df
=
df
.
loc
[
keep
]
df
=
df
.
loc
[
keep
]
# pandas df -> R df
# pandas df -> R df
r_count_data
=
pandas2ri
.
py2ri
(
df
)
r_count_data
=
pandas2ri
.
py2ri
(
df
)
# py2ri is stupid and makes too many assumptions.
# py2ri is stupid and makes too many assumptions.
# These two lines restore the column order
# These two lines restore the column order
r_count_data
.
colnames
=
list
(
set
(
flattened_subsets
))
r_count_data
.
colnames
=
list
(
OrderedDict
.
fromkeys
(
flattened_subsets
))
r_count_data
=
r_count_data
.
rx
(
robj
.
StrVector
(
flattened_subsets
))
r_count_data
=
r_count_data
.
rx
(
robj
.
StrVector
(
flattened_subsets
))
# see package documentation
# see package documentation
...
@@ -183,5 +184,5 @@ def get_deseq2_stats(df: pd.DataFrame,
...
@@ -183,5 +184,5 @@ def get_deseq2_stats(df: pd.DataFrame,
# R result table to Python pandas
# R result table to Python pandas
r_res
=
r
[
'as.data.frame'
](
r_res
)
r_res
=
r
[
'as.data.frame'
](
r_res
)
results
=
pandas2ri
.
ri2py
(
r_res
)
results
=
pandas2ri
.
ri2py
(
r_res
)
results
.
insert
(
0
,
'feature'
,
list
(
r
[
'row.names'
](
r_res
)))
return
results
return
results
fractalis/analytics/tasks/volcanoplot/main.py
View file @
be9f918c
...
@@ -44,17 +44,13 @@ class VolcanoTask(AnalyticTask):
...
@@ -44,17 +44,13 @@ class VolcanoTask(AnalyticTask):
"the subset sample ids do not match the data."
"the subset sample ids do not match the data."
logger
.
error
(
error
)
logger
.
error
(
error
)
raise
ValueError
(
error
)
raise
ValueError
(
error
)
# make matrix of input data
# make matrix of input data
df
=
df
.
pivot
(
index
=
'feature'
,
columns
=
'id'
,
values
=
'value'
)
df
=
df
.
pivot
(
index
=
'feature'
,
columns
=
'id'
,
values
=
'value'
)
features
=
list
(
df
.
index
)
# compute the stats (p / fC) for the selected ranking method
# compute the stats (p / fC) for the selected ranking method
stats
=
array_stats
.
get_stats
(
df
=
df
,
stats
=
array_stats
.
get_stats
(
df
=
df
,
subsets
=
subsets
,
subsets
=
subsets
,
params
=
params
,
params
=
params
,
ranking_method
=
ranking_method
)
ranking_method
=
ranking_method
)
return
{
return
{
'features'
:
features
,
'stats'
:
stats
.
to_dict
(
orient
=
'list'
)
'stats'
:
stats
.
to_dict
(
orient
=
'list'
)
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment