Commit 8e6c31fc authored by Valentina Galata's avatar Valentina Galata
Browse files

workflow: utils.py: fixed a bug in ave_gene_cov (issue #128)

parent 8821d03f
......@@ -293,15 +293,21 @@ def ave_gene_cov(cov_file, genes):
last_contig_cov = [] # array of cov. values for bases 1..N (idx 0..N-1)
for line in ifile:
contig_id, base, coverage = line.strip().split("\t")
assert contig_id != ""
if contig_id != last_contig_id:
# ave. cov. for genes from this contig
contig_genes = genes.contig_id == last_contig_id
if contig_id != "":
if last_contig_id != "": # "" at the beginning before having parsed the 1st contig
contig_genes = genes.contig_id == last_contig_id # genes/proteins of last contig
# ave. cov. (note: start/end w.r.t. Python indexing)
genes.loc[contig_genes, "ave_cov"] = genes.loc[contig_genes,:].apply(lambda x: sum(last_contig_cov[(x.start-1):x.end]) / (x.end-x.start+1), axis=1)
# reset
last_contig_id = contig_id
last_contig_cov = []
last_contig_cov.append(float(coverage))
# reached the end of the file --> process last contig
assert last_contig_id != ""
contig_genes = genes.contig_id == last_contig_id # genes/proteins of last contig
# ave. cov. (note: start/end w.r.t. Python indexing)
genes.loc[contig_genes, "ave_cov"] = genes.loc[contig_genes,:].apply(lambda x: sum(last_contig_cov[(x.start-1):x.end]) / (x.end-x.start+1), axis=1)
return genes
def mmseqs2_tsv(ifile_path):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment