Skip to content
Snippets Groups Projects
Commit 1608905c authored by Piotr Gawron's avatar Piotr Gawron
Browse files

--skipNonStandardChromosomes parameter added to skip nonstandard chromosomes

parent 416e24e2
No related branches found
No related tags found
1 merge request!3Add filtering for non standard chromosomes
......@@ -47,6 +47,8 @@ def download_ucsc_file(filename):
inputFile = sys.argv[1]
db = sys.argv[2]
skipNonStandardChromosomes = sys.argv[3] if 3 < len(sys.argv) else None
basename = ntpath.basename(inputFile).replace(".txt.gz", "")
download_ucsc_file("fetchChromSizes")
......@@ -59,10 +61,15 @@ with open(chrom_sizes, 'w') as output_chrom_sizes:
chromosomes = {}
unknown_chromosomes = {}
if skipNonStandardChromosomes == "--skipNonStandardChromosomes":
print("Skipping non standard chromosomes")
with open(chrom_sizes) as f:
lines = f.readlines()
for line in lines:
chromosomes[line.split("\t")[0]] = True
chromosomeName = line.split("\t")[0]
if skipNonStandardChromosomes != "--skipNonStandardChromosomes" or "_" not in chromosomeName:
chromosomes[chromosomeName] = True
output_file_unsorted = basename + ".bed"
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment