aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-08-19 12:16:21 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-08-19 12:16:21 -0700
commit8f8ce839b3edebd591e83bad8628302e995ab2e2 (patch)
tree1593bd0525dd702141779fd422a28ad95f3d9348
parent8d5626243ce5560161534cba114301fd3a4dd382 (diff)
feat: filter plasmids now works on the directory level
-rwxr-xr-xscripts/filter_plasmids.py32
1 files changed, 19 insertions, 13 deletions
diff --git a/scripts/filter_plasmids.py b/scripts/filter_plasmids.py
index 4ef2670..71aa23f 100755
--- a/scripts/filter_plasmids.py
+++ b/scripts/filter_plasmids.py
@@ -19,18 +19,24 @@ def open(path, *args, **kwargs):
else:
return builtins.open(path, *args, **kwargs)
-# def main(args):
-# for d in args:
+def main(args):
+ for arg in args:
+ in_dir = f"data/{arg}/assemblies"
+ if not os.path.exists(in_dir):
+ print(f"{in_dir} doesn't exist. skipping...")
+ continue
+
+ out_dir = f"data/{arg}-plasmid/assemblies"
+ if not os.path.exists(out_dir):
+ os.mkdirs(out_dir)
+
+ for path in glob(f"{in_dir}/*.f?a*"):
+ with open(path, 'rt') as fd, open(f"{out_dir}/{os.path.basename(path).replace('.gz', '')}", 'w') as wtr:
+ for i, rec in enumerate(parse_fasta(fd)):
+ if i == 0:
+ continue
+ wtr.write(str(rec))
+ wtr.write('\n')
-from time import time
if __name__ == "__main__":
- for path in glob("data/staph/assemblies/*.fna.gz"):
- with open(path, 'rt') as fd, open("test.fa", 'w') as wtr:
- for rec in parse_fasta(fd):
- # print(str(rec))
- wtr.write(str(rec))
- wtr.write('\n')
- seqs = [record for record in parse_fasta(fd)]
- break
-
- # main(sys.argv[1:])
+ main(sys.argv[1:])