diff options
author | Nicholas Noll <nbnoll@eml.cc> | 2020-08-19 12:16:21 -0700 |
---|---|---|
committer | Nicholas Noll <nbnoll@eml.cc> | 2020-08-19 12:16:21 -0700 |
commit | 8f8ce839b3edebd591e83bad8628302e995ab2e2 (patch) | |
tree | 1593bd0525dd702141779fd422a28ad95f3d9348 | |
parent | 8d5626243ce5560161534cba114301fd3a4dd382 (diff) |
feat: filter plasmids now works on the directory level
-rwxr-xr-x | scripts/filter_plasmids.py | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/scripts/filter_plasmids.py b/scripts/filter_plasmids.py index 4ef2670..71aa23f 100755 --- a/scripts/filter_plasmids.py +++ b/scripts/filter_plasmids.py @@ -19,18 +19,24 @@ def open(path, *args, **kwargs): else: return builtins.open(path, *args, **kwargs) -# def main(args): -# for d in args: +def main(args): + for arg in args: + in_dir = f"data/{arg}/assemblies" + if not os.path.exists(in_dir): + print(f"{in_dir} doesn't exist. skipping...") + continue + + out_dir = f"data/{arg}-plasmid/assemblies" + if not os.path.exists(out_dir): + os.mkdirs(out_dir) + + for path in glob(f"{in_dir}/*.f?a*"): + with open(path, 'rt') as fd, open(f"{out_dir}/{os.path.basename(path).replace('.gz', '')}", 'w') as wtr: + for i, rec in enumerate(parse_fasta(fd)): + if i == 0: + continue + wtr.write(str(rec)) + wtr.write('\n') -from time import time if __name__ == "__main__": - for path in glob("data/staph/assemblies/*.fna.gz"): - with open(path, 'rt') as fd, open("test.fa", 'w') as wtr: - for rec in parse_fasta(fd): - # print(str(rec)) - wtr.write(str(rec)) - wtr.write('\n') - seqs = [record for record in parse_fasta(fd)] - break - - # main(sys.argv[1:]) + main(sys.argv[1:]) |