In [1]:
import collections
gff_file = "/home/jovyan/work/scratch/2017_output_all_data/genome/GCF_000007805.1_ASM780v1_genomic.gff"
import csv
In [2]:
def check_key(key_to_check, gff_file):
print("key:", key_to_check)
name_list = []
with open(gff_file, newline='') as csvfile:
gffreader = csv.reader((row for row in csvfile if not row.startswith('#')),delimiter='\t')
for row in gffreader:
# print(row[2])
if row[2] == "gene":
# print(row[8])
kv_pairs = row[8]
kv_dict = dict(kv.split("=") for kv in kv_pairs.split(";"))
# print(kv_dict)
# print(kv_dict["Name"])
name_list.append(kv_dict[key_to_check])
name_counter = collections.Counter(name_list)
print("duplicates:", [item for (item, count) in name_counter.items() if count > 1])
all_keys = ['locus_tag', 'ID', 'Name', 'Dbxref']
for key_to_check in all_keys:
check_key(key_to_check, gff_file)
print("---------")
key: locus_tag
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-2-08a3fe36cafd> in <module>()
20 all_keys = ['locus_tag', 'ID', 'Name', 'Dbxref']
21 for key_to_check in all_keys:
---> 22 check_key(key_to_check, gff_file)
23 print("---------")
<ipython-input-2-08a3fe36cafd> in check_key(key_to_check, gff_file)
2 print("key:", key_to_check)
3 name_list = []
----> 4 with open(gff_file, newline='') as csvfile:
5 gffreader = csv.reader((row for row in csvfile if not row.startswith('#')),delimiter='\t')
6 for row in gffreader:
FileNotFoundError: [Errno 2] No such file or directory: '/home/jovyan/work/scratch/2017_output_all_data/genome/GCF_000007805.1_ASM780v1_genomic.gff'