In [1]:
import collections

gff_file = "/home/jovyan/work/scratch/2017_output_all_data/genome/GCF_000007805.1_ASM780v1_genomic.gff"
import csv
In [2]:
def check_key(key_to_check, gff_file):
    print("key:", key_to_check)
    name_list = []
    with open(gff_file, newline='') as csvfile:
        gffreader = csv.reader((row for row in csvfile if not row.startswith('#')),delimiter='\t')
        for row in gffreader:
            # print(row[2])
            if row[2] == "gene":
                # print(row[8])
                kv_pairs = row[8]
                kv_dict = dict(kv.split("=") for kv in kv_pairs.split(";"))
                # print(kv_dict)
                # print(kv_dict["Name"])
                name_list.append(kv_dict[key_to_check])

    name_counter = collections.Counter(name_list)

    print("duplicates:", [item for (item, count) in name_counter.items() if count > 1])

all_keys = ['locus_tag', 'ID', 'Name', 'Dbxref']
for key_to_check in all_keys:
    check_key(key_to_check, gff_file)
    print("---------")
key: locus_tag
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-2-08a3fe36cafd> in <module>()
     20 all_keys = ['locus_tag', 'ID', 'Name', 'Dbxref']
     21 for key_to_check in all_keys:
---> 22     check_key(key_to_check, gff_file)
     23     print("---------")

<ipython-input-2-08a3fe36cafd> in check_key(key_to_check, gff_file)
      2     print("key:", key_to_check)
      3     name_list = []
----> 4     with open(gff_file, newline='') as csvfile:
      5         gffreader = csv.reader((row for row in csvfile if not row.startswith('#')),delimiter='\t')
      6         for row in gffreader:

FileNotFoundError: [Errno 2] No such file or directory: '/home/jovyan/work/scratch/2017_output_all_data/genome/GCF_000007805.1_ASM780v1_genomic.gff'