#!/usr/bin/env python # -*- coding: utf-8 -*- from __future__ import print_function #, unicode_literals from __future__ import absolute_import, division import sys, re, os, os.path, fnmatch, codecs, locale from optparse import OptionParser desc = """\ Walk the current directory and its subdirectories, opening '*.text' files and reporting the changes needed for the new etm format. If the option '-m' is used, changes will be made and and the results saved to files with the extension '.txt', overwriting existing files if necessary. """ # Change the following if you have an abbreviationsFile abbreviationsFile = '' # encoding = "UTF-8" file_encoding = locale.getdefaultlocale()[1] from datetime import datetime, timedelta, time from time import gmtime, strftime from dateutil.parser import parse from textwrap import wrap # import uuid import bisect #!!!!!!!!!!!!!!!!!!!!! Change to True for production !!!!!!!!!!!!!!!!!!!! skip_existing = False # add_timestamps = False # add_timestamps = True #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # FIXME: incorporate project line defaults into individual entries and then omit the project line wrap_lines = 63 default_time = "00:00" comment_regex = re.compile(r'\s*#') at_regex = re.compile(r'\s+@') item_regex = re.compile(r'^\s*(\*|\&|\~|\+|\-|\!|\_+|\.+|\$)\s+(\S.*)$') ws_regex = re.compile(r'\s+') leadingspace_regex = re.compile(r'^([ \t]+)') # leading space or tab characters from etmQt import etmData try: from os.path import relpath except ImportError: # python < 2.6 from os.path import curdir, abspath, sep, commonprefix, pardir, join def relpath(path, start=curdir): """Return a relative version of a path""" if not path: raise ValueError("no path specified") start_list = abspath(start).split(sep) path_list = abspath(path).split(sep) # Work out how much of the filepath is shared by start and path. i = len(commonprefix([start_list, path_list])) rel_list = [pardir] * (len(start_list)-i) + path_list[i:] if not rel_list: return curdir return join(*rel_list) def add2list(lst, item): """Add item to lst if not already present using bisect to maintain order.""" try: i = bisect.bisect_left(lst, item) except: print("error adding", item, "\n ", lst) return() if i != len(lst) and lst[i] == item: return() bisect.insort(lst, item) sort_str = u'dbsezaAgckltpUriCSWwMmuo-+fn' sort_keys = [x for x in sort_str] parens_regex = re.compile(r'(? level: r[key] = [] if level-1 in r: tmp = [] for i in range(level): tmp.extend(r[i]) hsh[u'preq'] = tmp else: return({}) for part in parts: if len(part) >= 2: if part[0] == 'n': # remove leading spaces and tabs but leave other leading whitespace # and remove all trailing whitespace content = leadingspace_regex.sub('', part[1:].rstrip()) else: # replace consecutive whitespace characters with single space # for all fields other than notes content = " ".join([x for x in ws_regex.split(part[1:]) if x]) hsh[part[0]] = content return(hsh) def hash2Str(hsh): start_date = '' if u'DS' not in hsh: hsh[u'DS'] = '' if u'DT' in hsh: del hsh[u'DT'] if 'r' in hsh and hsh['r'] == 'l': del hsh['r'] if 'leader' not in hsh: print("missing leader", hsh) hsh['leader'] = '?' elif hsh['leader'] in ['*', '~', '-', '+', '!']: # else: if 'd' in hsh: start_date = hsh['d'] del hsh['d'] if 's' in hsh: # combine value = hsh['s'] m = parens_regex.search(value) if m: # skip range() # print("parens_regex match", m.groups()) value = parens_regex.sub("%s" % m.group(1), value) times = value.split(',') hsh['s'] = "%s %s" % (start_date, times[0]) if len(times) > 1 and 'r' not in hsh: # add the multiple starting datetimes to @+ hsh[u'+'] = ', '.join(["%s %s" % (start_date, x.strip()) for x in times]) if '-' in hsh: ex = [] # print("hsh['-']", hsh['-'], times) exdates = hsh['-'].split(',') for date in exdates: for time in times: ex.append("%s %s" % (date, time)) hsh['-'] = ', '.join(ex) # print('multiple s', hsh) elif hsh['leader'] == '*': # all day hsh['leader'] = '^' hsh['s'] = start_date elif 's' not in hsh: hsh['s'] = "%s %s" % (start_date, default_time) # if hsh['leader'] == '~': # print('action', hsh) elif '+' in hsh: # use the first item from @+ as @s value = hsh['+'] m = parens_regex.search(value) if m: # print("+ parens_regex match", m.groups()) value = parens_regex.sub("%s" % m.group(1), value) datetimes = value.split(',') hsh['s'] = datetimes[0] hsh['+'] = ", ".join(datetimes) # print('+ not d', hsh) if hsh['DS']: # print "DS", hsh['DS'] sl = ["%s %s" % (hsh['leader'], hsh['DS'])] else: sl = [] for key in sort_keys: if key in hsh and hsh[key]: value = hsh[key] if key in paren_keys: if key == 'w': m = weekday_regex.search(value) while m: value = weekday_regex.sub("%s%s" % (m.group(2), m.group(1)), value, count=1) m = weekday_regex.search(value) # oldm = oldparens_regex.search(value) m = parens_regex.search(value) # if oldm: # print "old", oldm.groups() # value = oldparens_regex.sub(" %s" % oldm.group(1), value) if m: # print "new", m.groups() # print("sort keys parens_regex match", m.groups(), value) value = parens_regex.sub("%s" % m.group(1), value) if key == 'a': # fix signs pass # value = ', '.join(["-%s" % x.strip() for x in value.split(',')]) elif key == 'f': value = value.split(',')[-1] if key == 'e': # fix extent m = time_regex.search(value) if m and not m.group(1): etime = parse("%s %s" % (start_date, m.group(2))) stime = parse(hsh['s']) # if etime < stime: # print("time reversal", stime, etime, m.group(2)) minutes = (etime - stime).seconds//60 value = "+%s" % minutes # print("got etime", m.group(2), etime - stime, minutes) else: m = extent_regex.search(value) if m: minutes = int(m.group(1))*60 + int(m.group(2)) value = "+%s" % minutes if key in sdict: nkey = sdict[key] elif key == 'n': nkey = '@d' else: nkey = "@%s" % key sl.append("%s %s" % (nkey, value)) if sl: return(" ".join(sl)) else: return('') def proj2hsh(s): """ Returns a hash corresponding to the project line s. """ hsh = {} msgs = [] parts = [x.strip() for x in s.split('@')] if len(parts) > 0: proj = parts.pop(0).strip() else: msgs.append(" Could not parse project line '%s'" % s) return(msgs, {}) if len(proj) > 0 and proj[0] in ['~', '*', '!', '-', '+']: hsh[u'P'] = '' else: hsh[u'P'] = proj for part in parts: hsh[part[0]] = part[1:].strip() return(msgs, hsh) def hashes(physical_lines, f, dfltHash): """A generator returning a hash for each physical line in physical lines each with an id based on the line numbers and the file's relative path, f. Prerequisites are computed for each task and added to the hash using the logic in which '+' adds the previous item to the current one as prerequisites for the next group of lower level tasks. """ linenum = 1 # we removed the first project line logical_line = [] linenums = [] r = {} r[0] = [] for line in physical_lines: linenums.append(linenum) linenum += 1 if comment_regex.match(line): continue stripped = line.strip() if stripped and stripped[0] in ['*', '+', '-', '!', '~']: if logical_line: yield lines2hsh(''.join(logical_line), linenums, f, r, dfltHash) logical_line = [] linenums = [] logical_line.append(line) elif stripped: # a line which does not continue, end of logical line logical_line.append(line) if logical_line: # end of sequence implies end of last logical line yield lines2hsh(''.join(logical_line), linenums, f, r, dfltHash) def getFileHashes(f, r, w=False): """ Process the lines in file f into hashes and add the relevant entries """ global contexts, keywords, locations mtime = strftime(rfmt, gmtime(os.path.getmtime(f))) newf = "%s.txt" % os.path.splitext(f)[0] newr = "%s.txt" % os.path.splitext(r)[0] fo = codecs.open(f, 'r', file_encoding) lines = fo.readlines() fo.close() msgs = [] dfltHash = {} ms, hsh = proj2hsh(lines.pop(0)) hsh['leader'] = '=' s = hash2Str(hsh) if not lines: print("\nskipping empty file:", r) return() if w: print("\nwriting:", newr) fo = codecs.open(newf, 'w', file_encoding) else: print("\nprocessing:", f) if s: if w: fo.write("= %s\n" % s) else: print("= %s" % s) for h in hashes(lines, r, dfltHash): if h: s = hash2Str(h) l = s.split('\n') n = len(l) + 1 if w: fo.write("%s\n" % (s)) else: print("%s" % (s)) if w: fo.close() def getFiles(d=None): """yield the list of files in topdir and its subdirectories whose names match pattern.""" pattern='[!.]*.text' filelist = [] if d: paths = [d] else: paths = [etmActions, etmEvents, etmNotes, etmTasks, etmdata] common_prefix = os.path.commonprefix(paths) for d in paths: if d: for path, subdirs, names in os.walk(d, followlinks=True): for name in names: if fnmatch.fnmatch(name, pattern): full_path = os.path.join(path,name) rel_path = relpath(full_path, common_prefix) tup = (full_path, rel_path) # print("file tuple", tup) add2list(filelist, tup) return(common_prefix, filelist) def getHashes(make_changes=False): # dtz_today, today, soondate = etmData.getToday() cwd = os.getcwd() common_prefix, filelist = getFiles(cwd) print("Starting from", common_prefix) if make_changes: print(' Writing changes to *.txt files') else: print(" Showing but not writing changes") msgs = [] for f,r in filelist: getFileHashes(f,r, make_changes) def main(): usage = "%prog [options]" parser = OptionParser(usage=usage, description = desc) parser.add_option("-m", "--make_changes", action="store_true", dest="make_changes", help="Actually make changes and save the result with the file's extension changed to .txt." ) options, args= parser.parse_args() getHashes(options.make_changes) print("\n-----------------------------------------------") if options.make_changes: print("Wrote changes to *.txt files") else: print("Run again with '-m' to write the changes to *.txt files") if __name__ == '__main__': main()