From 24a0618550c568f3a1c6318387ce78559dc1ef6d Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 20:02:31 +0200 Subject: Initial commit: Implement code in python --- python/etatsbasen.py | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100755 python/etatsbasen.py (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py new file mode 100755 index 0000000..24e6267 --- /dev/null +++ b/python/etatsbasen.py @@ -0,0 +1,108 @@ +#! /usr/bin/env python3 +import argparse +import sys +import os +from email.utils import parseaddr +import csv +import re + +VERSION="python-etatsbasen-v0.1" +DEFAULT_CATEGORIES = "12,14,17,18,27,33,38,66,68,76" +DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv" + +def cleanup_email(string): + fix1 = re.sub(r"^mailto:?", "", string) + if fix1 == valid_email(fix1): + return fix1 + # Split on ' ?[,/] ?' + split = re.split(r' ?[,/] ?', string) + for fix2 in split: + if fix2 == valid_email(fix2): + return fix2 + return False + +def valid_email(string): + # Think about using https://pypi.python.org/pypi/validate_email ? + name, email = parseaddr(string) + if (email == string and '@' in email): + return email + +rename = { + 'tailid': 'id', + 'email': 'request_email', + 'name_nb': 'name', + 'name_nn': 'name.nn', + 'name_en': 'name.en' + }; + +def filter_orgstructid(row, categories): + if row == None: + return None + if int(row["orgstructid"]) in categories: + return row + else: + print("Skipping tailid %s: orgstructid not in selected categories (%s not in %s)" % (row['tailid'], row['orgstructid'], categories), file=sys.stderr) + return None + +def filter_email(row): + if row == None: + return None + if row['email'] == "": + print("Skipping tailid %s: No email specified" % (row['tailid']), file=sys.stderr) + return None # No email, skip + elif not valid_email(row['email']): + fixed = cleanup_email(row['email']) + if fixed: + print("Replaced email for tailid %s: \"%s\" -> \"%s\"" % (row['tailid'], row['email'], fixed), file=sys.stderr) + row['email'] = fixed + else: + print("Skipping tailid %s: Invalid email (%s)" % (row['tailid'], row['email']), file=sys.stderr) + return None # Invalid email, skip + return row + + + +def printCSV(options): + print(options) + with open(options["inputfile"], newline='') as csvfile: + reader = csv.DictReader(csvfile, delimiter=',', quotechar='"') + filtered_rows = [] + for row in reader: + row = filter_orgstructid(row, options["categories"]) + row = filter_email(row) + if row != None: + filtered_rows.append(row) + pass + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Tool for exporting etatsbasen-data to a file that can be imported into alaveteli.') + parser.add_argument('-c', metavar="all|c1[,c2,c3,..]", default=DEFAULT_CATEGORIES, help="Categories to include (default: \"%s\")" % (DEFAULT_CATEGORIES)) + parser.add_argument('-f', metavar="file", default=DEFAULT_FILENAME, help="File to read from (default: \"%s\")" % (DEFAULT_FILENAME)) + parser.add_argument('-o', metavar="h1[,h2,h3...] ", help="Include only these headers in output (id or name)") + parser.add_argument('-v', help="Print version (%s) and exit" % (VERSION), action='store_true') + args = parser.parse_args() + + options = {} + + if args.v: + print("version: %s" % (VERSION)) + sys.exit(0) + + if os.path.isfile(args.f): + options["inputfile"] = args.f + else: + print("%s: No such file" % (args.f), file=sys.stderr) + sys.exit(0) + + if args.o: + options["headers"] = args.o.split(',') + else: + options["headers"] = None + try: + options["categories"] = [ int(x) for x in args.c.split(',') ] + except ValueError as ve: + print("Failed to parse \"-c %s\"; Categories must comma separated list of only integers" % (args.c), file=sys.stderr) + sys.exit(0) + + printCSV(options) -- cgit v1.2.3 From 693d31aace56df93911e9114b6c5cfe7d30bfe36 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 20:08:58 +0200 Subject: Python: Fix handling of "all" categories --- python/etatsbasen.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 24e6267..65785a3 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -38,6 +38,8 @@ rename = { def filter_orgstructid(row, categories): if row == None: return None + if len(categories) == 1 and categories[0] == "all": + return row if int(row["orgstructid"]) in categories: return row else: @@ -61,7 +63,6 @@ def filter_email(row): return row - def printCSV(options): print(options) with open(options["inputfile"], newline='') as csvfile: @@ -100,7 +101,10 @@ if __name__ == "__main__": else: options["headers"] = None try: - options["categories"] = [ int(x) for x in args.c.split(',') ] + if args.c == "all": + options["categories"] = ["all"] + else: + options["categories"] = [ int(x) for x in args.c.split(',') ] except ValueError as ve: print("Failed to parse \"-c %s\"; Categories must comma separated list of only integers" % (args.c), file=sys.stderr) sys.exit(0) -- cgit v1.2.3 From 45ed93506f8e5f4481e27316c920267fde1287bc Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 20:45:19 +0200 Subject: Python: Implement filtering of columns --- python/etatsbasen.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 65785a3..b58fedc 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -10,6 +10,8 @@ VERSION="python-etatsbasen-v0.1" DEFAULT_CATEGORIES = "12,14,17,18,27,33,38,66,68,76" DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv" +DEFAULT_COLUMNS = "url_nb,url_en,kommunenummer,orgid,orgstructid,parentid"; + def cleanup_email(string): fix1 = re.sub(r"^mailto:?", "", string) if fix1 == valid_email(fix1): @@ -63,6 +65,18 @@ def filter_email(row): return row + +def filter_column(row, headers): + if row == None: + return None + if len(headers) == 1 and headers[0] == "all": + return row # We should include all headers, shortcut + filtered_row = {} + for key in row: + if key in headers: + filtered_row[key] = row[key] + return filtered_row + def printCSV(options): print(options) with open(options["inputfile"], newline='') as csvfile: @@ -71,8 +85,10 @@ def printCSV(options): for row in reader: row = filter_orgstructid(row, options["categories"]) row = filter_email(row) + row = filter_column(row, options["headers"]) if row != None: filtered_rows.append(row) + print(filtered_rows) pass @@ -80,7 +96,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Tool for exporting etatsbasen-data to a file that can be imported into alaveteli.') parser.add_argument('-c', metavar="all|c1[,c2,c3,..]", default=DEFAULT_CATEGORIES, help="Categories to include (default: \"%s\")" % (DEFAULT_CATEGORIES)) parser.add_argument('-f', metavar="file", default=DEFAULT_FILENAME, help="File to read from (default: \"%s\")" % (DEFAULT_FILENAME)) - parser.add_argument('-o', metavar="h1[,h2,h3...] ", help="Include only these headers in output (id or name)") + parser.add_argument('-o', metavar="all|headerName1[,headername2,...] ", default=DEFAULT_COLUMNS, help="Include only these headers/columns in output (post-rename)(default: \"%s\")" % (DEFAULT_COLUMNS)) parser.add_argument('-v', help="Print version (%s) and exit" % (VERSION), action='store_true') args = parser.parse_args() @@ -95,11 +111,12 @@ if __name__ == "__main__": else: print("%s: No such file" % (args.f), file=sys.stderr) sys.exit(0) - - if args.o: - options["headers"] = args.o.split(',') + + if args.o == "all": + options["headers"] = ["all"] else: - options["headers"] = None + options["headers"] = args.o.split(',') + try: if args.c == "all": options["categories"] = ["all"] -- cgit v1.2.3 From 6f31168d14ed8931d0550abe310a671e969e5431 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 20:45:39 +0200 Subject: Python: Implement rename of columns --- python/etatsbasen.py | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index b58fedc..9db040f 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -12,6 +12,14 @@ DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv" DEFAULT_COLUMNS = "url_nb,url_en,kommunenummer,orgid,orgstructid,parentid"; +RENAME_HEADERS = { + 'tailid': 'id', + 'email': 'request_email', + 'name_nb': 'name', + 'name_nn': 'name.nn', + 'name_en': 'name.en' + }; + def cleanup_email(string): fix1 = re.sub(r"^mailto:?", "", string) if fix1 == valid_email(fix1): @@ -29,13 +37,6 @@ def valid_email(string): if (email == string and '@' in email): return email -rename = { - 'tailid': 'id', - 'email': 'request_email', - 'name_nb': 'name', - 'name_nn': 'name.nn', - 'name_en': 'name.en' - }; def filter_orgstructid(row, categories): if row == None: @@ -48,6 +49,7 @@ def filter_orgstructid(row, categories): print("Skipping tailid %s: orgstructid not in selected categories (%s not in %s)" % (row['tailid'], row['orgstructid'], categories), file=sys.stderr) return None + def filter_email(row): if row == None: return None @@ -65,6 +67,17 @@ def filter_email(row): return row +def renameHeader(row): + if row == None: + return None + renamed_row = {} + for key in row: + if key in RENAME_HEADERS: + renamed_row[RENAME_HEADERS[key]] = row[key] + else: + renamed_row[key] = row[key] + return renamed_row + def filter_column(row, headers): if row == None: @@ -85,6 +98,7 @@ def printCSV(options): for row in reader: row = filter_orgstructid(row, options["categories"]) row = filter_email(row) + row = renameHeader(row) row = filter_column(row, options["headers"]) if row != None: filtered_rows.append(row) -- cgit v1.2.3 From 263b059b9953417928e90738e69d19bbbce47ba3 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 20:56:41 +0200 Subject: Python: Readme and code of js-version does not use -o the same way, implement code-version (multiple -o) only --- python/etatsbasen.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 9db040f..f56102a 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -10,7 +10,7 @@ VERSION="python-etatsbasen-v0.1" DEFAULT_CATEGORIES = "12,14,17,18,27,33,38,66,68,76" DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv" -DEFAULT_COLUMNS = "url_nb,url_en,kommunenummer,orgid,orgstructid,parentid"; +DEFAULT_COLUMNS = ["url_nb","url_en","kommunenummer","orgid","orgstructid","parentid"]; RENAME_HEADERS = { 'tailid': 'id', @@ -110,10 +110,9 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Tool for exporting etatsbasen-data to a file that can be imported into alaveteli.') parser.add_argument('-c', metavar="all|c1[,c2,c3,..]", default=DEFAULT_CATEGORIES, help="Categories to include (default: \"%s\")" % (DEFAULT_CATEGORIES)) parser.add_argument('-f', metavar="file", default=DEFAULT_FILENAME, help="File to read from (default: \"%s\")" % (DEFAULT_FILENAME)) - parser.add_argument('-o', metavar="all|headerName1[,headername2,...] ", default=DEFAULT_COLUMNS, help="Include only these headers/columns in output (post-rename)(default: \"%s\")" % (DEFAULT_COLUMNS)) + parser.add_argument('-o', action='append', metavar="-o headerName1 [-o headername2 ...] ", help="Include only these headers/columns in output (post-rename)(default: \"%s\")" % (DEFAULT_COLUMNS)) parser.add_argument('-v', help="Print version (%s) and exit" % (VERSION), action='store_true') args = parser.parse_args() - options = {} if args.v: @@ -126,10 +125,15 @@ if __name__ == "__main__": print("%s: No such file" % (args.f), file=sys.stderr) sys.exit(0) - if args.o == "all": - options["headers"] = ["all"] + if args.o: + for value in args.o: + if "," in value: + # Hard fail if someone uses old syntax + print("Failed to parse \"-o %s\"; Old syntax with comma separated list not supported" % (" -o ".join(args.o)), file=sys.stderr) + sys.exit(0) + options["headers"] = args.o else: - options["headers"] = args.o.split(',') + options["headers"] = DEFAULT_COLUMNS try: if args.c == "all": @@ -139,5 +143,5 @@ if __name__ == "__main__": except ValueError as ve: print("Failed to parse \"-c %s\"; Categories must comma separated list of only integers" % (args.c), file=sys.stderr) sys.exit(0) - + printCSV(options) -- cgit v1.2.3 From 67d1f62a4041451a431ebcef1859b60fb95862f7 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 21:10:17 +0200 Subject: Trim row values --- python/etatsbasen.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index f56102a..6964cd0 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -90,6 +90,14 @@ def filter_column(row, headers): filtered_row[key] = row[key] return filtered_row +def trim_row(row): + if row == None: + return None + trimmed_row = {} + for key in row: + trimmed_row[key] = row[key].strip() + return trimmed_row + def printCSV(options): print(options) with open(options["inputfile"], newline='') as csvfile: @@ -99,6 +107,7 @@ def printCSV(options): row = filter_orgstructid(row, options["categories"]) row = filter_email(row) row = renameHeader(row) + row = trim_row(row) row = filter_column(row, options["headers"]) if row != None: filtered_rows.append(row) -- cgit v1.2.3 From d7981620cbe23639c40286e2a4acd23749840b93 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 21:10:31 +0200 Subject: Placeholdercode for missing functions --- python/etatsbasen.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 6964cd0..3d58525 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -98,6 +98,17 @@ def trim_row(row): trimmed_row[key] = row[key].strip() return trimmed_row + +def add_tags(row): + if row == None: + return None + return row + +def add_url(row): + if row == None: + return None + return row + def printCSV(options): print(options) with open(options["inputfile"], newline='') as csvfile: @@ -108,7 +119,10 @@ def printCSV(options): row = filter_email(row) row = renameHeader(row) row = trim_row(row) + row = add_tags(row) + row = add_url(row) row = filter_column(row, options["headers"]) + if row != None: filtered_rows.append(row) print(filtered_rows) -- cgit v1.2.3 From de8ea0be1ccc8b07c12bf584e53f152c5012c8e6 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 21:11:39 +0200 Subject: Wrap print + sys.exit in fatal, implement same handling of -c as js-code --- python/etatsbasen.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 3d58525..79b6481 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -7,7 +7,7 @@ import csv import re VERSION="python-etatsbasen-v0.1" -DEFAULT_CATEGORIES = "12,14,17,18,27,33,38,66,68,76" +DEFAULT_CATEGORIES = [12,14,17,18,27,33,38,66,68,76] DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv" DEFAULT_COLUMNS = ["url_nb","url_en","kommunenummer","orgid","orgstructid","parentid"]; @@ -20,6 +20,10 @@ RENAME_HEADERS = { 'name_en': 'name.en' }; +def fatal(msg): + print(msg, file=sys.stderr) + sys.exit(0) + def cleanup_email(string): fix1 = re.sub(r"^mailto:?", "", string) if fix1 == valid_email(fix1): @@ -131,7 +135,7 @@ def printCSV(options): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Tool for exporting etatsbasen-data to a file that can be imported into alaveteli.') - parser.add_argument('-c', metavar="all|c1[,c2,c3,..]", default=DEFAULT_CATEGORIES, help="Categories to include (default: \"%s\")" % (DEFAULT_CATEGORIES)) + parser.add_argument('-c', action='append', metavar="-c all|-c 12 -c 14...]", help="Categories to include (default: \"%s\")" % (DEFAULT_CATEGORIES)) parser.add_argument('-f', metavar="file", default=DEFAULT_FILENAME, help="File to read from (default: \"%s\")" % (DEFAULT_FILENAME)) parser.add_argument('-o', action='append', metavar="-o headerName1 [-o headername2 ...] ", help="Include only these headers/columns in output (post-rename)(default: \"%s\")" % (DEFAULT_COLUMNS)) parser.add_argument('-v', help="Print version (%s) and exit" % (VERSION), action='store_true') @@ -145,26 +149,26 @@ if __name__ == "__main__": if os.path.isfile(args.f): options["inputfile"] = args.f else: - print("%s: No such file" % (args.f), file=sys.stderr) - sys.exit(0) + fatal("%s: No such file" % (args.f)) if args.o: for value in args.o: if "," in value: # Hard fail if someone uses old syntax - print("Failed to parse \"-o %s\"; Old syntax with comma separated list not supported" % (" -o ".join(args.o)), file=sys.stderr) - sys.exit(0) + fatal("Failed to parse \"-o %s\"; Old syntax with comma separated list not supported" % (" -o ".join(args.o))) options["headers"] = args.o else: options["headers"] = DEFAULT_COLUMNS try: - if args.c == "all": - options["categories"] = ["all"] + if args.c[0] == "all": + if len(args.c) == 1: + options["categories"] = ["all"] + else: + fatal("Failed to parse \"-c %s\"; Categories must be integers or only \"-c all\"" % (" -c ".join(args.c))) else: - options["categories"] = [ int(x) for x in args.c.split(',') ] + options["categories"] = [ int(x) for x in args.c ] except ValueError as ve: - print("Failed to parse \"-c %s\"; Categories must comma separated list of only integers" % (args.c), file=sys.stderr) - sys.exit(0) + print("Failed to parse \"-c %s\"; Categories must be integers or only \"-c all\"" % (" -c ".join(args.c))) printCSV(options) -- cgit v1.2.3 From 9082362c39ae6ce972298a158cb3f3e88077e619 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 23:04:57 +0200 Subject: Use js regexp for email, add additional email-fix --- python/etatsbasen.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 79b6481..d3db8a7 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -33,14 +33,14 @@ def cleanup_email(string): for fix2 in split: if fix2 == valid_email(fix2): return fix2 + fix3 = re.sub(r"\.$", "", string) + if fix3 == valid_email(fix3): + return fix3 return False def valid_email(string): - # Think about using https://pypi.python.org/pypi/validate_email ? - name, email = parseaddr(string) - if (email == string and '@' in email): - return email - + if re.match(r"^(([^<>()[\]\\.,;:\s@\"]+(\.[^<>()[\]\\.,;:\s@\"]+)*)|(\".+\"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$", string): + return string def filter_orgstructid(row, categories): if row == None: -- cgit v1.2.3 From a1e163bfd6d65d7216971e1a7db267fc58150858 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 23:05:48 +0200 Subject: Implement add_url --- python/etatsbasen.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index d3db8a7..7f969b8 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -111,6 +111,12 @@ def add_tags(row): def add_url(row): if row == None: return None + if row["url_nb"].strip() != "": + row["home_page"] = row["url_nb"] + elif row ["url_en"].strip() != "": + row["home_page"] = row["url_en"] + else: + row["home_page"] = "" return row def printCSV(options): -- cgit v1.2.3 From 4a0e0ed91626dbb0c122174a76169519d8e163f9 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 23:06:56 +0200 Subject: -c and -o don't work / don't do what one expects, reimplement them --- python/etatsbasen.py | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 7f969b8..74b95ce 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -120,10 +120,10 @@ def add_url(row): return row def printCSV(options): - print(options) - with open(options["inputfile"], newline='') as csvfile: - reader = csv.DictReader(csvfile, delimiter=',', quotechar='"') + with open(options["inputfile"], "r") as csvfile: + reader = csv.DictReader(csvfile, delimiter=',', quotechar='"', strict=True) filtered_rows = [] + header = reader.fieldnames for row in reader: row = filter_orgstructid(row, options["categories"]) row = filter_email(row) @@ -135,15 +135,20 @@ def printCSV(options): if row != None: filtered_rows.append(row) - print(filtered_rows) + writer = csv.DictWriter(sys.stdout, delimiter=',', quotechar='"', lineterminator="\n", quoting=csv.QUOTE_MINIMAL, fieldnames=options["headers"], extrasaction='raise') + #writer.writeheader() + print("#%s" % (",".join(options["headers"]))) + for row in filtered_rows: + writer.writerow(row) pass if __name__ == "__main__": parser = argparse.ArgumentParser(description='Tool for exporting etatsbasen-data to a file that can be imported into alaveteli.') - parser.add_argument('-c', action='append', metavar="-c all|-c 12 -c 14...]", help="Categories to include (default: \"%s\")" % (DEFAULT_CATEGORIES)) + parser.add_argument('-c', action='append', metavar="all | -c 12 -c 14 -c ...", help="Categories to include (default: \"%s\")" % ("all")) parser.add_argument('-f', metavar="file", default=DEFAULT_FILENAME, help="File to read from (default: \"%s\")" % (DEFAULT_FILENAME)) - parser.add_argument('-o', action='append', metavar="-o headerName1 [-o headername2 ...] ", help="Include only these headers/columns in output (post-rename)(default: \"%s\")" % (DEFAULT_COLUMNS)) + #parser.add_argument('-o', action='append', metavar="-o headerName1 [-o headername2 ...] ", help="(i don't really work) ... Include only these headers/columns in output (post-rename)(default: \"%s\")" % (DEFAULT_COLUMNS)) + parser.add_argument('-u', action='append', metavar="headerName1 -u ...", help="Columns and order of columns to output (default: %s)" % (",".join(DEFAULT_COLUMNS))) parser.add_argument('-v', help="Print version (%s) and exit" % (VERSION), action='store_true') args = parser.parse_args() options = {} @@ -157,24 +162,23 @@ if __name__ == "__main__": else: fatal("%s: No such file" % (args.f)) - if args.o: - for value in args.o: - if "," in value: - # Hard fail if someone uses old syntax - fatal("Failed to parse \"-o %s\"; Old syntax with comma separated list not supported" % (" -o ".join(args.o))) - options["headers"] = args.o + if args.u: + options["headers"] = args.u else: options["headers"] = DEFAULT_COLUMNS - try: - if args.c[0] == "all": - if len(args.c) == 1: - options["categories"] = ["all"] + if args.c: + try: + if args.c[0] == "all": + if len(args.c) == 1: + options["categories"] = ["all"] + else: + fatal("Failed to parse \"-c %s\"; Categories must be integers or only \"-c all\"" % (" -c ".join(args.c))) else: - fatal("Failed to parse \"-c %s\"; Categories must be integers or only \"-c all\"" % (" -c ".join(args.c))) - else: - options["categories"] = [ int(x) for x in args.c ] - except ValueError as ve: - print("Failed to parse \"-c %s\"; Categories must be integers or only \"-c all\"" % (" -c ".join(args.c))) + options["categories"] = [ int(x) for x in args.c ] + except ValueError as ve: + print("Failed to parse \"-c %s\"; Categories must be integers or only \"-c all\"" % (" -c ".join(args.c))) + else: + options["categories"] = ["all"] #DEFAULT_CATEGORIES printCSV(options) -- cgit v1.2.3 From b72ec80e243860e64a7d70a64ee29bf65ae6f7ae Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 23:07:08 +0200 Subject: Implement add_tags --- python/etatsbasen.py | 1 + 1 file changed, 1 insertion(+) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 74b95ce..bd3462f 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -106,6 +106,7 @@ def trim_row(row): def add_tags(row): if row == None: return None + row['tag_string'] = row['orgstructid'] return row def add_url(row): -- cgit v1.2.3 From 287863f7facc4513bbe889db2abc95c67337265e Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 23:07:27 +0200 Subject: Use correct DEFAULT_COLUMNS --- python/etatsbasen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index bd3462f..6fc2e25 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -10,7 +10,7 @@ VERSION="python-etatsbasen-v0.1" DEFAULT_CATEGORIES = [12,14,17,18,27,33,38,66,68,76] DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv" -DEFAULT_COLUMNS = ["url_nb","url_en","kommunenummer","orgid","orgstructid","parentid"]; +DEFAULT_COLUMNS = ["id","request_email","name","name.nn","name.en","tag_string","home_page"] RENAME_HEADERS = { 'tailid': 'id', -- cgit v1.2.3 From dbb32585ac281e5b536fb6cc1c887eb41bae4eea Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 23:07:39 +0200 Subject: Use correct default inputfile --- python/etatsbasen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index 6fc2e25..e8ed078 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -8,7 +8,7 @@ import re VERSION="python-etatsbasen-v0.1" DEFAULT_CATEGORIES = [12,14,17,18,27,33,38,66,68,76] -DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv" +DEFAULT_FILENAME = "etatsbasen.csv" DEFAULT_COLUMNS = ["id","request_email","name","name.nn","name.en","tag_string","home_page"] -- cgit v1.2.3 From 6288dcf13aac994faeff47ee7482a0928987cc69 Mon Sep 17 00:00:00 2001 From: Anders Einar Hilden Date: Sun, 5 Jul 2015 23:25:35 +0200 Subject: Set DEFAULT_CATEGORIES to actual value --- python/etatsbasen.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'python/etatsbasen.py') diff --git a/python/etatsbasen.py b/python/etatsbasen.py index e8ed078..df23ec8 100755 --- a/python/etatsbasen.py +++ b/python/etatsbasen.py @@ -7,7 +7,7 @@ import csv import re VERSION="python-etatsbasen-v0.1" -DEFAULT_CATEGORIES = [12,14,17,18,27,33,38,66,68,76] +DEFAULT_CATEGORIES = ["all"] DEFAULT_FILENAME = "etatsbasen.csv" DEFAULT_COLUMNS = ["id","request_email","name","name.nn","name.en","tag_string","home_page"] @@ -146,7 +146,7 @@ def printCSV(options): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Tool for exporting etatsbasen-data to a file that can be imported into alaveteli.') - parser.add_argument('-c', action='append', metavar="all | -c 12 -c 14 -c ...", help="Categories to include (default: \"%s\")" % ("all")) + parser.add_argument('-c', action='append', metavar="all | -c 12 -c 14 -c ...", help="Categories to include (default: \"%s\")" % (",".join(DEFAULT_CATEGORIES))) parser.add_argument('-f', metavar="file", default=DEFAULT_FILENAME, help="File to read from (default: \"%s\")" % (DEFAULT_FILENAME)) #parser.add_argument('-o', action='append', metavar="-o headerName1 [-o headername2 ...] ", help="(i don't really work) ... Include only these headers/columns in output (post-rename)(default: \"%s\")" % (DEFAULT_COLUMNS)) parser.add_argument('-u', action='append', metavar="headerName1 -u ...", help="Columns and order of columns to output (default: %s)" % (",".join(DEFAULT_COLUMNS))) @@ -180,6 +180,6 @@ if __name__ == "__main__": except ValueError as ve: print("Failed to parse \"-c %s\"; Categories must be integers or only \"-c all\"" % (" -c ".join(args.c))) else: - options["categories"] = ["all"] #DEFAULT_CATEGORIES + options["categories"] = DEFAULT_CATEGORIES printCSV(options) -- cgit v1.2.3