aboutsummaryrefslogtreecommitdiffstats
path: root/python/etatsbasen.py
diff options
context:
space:
mode:
authorAnders Einar Hilden <hildenae@gmail.com>2015-07-05 20:45:39 +0200
committerAnders Einar Hilden <hildenae@gmail.com>2015-07-05 20:45:39 +0200
commit6f31168d14ed8931d0550abe310a671e969e5431 (patch)
treeecb507fc055d68e490b40c9f31adaf9b8494bba3 /python/etatsbasen.py
parent45ed93506f8e5f4481e27316c920267fde1287bc (diff)
Python: Implement rename of columns
Diffstat (limited to 'python/etatsbasen.py')
-rwxr-xr-xpython/etatsbasen.py28
1 files changed, 21 insertions, 7 deletions
diff --git a/python/etatsbasen.py b/python/etatsbasen.py
index b58fedc..9db040f 100755
--- a/python/etatsbasen.py
+++ b/python/etatsbasen.py
@@ -12,6 +12,14 @@ DEFAULT_FILENAME = "etatsbasen-small.csv" # "etatsbasen.csv"
DEFAULT_COLUMNS = "url_nb,url_en,kommunenummer,orgid,orgstructid,parentid";
+RENAME_HEADERS = {
+ 'tailid': 'id',
+ 'email': 'request_email',
+ 'name_nb': 'name',
+ 'name_nn': 'name.nn',
+ 'name_en': 'name.en'
+ };
+
def cleanup_email(string):
fix1 = re.sub(r"^mailto:?", "", string)
if fix1 == valid_email(fix1):
@@ -29,13 +37,6 @@ def valid_email(string):
if (email == string and '@' in email):
return email
-rename = {
- 'tailid': 'id',
- 'email': 'request_email',
- 'name_nb': 'name',
- 'name_nn': 'name.nn',
- 'name_en': 'name.en'
- };
def filter_orgstructid(row, categories):
if row == None:
@@ -48,6 +49,7 @@ def filter_orgstructid(row, categories):
print("Skipping tailid %s: orgstructid not in selected categories (%s not in %s)" % (row['tailid'], row['orgstructid'], categories), file=sys.stderr)
return None
+
def filter_email(row):
if row == None:
return None
@@ -65,6 +67,17 @@ def filter_email(row):
return row
+def renameHeader(row):
+ if row == None:
+ return None
+ renamed_row = {}
+ for key in row:
+ if key in RENAME_HEADERS:
+ renamed_row[RENAME_HEADERS[key]] = row[key]
+ else:
+ renamed_row[key] = row[key]
+ return renamed_row
+
def filter_column(row, headers):
if row == None:
@@ -85,6 +98,7 @@ def printCSV(options):
for row in reader:
row = filter_orgstructid(row, options["categories"])
row = filter_email(row)
+ row = renameHeader(row)
row = filter_column(row, options["headers"])
if row != None:
filtered_rows.append(row)