diff options
author | Petter Reinholdtsen <pere@hungry.com> | 2012-07-13 14:54:10 +0200 |
---|---|---|
committer | Petter Reinholdtsen <pere@hungry.com> | 2012-07-13 14:54:10 +0200 |
commit | 82a02bfae556f1a5709acb43df631108587547b8 (patch) | |
tree | 8d4c5721439ab8343d7415c290cfab3a249a704a | |
parent | aa84ea74af30b388221244b944239e85ea689db2 (diff) |
Add test script to run scrapers locally.
-rwxr-xr-x | run-scraper | 9 | ||||
-rw-r--r-- | testlib/scraperwiki.py | 12 |
2 files changed, 21 insertions, 0 deletions
diff --git a/run-scraper b/run-scraper new file mode 100755 index 0000000..1f9637b --- /dev/null +++ b/run-scraper @@ -0,0 +1,9 @@ +#!/bin/sh +# +# Run scraperwiki scrapers with a fake scraperwiki library, to allow +# the scrapers to be tested locally. + +file=$1 +export PYTHONPATH=`pwd`/testlib + +python $file diff --git a/testlib/scraperwiki.py b/testlib/scraperwiki.py new file mode 100644 index 0000000..baf2b57 --- /dev/null +++ b/testlib/scraperwiki.py @@ -0,0 +1,12 @@ +def scrape(url): + print "Scraping %s" % url + if -1 != url.find("file://"): + f = open(url.replace("file://", ""), "r") + content = f.read() + f.close() + return content + else: + return "" + +def pdftoxml(pdfcontent, options): + return pdfcontent |