diff options
-rwxr-xr-x | run-scraper | 9 | ||||
-rw-r--r-- | testlib/scraperwiki.py | 12 |
2 files changed, 21 insertions, 0 deletions
diff --git a/run-scraper b/run-scraper new file mode 100755 index 0000000..1f9637b --- /dev/null +++ b/run-scraper @@ -0,0 +1,9 @@ +#!/bin/sh +# +# Run scraperwiki scrapers with a fake scraperwiki library, to allow +# the scrapers to be tested locally. + +file=$1 +export PYTHONPATH=`pwd`/testlib + +python $file diff --git a/testlib/scraperwiki.py b/testlib/scraperwiki.py new file mode 100644 index 0000000..baf2b57 --- /dev/null +++ b/testlib/scraperwiki.py @@ -0,0 +1,12 @@ +def scrape(url): + print "Scraping %s" % url + if -1 != url.find("file://"): + f = open(url.replace("file://", ""), "r") + content = f.read() + f.close() + return content + else: + return "" + +def pdftoxml(pdfcontent, options): + return pdfcontent |