blob: 901acecfea7bee59dc090c5455a5fb98c9f29263 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
|
import urllib2
def scrape(url):
print "Scraping %s" % url
if -1 != url.find("file://"):
f = open(url.replace("file://", ""), "r")
content = f.read()
f.close()
return content
else:
response = urllib2.urlopen(url)
html = response.read()
return html
def pdftoxml(pdfcontent, options):
return pdfcontent
def swimport(scrapername):
return None
|