import httplib
import htmllib
import formatter
import re, sys
match = None
if len(sys.argv) > 1:
url = sys.argv[1]
match = re.match('^http:\/\/([^\/]+)(\/.*)$', url)
if match:
host, path = match.groups()
else:
print "Usage: python ", sys.argv[0], " http://host/[path]"
sys.exit(1)
h = httplib.HTTP(host)
h.putrequest('GET', path)
h.putheader('Accept', 'text/html')
h.putheader('Accept', 'text/plain')
h.endheaders()
errcode, errmsg, headers = h.getreply()
if errcode == 200:
data = h.getfile().read()
f = formatter.AbstractFormatter(formatter.DumbWriter())
parser = htmllib.HTMLParser(f)
parser.feed(data)
parser.close()
else:
print errcode, ": Failed to fetch", url
sys.exit(1)