Using Python urllib2
Jump to navigation
Jump to search
Example
#!/usr/bin/env python
import sys
import gzip
import getopt
import urllib2
import StringIO
#-------------------------------------------------------------------------------
URL = 'http://svtapps/mdcs'
referer = 'http://svtapps/'
uagent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
#-------------------------------------------------------------------------------
def get_page():
opener = urllib2.build_opener()
opener.addheaders = [
('Referer', referer),
('User-Agent', uagent),
('Accept-Encoding', 'gzip,deflate')
]
usock = opener.open(URL)
url = usock.geturl()
print "[[%s]]" % url
page = decode(usock)
usock.close()
return page
#-------------------------------------------------------------------------------
def decode(page):
encoding = page.info().get("Content-Encoding")
if encoding in ('gzip', 'x-gzip', 'deflate'):
content = page.read()
if encoding == 'deflate':
data = StringIO.StringIO(zlib.decompress(content))
else:
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
page = data.read()
return page
#-------------------------------------------------------------------------------
def usage():
USAGE = """
Usage:
$ get_page.py
"""
sys.stderr.write(USAGE)
#-------------------------------------------------------------------------------
def main(argv):
global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries
loop_cnt = 1
examine_flg = False
#----- Process command line arguments ----------------------------
try:
opts, args = getopt.getopt(argv, "dD:hv",
["debug", "debug_cnt=", "help","verbose"])
except getopt.GetoptError:
usage()
sys.exit(2)
else:
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit(0)
elif opt in ("-d", "--debug"):
debug_lvl += 1
elif opt in ("-D", "--debug_cnt"):
debug_lvl = int(arg)
elif opt in ("-v", "--verbose"):
verbose_flg = True
page = get_page()
print page
#-------------------------------------------------------------------------------
if __name__ == "__main__":
main(sys.argv[1:])
#-------------------------------------------------------------------------------