Using Python urllib2
Jump to navigation
Jump to search
Example
#!/usr/bin/env python import sys import gzip import getopt import urllib2 import StringIO #------------------------------------------------------------------------------- URL = 'http://svtapps/mdcs' referer = 'http://svtapps/' uagent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' #------------------------------------------------------------------------------- def get_page(): opener = urllib2.build_opener() opener.addheaders = [ ('Referer', referer), ('User-Agent', uagent), ('Accept-Encoding', 'gzip,deflate') ] usock = opener.open(URL) url = usock.geturl() print "[[%s]]" % url page = decode(usock) usock.close() return page #------------------------------------------------------------------------------- def decode(page): encoding = page.info().get("Content-Encoding") if encoding in ('gzip', 'x-gzip', 'deflate'): content = page.read() if encoding == 'deflate': data = StringIO.StringIO(zlib.decompress(content)) else: data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content)) page = data.read() return page #------------------------------------------------------------------------------- def usage(): USAGE = """ Usage: $ get_page.py """ sys.stderr.write(USAGE) #------------------------------------------------------------------------------- def main(argv): global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries loop_cnt = 1 examine_flg = False #----- Process command line arguments ---------------------------- try: opts, args = getopt.getopt(argv, "dD:hv", ["debug", "debug_cnt=", "help","verbose"]) except getopt.GetoptError: usage() sys.exit(2) else: for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit(0) elif opt in ("-d", "--debug"): debug_lvl += 1 elif opt in ("-D", "--debug_cnt"): debug_lvl = int(arg) elif opt in ("-v", "--verbose"): verbose_flg = True page = get_page() print page #------------------------------------------------------------------------------- if __name__ == "__main__": main(sys.argv[1:]) #-------------------------------------------------------------------------------