Difference between revisions of "Python gzip decompression"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) (New page: See. Category:Python Category:Internet Category:Examples) |
PeterHarding (talk | contribs) |
||
Line 1: | Line 1: | ||
=Python gzip Module= | |||
Using gzip module for decoding gzip deflated web page content... | |||
Adding 'Accept-Encoding: gzip,deflate' to a web request header will result in a gzipped page content being returned... | |||
==Sample Script== | |||
<pre> | |||
#!/usr/bin/env python | |||
import sys | |||
import gzip | |||
import getopt | |||
import urllib2 | |||
import StringIO | |||
#------------------------------------------------------------------------------- | |||
URL = 'http://svtapps/mdcs' | |||
referer = 'http://svtapps/' | |||
uagent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' | |||
#------------------------------------------------------------------------------- | |||
def get_page(): | |||
opener = urllib2.build_opener() | |||
opener.addheaders = [ | |||
('Referer', referer), | |||
('User-Agent', uagent), | |||
('Accept-Encoding', 'gzip,deflate') | |||
] | |||
usock = opener.open(URL) | |||
url = usock.geturl() | |||
print "[[%s]]" % url | |||
page = decode(usock) | |||
usock.close() | |||
return page | |||
#------------------------------------------------------------------------------- | |||
def decode(page): | |||
encoding = page.info().get("Content-Encoding") | |||
if encoding in ('gzip', 'x-gzip', 'deflate'): | |||
content = page.read() | |||
if encoding == 'deflate': | |||
data = StringIO.StringIO(zlib.decompress(content)) | |||
else: | |||
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content)) | |||
page = data.read() | |||
return page | |||
#------------------------------------------------------------------------------- | |||
def usage(): | |||
USAGE = """ | |||
Usage: | |||
$ get_page.py | |||
""" | |||
sys.stderr.write(USAGE) | |||
#------------------------------------------------------------------------------- | |||
def main(argv): | |||
global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries | |||
loop_cnt = 1 | |||
examine_flg = False | |||
#----- Process command line arguments ---------------------------- | |||
try: | |||
opts, args = getopt.getopt(argv, "dD:hv", | |||
["debug", "debug_cnt=", "help","verbose"]) | |||
except getopt.GetoptError: | |||
usage() | |||
sys.exit(2) | |||
else: | |||
for opt, arg in opts: | |||
if opt in ("-h", "--help"): | |||
usage() | |||
sys.exit(0) | |||
elif opt in ("-d", "--debug"): | |||
debug_lvl += 1 | |||
elif opt in ("-D", "--debug_cnt"): | |||
debug_lvl = int(arg) | |||
elif opt in ("-v", "--verbose"): | |||
verbose_flg = True | |||
page = get_page() | |||
print page | |||
#------------------------------------------------------------------------------- | |||
if __name__ == "__main__": | |||
main(sys.argv[1:]) | |||
#------------------------------------------------------------------------------- | |||
</pre> | |||
[[Category:Python]] | [[Category:Python]] | ||
[[Category:httplib]] | |||
[[Category:urllib]] | |||
[[Category:Internet]] | [[Category:Internet]] | ||
[[Category:Examples]] | [[Category:Examples]] |
Revision as of 14:32, 12 September 2008
Python gzip Module
Using gzip module for decoding gzip deflated web page content...
Adding 'Accept-Encoding: gzip,deflate' to a web request header will result in a gzipped page content being returned...
Sample Script
#!/usr/bin/env python import sys import gzip import getopt import urllib2 import StringIO #------------------------------------------------------------------------------- URL = 'http://svtapps/mdcs' referer = 'http://svtapps/' uagent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' #------------------------------------------------------------------------------- def get_page(): opener = urllib2.build_opener() opener.addheaders = [ ('Referer', referer), ('User-Agent', uagent), ('Accept-Encoding', 'gzip,deflate') ] usock = opener.open(URL) url = usock.geturl() print "[[%s]]" % url page = decode(usock) usock.close() return page #------------------------------------------------------------------------------- def decode(page): encoding = page.info().get("Content-Encoding") if encoding in ('gzip', 'x-gzip', 'deflate'): content = page.read() if encoding == 'deflate': data = StringIO.StringIO(zlib.decompress(content)) else: data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content)) page = data.read() return page #------------------------------------------------------------------------------- def usage(): USAGE = """ Usage: $ get_page.py """ sys.stderr.write(USAGE) #------------------------------------------------------------------------------- def main(argv): global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries loop_cnt = 1 examine_flg = False #----- Process command line arguments ---------------------------- try: opts, args = getopt.getopt(argv, "dD:hv", ["debug", "debug_cnt=", "help","verbose"]) except getopt.GetoptError: usage() sys.exit(2) else: for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit(0) elif opt in ("-d", "--debug"): debug_lvl += 1 elif opt in ("-D", "--debug_cnt"): debug_lvl = int(arg) elif opt in ("-v", "--verbose"): verbose_flg = True page = get_page() print page #------------------------------------------------------------------------------- if __name__ == "__main__": main(sys.argv[1:]) #-------------------------------------------------------------------------------