Difference between revisions of "Python gzip decompression"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) (New page: See. Category:Python Category:Internet Category:Examples) |
PeterHarding (talk | contribs) |
||
| Line 1: | Line 1: | ||
=Python gzip Module= | |||
Using gzip module for decoding gzip deflated web page content... | |||
Adding 'Accept-Encoding: gzip,deflate' to a web request header will result in a gzipped page content being returned... | |||
==Sample Script== | |||
<pre> | |||
#!/usr/bin/env python | |||
import sys | |||
import gzip | |||
import getopt | |||
import urllib2 | |||
import StringIO | |||
#------------------------------------------------------------------------------- | |||
URL = 'http://svtapps/mdcs' | |||
referer = 'http://svtapps/' | |||
uagent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' | |||
#------------------------------------------------------------------------------- | |||
def get_page(): | |||
opener = urllib2.build_opener() | |||
opener.addheaders = [ | |||
('Referer', referer), | |||
('User-Agent', uagent), | |||
('Accept-Encoding', 'gzip,deflate') | |||
] | |||
usock = opener.open(URL) | |||
url = usock.geturl() | |||
print "[[%s]]" % url | |||
page = decode(usock) | |||
usock.close() | |||
return page | |||
#------------------------------------------------------------------------------- | |||
def decode(page): | |||
encoding = page.info().get("Content-Encoding") | |||
if encoding in ('gzip', 'x-gzip', 'deflate'): | |||
content = page.read() | |||
if encoding == 'deflate': | |||
data = StringIO.StringIO(zlib.decompress(content)) | |||
else: | |||
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content)) | |||
page = data.read() | |||
return page | |||
#------------------------------------------------------------------------------- | |||
def usage(): | |||
USAGE = """ | |||
Usage: | |||
$ get_page.py | |||
""" | |||
sys.stderr.write(USAGE) | |||
#------------------------------------------------------------------------------- | |||
def main(argv): | |||
global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries | |||
loop_cnt = 1 | |||
examine_flg = False | |||
#----- Process command line arguments ---------------------------- | |||
try: | |||
opts, args = getopt.getopt(argv, "dD:hv", | |||
["debug", "debug_cnt=", "help","verbose"]) | |||
except getopt.GetoptError: | |||
usage() | |||
sys.exit(2) | |||
else: | |||
for opt, arg in opts: | |||
if opt in ("-h", "--help"): | |||
usage() | |||
sys.exit(0) | |||
elif opt in ("-d", "--debug"): | |||
debug_lvl += 1 | |||
elif opt in ("-D", "--debug_cnt"): | |||
debug_lvl = int(arg) | |||
elif opt in ("-v", "--verbose"): | |||
verbose_flg = True | |||
page = get_page() | |||
print page | |||
#------------------------------------------------------------------------------- | |||
if __name__ == "__main__": | |||
main(sys.argv[1:]) | |||
#------------------------------------------------------------------------------- | |||
</pre> | |||
[[Category:Python]] | [[Category:Python]] | ||
[[Category:httplib]] | |||
[[Category:urllib]] | |||
[[Category:Internet]] | [[Category:Internet]] | ||
[[Category:Examples]] | [[Category:Examples]] | ||
Revision as of 14:32, 12 September 2008
Python gzip Module
Using gzip module for decoding gzip deflated web page content...
Adding 'Accept-Encoding: gzip,deflate' to a web request header will result in a gzipped page content being returned...
Sample Script
#!/usr/bin/env python
import sys
import gzip
import getopt
import urllib2
import StringIO
#-------------------------------------------------------------------------------
URL = 'http://svtapps/mdcs'
referer = 'http://svtapps/'
uagent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
#-------------------------------------------------------------------------------
def get_page():
opener = urllib2.build_opener()
opener.addheaders = [
('Referer', referer),
('User-Agent', uagent),
('Accept-Encoding', 'gzip,deflate')
]
usock = opener.open(URL)
url = usock.geturl()
print "[[%s]]" % url
page = decode(usock)
usock.close()
return page
#-------------------------------------------------------------------------------
def decode(page):
encoding = page.info().get("Content-Encoding")
if encoding in ('gzip', 'x-gzip', 'deflate'):
content = page.read()
if encoding == 'deflate':
data = StringIO.StringIO(zlib.decompress(content))
else:
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
page = data.read()
return page
#-------------------------------------------------------------------------------
def usage():
USAGE = """
Usage:
$ get_page.py
"""
sys.stderr.write(USAGE)
#-------------------------------------------------------------------------------
def main(argv):
global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries
loop_cnt = 1
examine_flg = False
#----- Process command line arguments ----------------------------
try:
opts, args = getopt.getopt(argv, "dD:hv",
["debug", "debug_cnt=", "help","verbose"])
except getopt.GetoptError:
usage()
sys.exit(2)
else:
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit(0)
elif opt in ("-d", "--debug"):
debug_lvl += 1
elif opt in ("-D", "--debug_cnt"):
debug_lvl = int(arg)
elif opt in ("-v", "--verbose"):
verbose_flg = True
page = get_page()
print page
#-------------------------------------------------------------------------------
if __name__ == "__main__":
main(sys.argv[1:])
#-------------------------------------------------------------------------------