Difference between revisions of "Python gzip decompression"

From PeformIQ Upgrade
Jump to navigation Jump to search
Line 113: Line 113:


[[Category:Python]]
[[Category:Python]]
[[Category:httplib]]
[[Category:. httplib]]
[[Category:urllib]]
[[Category:. urllib]]
[[Category:Internet]]
[[Category:Internet]]
[[Category:Examples]]
[[Category:Examples]]

Revision as of 11:33, 30 September 2008

Python gzip Module

Using gzip module for decoding gzip deflated web page content...

Adding 'Accept-Encoding: gzip,deflate' to a web request header will result in a gzipped page content being returned...

Sample Script

#!/usr/bin/env python

import sys
import gzip
import getopt
import urllib2
import StringIO

#-------------------------------------------------------------------------------

URL     = 'http://svtapps/mdcs'
referer = 'http://svtapps/'
uagent  = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'

#-------------------------------------------------------------------------------

def  get_page():
   opener = urllib2.build_opener()

   opener.addheaders = [
                         ('Referer', referer),
                         ('User-Agent', uagent),
                         ('Accept-Encoding', 'gzip,deflate')
                       ]

   usock = opener.open(URL)

   url   = usock.geturl()

   print "[[%s]]" % url

   page = decode(usock)

   usock.close()

   return page

#-------------------------------------------------------------------------------

def decode(page):
    encoding = page.info().get("Content-Encoding")    
    if encoding in ('gzip', 'x-gzip', 'deflate'):
        content = page.read()
        if encoding == 'deflate':
            data = StringIO.StringIO(zlib.decompress(content))
        else:
            data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
        page = data.read()

    return page

#-------------------------------------------------------------------------------

def usage():
   USAGE = """

     Usage:
     
       $ get_page.py
       
   """
   
   sys.stderr.write(USAGE)
   
#-------------------------------------------------------------------------------

def main(argv):
   global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries
   
   loop_cnt    = 1
   examine_flg = False
   
   #----- Process command line arguments ----------------------------
   
   try:
      opts, args = getopt.getopt(argv, "dD:hv",
              ["debug", "debug_cnt=", "help","verbose"])
   except getopt.GetoptError: 
      usage()
      sys.exit(2)
   else:
      for opt, arg in opts:
         if opt in ("-h", "--help"):
            usage()
            sys.exit(0)
         elif opt in ("-d", "--debug"):
            debug_lvl      += 1
         elif opt in ("-D", "--debug_cnt"):
            debug_lvl       = int(arg)
         elif opt in ("-v", "--verbose"):
            verbose_flg     = True

   page = get_page()

   print page

#-------------------------------------------------------------------------------

if __name__ == "__main__":
   main(sys.argv[1:])

#-------------------------------------------------------------------------------