Difference between revisions of "Python gzip decompression"

From PeformIQ Upgrade
Jump to navigation Jump to search
 
 
(2 intermediate revisions by the same user not shown)
Line 1: Line 1:
See.
=Python gzip Module=
 
Using gzip module for decoding gzip deflated web page content...
 
Adding 'Accept-Encoding: gzip,deflate' to a web request header will result in a gzipped page content being returned...
 
==Sample Script==
 
<pre>
#!/usr/bin/env python
 
import sys
import gzip
import getopt
import urllib2
import StringIO
 
#-------------------------------------------------------------------------------
 
URL    = 'http://svtapps/mdcs'
referer = 'http://svtapps/'
uagent  = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
 
#-------------------------------------------------------------------------------
 
def  get_page():
  opener = urllib2.build_opener()
 
  opener.addheaders = [
                        ('Referer', referer),
                        ('User-Agent', uagent),
                        ('Accept-Encoding', 'gzip,deflate')
                      ]
 
  usock = opener.open(URL)
 
  url  = usock.geturl()
 
  print "[[%s]]" % url
 
  page = decode(usock)
 
  usock.close()
 
  return page
 
#-------------------------------------------------------------------------------
 
def decode(page):
    encoding = page.info().get("Content-Encoding")   
    if encoding in ('gzip', 'x-gzip', 'deflate'):
        content = page.read()
        if encoding == 'deflate':
            data = StringIO.StringIO(zlib.decompress(content))
        else:
            data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
        page = data.read()
 
    return page
 
#-------------------------------------------------------------------------------
 
def usage():
  USAGE = """
 
    Usage:
   
      $ get_page.py
     
  """
 
  sys.stderr.write(USAGE)
 
#-------------------------------------------------------------------------------
 
def main(argv):
  global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries
 
  loop_cnt    = 1
  examine_flg = False
 
  #----- Process command line arguments ----------------------------
 
  try:
      opts, args = getopt.getopt(argv, "dD:hv",
              ["debug", "debug_cnt=", "help","verbose"])
  except getopt.GetoptError:
      usage()
      sys.exit(2)
  else:
      for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            sys.exit(0)
        elif opt in ("-d", "--debug"):
            debug_lvl      += 1
        elif opt in ("-D", "--debug_cnt"):
            debug_lvl      = int(arg)
        elif opt in ("-v", "--verbose"):
            verbose_flg    = True
 
  page = get_page()
 
  print page
 
#-------------------------------------------------------------------------------
 
if __name__ == "__main__":
  main(sys.argv[1:])
 
#-------------------------------------------------------------------------------
</pre>


[[Category:Python]]
[[Category:Python]]
[[Category:Python httplib]]
[[Category:Python urllib]]
[[Category:Internet]]
[[Category:Internet]]
[[Category:Examples]]
[[Category:Examples]]

Latest revision as of 14:05, 1 August 2015

Python gzip Module

Using gzip module for decoding gzip deflated web page content...

Adding 'Accept-Encoding: gzip,deflate' to a web request header will result in a gzipped page content being returned...

Sample Script

#!/usr/bin/env python

import sys
import gzip
import getopt
import urllib2
import StringIO

#-------------------------------------------------------------------------------

URL     = 'http://svtapps/mdcs'
referer = 'http://svtapps/'
uagent  = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'

#-------------------------------------------------------------------------------

def  get_page():
   opener = urllib2.build_opener()

   opener.addheaders = [
                         ('Referer', referer),
                         ('User-Agent', uagent),
                         ('Accept-Encoding', 'gzip,deflate')
                       ]

   usock = opener.open(URL)

   url   = usock.geturl()

   print "[[%s]]" % url

   page = decode(usock)

   usock.close()

   return page

#-------------------------------------------------------------------------------

def decode(page):
    encoding = page.info().get("Content-Encoding")    
    if encoding in ('gzip', 'x-gzip', 'deflate'):
        content = page.read()
        if encoding == 'deflate':
            data = StringIO.StringIO(zlib.decompress(content))
        else:
            data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
        page = data.read()

    return page

#-------------------------------------------------------------------------------

def usage():
   USAGE = """

     Usage:
     
       $ get_page.py
       
   """
   
   sys.stderr.write(USAGE)
   
#-------------------------------------------------------------------------------

def main(argv):
   global debug_flg, verbose_flg, sundry_flg, id, no_orders, no_sundries
   
   loop_cnt    = 1
   examine_flg = False
   
   #----- Process command line arguments ----------------------------
   
   try:
      opts, args = getopt.getopt(argv, "dD:hv",
              ["debug", "debug_cnt=", "help","verbose"])
   except getopt.GetoptError: 
      usage()
      sys.exit(2)
   else:
      for opt, arg in opts:
         if opt in ("-h", "--help"):
            usage()
            sys.exit(0)
         elif opt in ("-d", "--debug"):
            debug_lvl      += 1
         elif opt in ("-D", "--debug_cnt"):
            debug_lvl       = int(arg)
         elif opt in ("-v", "--verbose"):
            verbose_flg     = True

   page = get_page()

   print page

#-------------------------------------------------------------------------------

if __name__ == "__main__":
   main(sys.argv[1:])

#-------------------------------------------------------------------------------