Difference between revisions of "Http get.py"

From PeformIQ Upgrade
Jump to navigation Jump to search
Line 226: Line 226:


Note the decode() function used in the code. Yes, you have to decode the content (if it's compressed).
Note the decode() function used in the code. Yes, you have to decode the content (if it's compressed).


def decode (page):
def decode (page):
Line 240: Line 239:
     return page
     return page


 
"""
</pre>
</pre>


[[Category:Python]]
[[Category:Python]]
[[Category:. httplib]]
[[Category:. httplib]]

Revision as of 10:10, 4 December 2008

#!/usr/bin/env python

import re
import zlib
import gzip
import socket
import StringIO

#-----------------------------------------------------------------------

SVTAPPS  = "svtapps"
PORT     = 80

p_Encoding = re.compile('Transfer-Encoding')

#-----------------------------------------------------------------------

#   'Accept'             : 'text/plain, text/html',
#   'Accept-Encoding'    : 'gzip, deflate',

"""
get_headers = {
   'Accept-Encoding'    : 'gzip, deflate',
   'Accept'             : '*/*',
   'Accept-Language'    : 'en-au',
   'Host'               : SITE,
   'Connection'         : 'Keep-Alive',
   'User-Agent'         :  'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727'
}


post_headers = {
   'Accept:'            : 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*',
   'Accept-Language:'   : 'en-au',
   'Content-Type:'      : 'application/x-www-form-urlencoded',
   'Accept-Encoding:'   : 'gzip, deflate',
   'User-Agent:'        : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
   'Host'               : SITE,
   'Connection:'        : 'Keep-Alive',
   'Cache-Control:'     : 'no-cache',
}
"""

#-----------------------------------------------------------------------

def dump(x):
   while i < len(x):
      print "%s %04x" % (repr(x[i]), ord(x[i]))
      i += 1

#-----------------------------------------------------------------------

headers = {}

HEADER  = 0
BODY    = 1
context = HEADER

data_encoding    = None
tranfer_encoding = None
chunk_length     = None
body             = ''

def parse(s):
   global body
   global context
   global transfer_encoding
   global chunk_length

   cnt = 0

   finished = False

   while True:
      if chunk_length:
         if len(s) < chunk_length:
             break
         else:
             data = s[:chunk_length]
             body += data
             s = s[chunk_length:]
             print "Chopped out %d bytes" % chunk_length
             chunk_length = None
             idx = s.find('\r\n')
             s = s[2:]
             # print "[%s] -> %d - %d" % (s, len(s), idx)
             continue
      else:  # <CR><LF> delimited text
         idx = s.find('\r\n')

         if idx >= 0:
            l = s[:idx]

            s = s[idx+2:]

            if context == HEADER:
               if len(l) > 0:
                  idx = l.find(': ')
                  if idx > 0:
                     tag = l[:idx]
                     value = l[idx+2:]
                     print "Tag [%s]  Value [%s]" % (tag, value)
                     headers[tag] = value
               else:
                  context = BODY
                  if headers.has_key('Transfer-Encoding'):
                     if headers['Transfer-Encoding'] == 'chunked':
                        transfer_encoding = 'chunked'
            else:
               if transfer_encoding == 'chunked':
                  if not chunk_length:
                     print "chunk -> [%s]" % l
                     chunk_length = int(l, 16)
                     print "chunk_length -> %d [%d]" % (chunk_length, len(s))
                     # print s
                     if chunk_length == 0:  # Should be done...
                        print  "# Should be done..."
                        idx = s.find('\r\n')
                        print "idx -> %d" % idx
                        if idx == 0:
                           finished = True
                           s = body
                           break
                     if len(s) < chunk_length:
                        break
                     else:
                        continue
                  else:
                     print "Should not get here!"
                     break
         else:
            break

   return (finished, s)

#-----------------------------------------------------------------------

def setup():
   # Create an INET, STREAMing socket

   s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

   s.connect((SVTAPPS, PORT))

   return s

#-----------------------------------------------------------------------

# Accept-Encoding: gzip, deflate

request = """\
GET /mdcs HTTP/1.1
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*
Accept-Encoding: gzip, deflate
Accept-Language: en-au
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
Host: svtapps
Connection: Keep-Alive

"""

def get():
   s = setup()

   print "len(request) = %d" % len(request)

   n = s.send(request)

   print"send() -> %d" % n

   cnt = 0

   residue = ""

   while True:
      recv = s.recv(1024)

      buf = residue + recv

      i = 0

      (done, residue) = parse(buf)

      print ">**> %s" % done

      cnt += 1

      if done: break

   print len(residue)

   # print zlib.decompress(residue)
   # print residue
   data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(residue))
   page = data.read()
   print page


#-----------------------------------------------------------------------

def main():
   get()

#-----------------------------------------------------------------------

main()

#-----------------------------------------------------------------------

"""
# zlib.decompressobj().decompress('x\x9c' + binary_str)


So you can do the same from you Python code. Just add ('Accept-Encoding', 'gzip,deflate') in the request header. Check the following code chunk:

opener = urllib2.build_opener()
opener.addheaders = [('Referer', referer),
('User-Agent', uagent),
('Accept-Encoding', 'gzip,deflate')]
usock = opener.open(url)
url = usock.geturl()
data = decode(usock)
usock.close()
return data

Note the decode() function used in the code. Yes, you have to decode the content (if it's compressed).

def decode (page):
    encoding = page.info().get("Content-Encoding")    
    if encoding in ('gzip', 'x-gzip', 'deflate'):
        content = page.read()
        if encoding == 'deflate':
            data = StringIO.StringIO(zlib.decompress(content))
        else:
            data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
        page = data.read()

    return page

"""