Difference between revisions of "Http get.py"

From PeformIQ Upgrade
Jump to navigation Jump to search
(New page: <pre> #!/usr/bin/env python import re import zlib import gzip import socket import StringIO #----------------------------------------------------------------------- SVTAPPS = "svtapps...)
 
 
(4 intermediate revisions by the same user not shown)
Line 1: Line 1:
<pre>
<pre>
#!/usr/bin/env python
#!/usr/bin/env python
Line 11: Line 10:
#-----------------------------------------------------------------------
#-----------------------------------------------------------------------


SVTAPPS  = "svtapps"
HOST    = "host"
PORT    = 80
PORT    = 80


Line 143: Line 142:
   s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
   s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)


   s.connect((SVTAPPS, PORT))
   s.connect((HOST, PORT))


   return s
   return s
Line 152: Line 151:


request = """\
request = """\
GET /mdcs HTTP/1.1
GET /test HTTP/1.1
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*
Accept-Encoding: gzip, deflate
Accept-Encoding: gzip, deflate
Accept-Language: en-au
Accept-Language: en-au
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
Host: svtapps
Host: host
Connection: Keep-Alive
Connection: Keep-Alive
"""
"""


Line 227: Line 225:


Note the decode() function used in the code. Yes, you have to decode the content (if it's compressed).
Note the decode() function used in the code. Yes, you have to decode the content (if it's compressed).


def decode (page):
def decode (page):
Line 241: Line 238:
     return page
     return page


 
"""
</pre>
</pre>


[[Category:Python]]
[[Category:Python]]
[[Category:Py httplib]]
[[Category:Python httplib]]
[[Category:Examples]]

Latest revision as of 15:07, 1 August 2015

#!/usr/bin/env python

import re
import zlib
import gzip
import socket
import StringIO

#-----------------------------------------------------------------------

HOST     = "host"
PORT     = 80

p_Encoding = re.compile('Transfer-Encoding')

#-----------------------------------------------------------------------

#   'Accept'             : 'text/plain, text/html',
#   'Accept-Encoding'    : 'gzip, deflate',

"""
get_headers = {
   'Accept-Encoding'    : 'gzip, deflate',
   'Accept'             : '*/*',
   'Accept-Language'    : 'en-au',
   'Host'               : SITE,
   'Connection'         : 'Keep-Alive',
   'User-Agent'         :  'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727'
}


post_headers = {
   'Accept:'            : 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*',
   'Accept-Language:'   : 'en-au',
   'Content-Type:'      : 'application/x-www-form-urlencoded',
   'Accept-Encoding:'   : 'gzip, deflate',
   'User-Agent:'        : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
   'Host'               : SITE,
   'Connection:'        : 'Keep-Alive',
   'Cache-Control:'     : 'no-cache',
}
"""

#-----------------------------------------------------------------------

def dump(x):
   while i < len(x):
      print "%s %04x" % (repr(x[i]), ord(x[i]))
      i += 1

#-----------------------------------------------------------------------

headers = {}

HEADER  = 0
BODY    = 1
context = HEADER

data_encoding    = None
tranfer_encoding = None
chunk_length     = None
body             = ''

def parse(s):
   global body
   global context
   global transfer_encoding
   global chunk_length

   cnt = 0

   finished = False

   while True:
      if chunk_length:
         if len(s) < chunk_length:
             break
         else:
             data = s[:chunk_length]
             body += data
             s = s[chunk_length:]
             print "Chopped out %d bytes" % chunk_length
             chunk_length = None
             idx = s.find('\r\n')
             s = s[2:]
             # print "[%s] -> %d - %d" % (s, len(s), idx)
             continue
      else:  # <CR><LF> delimited text
         idx = s.find('\r\n')

         if idx >= 0:
            l = s[:idx]

            s = s[idx+2:]

            if context == HEADER:
               if len(l) > 0:
                  idx = l.find(': ')
                  if idx > 0:
                     tag = l[:idx]
                     value = l[idx+2:]
                     print "Tag [%s]  Value [%s]" % (tag, value)
                     headers[tag] = value
               else:
                  context = BODY
                  if headers.has_key('Transfer-Encoding'):
                     if headers['Transfer-Encoding'] == 'chunked':
                        transfer_encoding = 'chunked'
            else:
               if transfer_encoding == 'chunked':
                  if not chunk_length:
                     print "chunk -> [%s]" % l
                     chunk_length = int(l, 16)
                     print "chunk_length -> %d [%d]" % (chunk_length, len(s))
                     # print s
                     if chunk_length == 0:  # Should be done...
                        print  "# Should be done..."
                        idx = s.find('\r\n')
                        print "idx -> %d" % idx
                        if idx == 0:
                           finished = True
                           s = body
                           break
                     if len(s) < chunk_length:
                        break
                     else:
                        continue
                  else:
                     print "Should not get here!"
                     break
         else:
            break

   return (finished, s)

#-----------------------------------------------------------------------

def setup():
   # Create an INET, STREAMing socket

   s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

   s.connect((HOST, PORT))

   return s

#-----------------------------------------------------------------------

# Accept-Encoding: gzip, deflate

request = """\
GET /test HTTP/1.1
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*
Accept-Encoding: gzip, deflate
Accept-Language: en-au
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
Host: host
Connection: Keep-Alive
"""

def get():
   s = setup()

   print "len(request) = %d" % len(request)

   n = s.send(request)

   print"send() -> %d" % n

   cnt = 0

   residue = ""

   while True:
      recv = s.recv(1024)

      buf = residue + recv

      i = 0

      (done, residue) = parse(buf)

      print ">**> %s" % done

      cnt += 1

      if done: break

   print len(residue)

   # print zlib.decompress(residue)
   # print residue
   data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(residue))
   page = data.read()
   print page


#-----------------------------------------------------------------------

def main():
   get()

#-----------------------------------------------------------------------

main()

#-----------------------------------------------------------------------

"""
# zlib.decompressobj().decompress('x\x9c' + binary_str)


So you can do the same from you Python code. Just add ('Accept-Encoding', 'gzip,deflate') in the request header. Check the following code chunk:

opener = urllib2.build_opener()
opener.addheaders = [('Referer', referer),
('User-Agent', uagent),
('Accept-Encoding', 'gzip,deflate')]
usock = opener.open(url)
url = usock.geturl()
data = decode(usock)
usock.close()
return data

Note the decode() function used in the code. Yes, you have to decode the content (if it's compressed).

def decode (page):
    encoding = page.info().get("Content-Encoding")    
    if encoding in ('gzip', 'x-gzip', 'deflate'):
        content = page.read()
        if encoding == 'deflate':
            data = StringIO.StringIO(zlib.decompress(content))
        else:
            data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
        page = data.read()

    return page

"""