Difference between revisions of "Http get.py"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) |
PeterHarding (talk | contribs) |
||
| (One intermediate revision by the same user not shown) | |||
| Line 10: | Line 10: | ||
#----------------------------------------------------------------------- | #----------------------------------------------------------------------- | ||
HOST = "host" | |||
PORT = 80 | PORT = 80 | ||
| Line 142: | Line 142: | ||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) | ||
s.connect(( | s.connect((HOST, PORT)) | ||
return s | return s | ||
| Line 151: | Line 151: | ||
request = """\ | request = """\ | ||
GET / | GET /test HTTP/1.1 | ||
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */* | Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */* | ||
Accept-Encoding: gzip, deflate | Accept-Encoding: gzip, deflate | ||
Accept-Language: en-au | Accept-Language: en-au | ||
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) | User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) | ||
Host: | Host: host | ||
Connection: Keep-Alive | Connection: Keep-Alive | ||
""" | """ | ||
| Line 243: | Line 242: | ||
[[Category:Python]] | [[Category:Python]] | ||
[[Category: | [[Category:Python httplib]] | ||
[[Category:Examples]] | [[Category:Examples]] | ||
Latest revision as of 15:07, 1 August 2015
#!/usr/bin/env python
import re
import zlib
import gzip
import socket
import StringIO
#-----------------------------------------------------------------------
HOST = "host"
PORT = 80
p_Encoding = re.compile('Transfer-Encoding')
#-----------------------------------------------------------------------
# 'Accept' : 'text/plain, text/html',
# 'Accept-Encoding' : 'gzip, deflate',
"""
get_headers = {
'Accept-Encoding' : 'gzip, deflate',
'Accept' : '*/*',
'Accept-Language' : 'en-au',
'Host' : SITE,
'Connection' : 'Keep-Alive',
'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727'
}
post_headers = {
'Accept:' : 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*',
'Accept-Language:' : 'en-au',
'Content-Type:' : 'application/x-www-form-urlencoded',
'Accept-Encoding:' : 'gzip, deflate',
'User-Agent:' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Host' : SITE,
'Connection:' : 'Keep-Alive',
'Cache-Control:' : 'no-cache',
}
"""
#-----------------------------------------------------------------------
def dump(x):
while i < len(x):
print "%s %04x" % (repr(x[i]), ord(x[i]))
i += 1
#-----------------------------------------------------------------------
headers = {}
HEADER = 0
BODY = 1
context = HEADER
data_encoding = None
tranfer_encoding = None
chunk_length = None
body = ''
def parse(s):
global body
global context
global transfer_encoding
global chunk_length
cnt = 0
finished = False
while True:
if chunk_length:
if len(s) < chunk_length:
break
else:
data = s[:chunk_length]
body += data
s = s[chunk_length:]
print "Chopped out %d bytes" % chunk_length
chunk_length = None
idx = s.find('\r\n')
s = s[2:]
# print "[%s] -> %d - %d" % (s, len(s), idx)
continue
else: # <CR><LF> delimited text
idx = s.find('\r\n')
if idx >= 0:
l = s[:idx]
s = s[idx+2:]
if context == HEADER:
if len(l) > 0:
idx = l.find(': ')
if idx > 0:
tag = l[:idx]
value = l[idx+2:]
print "Tag [%s] Value [%s]" % (tag, value)
headers[tag] = value
else:
context = BODY
if headers.has_key('Transfer-Encoding'):
if headers['Transfer-Encoding'] == 'chunked':
transfer_encoding = 'chunked'
else:
if transfer_encoding == 'chunked':
if not chunk_length:
print "chunk -> [%s]" % l
chunk_length = int(l, 16)
print "chunk_length -> %d [%d]" % (chunk_length, len(s))
# print s
if chunk_length == 0: # Should be done...
print "# Should be done..."
idx = s.find('\r\n')
print "idx -> %d" % idx
if idx == 0:
finished = True
s = body
break
if len(s) < chunk_length:
break
else:
continue
else:
print "Should not get here!"
break
else:
break
return (finished, s)
#-----------------------------------------------------------------------
def setup():
# Create an INET, STREAMing socket
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect((HOST, PORT))
return s
#-----------------------------------------------------------------------
# Accept-Encoding: gzip, deflate
request = """\
GET /test HTTP/1.1
Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-shockwave-flash, */*
Accept-Encoding: gzip, deflate
Accept-Language: en-au
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
Host: host
Connection: Keep-Alive
"""
def get():
s = setup()
print "len(request) = %d" % len(request)
n = s.send(request)
print"send() -> %d" % n
cnt = 0
residue = ""
while True:
recv = s.recv(1024)
buf = residue + recv
i = 0
(done, residue) = parse(buf)
print ">**> %s" % done
cnt += 1
if done: break
print len(residue)
# print zlib.decompress(residue)
# print residue
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(residue))
page = data.read()
print page
#-----------------------------------------------------------------------
def main():
get()
#-----------------------------------------------------------------------
main()
#-----------------------------------------------------------------------
"""
# zlib.decompressobj().decompress('x\x9c' + binary_str)
So you can do the same from you Python code. Just add ('Accept-Encoding', 'gzip,deflate') in the request header. Check the following code chunk:
opener = urllib2.build_opener()
opener.addheaders = [('Referer', referer),
('User-Agent', uagent),
('Accept-Encoding', 'gzip,deflate')]
usock = opener.open(url)
url = usock.geturl()
data = decode(usock)
usock.close()
return data
Note the decode() function used in the code. Yes, you have to decode the content (if it's compressed).
def decode (page):
encoding = page.info().get("Content-Encoding")
if encoding in ('gzip', 'x-gzip', 'deflate'):
content = page.read()
if encoding == 'deflate':
data = StringIO.StringIO(zlib.decompress(content))
else:
data = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
page = data.read()
return page
"""