Difference between revisions of "Python - httplib"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) |
PeterHarding (talk | contribs) |
||
| Line 7: | Line 7: | ||
[[Parsing WhitePages Search Results HTML]] | [[Parsing WhitePages Search Results HTML]] | ||
=File Download Example= | |||
<pre> | <pre> | ||
#!/usr/bin/env python | |||
# | |||
# | |||
# | |||
#------------------------------------------------------------------------------- | |||
import re | |||
import sys | |||
import urllib | |||
import httplib | |||
import binascii | |||
#------------------------------------------------------------------------------- | |||
SITE = 'hx404' | |||
URL = '/CustomerPortalWeb/login.portal' | |||
params = urllib.urlencode({'aaa' : 1}) | |||
get_headers = { | |||
'Accept-Language' : 'en-au', | |||
'Accept' : 'text/plain', | |||
'Content-Type' : 'text/html; charset=utf-8', | |||
'Connection' : 'Keep-Alive', | |||
'Host' : SITE, | |||
'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' | |||
} | |||
post_headers = { | |||
'Accept-Language' : 'en-au', | |||
'Accept-Encoding' : 'gzip, deflate', | |||
'Content-Type' : 'application/x-www-form-urlencoded', | |||
'Host' : SITE, | |||
'Connection' : 'Keep-Alive', | |||
'Cache-Control' : 'no-cache', | |||
'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' | |||
} | |||
# 'Content-Length' : len(request), | |||
# 'Cookie' : 'JSESSIONID=%s' % JSESSIONID | |||
#------------------------------------------------------------------------------- | |||
def log_header(idx, resp): | |||
of = open('%04d.hdr' % idx, 'w') | |||
of.write("resp.__dict__ -> '%s'\n" % resp.__dict__) | |||
of.write("Status %s Reason [%s]\n" % (resp.status, resp.reason)) | |||
of.write("Msg -> '%s'\n" % resp.msg) | |||
of.write("Msg.__dict__ -> '%s'\n" % resp.msg.__dict__) | |||
#xxx = "Msg.__dict__ -> '%s'" % resp.msg.__dict__['dict']['set-cookie'] | |||
#print xxx | |||
of.close() | |||
#------------------------------------------------------------------------------- | |||
def log_body(idx, resp_body): | |||
of = open('%04d.bdy' % idx, 'w') | |||
of.write(resp_body); | |||
of.close() | |||
#------------------------------------------------------------------------------- | |||
def do(): | |||
conn = httplib.HTTPConnection(SITE) | |||
#--------------------------------------------------------------------- | |||
idx = 1 | |||
print ">>>>> GET /CustomerPortalWeb/login.portal <<<<<" | |||
conn.request("GET", '/CustomerPortalWeb/login.portal', None, get_headers) | |||
resp = conn.getresponse() | |||
log_header(idx, resp) | |||
m = re.search('JSESSIONID=(.*);', resp.msg.__dict__['dict']['set-cookie']) | |||
if m: | |||
print m.group(1) | |||
JSESSIONID = m.group(1) | |||
resp_body = resp.read() | |||
log_body(idx, resp_body) | |||
print resp_body | |||
# <form action="http://hx404:80/CustomerPortalWeb/login/login.do;jsessionid=vgp9GDVS6JyTly0v6NfsHG0rt1pLyvpMLxYnJf9MXsk3Yn0T2SZ3!1111094026" method="post"> | |||
#--------------------------------------------------------------------- | |||
idx = 2 | |||
print ">>>>> POST /CustomerPortalWeb/login/login.do <<<<<" | |||
print ">>>>> JSESSIONID = %s " % JSESSIONID | |||
URL = "/CustomerPortalWeb/login/login.do;jsessionid=%s" % JSESSIONID | |||
# form_data = { | |||
# '{actionForm.username}' : 'svtest035@svt', | |||
# '{actionForm.password}' : 'Welcome1' | |||
# } | |||
form_data = { | |||
'{actionForm.username}' : 'cpcustomeradmin', | |||
'{actionForm.password}' : 'August2007' | |||
} | |||
params = urllib.urlencode(form_data) | |||
post_headers['Content-Length'] = len(params) | |||
post_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID | |||
conn.request("POST", URL, params, post_headers) | |||
resp = conn.getresponse() | |||
log_header(idx, resp) | |||
resp_body = resp.read() | |||
log_body(idx, resp_body) | |||
#--------------------------------------------------------------------- | |||
idx = 3 | |||
print ">>>>> GET /CustomerPortalWeb/ausPost.portal <<<<<" | |||
get_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID | |||
conn.request("GET", '/CustomerPortalWeb/ausPost.portal', None, get_headers) | |||
resp = conn.getresponse() | |||
log_header(idx, resp) | |||
resp_body = resp.read() | |||
log_body(idx, resp_body) | |||
#--------------------------------------------------------------------- | |||
idx = 4 | |||
args = { | |||
'_nfpb' : 'true', | |||
'_pageLabel' : 'ImportUserPage' | |||
} | |||
ue_args = urllib.urlencode(args) | |||
print ">>>>> GET /CustomerPortalWeb/ausPost.portal <<<<<" | |||
get_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID | |||
conn.request("GET", '/CustomerPortalWeb/ausPost.portal?%s' % ue_args, None, get_headers) | |||
resp = conn.getresponse() | |||
log_header(idx, resp) | |||
resp_body = resp.read() | |||
log_body(idx, resp_body) | |||
#--------------------------------------------------------------------- | |||
conn.close() | |||
#------------------------------------------------------------------------------- | |||
def main(args): | |||
do() | |||
#------------------------------------------------------------------------------- | |||
if __name__ == "__main__": | |||
main(sys.argv[1:]) | |||
#------------------------------------------------------------------------------- | |||
""" | |||
Regex Stuff: | |||
regex = re.compile("\\n *") | |||
(name, cnt) = re.subn('esb:', '', node_name) | |||
value = re.sub(r'\n *', 'N/A', value) | |||
""" | |||
""" | |||
FILE DOWNLOAD: | |||
h.putrequest('POST', '/scripts/cgi.exe?') | |||
h.putheader('Content-length', '%d'%len(params)) | |||
h.putheader('Accept', 'text/plain') | |||
h.putheader('Host', 'test.site.com') | |||
h.endheaders() | |||
h.send(params) | |||
reply, msg, hdrs = h.getreply() | |||
data = h.getfile().read() | |||
file('test.file', 'w').write(data) | |||
h.close() | |||
""" | |||
""" | |||
Accept-Language: en-au | |||
Content-Type: application/x-www-form-urlencoded | |||
Accept-Encoding: gzip, deflate | |||
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) | |||
Host: hx414:6304 | |||
Content-Length: 54 | |||
Connection: Keep-Alive | |||
Cache-Control: no-cache | |||
Cookie: JSESSIONID=jpm7G5hJbx6pYdhTr3GRRQrXsknrFcxdF7VdhcVPctThHdQxJjsC!2061771890 | |||
""" | |||
</pre> | </pre> | ||
Revision as of 16:01, 15 February 2008
Examples
Whitepages
See script which page scrape search results off Whitepages site...
Parsing WhitePages Search Results HTML
File Download Example
#!/usr/bin/env python
#
#
#
#-------------------------------------------------------------------------------
import re
import sys
import urllib
import httplib
import binascii
#-------------------------------------------------------------------------------
SITE = 'hx404'
URL = '/CustomerPortalWeb/login.portal'
params = urllib.urlencode({'aaa' : 1})
get_headers = {
'Accept-Language' : 'en-au',
'Accept' : 'text/plain',
'Content-Type' : 'text/html; charset=utf-8',
'Connection' : 'Keep-Alive',
'Host' : SITE,
'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
}
post_headers = {
'Accept-Language' : 'en-au',
'Accept-Encoding' : 'gzip, deflate',
'Content-Type' : 'application/x-www-form-urlencoded',
'Host' : SITE,
'Connection' : 'Keep-Alive',
'Cache-Control' : 'no-cache',
'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
}
# 'Content-Length' : len(request),
# 'Cookie' : 'JSESSIONID=%s' % JSESSIONID
#-------------------------------------------------------------------------------
def log_header(idx, resp):
of = open('%04d.hdr' % idx, 'w')
of.write("resp.__dict__ -> '%s'\n" % resp.__dict__)
of.write("Status %s Reason [%s]\n" % (resp.status, resp.reason))
of.write("Msg -> '%s'\n" % resp.msg)
of.write("Msg.__dict__ -> '%s'\n" % resp.msg.__dict__)
#xxx = "Msg.__dict__ -> '%s'" % resp.msg.__dict__['dict']['set-cookie']
#print xxx
of.close()
#-------------------------------------------------------------------------------
def log_body(idx, resp_body):
of = open('%04d.bdy' % idx, 'w')
of.write(resp_body);
of.close()
#-------------------------------------------------------------------------------
def do():
conn = httplib.HTTPConnection(SITE)
#---------------------------------------------------------------------
idx = 1
print ">>>>> GET /CustomerPortalWeb/login.portal <<<<<"
conn.request("GET", '/CustomerPortalWeb/login.portal', None, get_headers)
resp = conn.getresponse()
log_header(idx, resp)
m = re.search('JSESSIONID=(.*);', resp.msg.__dict__['dict']['set-cookie'])
if m:
print m.group(1)
JSESSIONID = m.group(1)
resp_body = resp.read()
log_body(idx, resp_body)
print resp_body
# <form action="http://hx404:80/CustomerPortalWeb/login/login.do;jsessionid=vgp9GDVS6JyTly0v6NfsHG0rt1pLyvpMLxYnJf9MXsk3Yn0T2SZ3!1111094026" method="post">
#---------------------------------------------------------------------
idx = 2
print ">>>>> POST /CustomerPortalWeb/login/login.do <<<<<"
print ">>>>> JSESSIONID = %s " % JSESSIONID
URL = "/CustomerPortalWeb/login/login.do;jsessionid=%s" % JSESSIONID
# form_data = {
# '{actionForm.username}' : 'svtest035@svt',
# '{actionForm.password}' : 'Welcome1'
# }
form_data = {
'{actionForm.username}' : 'cpcustomeradmin',
'{actionForm.password}' : 'August2007'
}
params = urllib.urlencode(form_data)
post_headers['Content-Length'] = len(params)
post_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID
conn.request("POST", URL, params, post_headers)
resp = conn.getresponse()
log_header(idx, resp)
resp_body = resp.read()
log_body(idx, resp_body)
#---------------------------------------------------------------------
idx = 3
print ">>>>> GET /CustomerPortalWeb/ausPost.portal <<<<<"
get_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID
conn.request("GET", '/CustomerPortalWeb/ausPost.portal', None, get_headers)
resp = conn.getresponse()
log_header(idx, resp)
resp_body = resp.read()
log_body(idx, resp_body)
#---------------------------------------------------------------------
idx = 4
args = {
'_nfpb' : 'true',
'_pageLabel' : 'ImportUserPage'
}
ue_args = urllib.urlencode(args)
print ">>>>> GET /CustomerPortalWeb/ausPost.portal <<<<<"
get_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID
conn.request("GET", '/CustomerPortalWeb/ausPost.portal?%s' % ue_args, None, get_headers)
resp = conn.getresponse()
log_header(idx, resp)
resp_body = resp.read()
log_body(idx, resp_body)
#---------------------------------------------------------------------
conn.close()
#-------------------------------------------------------------------------------
def main(args):
do()
#-------------------------------------------------------------------------------
if __name__ == "__main__":
main(sys.argv[1:])
#-------------------------------------------------------------------------------
"""
Regex Stuff:
regex = re.compile("\\n *")
(name, cnt) = re.subn('esb:', '', node_name)
value = re.sub(r'\n *', 'N/A', value)
"""
"""
FILE DOWNLOAD:
h.putrequest('POST', '/scripts/cgi.exe?')
h.putheader('Content-length', '%d'%len(params))
h.putheader('Accept', 'text/plain')
h.putheader('Host', 'test.site.com')
h.endheaders()
h.send(params)
reply, msg, hdrs = h.getreply()
data = h.getfile().read()
file('test.file', 'w').write(data)
h.close()
"""
"""
Accept-Language: en-au
Content-Type: application/x-www-form-urlencoded
Accept-Encoding: gzip, deflate
User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
Host: hx414:6304
Content-Length: 54
Connection: Keep-Alive
Cache-Control: no-cache
Cookie: JSESSIONID=jpm7G5hJbx6pYdhTr3GRRQrXsknrFcxdF7VdhcVPctThHdQxJjsC!2061771890
"""