Difference between revisions of "Python - httplib"
Jump to navigation
Jump to search
PeterHarding (talk | contribs) |
PeterHarding (talk | contribs) |
||
Line 1: | Line 1: | ||
__TOC__ | |||
=Examples= | =Examples= | ||
Line 7: | Line 10: | ||
[[Parsing WhitePages Search Results HTML]] | [[Parsing WhitePages Search Results HTML]] | ||
=File Download Example= | ==File Download Example== | ||
<pre> | <pre> |
Revision as of 16:01, 15 February 2008
Examples
Whitepages
See script which page scrape search results off Whitepages site...
Parsing WhitePages Search Results HTML
File Download Example
#!/usr/bin/env python # # # #------------------------------------------------------------------------------- import re import sys import urllib import httplib import binascii #------------------------------------------------------------------------------- SITE = 'hx404' URL = '/CustomerPortalWeb/login.portal' params = urllib.urlencode({'aaa' : 1}) get_headers = { 'Accept-Language' : 'en-au', 'Accept' : 'text/plain', 'Content-Type' : 'text/html; charset=utf-8', 'Connection' : 'Keep-Alive', 'Host' : SITE, 'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' } post_headers = { 'Accept-Language' : 'en-au', 'Accept-Encoding' : 'gzip, deflate', 'Content-Type' : 'application/x-www-form-urlencoded', 'Host' : SITE, 'Connection' : 'Keep-Alive', 'Cache-Control' : 'no-cache', 'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' } # 'Content-Length' : len(request), # 'Cookie' : 'JSESSIONID=%s' % JSESSIONID #------------------------------------------------------------------------------- def log_header(idx, resp): of = open('%04d.hdr' % idx, 'w') of.write("resp.__dict__ -> '%s'\n" % resp.__dict__) of.write("Status %s Reason [%s]\n" % (resp.status, resp.reason)) of.write("Msg -> '%s'\n" % resp.msg) of.write("Msg.__dict__ -> '%s'\n" % resp.msg.__dict__) #xxx = "Msg.__dict__ -> '%s'" % resp.msg.__dict__['dict']['set-cookie'] #print xxx of.close() #------------------------------------------------------------------------------- def log_body(idx, resp_body): of = open('%04d.bdy' % idx, 'w') of.write(resp_body); of.close() #------------------------------------------------------------------------------- def do(): conn = httplib.HTTPConnection(SITE) #--------------------------------------------------------------------- idx = 1 print ">>>>> GET /CustomerPortalWeb/login.portal <<<<<" conn.request("GET", '/CustomerPortalWeb/login.portal', None, get_headers) resp = conn.getresponse() log_header(idx, resp) m = re.search('JSESSIONID=(.*);', resp.msg.__dict__['dict']['set-cookie']) if m: print m.group(1) JSESSIONID = m.group(1) resp_body = resp.read() log_body(idx, resp_body) print resp_body # <form action="http://hx404:80/CustomerPortalWeb/login/login.do;jsessionid=vgp9GDVS6JyTly0v6NfsHG0rt1pLyvpMLxYnJf9MXsk3Yn0T2SZ3!1111094026" method="post"> #--------------------------------------------------------------------- idx = 2 print ">>>>> POST /CustomerPortalWeb/login/login.do <<<<<" print ">>>>> JSESSIONID = %s " % JSESSIONID URL = "/CustomerPortalWeb/login/login.do;jsessionid=%s" % JSESSIONID # form_data = { # '{actionForm.username}' : 'svtest035@svt', # '{actionForm.password}' : 'Welcome1' # } form_data = { '{actionForm.username}' : 'cpcustomeradmin', '{actionForm.password}' : 'August2007' } params = urllib.urlencode(form_data) post_headers['Content-Length'] = len(params) post_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID conn.request("POST", URL, params, post_headers) resp = conn.getresponse() log_header(idx, resp) resp_body = resp.read() log_body(idx, resp_body) #--------------------------------------------------------------------- idx = 3 print ">>>>> GET /CustomerPortalWeb/ausPost.portal <<<<<" get_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID conn.request("GET", '/CustomerPortalWeb/ausPost.portal', None, get_headers) resp = conn.getresponse() log_header(idx, resp) resp_body = resp.read() log_body(idx, resp_body) #--------------------------------------------------------------------- idx = 4 args = { '_nfpb' : 'true', '_pageLabel' : 'ImportUserPage' } ue_args = urllib.urlencode(args) print ">>>>> GET /CustomerPortalWeb/ausPost.portal <<<<<" get_headers['Cookie'] = 'JSESSIONID=%s' % JSESSIONID conn.request("GET", '/CustomerPortalWeb/ausPost.portal?%s' % ue_args, None, get_headers) resp = conn.getresponse() log_header(idx, resp) resp_body = resp.read() log_body(idx, resp_body) #--------------------------------------------------------------------- conn.close() #------------------------------------------------------------------------------- def main(args): do() #------------------------------------------------------------------------------- if __name__ == "__main__": main(sys.argv[1:]) #------------------------------------------------------------------------------- """ Regex Stuff: regex = re.compile("\\n *") (name, cnt) = re.subn('esb:', '', node_name) value = re.sub(r'\n *', 'N/A', value) """ """ FILE DOWNLOAD: h.putrequest('POST', '/scripts/cgi.exe?') h.putheader('Content-length', '%d'%len(params)) h.putheader('Accept', 'text/plain') h.putheader('Host', 'test.site.com') h.endheaders() h.send(params) reply, msg, hdrs = h.getreply() data = h.getfile().read() file('test.file', 'w').write(data) h.close() """ """ Accept-Language: en-au Content-Type: application/x-www-form-urlencoded Accept-Encoding: gzip, deflate User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) Host: hx414:6304 Content-Length: 54 Connection: Keep-Alive Cache-Control: no-cache Cookie: JSESSIONID=jpm7G5hJbx6pYdhTr3GRRQrXsknrFcxdF7VdhcVPctThHdQxJjsC!2061771890 """