<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://performiq.com/kb/index.php?action=history&amp;feed=atom&amp;title=Python_-_HTML_Parsers</id>
	<title>Python - HTML Parsers - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://performiq.com/kb/index.php?action=history&amp;feed=atom&amp;title=Python_-_HTML_Parsers"/>
	<link rel="alternate" type="text/html" href="https://performiq.com/kb/index.php?title=Python_-_HTML_Parsers&amp;action=history"/>
	<updated>2026-05-18T12:38:17Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.37.1</generator>
	<entry>
		<id>https://performiq.com/kb/index.php?title=Python_-_HTML_Parsers&amp;diff=3057&amp;oldid=prev</id>
		<title>PeterHarding at 07:35, 23 April 2009</title>
		<link rel="alternate" type="text/html" href="https://performiq.com/kb/index.php?title=Python_-_HTML_Parsers&amp;diff=3057&amp;oldid=prev"/>
		<updated>2009-04-23T07:35:38Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;table style=&quot;background-color: #fff; color: #202122;&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #202122; text-align: center;&quot;&gt;Revision as of 17:35, 23 April 2009&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l10&quot;&gt;Line 10:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 10:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;import re&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;import re&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;−&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;f_in = open(&amp;quot;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;BO_1001_Body_01&lt;/del&gt;.&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;txt&lt;/del&gt;&amp;quot;, &amp;quot;r&amp;quot;)&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot; data-marker=&quot;+&quot;&gt;&lt;/td&gt;&lt;td style=&quot;color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;f_in = open(&amp;quot;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;Some&lt;/ins&gt;.&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;html&lt;/ins&gt;&amp;quot;, &amp;quot;r&amp;quot;)&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;br/&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;data = f_in.read()&lt;/div&gt;&lt;/td&gt;&lt;td class=&quot;diff-marker&quot;&gt;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #202122; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;data = f_in.read()&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>PeterHarding</name></author>
	</entry>
	<entry>
		<id>https://performiq.com/kb/index.php?title=Python_-_HTML_Parsers&amp;diff=3056&amp;oldid=prev</id>
		<title>PeterHarding: New page: =parsers=  See BeautifulSoup ...  ==Examples==  &lt;pre&gt; #!/usr/bin/env python  import re  f_in = open(&quot;BO_1001_Body_01.txt&quot;, &quot;r&quot;)  data = f_in.read()  s = re.compile(&#039;(&lt;[^ &gt;]*)&#039;) e = re.comp...</title>
		<link rel="alternate" type="text/html" href="https://performiq.com/kb/index.php?title=Python_-_HTML_Parsers&amp;diff=3056&amp;oldid=prev"/>
		<updated>2009-04-23T07:35:11Z</updated>

		<summary type="html">&lt;p&gt;New page: =parsers=  See BeautifulSoup ...  ==Examples==  &amp;lt;pre&amp;gt; #!/usr/bin/env python  import re  f_in = open(&amp;quot;BO_1001_Body_01.txt&amp;quot;, &amp;quot;r&amp;quot;)  data = f_in.read()  s = re.compile(&amp;#039;(&amp;lt;[^ &amp;gt;]*)&amp;#039;) e = re.comp...&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;=parsers=&lt;br /&gt;
&lt;br /&gt;
See BeautifulSoup ...&lt;br /&gt;
&lt;br /&gt;
==Examples==&lt;br /&gt;
&lt;br /&gt;
&amp;lt;pre&amp;gt;&lt;br /&gt;
#!/usr/bin/env python&lt;br /&gt;
&lt;br /&gt;
import re&lt;br /&gt;
&lt;br /&gt;
f_in = open(&amp;quot;BO_1001_Body_01.txt&amp;quot;, &amp;quot;r&amp;quot;)&lt;br /&gt;
&lt;br /&gt;
data = f_in.read()&lt;br /&gt;
&lt;br /&gt;
s = re.compile(&amp;#039;(&amp;lt;[^ &amp;gt;]*)&amp;#039;)&lt;br /&gt;
e = re.compile(&amp;#039;&amp;gt;&amp;#039;)&lt;br /&gt;
&lt;br /&gt;
idx = 0&lt;br /&gt;
&lt;br /&gt;
cnt = 0&lt;br /&gt;
&lt;br /&gt;
l   = len(data) - 2&lt;br /&gt;
&lt;br /&gt;
tag_end = None&lt;br /&gt;
&lt;br /&gt;
while True:&lt;br /&gt;
   m = s.search(data[idx:])&lt;br /&gt;
&lt;br /&gt;
   if m:&lt;br /&gt;
      # print m.group(0)&lt;br /&gt;
      start  = m.start(0)&lt;br /&gt;
      offset =  m.end(0)&lt;br /&gt;
&lt;br /&gt;
      # print idx, start, offset&lt;br /&gt;
&lt;br /&gt;
      if tag_end:&lt;br /&gt;
         # print &amp;quot;%s %s&amp;quot; % (tag_end, idx + start)&lt;br /&gt;
&lt;br /&gt;
         if idx + start != tag_end:  # have some data!&lt;br /&gt;
            text = data[tag_end:idx + start]&lt;br /&gt;
            print &amp;quot;%s&amp;quot; % text&lt;br /&gt;
&lt;br /&gt;
      n = e.search(data[idx + offset:])&lt;br /&gt;
&lt;br /&gt;
      if n:&lt;br /&gt;
         tag_end = idx + offset+ n.end(0)&lt;br /&gt;
         print data[idx + start:tag_end]&lt;br /&gt;
      else:&lt;br /&gt;
         rest = data[idx + offset:]&lt;br /&gt;
         print &amp;quot;No end tag&amp;quot;&lt;br /&gt;
         print &amp;quot;Rest: [%s]&amp;quot; % rest&lt;br /&gt;
         break&lt;br /&gt;
&lt;br /&gt;
   else:&lt;br /&gt;
      rest = data[idx:]&lt;br /&gt;
      print &amp;quot;Rest: [%s]&amp;quot; % rest&lt;br /&gt;
      break&lt;br /&gt;
&lt;br /&gt;
   idx = tag_end&lt;br /&gt;
   # print data[idx:idx + 80]&lt;br /&gt;
&lt;br /&gt;
   cnt += 1&lt;br /&gt;
&lt;br /&gt;
f_in.close()&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
[[Category:Python]]&lt;br /&gt;
[[Category:HTML]]&lt;/div&gt;</summary>
		<author><name>PeterHarding</name></author>
	</entry>
</feed>