Python Parse HTML

How to parse HTML using Python and HTMLparser library.

# 
# Python parse HTML
#
from html.parser import  HTMLParser

class AnHTMLParser(HTMLParser):
  def handle_comment(self, data):
    print("Encountered comment: ", data)
    pos = self.getpos()
    print("\tAt line: ", pos[0], " position ", pos[1])



def main():
  # instantiate the parser and feed it some HTML
  parser = AnHTMLParser()
  f = open("samplehtml.html")
  if f.mode == 'r':
    contents = f.read()
    print(contents)
    parser.feed(contents)

if __name__ == "__main__":
  main();
  


--------------------------------------------------
#Output
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8" />
    <title>Sample HTML Document</title>
    <meta name="description" content="This is a sample HTML file" />
    <meta name="author" content="Administrator" />
    <meta name="viewport" content="width=device-width; initial-scale=1.0" />
    <!-- Replace favicon.ico & apple-touch-icon.png in the root of your domain and delete these references -->
    <link rel="shortcut icon" href="/favicon.ico" />
    <link rel="apple-touch-icon" href="/apple-touch-icon.png" />
  </head>

  <body>
    <div>
      <header>
        <h1>HTML Sample File</h1>
      </header>
      <nav>
        <p>
          <a href="/">Home</a>
        </p>
        <p>
          <a href="/contact">Contact</a>
        </p>
      </nav>
      <div>

      </div>
      <footer>
        <p>&copy; Copyright by Administrator</p>
      </footer>
    </div>
  </body>
</html>

Encountered comment:
At line:  9  position  4

Comments

Popular posts from this blog

How to write to a file in Kotlin

Python Tkinter example