beautifulsoup parse html code example

Example 1: use beautifulsoup

#start


from bs4 import BeautifulSoup
import requests

req = requests.get('https://www.slickcharts.com/sp500')
soup = BeautifulSoup(req.text, 'html.parser')

Example 2: beautifulsoup

>>> from bs4 import BeautifulSoup
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
>>> print soup.prettify()
<html>
<body>
<p>
Some
<b>
bad
<i>
HTML
</i>
</b>
</p>
</body>
</html>
>>> soup.find(text="bad")
u'bad'
>>> soup.i
<i>HTML</i>
#
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
#
>>> print soup.prettify()
<?xml version="1.0" encoding="utf-8">
<tag1>
Some
<tag2 />
bad
<tag3>
XML
</tag3>
</tag1>

Example 3: beautiful soup 4

from bs4 import BeautifulSoup

with open("index.html") as fp:
    soup = BeautifulSoup(fp)

soup = BeautifulSoup("<html>a web page</html>")

Example 4: children beautiful soup

li = soup.find('li', {'class': 'text'})
children = li.findChildren("a" , recursive=False)
for child in children:
    print child

Example 5: beautifulsoup find class

mydivs = soup.findAll("div", {"class": "stylelistrow"})

Example 6: python beautifulsoup find_all

import re

soup.find_all(re.compile("^b")):
soup.find_all('b')
soup.find_all(["a", "b"])

Tags:

Misc Example