beautifulsoup parse html code example
Example 1: use beautifulsoup
#start
from bs4 import BeautifulSoup
import requests
req = requests.get('https://www.slickcharts.com/sp500')
soup = BeautifulSoup(req.text, 'html.parser')
Example 2: beautifulsoup
>>> from bs4 import BeautifulSoup
>>> soup = BeautifulSoup("<p>Some<b>bad<i>HTML")
>>> print soup.prettify()
<html>
<body>
<p>
Some
<b>
bad
<i>
HTML
</i>
</b>
</p>
</body>
</html>
>>> soup.find(text="bad")
u'bad'
>>> soup.i
<i>HTML</i>
#
>>> soup = BeautifulSoup("<tag1>Some<tag2/>bad<tag3>XML", "xml")
#
>>> print soup.prettify()
<?xml version="1.0" encoding="utf-8">
<tag1>
Some
<tag2 />
bad
<tag3>
XML
</tag3>
</tag1>
Example 3: beautiful soup 4
from bs4 import BeautifulSoup
with open("index.html") as fp:
soup = BeautifulSoup(fp)
soup = BeautifulSoup("<html>a web page</html>")
Example 4: children beautiful soup
li = soup.find('li', {'class': 'text'})
children = li.findChildren("a" , recursive=False)
for child in children:
print child
Example 5: beautifulsoup find class
mydivs = soup.findAll("div", {"class": "stylelistrow"})
Example 6: python beautifulsoup find_all
import re
soup.find_all(re.compile("^b")):
soup.find_all('b')
soup.find_all(["a", "b"])