scrape site iwth bs4 code example
Example 1: web scraper python
>>> from bs4 import BeautifulSoup
>>> raw_html = open('contrived.html').read()
>>> html = BeautifulSoup(raw_html, 'html.parser')
>>> for p in html.select('p'):
... if p['id'] == 'walrus':
... print(p.text)
'I am the walrus'
Example 2: BeautifulSoup - scraping list from html
from bs4 import BeautifulSoup
# Simple HTML
SIMPLE_HTML = '''<html>
<head></head>
<body>
<h1>This is a title</h1>
<p class="subtitle">Lorem ipsum dolor sit amet.</p>
<p>Here's another p without a class</p>
<ul>
<li>Sarah</li>
<li>Mary</li>
<li>Charlotte</li>
<li>Carl</li>
</ul>
</body>
</html>'''
simple_soup = BeautifulSoup(SIMPLE_HTML, 'html.parser') # use html.parser in order to understand the simple HTML
# Find list from html
def find_list():
list_items = simple_soup.find_all('li')
my_list = [e.string for e in list_items] # convert list_items to string
print(my_list)
find_list()