python beautifulsoup example
Example 1: use beautifulsoup
#start
from bs4 import BeautifulSoup
import requests
req = requests.get('https://www.slickcharts.com/sp500')
soup = BeautifulSoup(req.text, 'html.parser')
Example 2: web scraper python
def simple_get(url):
"""
Attempts to get the content at `url` by making an HTTP GET request.
If the content-type of response is some kind of HTML/XML, return the
text content, otherwise return None.
"""
try:
with closing(get(url, stream=True)) as resp:
if is_good_response(resp):
return resp.content
else:
return None
except RequestException as e:
log_error('Error during requests to {0} : {1}'.format(url, str(e)))
return None
def is_good_response(resp):
"""
Returns True if the response seems to be HTML, False otherwise.
"""
content_type = resp.headers['Content-Type'].lower()
return (resp.status_code == 200
and content_type is not None
and content_type.find('html') > -1)
def log_error(e):
"""
It is always a good idea to log errors.
This function just prints them, but you can
make it do anything.
"""
print(e)
Example 3: beautifulsoup example
import requests
page = requests.get("http://dataquestio.github.io/web-scraping-pages/simple.html")
page