mybeautiful soup collect urls code example
Example 1: BeautifulSoup - scraping the link of the website
import requests
from bs4 import BeautifulSoup
page = requests.get('http://www.example.com')
soup = BeautifulSoup(page.content, 'html.parser')
print(soup.select_one('p a').attrs['href']) # get the link of the website
Example 2: how to pass the current url to beautiful soup html
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
def parse_content(html_doc):
soup = BeautifulSoup(html_doc, 'html.parser')
print (soup.title.text)
driver = webdriver.Chrome()
driver.get("http://www.python.org")
assert "Python" in driver.title
elem = driver.find_element_by_name("q")
elem.clear()
elem.send_keys("pycon")
elem.send_keys(Keys.RETURN)
assert "No results found." not in driver.page_source
html_doc = driver.page_source
driver.close()
parse_content(html_doc)