beautifulsoup using link scrape code example

Example 1: BeautifulSoup - scraping the link of the website

import requests
from bs4 import BeautifulSoup

page = requests.get('http://www.example.com')
soup = BeautifulSoup(page.content, 'html.parser')

print(soup.select_one('p a').attrs['href'])  # get the link of the website

Example 2: Use Beautifulsoup or Scrapy to Scrape a Book Store

import scrapy

class bookScraper(scrapy.Spider):
    name = "bookscrape"
    
    start_urls = [
            'http://books.toscrape.com/'
        ]
    def parse(self, response):
        all_books = response.css('.col-lg-3 ')
    
        for book in all_books:
            img_link = book.css('a img::attr(src)').extract()
            title = book.css('h3 a::attr(title)').extract()
            price = book.css('div.product_price p.price_color::text').extract()

            yield {
                'image_url' : img_link,
                'book_title' : title,
                'product_price' : price
            }
        next_page = response.css('li.next a::attr(href)').get()
        if next_page is not None:
            yield response.follow(next_page, callback = self.parse)

Example 3: get all href links beautifulsoup from a website python

from BeautifulSoup import BeautifulSoupimport urllib2import rehtml_page = urllib2.urlopen("https://arstechnica.com")soup = BeautifulSoup(html_page)for link in soup.findAll('a', attrs={'href': re.compile("^http://")}):    print link.get('href')