web scrapping with python code example
Example 1: python web scraping
import requests
from bs4 import BeautifulSoup
URL = 'https://www.monster.com/jobs/search/?q=Software-Developer&where=Australia'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
Example 2: web scraping python
import os
import requests
from bs4 import BeautifulSoup
url = "https://www.google.com/"
reponse = requests.get(url)
if reponse.ok:
soup = BeautifulSoup(reponse.text, "lxml")
title = str(soup.find("title"))
title = title.replace("<title>", "")
title = title.replace("</title>", "")
print("The title is : " + str(title))
os.system("pause")
Example 3: web scraping python
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
url = 'http://web.mta.info/developers/turnstile.html'
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
line_count = 1
for one_a_tag in soup.findAll('a'):
if line_count >= 36:
link = one_a_tag['href']
download_url = 'http://web.mta.info/developers/'+ link
urllib.request.urlretrieve(download_url,'./'+link[link.find('/turnstile_')+1:])
time.sleep(1)
line_count +=1
Example 4: web scraping python
Python OneStop Solution..
link: https://github.com/itzanuragsinghania/Python-One-Stop
Example 5: web scraping python
import scrapy
from ..items import SampletestItem
class QuoteTestSpider(scrapy.Spider):
name = 'quote_test'
start_urls = ['https://quotes.toscrape.com/']
def parse(self, response):
items = SampletestItem()
quotes = response.css("div.quote")
for quote in quotes:
items['title'] = quote.css("span.text::text").get()
items['author'] = quote.css(".author::text").get()
items['tags'] = quote.css(".tags .tag::text").getall()
yield items
next_page = response.css(".next a::attr(href)").get()
if next_page is not None:
next_url = response.urljoin(next_page)
yield scrapy.Request(next_url, callback=self.parse)
Example 6: web scraper python
>>> raw_html = simple_get('http://www.fabpedigree.com/james/mathmen.htm')
>>> html = BeautifulSoup(raw_html, 'html.parser')
>>> for i, li in enumerate(html.select('li')):
print(i, li.text)
0 Isaac Newton
Archimedes
Carl F. Gauss
Leonhard Euler
Bernhard Riemann
1 Archimedes
Carl F. Gauss
Leonhard Euler
Bernhard Riemann
2 Carl F. Gauss
Leonhard Euler
Bernhard Riemann
3 Leonhard Euler
Bernhard Riemann
4 Bernhard Riemann