python beautifulsoup get all href code example
Example 1: get all href links beautifulsoup from a website python
from BeautifulSoup import BeautifulSoupimport urllib2import redef getLinks(url): html_page = urllib2.urlopen(url) soup = BeautifulSoup(html_page) links = [] for link in soup.findAll('a', attrs={'href': re.compile("^http://")}): links.append(link.get('href')) return linksprint( getLinks("https://arstechnica.com") )
Example 2: get all href links beautifulsoup from a website python
from BeautifulSoup import BeautifulSoupimport urllib2import rehtml_page = urllib2.urlopen("https://arstechnica.com")soup = BeautifulSoup(html_page)for link in soup.findAll('a', attrs={'href': re.compile("^http://")}): print link.get('href')