scrapy takefirst code example
Example 1: scrapy itemloader example
def parse_song_list(self, response):
selector = Selector(response)
song_name_list = selector.xpath('//body//ul[@class="f-hide"]/li/a/text()').extract()
song_id_list = selector.xpath('//body//ul[@class="f-hide"]/li/a/@href').extract()
title = selector.xpath('//title/text()').extract()
for index, id_ in enumerate(song_id_list):
l = ItemLoader(item=PlayListItem())
l.add_value('song_name', song_name_list[index])
l.add_value('title', title)
yield scrapy.FormRequest(url=self.BASE_URL + id_, meta={'song_id': id_[9:], 'loader': l}, method='GET',
headers=self.headers, callback=self.parse_single_song)
Example 2: scrapy itemloader example
def parse(self, response):
mongoClient = mongodb_client('localhost', 27017)
list = []
print "************************"
player_away = response.xpath('//table[@id="J_away_content"]/tbody/tr')
player_home = response.xpath('//table[@id="J_home_content"]/tbody/tr')
if player_away:
for player in player_away:
playerName=player.xpath('td/a/text()').extract()
if playerName:
list.append(playerName + player.xpath('td/text()').extract())
for player in player_home:
playerName = player.xpath('td/a/text()').extract()
if playerName:
list.append(playerName + player.xpath('td/text()').extract())
print "************************"
time = response.xpath('//div[@class="about_fonts clearfix"]/p[@class="time_f"]/text()').extract()
team = response.xpath('//div[@class="message"]/p/a/text()').extract()
score = response.xpath('//div[@class="message"]/h2/text()').extract()
url = response.url
g = game(time, team, score, list, url)
print g.__dict__
client = mongoClient.connect()
db = mongoClient.useDB(client, "hupu_data")
print mongoClient.insert_one(db, "games", g.__dict__)