scrapy item loader replace code example
Example 1: scrapy itemloader example
def parse_question(self, response):
question_pattern = re.compile('(.*zhihu.com/question/(\d+))(/|$).*')
match_object = re.match(question_pattern, response.url)
question_id = match_object.group(2)
item_loader = ItemLoader(item=ZhihuQuestionItem(), response=response)
item_loader.add_value('zhihu_id', question_id)
item_loader.add_css('title', 'h1.QuestionHeader-title::text')
item_loader.add_css('topics', '.TopicLink .Popover div::text')
item_loader.add_value('url', response.url)
item_loader.add_css('content', '.QuestionHeader-detail div div span::text')
item_loader.add_css('answer_num', '.List-headerText span::text')
item_loader.add_css('comments_num', '.QuestionHeader-Comment button::text')
item_loader.add_css('watch_user_num', '.NumberBoard-value::text')
item = item_loader.load_item()
yield item
yield scrapy.Request(self.start_answer_url.format(question_id=question_id, offset=0, limit=20),
headers=self.headers, callback=self.parse_answer)
Example 2: scrapy itemloader example
def parse(self, response):
mongoClient = mongodb_client('localhost', 27017)
list = []
print "************************"
player_away = response.xpath('//table[@id="J_away_content"]/tbody/tr')
player_home = response.xpath('//table[@id="J_home_content"]/tbody/tr')
if player_away:
for player in player_away:
playerName=player.xpath('td/a/text()').extract()
if playerName:
list.append(playerName + player.xpath('td/text()').extract())
for player in player_home:
playerName = player.xpath('td/a/text()').extract()
if playerName:
list.append(playerName + player.xpath('td/text()').extract())
print "************************"
time = response.xpath('//div[@class="about_fonts clearfix"]/p[@class="time_f"]/text()').extract()
team = response.xpath('//div[@class="message"]/p/a/text()').extract()
score = response.xpath('//div[@class="message"]/h2/text()').extract()
url = response.url
g = game(time, team, score, list, url)
print g.__dict__
client = mongoClient.connect()
db = mongoClient.useDB(client, "hupu_data")
print mongoClient.insert_one(db, "games", g.__dict__)