Renaming downloaded images in Scrapy 0.24 with content from an item field while avoiding filename conflicts?
The pipelines.py:
from scrapy.pipelines.images import ImagesPipeline
from scrapy.http import Request
from scrapy.exceptions import DropItem
from scrapy import log
class MyImagesPipeline(ImagesPipeline):
#Name download version
def file_path(self, request, response=None, info=None):
image_guid = request.meta['model'][0]
log.msg(image_guid, level=log.DEBUG)
return 'full/%s' % (image_guid)
#Name thumbnail version
def thumb_path(self, request, thumb_id, response=None, info=None):
image_guid = thumb_id + request.url.split('/')[-1]
log.msg(image_guid, level=log.DEBUG)
return 'thumbs/%s/%s.jpg' % (thumb_id, image_guid)
def get_media_requests(self, item, info):
yield Request(item['image_urls'][0], meta=item)
You're using the settings.py
wrong. You should use this:
ITEM_PIPELINES = {'allenheath.pipelines.MyImagesPipeline': 1}
For thumbsnails to work, add this to settings.py
:
IMAGES_THUMBS = {
'small': (50, 50),
'big': (100, 100),
}