| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 | # -*- coding:utf-8 -*-# @Time    : 2018/4/27 10:50 AM# @Author  : Swingimport scrapyfrom elabSpider.items import ResoldHouseItemimport loggingimport tracebackfrom elabSpider.email_util import send_emailclass RentalHouseSpider(scrapy.Spider):    name = 'sjkresoldHouse'    allowed_domains = [        'nb.anjuke.com'    ]    start_urls = [        'https://nb.anjuke.com/community/props/sale/275642/',        'https://nb.anjuke.com/community/props/sale/1003094/',        'https://nb.anjuke.com/community/props/sale/275869/',        'https://nb.anjuke.com/community/props/sale/973807/',        'https://nb.anjuke.com/community/props/sale/973808/',        'https://nb.anjuke.com/community/props/sale/275517/',        'https://nb.anjuke.com/community/props/sale/1000067/',        'https://nb.anjuke.com/community/props/sale/406899/',        'https://nb.anjuke.com/community/props/sale/1016525/',        'https://nb.anjuke.com/community/props/sale/275936/',        'https://nb.anjuke.com/community/props/sale/1017728/',        'https://nb.anjuke.com/community/props/sale/275274/',        'https://nb.anjuke.com/community/props/sale/275658/',        'https://nb.anjuke.com/community/props/sale/275386/',        'https://nb.anjuke.com/community/props/sale/1006982/',        'https://nb.anjuke.com/community/props/sale/275764/',        'https://nb.anjuke.com/community/props/sale/792725/',        'https://nb.anjuke.com/community/props/sale/1022250/'        # 'https://nb.anjuke.com/prop/view/A1237992888?from=filter&spread=filtersearch_p&position=117&kwtype=filter&now_time=1526637680'    ]    def parse(self, response):        try:            community_list = response.xpath('//ul[@class="m-house-list"]/li/a/@href').extract()            if community_list:                for community_url in community_list:                    yield scrapy.Request(community_url, callback=self.parse_item)        except Exception as err:            send_email('sjkresoldHouse get detail url error', response._url + '\n' + traceback.format_exc())            logging.error('get detail url error ! url: ' + response._url + " reason: " + '-'.join(err.args))        try:            next_page = response.xpath(r'//div[@class="m-page"]/div[@class="multi-page"]/a[@class="aNxt"]/@href').extract_first()            if next_page:                yield scrapy.Request(next_page, callback=self.parse)        except Exception as err:            send_email('sjkresoldHouse get next page url error', response._url + '\n' + traceback.format_exc())            logging.error('get next page url error ! url: ' + response._url + " reason: " + '-'.join(err.args))    def parse_item(self, response):        try:            item = ResoldHouseItem.handle_response(response)            yield item        except Exception as err:            send_email('sjkresoldHouse parse response error', response._url + '\n' + traceback.format_exc())            logging.error('parse response error ! url: ' + response._url + " reason: " + '-'.join(err.args))
 |