| 1234567891011121314151617181920212223242526272829303132333435363738 | 
							- # -*- coding: utf-8 -*-
 
- import scrapy
 
- from elabSpider.items import *
 
- import traceback
 
- from elabSpider.email_util import send_email
 
- class ExampleSpider(scrapy.Spider):
 
-     name = '58ershoufang'
 
-     allowed_domains = ['58.com']
 
-     start_urls = ['http://nb.58.com/haishu/ershoufang/']
 
-     def parse(self, response):
 
-         try:
 
-             for href in response.xpath(r'//ul[@class="house-list-wrap"]/li/div[@class="list-info"]/h2[@class="title"]/a/@href'):
 
-                 url = href.extract()
 
-                 yield scrapy.Request(url, callback=self.parse_item)
 
-         except:
 
-             send_email('58ershoufang lv 1 url parse error', response._url + '\n' + traceback.format_exc())
 
-             print('error')
 
-         try:
 
-             next_page = response.xpath(r'//div[@class="pager"]/a[@class="next"]/@href').extract_first()
 
-             if next_page:
 
-                 yield scrapy.Request(next_page, callback=self.parse)
 
-         except:
 
-             send_email('58ershoufang get next url error', response._url + '\n' + traceback.format_exc())
 
-             print('error next page')
 
-     def parse_item(self, response):
 
-         try:
 
-             item = ResoldApartmentItem.handle_response(response)
 
-             yield item
 
-         except:
 
-             send_email('58ershoufang get item parse error', response._url + '\n' + traceback.format_exc())
 
-             print('error' + response.string)
 
 
  |