Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
iamsk committed Nov 6, 2016
1 parent cd0adb1 commit b09903b
Showing 1 changed file with 16 additions and 9 deletions.
25 changes: 16 additions & 9 deletions dianping/dianping/spiders/spider.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
import requests
from json import loads
from scrapy.http import Request
from scrapy.selector import Selector

Expand Down Expand Up @@ -33,8 +34,6 @@ def clean_string(string):

def address_to_geo(address):
data = requests.get(BAIDU_GEO.format(address)).json()
if 'result' in data:
return {}
longitude = data['result']['location']['lng'] if 'result' in data else 120.260569
latitude = data['result']['location']['lat'] if 'result' in data else 30.242865
return {'longitude': longitude, 'latitude': latitude}
Expand All @@ -46,11 +45,9 @@ class dianpingSpider(CommonSpider):

def start_requests(self):
for k, v in start_url_dict.items():
for i in range(1, 20):
for i in range(1, 3):
url = base_category_url + v + 'p{}'.format(i)
yield Request(url, callback=self.parse, meta={'category': k})
break
break

def parse(self, response):
hxs = Selector(response)
Expand Down Expand Up @@ -85,10 +82,15 @@ def parse_shop(self, response):
class dianpingDealSpider(CommonSpider):
name = "dianping-deal"
allowed_domains = ["dianping.com"]
start_urls = [
"http://t.dianping.com/deal/21481263",

]
def start_requests(self):
with open('partner.json', 'rb') as f:
for line in f:
data = loads(line)
for url in data['deals']:
yield Request(url, callback=self.parse, meta={'category': data['category'],
'partner': data['name']})
break

def parse(self, response):
deal = {}
Expand All @@ -100,4 +102,9 @@ def parse(self, response):
deal['description'] = clean_string(description)
price = bd.css('.price-display::text').extract_first()
deal['price'] = clean_string(price)
print deal
# it's dynamic
# images = hxs.xpath('//div[@class="img-area"]//img/@src').extract()
# deal['images'] = ','.join(images[:2])
deal['category'] = response.request.meta['category']
deal['partner'] = response.request.meta['partner']
return deal

0 comments on commit b09903b

Please sign in to comment.