创建一个项目
scrapy startproject baidu 这后面是名字
#运行项目 要cd 进入项目才可以运行
scrapy crawl baidu
import scrapy
from bs4 import BeautifulSoup as bs
#scrapy crawl quotes
class QuotesSpider(scrapy.Spider):
name = "quotes"
def start_requests(self):
urls = [
'http://top.baidu.com/buzz?b=353&c=10&fr=topcategory_c10',
]
for url in urls:
yield scrapy.Request(url=url, callback=self.parse)
def parse(self, response):
x = response.css('td.keyword').getall()
import openpyxl
wb = openpyxl.Workbook()
word = wb.active
for i in range(len(x)):
html = bs(x[i],'lxml')
ming = html.a.text
u =html.a.attrs['href']
self.log(ming)
A = 'A{0}'.format(i+1)
B = 'B{0}'.format(i+1)
word[A] = html.a.text
word[B] = html.a.attrs['href']
wb.save('小说排行榜.csv')
self.log('保存完成')