#爬取同花顺官网中的数据 共四页 项目适合练手 最终保存在csv文件中
#尚有缺点 先发出来 一起探讨 qq:2385455226 欢迎来访
import requests
from lxml import html
headers = {
'Accept': 'text/html, */*; q=0.01',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': '__utma=156575163.1547851450.1557741097.1557741097.1558270926.2; __utmz=156575163.1558270926.2.2.utmcsr=10jqka.com.cn|utmccn=(referral)|utmcmd=referral|utmcct=/; refreshStat=off; log=; Hm_lvt_78c58f01938e4d85eaf619eae71b4ed1=1557739152,1558270937,1558341509,1558417235; Hm_lvt_60bad21af9c824a4a0530d5dbf4357ca=1557749473,1558271070,1558341991,1558417357; Hm_lvt_f79b64788a4e377c608617fba4c736e2=1557749474,1558271070,1558341991,1558417357; Hm_lpvt_60bad21af9c824a4a0530d5dbf4357ca=1558417391; Hm_lpvt_78c58f01938e4d85eaf619eae71b4ed1=1558417391; Hm_lpvt_f79b64788a4e377c608617fba4c736e2=1558417391; v=Agqx8HNlYqY8be4KVLfuTfT9W_uv-49tgH0C-ZRDtNrptaStfIveZVAPUgpn',
'hexin-v': 'Agqx8HNlYqY8be4KVLfuTfT9W_uv-49tgH0C-ZRDtNrptaStfIveZVAPUgpn',
'Host': 'data.10jqka.com.cn',
'Pragma': 'no-cache',
'Referer': 'http://data.10jqka.com.cn/market/rzrq/board/sh/',
#有问题,自行换一个
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) ',
}
for i in range(1,5):
url = 'http://data.10jqka.com.cn/market/rzrq/board/getRzrqPage/page/%d/ajax/1/'%i
res = requests.get(url=url,headers=headers)
etree = html.etree
cont = etree.HTML(res.text)
s1 = cont.xpath("//thead/tr[1]/th/text()")
print(s1)
s2 = cont.xpath("//thead/tr[2]/th/text()")
s2.insert(0, '交易日期')
f = open('tonghua3.csv', 'a', encoding='gbk')
t1 = str(s2).replace('[', '').replace(']', '').replace("'", '') + '\n'
print(t1)
# for t2 in t1:
f.write(t1)
s3 = cont.xpath("//tbody/tr/td/text()")
# print(s3)
# print(len(s3))
s4 = []
# 使用切片的方式将大列表分为小列表
for i in range(0, len(s3), 13):
s4.append(s3[i:i + 13])
# print(s4)
for item in s4:
# print(item)
s = str(item).replace('[', '').replace(']', '').replace("'", '') + '\n'
print(s)
# for s in item:
# print(s)
#
f.write(s)
f.close()
# f.write(str(item))
# f.close()