先上图,结果
【一】利用这个模块连接sql server
import pymssql #引入pymssql模块
安装方式:
conda install pymssql
【二】连接失败的可能原因
参考此博客,需要开启几个服务 https://blog.csdn.net/pennykoon/article/details/79155443
【三】测试代码
import pymssql #引入pymssql模块
def conn():
connect = pymssql.connect('Y_PIN-PC', 'zhaolong', 'zhaolong', 'test') #服务器名,账户,密码,数据库名
if connect:
print("连接成功!")
return connect
if __name__ == '__main__':
conn = conn()
【四】pymssql 使用教程
参考博客,随便看下就行,当然得会SQL server https://www.cnblogs.com/baiyangcao/p/pymssql_basic.html
- 使用
connect
创建连接对象
-
connect.cursor
创建游标对象,SQL语句的执行基本都在游标上进行
-
cursor.executeXXX
方法执行SQL语句,cursor.fetchXXX
获取查询结果等
- 调用
close
方法关闭游标cursor
和数据库连接
【五】全部代码调试 (1)
只能把代码里的内容存储到数据库
import pymssql #引入pymssql模块
def conn():
connect = pymssql.connect('Y_PIN-PC', 'zhaolong', 'zhaolong', 'test') #服务器名,账户,密码,数据库名
if connect:
print("连接成功!")
#----------------------------------使用connect创建连接对象-----------------------
cursor = connect.cursor()
# ---------------------------------新建、插入操作--------------------------------
cursor.execute("""
IF OBJECT_ID('persons', 'U') IS NOT NULL
DROP TABLE persons
CREATE TABLE persons (
id INT NOT NULL,
name VARCHAR(100),
salesrep VARCHAR(100),
PRIMARY KEY(id)
)
""")
cursor.executemany(
"INSERT INTO persons VALUES (%d, %s, %s)",
[(1, 'John Smith', 'John Doe'),
(2, 'Jane Doe', 'Joe Dog'),
(3, 'Mike T.', 'Sarah H.')])
# 如果没有指定autocommit属性为True的话就需要调用commit()方法
connect.commit()
# 查询操作
cursor.execute('SELECT * FROM persons ')
row = cursor.fetchone()
while row:
print("ID=%d, Name=%s" % (row[0], row[1]))
row = cursor.fetchone()
# 也可以使用for循环来迭代查询结果
# for row in cursor:
# print("ID=%d, Name=%s" % (row[0], row[1]))
# 关闭连接
connect.close()
return connect
if __name__ == '__main__':
conn = conn()
【六】把爬虫结果存储到数据库里啊
(报错1)SyntaxError: Non-UTF-8 code starting with '\x
(图6.1) 改变这个地方,由gbk改为 UTF-8即可哈
【七】改后的代码
import pymssql #引入pymssql模块
import re
from urllib import request
def _fetch_content():
r = request.urlopen(url)
htmls = r.read().decode('utf-8', 'ignore')
# htmls = str(htmls, encoding='utf-8')
# htmls = r.read().decode('utf-8')
return htmls
def refine(anchors):
l = lambda anchor: {'name': anchor['name'][0].strip(), 'number': anchor['number'][0]}
return map(l, anchors)
def sort__seed( anchor):
r = re.findall('\d*', anchor['number'])
number = float(r[0])
if '万' in anchor['number']:
number *= 10000
return number
def sort__rank(anchors):
return sorted(anchors, key=sort__seed, reverse=True)
def __analysis(htmls):
root_html = re.findall(root_pattern, htmls)
for html in root_html:
number = re.findall(number_pattern, html)
name = re.findall(name_pattern, html)
anchor = {'name': name, 'number': number}
anchors.append(anchor)
return anchors
def __show( anchors):
for anchor in anchors:
print('name : ' + anchor['name'] + ' number : ' + anchor['number'])
def __write(anchors):
fr = open("spider__write.txt", "w", encoding="utf-8")
# fr.write('id+^+name+^+viewer__number\n')
for anchor in anchors:
fr.write(anchor['name'] + '^' + anchor['number'] + '\n')
fr.close()
def conn():
#------------------------建立连接-------------
connect = pymssql.connect('Y_PIN-PC', 'zhaolong', 'zhaolong', 'test') #服务器名,账户,密码,数据库名
if connect:
print("连接成功!")
cursor = connect.cursor()
#-----------------------建表--------------
cursor.execute("""
IF OBJECT_ID('persons','U') IS NOT NULL
DROP TABLE persons
CREATE TABLE persons (
name VARCHAR(100),
number VARCHAR(100)
)
"""
)
#-----------------------------打开保存的TXT文件-----------------------
count = 0
fr = open("d:\spider__write.txt", "r")
print ("da kei wenjian cheng gong ")
for line in fr:
count += 1
if (count == 1):
continue
line = line.strip().split('^')
print ("",line)
#-----------------------------插入内容-------------------------
cursor.execute("INSERT INTO persons VALUES('%s','%s')" %(line[0], line[1]))
# cursor.executemany("INSERT INTO persons VALUES(name number)"%(line[0],line[1]))
# 如果没有指定autocommit属性为True的话就需要调用commit()方法
connect.commit()
# --------------------查询操作---------------
cursor.execute('SELECT * FROM persons ')
row = cursor.fetchone()
while row:
# print("ID=%s, Name=%s" % (row[0], row[1]))
print("ID=%s, Name=%s" % (row[0], row[1]))
row = cursor.fetchone()
# # # 也可以使用for循环来迭代查询结果
# for row in cursor:
# print("ID=%d, Name=%s" % (row[0], row[1]))
#
# ---------------关闭连接-----------
connect.close()
return connect
if __name__ == '__main__':
#--------------------为爬虫所准备---------------
url = 'https://www.huya.com/g/wzry'
root_pattern = '<span class="txt">([\s\S]*?)</li>'
name_pattern = '<i class="nick" title="([\s\S]*?)">'
number_pattern = '<i class="js-num">([\s\S]*?)</i>'
anchors = []
htmls = _fetch_content()
anchors =__analysis(htmls)
anchors = list(refine(anchors))
anchors = sort__rank(anchors)
# __show(anchors)
__write(anchors)
conn =conn()
【八】
部分参考文献:
https://blog.csdn.net/qq_40475529/article/details/82929401