import jieba
import xlrd
import jieba.analyse
def stopwordslist(filepath):
stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]
return stopwords
def fenci(content):
table = content.sheets()[0]
nrows = table.nrows
row1=1
cell=""
final = ""
while row1<nrows:
cell = table.cell(row1,0).value
fenci=jieba.cut(cell)
for seg in fenci:
if seg not in stopwords and len(seg)>0:
final+=seg+" "
final+=""
final+='\n'
row1 += 1
return final
jieba.load_userdict("C:\\Users\\Administrator\\Desktop\\userdic.txt")
stopwords=stopwordslist("C:\\Users\\Administrator\\Desktop\\stop.txt")
content=xlrd.open_workbook("C:\\Users\\Administrator\\Desktop\\zhaopin_data.xlsx")
final=fenci(content)
keywords = jieba.analyse.extract_tags(final,topK=200,withWeight=True,allowPOS=())
for item in keywords:
print(item[0], item[1])
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)