python读取20万数据Excel文件
使用普通的pandas读取Excel,再结合xlrd读取,可能会读取的Excel数据会不全,最多只能读取到65535+行的数,如果读取超大excel数据时就读取不了。
解决读取数据不全python代码。
#!/usr/bin/python3.6
# -*- coding: utf-8 -*-
# @Time : 2022/9/30 8:49
# @Author : Admin
# @Email : xxxxxxxxxxx@xxx.com
# @Phone : 199xx08xxxx/191xx93xxxx
# @File : 拆分.py
# @Software: PyCharm
import pandas as pd
import time
import datetime
start = time.clock()
print('程序正在运行, 请稍等...')
print("数据读取中...")
today_now = datetime.datetime.now()
print("现在时间是:", today_now)
data = pd.read_excel("汇总:小区编码与名称对应列表-截至20220928-new.xlsx", sheet_name="Sheet1", engine='openpyxl')
print(data)
print(data.info())
rows = data.shape[0] # 获取行数 shape[1]获取列数
department_list = []
for i in range(rows):
temp = data["地市"][i]
if temp not in department_list:
department_list.append(temp) # 将销售部门的分类存在一个列表中
for department in department_list:
new_df = pd.DataFrame()
for i in range(0, rows):
if data["地市"][i] == department:
new_df = pd.concat([new_df, data.iloc[[i], :]], axis=0, ignore_index=True)
new_df.to_excel(str(department) + ".xlsx", sheet_name=department, index=False) # 将每个销售部门存成一个新excel
print("分表成功")
print("成功!")
today_now_end = datetime.datetime.now()
print("现在时间是:", today_now_end)
end = time.clock()
print('本次耗时:%0.0f秒' % (end - start))