经常使用pandas 读取多个sheet 的文件,读取方式,先获得sheet 名字,再指定sheet_name 参数进行读取 ,写多个sheet 到同一个文件
import pandas as pd
infile = "data/test.xlsx"
raw_file = pd.ExcelFile(infile)
# 获得sheet_names
sheet_names = raw_file.sheet_names
print(sheet_names)
sheet_dict = {}
for sheet in sheet_names:
df = pd.read_excel(infile,sheet_name=sheet)
sheet_dict[sheet] = df
outfile = "ret.xlsx"
# 写多个sheet 到 一个excel
with pd.ExcelWriter(outfile, engine="openpyxl") as writer:
for sheet,sheet_df in sheet_dict.items():
# 自己添加业务逻辑
print(sheet_df.shape[0])
if sheet_df.shape[0] > 1000:
random_sheet_df = sheet_df.sample(n=1000)
else:
random_sheet_df = sheet_df
all_num +=sheet_df.shape[0]
sheet_num_dict[sheet] = sheet_df.shape[0]
random_sheet_df.to_excel(writer,sheet_name=sheet,index=False)