一、代码中涉及 groupby\astype\str.split\等函数,也涉及对Excel设置格式、修改数值:
# -*- coding:utf-8 -*-
import pandas as pd # 需要安装 pandas
# from pandas import Series # 统计不重复个数
import os
import warnings
# python警告:Workbook contains no default style, apply openpyxl‘s default warn
# 法一:用Excel打开该文件重新保存一下,同时应该也保存了默认的excel样式。
# 法二:如下禁止显示提示
warnings.filterwarnings('ignore')
def xlsx_count(xlsxfile):
# xlsxfile = r'访问统计.xlsx'
# dtype指定读取格式; header=1等效于skiprows=[0] 指跳过第一行取出数据 sheet_name='数据源' 或 ['sheet1', 'sheet2']
df = pd.read_excel('./excel_files/'+xlsxfile, header=1)
month = "./count_files/" + df['月份'][0].astype(str)
# 学生登录次数为0的查询
students = df.query("身份=='学生' & 登陆次数==0")[['月份', '用户', '组织', '登陆次数']]
# 利用split进行分列,expand = True 返回dataframe;expand=False返回Series .str.split('级', expand=True)
students['年级'] = students['组织'].str.split('>', expand=True)[3]
students['班级'] = students['组织'].str.split('>', expand=True)[4]
students['未登陆人数'] = 1
# ******************************************************************************************
# 以下为各班未完成情况
classes = students.groupby(["年级", '班级'])["未登陆人数"].count()
# 注意格式为 Series 要加上 to_frame()
styler = classes.to_frame().style.set_properties(
**{'border': '1px solid black', 'text-align': 'center', 'font-name': u"宋体", 'font-size': '12pt'})
res_file = month + "班级.xlsx"
writer = pd.ExcelWriter(res_file, engine="xlsxwriter")
styler.to_excel(writer, 'sheet1', index=True,
header=True, startrow=1) # 需要索引
# 表格格式设置 ,
workbook = writer.book
worksheet = writer.sheets['sheet1']
worksheet.set_column('A:C', width=12)
# 设置字体样式
heading = workbook.add_format({'font_name': '黑体', 'font_size': 14, 'font_color': 'red', 'bold': True,
'bg_color': 'yellow', 'align': 'center', 'valign': 'vcenter'})
# 设置行高
worksheet.set_row(0, 30)
worksheet.merge_range('A1:C1', '云平台登录统计', heading)
# 写入标题和单元格内容
# worksheet.write('A1', '云平台登录统计', heading)
writer.close()
if __name__ == '__main__':
file_path = './excel_files/'
files = os.listdir(file_path)
# print(files)
for file in files:
xlsx_count(file)
二、其他好的教程:链接地址
三、一些功能记录
import pandas as pd
import numpy as np
df = pd.DataFrame(
np.random.randint(60, 100, (4, 3))
)
df.columns = ["语文", "数学", "英语"]
df.index = ["学生"+s for s in list("ABCD")]
df.T # 转置
# axis=1按列汇总
df["总分"] = df.sum(axis=1)
df["平均分"] = df.mean(axis=1)
df = pd.DataFrame(np.random.rand(200, 10))
# 批量设置标题名
df.columns = [
"SAMPLE_" + s for s in list("ABCDEFGHIJ")
]
四、数据替换replace:链接地址
六、行列转置并修改索引为普通列:链接地址
七、python将数字转为Excel字母
def convert_to_excel_letter(num): """ 将数字转换为 Excel 字母 """ result = "" while num > 0: num -= 1 remainder = num % 26 result = chr(65 + remainder) + result num //= 26 return result