# -*- coding: utf-8 -*- """ 普康健康_生成直付理赔周报 """ # 加载模块 import pandas, numpy from utils.pandas_extension import open_csv, save_as_workbook # 根据机构所在省份匹配为机构名称 def match_institution_name(x): x_matched = "总部" match x: case "北京市" | "天津市": x_matched = "京津" case "河北省": x_matched = "河北" case "山西省": x_matched = "山西" case "内蒙古自治区": x_matched = "内蒙" case "辽宁省": x_matched = "辽宁" case "吉林省" | "黑龙江省": x_matched = "黑吉" case "上海市": x_matched = "上海" case "江苏省": x_matched = "江苏" case "浙江省": x_matched = "浙江" case "安徽省": x_matched = "安徽" case "福建省": x_matched = "福建" case "江西省": x_matched = "江西" case "山东省": x_matched = "山东" case "河南省": x_matched = "河南" case "湖北省": x_matched = "湖北" case "湖南省": x_matched = "湖南" case "广东省" | "海南省": x_matched = "广东" case "广西壮族自治区": x_matched = "广西" case "重庆市" | "四川省" | "西藏自治区": x_matched = "四川" case "贵州省": x_matched = "贵州" case "云南省": x_matched = "云南" case "新疆维吾尔自治区": x_matched = "新疆" case "陕西省" | "青海省": x_matched = "陕西" case "甘肃省": x_matched = "甘肃" case "宁夏回族自治区": x_matched = "宁夏" return x_matched # 根据机构名称匹配为大区名称 def match_region_name(x): x_matched = "总部" match x: case "内蒙" | "辽宁" | "黑吉": x_matched = "东北大区" case "京津" | "河北" | "山西": x_matched = "华北大区" case "安徽" | "山东" | "河南": x_matched = "华东大区" case "江苏" | "福建" | "广东": x_matched = "东南大区" case "江西" | "湖北" | "湖南": x_matched = "华中大区" case "新疆" | "陕西" | "甘肃" | "宁夏": x_matched = "西北大区" case "广西" | "四川" | "云南" | "贵州": x_matched = "西南大区" case "上海": x_matched = "上海" case "浙江": x_matched = "浙江" return x_matched """ 统计方案: 1、读取当年往月对账单数据,包括对账期、商家编号、保单编号和对账金额,文件名为reconciliations.csv 1.1 根据对账期和保单编号分组,就对账金额求和,其中对账期、对账金额之和重命名为考核周期、消费规模 2、读取当年当月保单扣减数据,包括扣减期、商家编号、保单编号和扣减金额,文件名为reconciliations_month.csv 2.1 根据扣减期和保单编号分组,就扣减金额求和,其中扣减期、扣减金额之和重命名为考核周期、消费规模 3、合并1.1和2.1,即当年往月和当年当月考核周期、保单编号和消费规模 4、读取徐丹老师提供的保单机构分配数据,包括保单编号、落地机构、落地机构分配比例、出单机构、出单机构分配比例、总部分配比例,文件名为slips.csv 4.1 先查询3中消费规模大于0的保单编号,再和4中保单编号比较、就不在4中的保单编号提供徐丹老师,由其补录保单机构分配方案。补录后重复4.1至无需再提供徐丹老师 -->过程表 5、就机构拆解保单消费规模,根据考核周期、机构分组,就消费规模求和 5.1 根据机构名称匹配大区名称 5.2 读取当年机构消费目标数据,包括考核周期、机构名称和消费目标,根据考核周期和机构名称匹配消费目标 5.3 就算达成率(消费规模/消费目标)-->基表 6、透视基表,生成各机构在当年各月消费目标、消费规模和转化率,当年消费目标、消费规模和转化率,并汇总 """ print("正在生成直付理赔周报...", end="") # 当年往月对账单数据(维度为对账期-商家编号-保单编号) reconciliations = open_csv(file_name="reconciliations.csv") # 删除保单编号为空的行(若保单编号为空则对账金额必定为空,若对账金额为空则保单编号必定为空) reconciliations.dropna(subset=["保单编号"], inplace=True) # 数据类型转换 for variable_label in reconciliations.columns: match variable_label: case "对账金额": # 不可能出现缺失值,无需填补缺失值 reconciliations[variable_label] = reconciliations[variable_label].astype( "float" ) # 按照对账期和保单编号分组,就对账金额求和,重置索引,修改列名 reconciliations = ( reconciliations.groupby(by=["对账期", "保单编号"]) .agg(对账金额=("对账金额", "sum")) .reset_index() .rename(columns={"对账期": "考核周期", "对账金额": "消费规模"}) ) # 当年当月保单扣减数据(维度为扣减期-商家编号-保单编号) reconciliations_month = open_csv(file_name="reconciliations_month.csv") # 数据类型转换 for variable_label in reconciliations_month.columns: match variable_label: case "扣减金额": # 不可能出现缺失值,无需填补缺失值 reconciliations_month[variable_label] = reconciliations_month[ variable_label ].astype("float") # 按照扣减期和保单编号分组,就扣减金额求和,重置索引,修改列名 reconciliations_month = ( reconciliations_month.groupby(by=["扣减期", "保单编号"]) .agg(扣减金额=("扣减金额", "sum")) .reset_index() .rename(columns={"扣减期": "考核周期", "扣减金额": "消费规模"}) ) # 合并上述当年往月对账单数据和当年当月保单扣减数据 reconciliations = pandas.concat( objs=[reconciliations, reconciliations_month], ignore_index=True ) # 徐丹老师提供的保单机构分配数据 slips = open_csv(file_name="slips.csv") # 数据类型转换 for variable_label in slips.columns: match variable_label: # 不可能出现缺失值,无需填补缺失值 case "落地机构分配比例" | "出单机构分配比例" | "总部分配比例": slips[variable_label] = slips[variable_label].astype("int") # 过程表 process_table = reconciliations.merge(right=slips, on="保单编号", how="left") # 统计消费规模大于0且出单机构分配比例为空的保单机构分配数据 process_table.loc[ (process_table["消费规模"] > 0) & (process_table["出单机构分配比例"].isna()), "异常标签", ] = "无分配方案" if process_table.loc[process_table["异常标签"] == "无分配方案"].shape[0] > 0: print("存在未分配机构的保单,请提请徐丹老师补录") print() save_as_workbook( worksheets=[("异常保单", process_table)], workbook_name="普康健康_需补录保单机构分配方案.xlsx", ) exit() # 新增总部 slips.insert(loc=slips.shape[1] - 1, column="总部", value="总部") # 先就落地机构、出单机构和总部新增机构名称列,落地机构分配比例、出单机构分配比例和总部分配比例新增分配比例列,再拆分为行 slips = ( slips.assign( # 整合机构 机构名称=slips.apply( lambda x: [x["落地机构"], x["出单机构"], x["总部"]], axis="columns" ), # 整合分配比例 分配比例=slips.apply( lambda x: [x["落地机构分配比例"], x["出单机构分配比例"], x["总部分配比例"]], axis="columns", ), ) # 拆分机构名称和分配比例并重置索引 .explode(["机构名称", "分配比例"]).reset_index(drop=True) ) # 保留分配比例大于0的保单机构分配数据 slips = slips.loc[slips["分配比例"] > 0, ["保单编号", "机构名称", "分配比例"]] # 根据机构所在省份匹配为机构名称 slips["机构名称"] = slips["机构名称"].apply(lambda x: match_institution_name(x)) # 根据机构名称匹配为大区名称并插入至第二列 slips.insert( loc=slips.shape[1] - 2, column="大区名称", value=slips["机构名称"].apply(lambda x: match_region_name(x)), ) # 左拼接保单机构分配数据(分配比例不可能出现缺失值,无需填补缺失值) process_table = process_table.merge(right=slips, on="保单编号", how="left") # 分配后消费规模 process_table["分配后消费规模"] = process_table.apply( lambda x: x["消费规模"] * x["分配比例"] / 100, axis="columns" ) # 按照考核周期和机构名称分组,就分配后消费规模求和 process_table = ( process_table.groupby(by=["考核周期", "机构名称"]) .agg(大区名称=("大区名称", "first"), 分配后消费规模=("分配后消费规模", "sum")) .reset_index() ) # 机构考核周期消费目标数据(维度为对机构名称-考核周期) targets = open_csv(file_name="targets.csv") # 数据类型转换 for variable_label in targets.columns: match variable_label: case "消费目标": # 消费目标不可能出现缺失值,无需填补缺失值 targets[variable_label] = targets[variable_label].astype("float") process_table = process_table.merge( right=targets, on=["机构名称", "考核周期"], how="left" ) # 根据过程表透视(第一级行索引为大区名称,第二级行索引为机构名称,第一级列索引为考核周期,列索引值为分配后消费规模和消费目标,行和列汇总) pivot_table = process_table.pivot_table( index=["大区名称", "机构名称"], columns="考核周期", values=[ "分配后消费规模", "消费目标", ], # 注意:若设置一个列索引和多个列索引值PANDAS将自动创建多级列索引,第一级列索引为VALUES,第二季列索引为COLUMNS aggfunc="sum", margins=True, margins_name="汇总", ) # 添加大区汇总 for region_name in pivot_table.index.get_level_values("大区名称").unique(): if region_name not in ["上海", "浙江", "总部", "汇总"]: # 汇总大区数据(就各机构的考核周期分配后消费规模和消费目标分别求和) region_summary = pivot_table.loc[region_name].sum() # SERIES对象 region_summary = pandas.DataFrame( data=[region_summary], # SERIES列表 # 创建多级行索引 index=pandas.MultiIndex.from_tuples( tuples=[(region_name, "汇总")], names=["大区名称", "机构名称"] ), columns=region_summary.index, ) pivot_table = pandas.concat(objs=[pivot_table, region_summary]) # 计算各考核周期和汇总达成率 for period in pivot_table.columns.get_level_values("考核周期").unique(): pivot_table[("达成率", period)] = pivot_table.apply( lambda x: ( x[("分配后消费规模", period)] / x[("消费目标", period)] if x[("消费目标", period)] != 0 else 0 ), axis="columns", ) # 交换列索引层级,再就列索引排序 pivot_table = pivot_table.swaplevel(axis="columns").sort_index(axis="columns") # 大区名称排序 regions_orders = [ "东北大区", "华北大区", "华东大区", "华中大区", "东南大区", "西北大区", "西南大区", "上海", "浙江", "总部", "汇总", ] # 大区名称和排序映射器 region_mapper = { region_name: region_index for region_index, region_name in enumerate(regions_orders) } # 根据大区名称映射排序 regions_mapped = [ region_mapper.get(region_name) for region_name in pivot_table.index.get_level_values("大区名称") ] # 机构排序 institutions_orders = { "东北大区": ["汇总", "内蒙", "辽宁", "黑吉"], "华北大区": ["汇总", "京津", "河北", "山西"], "华东大区": ["汇总", "安徽", "山东", "河南"], "华中大区": ["汇总", "江西", "湖北", "湖南"], "东南大区": ["汇总", "江苏", "福建", "广东"], "西北大区": ["汇总", "新疆", "陕西", "甘肃", "宁夏"], "西南大区": ["汇总", "广西", "四川", "云南", "贵州"], "上海": ["上海"], "浙江": ["浙江"], "总部": ["总部"], "汇总": [""], } # 机构名称和排序映射器 institution_mapper = {} institution_mapper.update( { (region_name, institution_name): institution_index for region_name, institution_names in institutions_orders.items() for institution_index, institution_name in enumerate(institution_names) } ) # 根据机构名称映射排序 institutions_mapped = [ institution_mapper.get((region, institution)) for region, institution in zip( pivot_table.index.get_level_values("大区名称"), pivot_table.index.get_level_values("机构名称"), ) ] # 根据大区名称映射排序和机构名称映射排序多重排序 pivot_table = pivot_table.iloc[ numpy.lexsort((institutions_mapped, regions_mapped)) ].reset_index() save_as_workbook( worksheets=[("sheet1", pivot_table)], workbook_name="普康健康_机构周报.xlsx" ) print("生成成功")