475 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			475 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
	
	
| # -*- coding: utf-8 -*-
 | ||
| 
 | ||
| """
 | ||
| 普康健康_生成直付理赔周报
 | ||
| """
 | ||
| 
 | ||
| # 加载模块
 | ||
| 
 | ||
| import pandas, numpy
 | ||
| 
 | ||
| from utils.pandas_extension import open_csv, save_as_workbook
 | ||
| 
 | ||
| 
 | ||
| # 根据机构所在省份匹配为机构名称
 | ||
| def match_institution_name(x):
 | ||
| 
 | ||
|     x_matched = "总部"
 | ||
| 
 | ||
|     match x:
 | ||
| 
 | ||
|         case "北京市" | "天津市":
 | ||
| 
 | ||
|             x_matched = "京津"
 | ||
| 
 | ||
|         case "河北省":
 | ||
| 
 | ||
|             x_matched = "河北"
 | ||
| 
 | ||
|         case "山西省":
 | ||
| 
 | ||
|             x_matched = "山西"
 | ||
| 
 | ||
|         case "内蒙古自治区":
 | ||
| 
 | ||
|             x_matched = "内蒙"
 | ||
| 
 | ||
|         case "辽宁省":
 | ||
| 
 | ||
|             x_matched = "辽宁"
 | ||
| 
 | ||
|         case "吉林省" | "黑龙江省":
 | ||
| 
 | ||
|             x_matched = "黑吉"
 | ||
| 
 | ||
|         case "上海市":
 | ||
| 
 | ||
|             x_matched = "上海"
 | ||
| 
 | ||
|         case "江苏省":
 | ||
| 
 | ||
|             x_matched = "江苏"
 | ||
| 
 | ||
|         case "浙江省":
 | ||
| 
 | ||
|             x_matched = "浙江"
 | ||
| 
 | ||
|         case "安徽省":
 | ||
| 
 | ||
|             x_matched = "安徽"
 | ||
| 
 | ||
|         case "福建省":
 | ||
| 
 | ||
|             x_matched = "福建"
 | ||
| 
 | ||
|         case "江西省":
 | ||
| 
 | ||
|             x_matched = "江西"
 | ||
| 
 | ||
|         case "山东省":
 | ||
| 
 | ||
|             x_matched = "山东"
 | ||
| 
 | ||
|         case "河南省":
 | ||
| 
 | ||
|             x_matched = "河南"
 | ||
| 
 | ||
|         case "湖北省":
 | ||
| 
 | ||
|             x_matched = "湖北"
 | ||
| 
 | ||
|         case "湖南省":
 | ||
| 
 | ||
|             x_matched = "湖南"
 | ||
| 
 | ||
|         case "广东省" | "海南省":
 | ||
| 
 | ||
|             x_matched = "广东"
 | ||
| 
 | ||
|         case "广西壮族自治区":
 | ||
| 
 | ||
|             x_matched = "广西"
 | ||
| 
 | ||
|         case "重庆市" | "四川省" | "西藏自治区":
 | ||
| 
 | ||
|             x_matched = "四川"
 | ||
| 
 | ||
|         case "贵州省":
 | ||
| 
 | ||
|             x_matched = "贵州"
 | ||
| 
 | ||
|         case "云南省":
 | ||
| 
 | ||
|             x_matched = "云南"
 | ||
| 
 | ||
|         case "新疆维吾尔自治区":
 | ||
| 
 | ||
|             x_matched = "新疆"
 | ||
| 
 | ||
|         case "陕西省" | "青海省":
 | ||
| 
 | ||
|             x_matched = "陕西"
 | ||
| 
 | ||
|         case "甘肃省":
 | ||
| 
 | ||
|             x_matched = "甘肃"
 | ||
| 
 | ||
|         case "宁夏回族自治区":
 | ||
| 
 | ||
|             x_matched = "宁夏"
 | ||
| 
 | ||
|     return x_matched
 | ||
| 
 | ||
| 
 | ||
| # 根据机构名称匹配为大区名称
 | ||
| def match_region_name(x):
 | ||
| 
 | ||
|     x_matched = "总部"
 | ||
| 
 | ||
|     match x:
 | ||
| 
 | ||
|         case "内蒙" | "辽宁" | "黑吉":
 | ||
| 
 | ||
|             x_matched = "东北大区"
 | ||
| 
 | ||
|         case "京津" | "河北" | "山西":
 | ||
| 
 | ||
|             x_matched = "华北大区"
 | ||
| 
 | ||
|         case "安徽" | "山东" | "河南":
 | ||
| 
 | ||
|             x_matched = "华东大区"
 | ||
| 
 | ||
|         case "江苏" | "福建" | "广东":
 | ||
| 
 | ||
|             x_matched = "东南大区"
 | ||
| 
 | ||
|         case "江西" | "湖北" | "湖南":
 | ||
| 
 | ||
|             x_matched = "华中大区"
 | ||
| 
 | ||
|         case "新疆" | "陕西" | "甘肃" | "宁夏":
 | ||
| 
 | ||
|             x_matched = "西北大区"
 | ||
| 
 | ||
|         case "广西" | "四川" | "云南" | "贵州":
 | ||
| 
 | ||
|             x_matched = "西南大区"
 | ||
| 
 | ||
|         case "上海":
 | ||
| 
 | ||
|             x_matched = "上海"
 | ||
| 
 | ||
|         case "浙江":
 | ||
| 
 | ||
|             x_matched = "浙江"
 | ||
| 
 | ||
|     return x_matched
 | ||
| 
 | ||
| 
 | ||
| """
 | ||
| 统计方案:
 | ||
| 
 | ||
| 1、读取当年往月对账单数据,包括对账期、商家编号、保单编号和对账金额,文件名为reconciliations.csv
 | ||
| 
 | ||
|     1.1 根据对账期和保单编号分组,就对账金额求和,其中对账期、对账金额之和重命名为考核周期、消费规模
 | ||
| 
 | ||
| 2、读取当年当月保单扣减数据,包括扣减期、商家编号、保单编号和扣减金额,文件名为reconciliations_month.csv
 | ||
| 
 | ||
|     2.1 根据扣减期和保单编号分组,就扣减金额求和,其中扣减期、扣减金额之和重命名为考核周期、消费规模
 | ||
| 
 | ||
| 3、合并1.1和2.1,即当年往月和当年当月考核周期、保单编号和消费规模
 | ||
| 
 | ||
| 4、读取徐丹老师提供的保单机构分配数据,包括保单编号、落地机构、落地机构分配比例、出单机构、出单机构分配比例、总部分配比例,文件名为slips.csv
 | ||
| 
 | ||
|     4.1 先查询3中消费规模大于0的保单编号,再和4中保单编号比较、就不在4中的保单编号提供徐丹老师,由其补录保单机构分配方案。补录后重复4.1至无需再提供徐丹老师 -->过程表
 | ||
| 
 | ||
| 5、就机构拆解保单消费规模,根据考核周期、机构分组,就消费规模求和
 | ||
| 
 | ||
|     5.1 根据机构名称匹配大区名称
 | ||
|     
 | ||
|     5.2 读取当年机构消费目标数据,包括考核周期、机构名称和消费目标,根据考核周期和机构名称匹配消费目标
 | ||
|     
 | ||
|     5.3 就算达成率(消费规模/消费目标)-->基表
 | ||
| 
 | ||
| 6、透视基表,生成各机构在当年各月消费目标、消费规模和转化率,当年消费目标、消费规模和转化率,并汇总
 | ||
| """
 | ||
| 
 | ||
| print("正在生成直付理赔周报...", end="")
 | ||
| 
 | ||
| # 当年往月对账单数据(维度为对账期-商家编号-保单编号)
 | ||
| reconciliations = open_csv(file_name="reconciliations.csv")
 | ||
| 
 | ||
| # 删除保单编号为空的行(若保单编号为空则对账金额必定为空,若对账金额为空则保单编号必定为空)
 | ||
| reconciliations.dropna(subset=["保单编号"], inplace=True)
 | ||
| 
 | ||
| # 数据类型转换
 | ||
| for variable_label in reconciliations.columns:
 | ||
| 
 | ||
|     match variable_label:
 | ||
| 
 | ||
|         case "对账金额":
 | ||
| 
 | ||
|             # 不可能出现缺失值,无需填补缺失值
 | ||
| 
 | ||
|             reconciliations[variable_label] = reconciliations[variable_label].astype(
 | ||
|                 "float"
 | ||
|             )
 | ||
| 
 | ||
| # 按照对账期和保单编号分组,就对账金额求和,重置索引,修改列名
 | ||
| reconciliations = (
 | ||
|     reconciliations.groupby(by=["对账期", "保单编号"])
 | ||
|     .agg(对账金额=("对账金额", "sum"))
 | ||
|     .reset_index()
 | ||
|     .rename(columns={"对账期": "考核周期", "对账金额": "消费规模"})
 | ||
| )
 | ||
| 
 | ||
| # 当年当月保单扣减数据(维度为扣减期-商家编号-保单编号)
 | ||
| reconciliations_month = open_csv(file_name="reconciliations_month.csv")
 | ||
| 
 | ||
| # 数据类型转换
 | ||
| for variable_label in reconciliations_month.columns:
 | ||
| 
 | ||
|     match variable_label:
 | ||
| 
 | ||
|         case "扣减金额":
 | ||
| 
 | ||
|             # 不可能出现缺失值,无需填补缺失值
 | ||
| 
 | ||
|             reconciliations_month[variable_label] = reconciliations_month[
 | ||
|                 variable_label
 | ||
|             ].astype("float")
 | ||
| 
 | ||
| # 按照扣减期和保单编号分组,就扣减金额求和,重置索引,修改列名
 | ||
| reconciliations_month = (
 | ||
|     reconciliations_month.groupby(by=["扣减期", "保单编号"])
 | ||
|     .agg(扣减金额=("扣减金额", "sum"))
 | ||
|     .reset_index()
 | ||
|     .rename(columns={"扣减期": "考核周期", "扣减金额": "消费规模"})
 | ||
| )
 | ||
| 
 | ||
| # 合并上述当年往月对账单数据和当年当月保单扣减数据
 | ||
| reconciliations = pandas.concat(
 | ||
|     objs=[reconciliations, reconciliations_month], ignore_index=True
 | ||
| )
 | ||
| 
 | ||
| # 徐丹老师提供的保单机构分配数据
 | ||
| slips = open_csv(file_name="slips.csv")
 | ||
| 
 | ||
| # 数据类型转换
 | ||
| for variable_label in slips.columns:
 | ||
| 
 | ||
|     match variable_label:
 | ||
| 
 | ||
|         # 不可能出现缺失值,无需填补缺失值
 | ||
| 
 | ||
|         case "落地机构分配比例" | "出单机构分配比例" | "总部分配比例":
 | ||
| 
 | ||
|             slips[variable_label] = slips[variable_label].astype("int")
 | ||
| 
 | ||
| # 过程表
 | ||
| process_table = reconciliations.merge(right=slips, on="保单编号", how="left")
 | ||
| 
 | ||
| # 统计消费规模大于0且出单机构分配比例为空的保单机构分配数据
 | ||
| process_table.loc[
 | ||
|     (process_table["消费规模"] > 0) & (process_table["出单机构分配比例"].isna()),
 | ||
|     "异常标签",
 | ||
| ] = "无分配方案"
 | ||
| 
 | ||
| if process_table.loc[process_table["异常标签"] == "无分配方案"].shape[0] > 0:
 | ||
| 
 | ||
|     print("存在未分配机构的保单,请提请徐丹老师补录")
 | ||
|     print()
 | ||
| 
 | ||
|     save_as_workbook(
 | ||
|         worksheets=[("异常保单", process_table)],
 | ||
|         workbook_name="普康健康_需补录保单机构分配方案.xlsx",
 | ||
|     )
 | ||
| 
 | ||
|     exit()
 | ||
| 
 | ||
| # 新增总部
 | ||
| slips.insert(loc=slips.shape[1] - 1, column="总部", value="总部")
 | ||
| 
 | ||
| # 先就落地机构、出单机构和总部新增机构名称列,落地机构分配比例、出单机构分配比例和总部分配比例新增分配比例列,再拆分为行
 | ||
| slips = (
 | ||
|     slips.assign(
 | ||
|         # 整合机构
 | ||
|         机构名称=slips.apply(
 | ||
|             lambda x: [x["落地机构"], x["出单机构"], x["总部"]], axis="columns"
 | ||
|         ),
 | ||
|         # 整合分配比例
 | ||
|         分配比例=slips.apply(
 | ||
|             lambda x: [x["落地机构分配比例"], x["出单机构分配比例"], x["总部分配比例"]],
 | ||
|             axis="columns",
 | ||
|         ),
 | ||
|     )
 | ||
|     # 拆分机构名称和分配比例并重置索引
 | ||
|     .explode(["机构名称", "分配比例"]).reset_index(drop=True)
 | ||
| )
 | ||
| 
 | ||
| # 保留分配比例大于0的保单机构分配数据
 | ||
| slips = slips.loc[slips["分配比例"] > 0, ["保单编号", "机构名称", "分配比例"]]
 | ||
| 
 | ||
| # 根据机构所在省份匹配为机构名称
 | ||
| slips["机构名称"] = slips["机构名称"].apply(lambda x: match_institution_name(x))
 | ||
| 
 | ||
| # 根据机构名称匹配为大区名称并插入至第二列
 | ||
| slips.insert(
 | ||
|     loc=slips.shape[1] - 2,
 | ||
|     column="大区名称",
 | ||
|     value=slips["机构名称"].apply(lambda x: match_region_name(x)),
 | ||
| )
 | ||
| 
 | ||
| # 左拼接保单机构分配数据(分配比例不可能出现缺失值,无需填补缺失值)
 | ||
| process_table = process_table.merge(right=slips, on="保单编号", how="left")
 | ||
| 
 | ||
| # 分配后消费规模
 | ||
| process_table["分配后消费规模"] = process_table.apply(
 | ||
|     lambda x: x["消费规模"] * x["分配比例"] / 100, axis="columns"
 | ||
| )
 | ||
| 
 | ||
| # 按照考核周期和机构名称分组,就分配后消费规模求和
 | ||
| process_table = (
 | ||
|     process_table.groupby(by=["考核周期", "机构名称"])
 | ||
|     .agg(大区名称=("大区名称", "first"), 分配后消费规模=("分配后消费规模", "sum"))
 | ||
|     .reset_index()
 | ||
| )
 | ||
| 
 | ||
| # 机构考核周期消费目标数据(维度为对机构名称-考核周期)
 | ||
| targets = open_csv(file_name="targets.csv")
 | ||
| 
 | ||
| # 数据类型转换
 | ||
| for variable_label in targets.columns:
 | ||
| 
 | ||
|     match variable_label:
 | ||
| 
 | ||
|         case "消费目标":
 | ||
| 
 | ||
|             # 消费目标不可能出现缺失值,无需填补缺失值
 | ||
| 
 | ||
|             targets[variable_label] = targets[variable_label].astype("float")
 | ||
| 
 | ||
| process_table = process_table.merge(
 | ||
|     right=targets, on=["机构名称", "考核周期"], how="left"
 | ||
| )
 | ||
| 
 | ||
| # 根据过程表透视(第一级行索引为大区名称,第二级行索引为机构名称,第一级列索引为考核周期,列索引值为分配后消费规模和消费目标,行和列汇总)
 | ||
| pivot_table = process_table.pivot_table(
 | ||
|     index=["大区名称", "机构名称"],
 | ||
|     columns="考核周期",
 | ||
|     values=[
 | ||
|         "分配后消费规模",
 | ||
|         "消费目标",
 | ||
|     ],  # 注意:若设置一个列索引和多个列索引值PANDAS将自动创建多级列索引,第一级列索引为VALUES,第二季列索引为COLUMNS
 | ||
|     aggfunc="sum",
 | ||
|     margins=True,
 | ||
|     margins_name="汇总",
 | ||
| )
 | ||
| 
 | ||
| # 添加大区汇总
 | ||
| for region_name in pivot_table.index.get_level_values("大区名称").unique():
 | ||
| 
 | ||
|     if region_name not in ["上海", "浙江", "总部", "汇总"]:
 | ||
| 
 | ||
|         # 汇总大区数据(就各机构的考核周期分配后消费规模和消费目标分别求和)
 | ||
|         region_summary = pivot_table.loc[region_name].sum()  # SERIES对象
 | ||
| 
 | ||
|         region_summary = pandas.DataFrame(
 | ||
|             data=[region_summary],  # SERIES列表
 | ||
|             # 创建多级行索引
 | ||
|             index=pandas.MultiIndex.from_tuples(
 | ||
|                 tuples=[(region_name, "汇总")], names=["大区名称", "机构名称"]
 | ||
|             ),
 | ||
|             columns=region_summary.index,
 | ||
|         )
 | ||
| 
 | ||
|         pivot_table = pandas.concat(objs=[pivot_table, region_summary])
 | ||
| 
 | ||
| # 计算各考核周期和汇总达成率
 | ||
| for period in pivot_table.columns.get_level_values("考核周期").unique():
 | ||
| 
 | ||
|     pivot_table[("达成率", period)] = pivot_table.apply(
 | ||
|         lambda x: (
 | ||
|             x[("分配后消费规模", period)] / x[("消费目标", period)]
 | ||
|             if x[("消费目标", period)] != 0
 | ||
|             else 0
 | ||
|         ),
 | ||
|         axis="columns",
 | ||
|     )
 | ||
| 
 | ||
| # 交换列索引层级,再就列索引排序
 | ||
| pivot_table = pivot_table.swaplevel(axis="columns").sort_index(axis="columns")
 | ||
| 
 | ||
| # 大区名称排序
 | ||
| regions_orders = [
 | ||
|     "东北大区",
 | ||
|     "华北大区",
 | ||
|     "华东大区",
 | ||
|     "华中大区",
 | ||
|     "东南大区",
 | ||
|     "西北大区",
 | ||
|     "西南大区",
 | ||
|     "上海",
 | ||
|     "浙江",
 | ||
|     "总部",
 | ||
|     "汇总",
 | ||
| ]
 | ||
| 
 | ||
| # 大区名称和排序映射器
 | ||
| region_mapper = {
 | ||
|     region_name: region_index for region_index, region_name in enumerate(regions_orders)
 | ||
| }
 | ||
| 
 | ||
| # 根据大区名称映射排序
 | ||
| regions_mapped = [
 | ||
|     region_mapper.get(region_name)
 | ||
|     for region_name in pivot_table.index.get_level_values("大区名称")
 | ||
| ]
 | ||
| 
 | ||
| # 机构排序
 | ||
| institutions_orders = {
 | ||
|     "东北大区": ["汇总", "内蒙", "辽宁", "黑吉"],
 | ||
|     "华北大区": ["汇总", "京津", "河北", "山西"],
 | ||
|     "华东大区": ["汇总", "安徽", "山东", "河南"],
 | ||
|     "华中大区": ["汇总", "江西", "湖北", "湖南"],
 | ||
|     "东南大区": ["汇总", "江苏", "福建", "广东"],
 | ||
|     "西北大区": ["汇总", "新疆", "陕西", "甘肃", "宁夏"],
 | ||
|     "西南大区": ["汇总", "广西", "四川", "云南", "贵州"],
 | ||
|     "上海": ["上海"],
 | ||
|     "浙江": ["浙江"],
 | ||
|     "总部": ["总部"],
 | ||
|     "汇总": [""],
 | ||
| }
 | ||
| 
 | ||
| # 机构名称和排序映射器
 | ||
| institution_mapper = {}
 | ||
| 
 | ||
| institution_mapper.update(
 | ||
|     {
 | ||
|         (region_name, institution_name): institution_index
 | ||
|         for region_name, institution_names in institutions_orders.items()
 | ||
|         for institution_index, institution_name in enumerate(institution_names)
 | ||
|     }
 | ||
| )
 | ||
| 
 | ||
| # 根据机构名称映射排序
 | ||
| institutions_mapped = [
 | ||
|     institution_mapper.get((region, institution))
 | ||
|     for region, institution in zip(
 | ||
|         pivot_table.index.get_level_values("大区名称"),
 | ||
|         pivot_table.index.get_level_values("机构名称"),
 | ||
|     )
 | ||
| ]
 | ||
| 
 | ||
| # 根据大区名称映射排序和机构名称映射排序多重排序
 | ||
| pivot_table = pivot_table.iloc[
 | ||
|     numpy.lexsort((institutions_mapped, regions_mapped))
 | ||
| ].reset_index()
 | ||
| 
 | ||
| save_as_workbook(
 | ||
|     worksheets=[("sheet1", pivot_table)], workbook_name="普康健康_机构周报.xlsx"
 | ||
| )
 | ||
| 
 | ||
| print("生成成功")
 |