251029更新
This commit is contained in:
		
							parent
							
								
									376d8448a7
								
							
						
					
					
						commit
						73511f73cc
					
				
							
								
								
									
										225
									
								
								rfm/main.py
								
								
								
								
							
							
						
						
									
										225
									
								
								rfm/main.py
								
								
								
								
							|  | @ -1,104 +1,84 @@ | ||||||
| # -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": |  | ||||||
| 
 | 
 | ||||||
|     """ | """ | ||||||
|     基于RFM模型生成数据分析报告 | 基于RFM模型生成数据分析报告 | ||||||
|     """ | """ | ||||||
| 
 | 
 | ||||||
|     # 导入模块 | # 导入模块 | ||||||
| 
 | 
 | ||||||
|     import pandas | import statistics | ||||||
|  | from datetime import datetime | ||||||
|  | from decimal import Decimal, ROUND_HALF_UP | ||||||
| 
 | 
 | ||||||
|     from datetime import datetime | import pandas | ||||||
|  | from jinja2 import Environment, FileSystemLoader | ||||||
| 
 | 
 | ||||||
|     from decimal import Decimal, ROUND_HALF_UP | from utils.client import MySQLClient | ||||||
|  | from utils.pandas_extension import DrawAsHTML | ||||||
| 
 | 
 | ||||||
|     import statistics |  | ||||||
| 
 |  | ||||||
|     from jinja2 import Environment, FileSystemLoader |  | ||||||
| 
 |  | ||||||
|     from utils.client import MySQLClient |  | ||||||
| 
 |  | ||||||
|     from utils.pandas_extension import DrawAsHTML |  | ||||||
| 
 |  | ||||||
|     # 函数说明:根据RFM编码映射为客户分类 |  | ||||||
|     def map_classification(r_encoded, f_encoded, m_encoded): |  | ||||||
| 
 | 
 | ||||||
|  | # 函数说明:根据RFM编码映射为客户分类 | ||||||
|  | def map_classification(r_encoded, f_encoded, m_encoded): | ||||||
|     # 就R、F、M指标构建独热编码并匹配客户分类 |     # 就R、F、M指标构建独热编码并匹配客户分类 | ||||||
|     match f"{r_encoded}{f_encoded}{m_encoded}": |     match f"{r_encoded}{f_encoded}{m_encoded}": | ||||||
| 
 |  | ||||||
|         case "000": |         case "000": | ||||||
| 
 |  | ||||||
|             classification = "流失客户" |             classification = "流失客户" | ||||||
| 
 |  | ||||||
|         case "010": |         case "010": | ||||||
| 
 |  | ||||||
|             classification = "一般维持客户" |             classification = "一般维持客户" | ||||||
| 
 |  | ||||||
|         case "100": |         case "100": | ||||||
| 
 |  | ||||||
|             classification = "新客户" |             classification = "新客户" | ||||||
| 
 |  | ||||||
|         case "110": |         case "110": | ||||||
| 
 |  | ||||||
|             classification = "潜力客户" |             classification = "潜力客户" | ||||||
| 
 |  | ||||||
|         case "001": |         case "001": | ||||||
| 
 |  | ||||||
|             classification = "重要挽留客户" |             classification = "重要挽留客户" | ||||||
| 
 |  | ||||||
|         case "101": |         case "101": | ||||||
| 
 |  | ||||||
|             classification = "重要深耕客户" |             classification = "重要深耕客户" | ||||||
| 
 |  | ||||||
|         case "011": |         case "011": | ||||||
| 
 |  | ||||||
|             classification = "重要唤回客户" |             classification = "重要唤回客户" | ||||||
| 
 |  | ||||||
|         case "111": |         case "111": | ||||||
| 
 |  | ||||||
|             classification = "重要价值客户" |             classification = "重要价值客户" | ||||||
| 
 | 
 | ||||||
|     # noinspection PyUnboundLocalVariable |     # noinspection PyUnboundLocalVariable | ||||||
|     return classification |     return classification | ||||||
| 
 | 
 | ||||||
|     print("1 加载数据集...", end="") |  | ||||||
| 
 | 
 | ||||||
|     client = MySQLClient(database="data_analysis") | print("1 加载数据集...", end="") | ||||||
| 
 | 
 | ||||||
|     dataframe = client.execute_query( | client = MySQLClient(database="data_analysis") | ||||||
|  | 
 | ||||||
|  | dataframe = client.execute_query( | ||||||
|     sql="select 客户ID, 交易金额, 交易日期 from rfm_dataset" |     sql="select 客户ID, 交易金额, 交易日期 from rfm_dataset" | ||||||
|     )  # customer_id 客户ID STRING,trade_date 交易日期 DATETIME.DATE,trade_amount 交易金额 DECIMAL | )  # customer_id 客户ID STRING,trade_date 交易日期 DATETIME.DATE,trade_amount 交易金额 DECIMAL | ||||||
| 
 | 
 | ||||||
|     print("已完成") | print("已完成") | ||||||
| 
 | 
 | ||||||
|     print("2 预处理,删除包含缺失值的样本和重复样本...", end="") | print("2 预处理,删除包含缺失值的样本和重复样本...", end="") | ||||||
| 
 | 
 | ||||||
|     # 删除包含缺失值的样本 | # 删除包含缺失值的样本 | ||||||
|     dataframe.dropna(inplace=True) | dataframe.dropna(inplace=True) | ||||||
| 
 | 
 | ||||||
|     # 删除重复样本(保留第一例重复样本、重置索引) | # 删除重复样本(保留第一例重复样本、重置索引) | ||||||
|     dataframe = dataframe.drop_duplicates(ignore_index=True, inplace=False) | dataframe = dataframe.drop_duplicates(ignore_index=True, inplace=False) | ||||||
| 
 | 
 | ||||||
|     # 仅保留交易日期为2012和2013年的样本 | # 仅保留交易日期为2012和2013年的样本 | ||||||
|     dataframe = dataframe[ | dataframe = dataframe[ | ||||||
|     dataframe["交易日期"].apply(lambda x: x.year in [2012, 2013]) |     dataframe["交易日期"].apply(lambda x: x.year in [2012, 2013]) | ||||||
|     ].reset_index( | ].reset_index( | ||||||
|     drop=True |     drop=True | ||||||
|     )  # 因交易日期数据类型为DATETIME.DATE非DATETIME64,故无法使用SERIES.DT.YEAR方法 | )  # 因交易日期数据类型为DATETIME.DATE非DATETIME64,故无法使用SERIES.DT.YEAR方法 | ||||||
| 
 | 
 | ||||||
|     sample_size = Decimal(dataframe.shape[0]).quantize(Decimal("0")) | sample_size = Decimal(dataframe.shape[0]).quantize(Decimal("0")) | ||||||
| 
 | 
 | ||||||
|     print("已完成") | print("已完成") | ||||||
| 
 | 
 | ||||||
|     print("3 构建RFM...", end="") | print("3 构建RFM...", end="") | ||||||
| 
 | 
 | ||||||
|     # 最远交易日期 | # 最远交易日期 | ||||||
|     min_trade_date = dataframe["交易日期"].min() | min_trade_date = dataframe["交易日期"].min() | ||||||
| 
 | 
 | ||||||
|     # R为最近一次交易日期距离样本中最远一次交易日期的天数(单位:日),DECIMAL;F为交易频率(单位;次),DECIMAL;M为交易金额(单位:元),DECIMAL。均正向化 | # R为最近一次交易日期距离样本中最远一次交易日期的天数(单位:日),DECIMAL;F为交易频率(单位;次),DECIMAL;M为交易金额(单位:元),DECIMAL。均正向化 | ||||||
|     rfm = ( | rfm = ( | ||||||
|     dataframe.groupby(by="客户ID") |     dataframe.groupby(by="客户ID") | ||||||
|     .agg( |     .agg( | ||||||
|         R=( |         R=( | ||||||
|  | @ -109,9 +89,7 @@ if __name__ == "__main__": | ||||||
|         ), |         ), | ||||||
|         F=( |         F=( | ||||||
|             "客户ID", |             "客户ID", | ||||||
|                 lambda x: Decimal(len(x)).quantize( |             lambda x: Decimal(len(x)).quantize(Decimal("0"), rounding=ROUND_HALF_UP), | ||||||
|                     Decimal("0"), rounding=ROUND_HALF_UP |  | ||||||
|                 ), |  | ||||||
|         ), |         ), | ||||||
|         M=( |         M=( | ||||||
|             "交易金额", |             "交易金额", | ||||||
|  | @ -121,33 +99,27 @@ if __name__ == "__main__": | ||||||
|         ), |         ), | ||||||
|     ) |     ) | ||||||
|     .reset_index() |     .reset_index() | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 客户数 | # 客户数 | ||||||
|     customer_counts = Decimal(rfm.shape[0]).quantize(Decimal("0")) | customer_counts = Decimal(rfm.shape[0]).quantize(Decimal("0")) | ||||||
| 
 | 
 | ||||||
|     # 总交易金额 | # 总交易金额 | ||||||
|     trade_amounts = sum(rfm["M"], Decimal("0.00")).quantize(Decimal("0.00")) | trade_amounts = sum(rfm["M"], Decimal("0.00")).quantize(Decimal("0.00")) | ||||||
| 
 | 
 | ||||||
|     print("已完成") | print("已完成") | ||||||
| 
 | 
 | ||||||
|     print("4 基于平均数将R、F和M分为低、高两个等级并组合为八种客户分类...", end="") | print("4 基于平均数将R、F和M分为低、高两个等级并组合为八种客户分类...", end="") | ||||||
| 
 | 
 | ||||||
|     # R、F和M的平均数,使用STATISTICS.MEAN统计平均值,保证精度 | # R、F和M的平均数,使用STATISTICS.MEAN统计平均值,保证精度 | ||||||
|     # noinspection PyUnresolvedReferences | # noinspection PyUnresolvedReferences | ||||||
|     means = { | means = { | ||||||
|         "R": statistics.mean(rfm["R"]).quantize( |     "R": statistics.mean(rfm["R"]).quantize(Decimal("0.00"), rounding=ROUND_HALF_UP), | ||||||
|             Decimal("0.00"), rounding=ROUND_HALF_UP |     "F": statistics.mean(rfm["F"]).quantize(Decimal("0.00"), rounding=ROUND_HALF_UP), | ||||||
|         ), |     "M": statistics.mean(rfm["M"]).quantize(Decimal("0.00"), rounding=ROUND_HALF_UP), | ||||||
|         "F": statistics.mean(rfm["F"]).quantize( | } | ||||||
|             Decimal("0.00"), rounding=ROUND_HALF_UP |  | ||||||
|         ), |  | ||||||
|         "M": statistics.mean(rfm["M"]).quantize( |  | ||||||
|             Decimal("0.00"), rounding=ROUND_HALF_UP |  | ||||||
|         ), |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     rfm = rfm.assign( | rfm = rfm.assign( | ||||||
|     客户分类=lambda dataframe: dataframe.apply( |     客户分类=lambda dataframe: dataframe.apply( | ||||||
|         lambda row: map_classification( |         lambda row: map_classification( | ||||||
|             r_encoded=0 if row["R"] <= means["R"] else 1, |             r_encoded=0 if row["R"] <= means["R"] else 1, | ||||||
|  | @ -156,26 +128,24 @@ if __name__ == "__main__": | ||||||
|         ), |         ), | ||||||
|         axis="columns", |         axis="columns", | ||||||
|     ) |     ) | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     dataframe = dataframe.merge( | dataframe = dataframe.merge(right=rfm[["客户ID", "客户分类"]], on="客户ID", how="left") | ||||||
|         right=rfm[["客户ID", "客户分类"]], on="客户ID", how="left" |  | ||||||
|     ) |  | ||||||
| 
 | 
 | ||||||
|     print("已完成") | print("已完成") | ||||||
| 
 | 
 | ||||||
|     print("5 生成分析报告...", end="") | print("5 生成分析报告...", end="") | ||||||
| 
 | 
 | ||||||
|     draw = DrawAsHTML() | draw = DrawAsHTML() | ||||||
| 
 | 
 | ||||||
|     # 生成数据预览 | # 生成数据预览 | ||||||
|     draw.table( | draw.table( | ||||||
|     dataframe=dataframe.sample(5), |     dataframe=dataframe.sample(5), | ||||||
|     file_name="数据预览.html", |     file_name="数据预览.html", | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 客户分类维度 | # 客户分类维度 | ||||||
|     customer_types = ( | customer_types = ( | ||||||
|     rfm.groupby(by="客户分类")  # 按照客户分类分组 |     rfm.groupby(by="客户分类")  # 按照客户分类分组 | ||||||
|     .agg( |     .agg( | ||||||
|         R=( |         R=( | ||||||
|  | @ -210,37 +180,35 @@ if __name__ == "__main__": | ||||||
|         ),  # 统计各客户分类的交易金额占比 |         ),  # 统计各客户分类的交易金额占比 | ||||||
|     ) |     ) | ||||||
|     .reset_index() |     .reset_index() | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 生成客户分类分布 | # 生成客户分类分布 | ||||||
|     draw.scatter( | draw.scatter( | ||||||
|     dataframe=customer_types[["客户分类", "R", "F", "M"]], |     dataframe=customer_types[["客户分类", "R", "F", "M"]], | ||||||
|     xaxis_opts_min=475, |     xaxis_opts_min=475, | ||||||
|     xaxis_opts_max=750, |     xaxis_opts_max=750, | ||||||
|     file_name="客户分类分布.html", |     file_name="客户分类分布.html", | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 生成客户占比 | # 生成客户占比 | ||||||
|     draw.pie( | draw.pie( | ||||||
|     dataframe=customer_types[["客户分类", "客户占比"]].sort_values( |     dataframe=customer_types[["客户分类", "客户占比"]].sort_values( | ||||||
|         by="客户占比", ascending=False |         by="客户占比", ascending=False | ||||||
|     ),  # 按照客户占比降序 |     ),  # 按照客户占比降序 | ||||||
|     file_name="客户占比.html", |     file_name="客户占比.html", | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 生成交易金额占比 | # 生成交易金额占比 | ||||||
|     draw.pie( | draw.pie( | ||||||
|     dataframe=customer_types[["客户分类", "交易金额占比"]].sort_values( |     dataframe=customer_types[["客户分类", "交易金额占比"]].sort_values( | ||||||
|         by="交易金额占比", ascending=False |         by="交易金额占比", ascending=False | ||||||
|     ),  # 按照交易金额占比降序 |     ),  # 按照交易金额占比降序 | ||||||
|     file_name="交易金额占比.html", |     file_name="交易金额占比.html", | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     report_backward = pandas.DataFrame( | report_backward = pandas.DataFrame(data=[], columns=["客户分类", "窗口期", "客户数"]) | ||||||
|         data=[], columns=["客户分类", "窗口期", "客户数"] |  | ||||||
|     ) |  | ||||||
| 
 | 
 | ||||||
|     for customer_type in customer_types["客户分类"]: | for customer_type in customer_types["客户分类"]: | ||||||
| 
 | 
 | ||||||
|     for month in range(1, 13): |     for month in range(1, 13): | ||||||
| 
 | 
 | ||||||
|  | @ -253,9 +221,7 @@ if __name__ == "__main__": | ||||||
|         ).start_time.date() |         ).start_time.date() | ||||||
| 
 | 
 | ||||||
|         # 窗口期止期 |         # 窗口期止期 | ||||||
|             period_end = pandas.Period( |         period_end = pandas.Period(value=f"2013-{month:02d}", freq="M").end_time.date() | ||||||
|                 value=f"2013-{month:02d}", freq="M" |  | ||||||
|             ).end_time.date() |  | ||||||
| 
 | 
 | ||||||
|         # 指定客户分类窗口期内客户数 |         # 指定客户分类窗口期内客户数 | ||||||
|         customer_counts = dataframe.loc[ |         customer_counts = dataframe.loc[ | ||||||
|  | @ -271,20 +237,20 @@ if __name__ == "__main__": | ||||||
|             customer_counts, |             customer_counts, | ||||||
|         ] |         ] | ||||||
| 
 | 
 | ||||||
|     # 生成近十二个自然月客户数趋势 | # 生成近十二个自然月客户数趋势 | ||||||
|     draw.area( | draw.area( | ||||||
|     dataframe=report_backward.groupby(by="窗口期", as_index=False).agg( |     dataframe=report_backward.groupby(by="窗口期", as_index=False).agg( | ||||||
|         客户数=("客户数", "sum") |         客户数=("客户数", "sum") | ||||||
|     ), |     ), | ||||||
|     file_name="近十二个自然月客户数趋势.html", |     file_name="近十二个自然月客户数趋势.html", | ||||||
|     yaxis_opts_min=1350, |     yaxis_opts_min=1350, | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     report_backward = report_backward.loc[ | report_backward = report_backward.loc[ | ||||||
|     report_backward["客户分类"].isin( |     report_backward["客户分类"].isin( | ||||||
|         ["新客户", "流失客户", "重要价值客户"] |         ["新客户", "流失客户", "重要价值客户"] | ||||||
|     )  # 仅考虑新客户、流水客户、重要价值客户 |     )  # 仅考虑新客户、流水客户、重要价值客户 | ||||||
|     ].assign( | ].assign( | ||||||
|     总客户数=lambda x: x.groupby(by="窗口期")["客户数"].transform( |     总客户数=lambda x: x.groupby(by="窗口期")["客户数"].transform( | ||||||
|         "sum" |         "sum" | ||||||
|     ),  # 统计窗口期总客户数并新增值各行 |     ),  # 统计窗口期总客户数并新增值各行 | ||||||
|  | @ -296,18 +262,18 @@ if __name__ == "__main__": | ||||||
|         ),  # 运算各项使用DECIMAL以控制精度 |         ),  # 运算各项使用DECIMAL以控制精度 | ||||||
|         axis="columns", |         axis="columns", | ||||||
|     ), |     ), | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 生成近十二个自然月客户占比趋势(仅考虑新客户、流失客户和重要价值客户) | # 生成近十二个自然月客户占比趋势(仅考虑新客户、流失客户和重要价值客户) | ||||||
|     draw.bar( | draw.bar( | ||||||
|     dataframe=report_backward[ |     dataframe=report_backward[ | ||||||
|         ["客户分类", "窗口期", "客户占比"] |         ["客户分类", "窗口期", "客户占比"] | ||||||
|     ],  # 仅保留客户分类、窗口期和占比 |     ],  # 仅保留客户分类、窗口期和占比 | ||||||
|     file_name="近十二个自然月客户占比趋势.html", |     file_name="近十二个自然月客户占比趋势.html", | ||||||
|     stack=True, |     stack=True, | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     report_forward = ( | report_forward = ( | ||||||
|     dataframe.assign( |     dataframe.assign( | ||||||
|         最早交易日期=lambda dataframe: dataframe.groupby(by="客户ID")[ |         最早交易日期=lambda dataframe: dataframe.groupby(by="客户ID")[ | ||||||
|             "交易日期" |             "交易日期" | ||||||
|  | @ -371,28 +337,27 @@ if __name__ == "__main__": | ||||||
|         ) |         ) | ||||||
|     ] |     ] | ||||||
|     .reset_index(drop=True) |     .reset_index(drop=True) | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 生成近十二个自然月留存率趋势 | # 生成近十二个自然月留存率趋势 | ||||||
|     draw.line( | draw.line( | ||||||
|     dataframe=report_forward[["客户分类", "周期", "留存率"]], |     dataframe=report_forward[["客户分类", "周期", "留存率"]], | ||||||
|     file_name="近十二个自然月留存率趋势.html", |     file_name="近十二个自然月留存率趋势.html", | ||||||
|     ) | ) | ||||||
| 
 | 
 | ||||||
|     # 获取报告模版 | # 获取报告模版 | ||||||
|     template = Environment(loader=FileSystemLoader(".")).get_template("template.html") | template = Environment(loader=FileSystemLoader(".")).get_template("template.html") | ||||||
| 
 | 
 | ||||||
|     # 渲染模版 | # 渲染模版 | ||||||
|     rfm_report = template.render( | rfm_report = template.render( | ||||||
|     { |     { | ||||||
|         # 报告日期 |         # 报告日期 | ||||||
|         "report_date": datetime.now().strftime("%Y-%m-%d"), |         "report_date": datetime.now().strftime("%Y-%m-%d"), | ||||||
|         "sample_size": sample_size, |         "sample_size": sample_size, | ||||||
|     } |     } | ||||||
|     ) | ) | ||||||
| 
 |  | ||||||
|     with open("rfm_report.html", "w", encoding="utf8") as file: |  | ||||||
| 
 | 
 | ||||||
|  | with open("rfm_report.html", "w", encoding="utf8") as file: | ||||||
|     file.write(rfm_report) |     file.write(rfm_report) | ||||||
| 
 | 
 | ||||||
|     print("已完成") | print("已完成") | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue