diff --git a/KANO/main.py b/KANO/main.py index 2c1a272..bfb536c 100644 --- a/KANO/main.py +++ b/KANO/main.py @@ -5,9 +5,12 @@ 本脚本用于KANO需求分析,能够处理问卷数据并输出需求分类结果 """ +import re + +import numpy import pandas -import prettytable +from utils.pandas_extension import save_as_workbook print("1 打开并读取Excel文件...", end="") @@ -37,9 +40,9 @@ try: raise Exception("样本数为0") # 统计列数 - columns_counts = dataset.shape[1] + columns = dataset.shape[1] # 若列数非奇数则抛出异常 - if columns_counts % 2 != 0: + if columns % 2 != 0: raise Exception("列数为奇数") print(f"已完成,样本数为{samples_size}") @@ -48,451 +51,133 @@ except Exception as exception: print(f"发生异常:{str(exception)}") exit() -# 读取问卷题目和答案(从第7列开始为题目或答案) -DataSet = DataSet.iloc[:, 6:] +print("2 生成KANO评价结果...", end="") -# 统计数据集中样本数量和题目数量 -Sample_Size, Question_Amount = DataSet.shape +# KANO评价结果 +kano = pandas.DataFrame() -# 判断题目数量是否为偶数,若为偶数则计算问卷中涉及需求数量,若为奇数则终止脚本 -if Question_Amount % 2 == 0: +for column in range(columns): + if column % 2 != 0: + continue - # 计算问卷中涉及需求数量 - Requirement_Amount = int(Question_Amount / 2) + # 功能名称 + feature = re.search( + pattern="【(?P.*?)】", string=dataset.columns[column] + ).group("feature") -else: + # 生成某功能的选项分布 + distribution = pandas.crosstab( + index=dataset.iloc[:, column], # 行索引为如果该产品有{{功能名称}}的选项 + columns=dataset.iloc[:, column + 1], # 列索引为如果该产品没有{{功能名称}}的选项 + rownames=[""], + colnames=[""], + normalize=True, # 转为占比 + ).reindex(index=alternatives, columns=alternatives, fill_value=0) - print("题目数量为奇数,请检查") - print("") - - exit() - -print( - "数据集中包含 %d 份样本, %d 个问题(涉及 %d 个需求)" - % (Sample_Size, Question_Amount, Requirement_Amount) -) -print("") - -print("*" * 100) -print("") - -print("2 数据预处理") -print("") - -print("2.1 检查并删除不规范样本") -print("") - -# 定义问卷中备选答案列表 -Alternatives = ["我很喜欢", "理所应当", "无所谓", "勉强接受", "我很不喜欢"] - -# 检查答案是否在指定范围,若否则将该答案定义为空值 -DataSet = DataSet.applymap(lambda x: x if x in Alternatives else None) - -# 删除包含缺失值的样本 -DataSet.dropna(axis="index", how="any", inplace=True) - -# 删除答案全部相同的样本 -DataSet = DataSet[DataSet.apply(pandas.Series.nunique, axis="columns") != 1] - -# 统计有效样本数量 -Sample_Size = DataSet.shape[0] - -print("处理后,有效样本数量为 %d 份" % (Sample_Size)) -print("") - -print("*" * 100) -print("") - -print("3 数据处理") -print("") - -Requirement_Labels = DataSet.columns.tolist() - -# 通过问题截取需求名称(截取'有'右侧、','左侧字符串) -Requirement_Labels = [ - x[x.find("有") + 1 : x.find(",")] - for x in Requirement_Labels - if isinstance(x, str) and "具有" in x -] - -print("3.1 绘制KANO评价结果分类对照表") -print("") - -for Question_Number in range(Requirement_Amount): - - # 创建KANO评价结果分类对照表 - KANO = pandas.DataFrame(data=[], index=Alternatives, columns=Alternatives) - - for Column_Label in Alternatives: - - for Index_Label in Alternatives: - - # 统计并赋值 - KANO.loc[Index_Label, Column_Label] = DataSet.loc[ - (DataSet.iloc[:, Question_Number].isin([Index_Label])) - & (DataSet.iloc[:, Question_Number + 1].isin([Column_Label])), - :, - ].shape[0] - - # 修改行名 - KANO.index = [ - "Provide_Like", - "Provide_Should", - "Provide_Indifferent", - "Provide_Grudging", - "Provide_Hate", - ] - - # 重置索引 - KANO.reset_index(inplace=True) - - # 修改列名 - KANO.columns = [ - "", - "Not_Provide_Like", - "Not_Provide_Should", - "Not_Provide_Indifferent", - "Not_Provide_Grudging", - "Not_Provide_Hate", - ] - - # 打印表格 - - PrintTable = prettytable.PrettyTable() - - PrintTable.field_names = KANO.columns.tolist() - - for Index in KANO.index.tolist(): - - PrintTable.add_row(KANO.loc[Index]) - - PrintTable.align = "r" - - PrintTable.align[""] = "l" - - PrintTable.float_format = "." - - print( - "附表 需求%d:%s的KANO评价结果分类对照表:" - % (Question_Number + 1, Requirement_Labels[Question_Number]) - ) - - print(PrintTable) - - print("") - -print("字段说明:") - -print( - "1)Not_Provide_Like为不提供该需求、用户表示“我很喜欢”,Not_Provide_Should为不提供该需求、用户表示“理所应当”,Not_Provide_Indifferent为不提供该需求、用户表示“无所谓”,Not_Provide_Grudging为不提供该需求、用户表示“勉强接受”,Not_Provide_Hate为不提供该需求、用户表示“我很不喜欢”。" -) - -print( - "1)Provide_Like为提供该需求、用户表示“我很喜欢”,Provide_Should为提供该需求、用户表示“理所应当”,Provide_Indifferent为提供该需求、用户表示“无所谓”,Provide_Grudging为提供该需求、用户表示“勉强接受”,Provide_Hate为不提供该需求、用户表示“我很不喜欢”。" -) - -print("") - -print("3.2 计算KANO评价维度") -print("") - -# 创建KANO各维度分数表 -KANO_Report = pandas.DataFrame( - data=[], - columns=[ - "Requirement_Label", - "A_Score", - "O_Score", - "M_Score", - "I_Score", - "R_Score", - "Q_Score", - ], - dtype="float", -) - -KANO_Report["Requirement_Label"] = Requirement_Labels - -for Question_Number in range(Requirement_Amount): - - # 计算兴奋型需求分数 - A_Score = round( - DataSet.loc[ - (DataSet.iloc[:, Question_Number].isin(["我很喜欢"])) - & ( - DataSet.iloc[:, Question_Number + 1].isin( - ["理所应当", "无所谓", "勉强接受"] - ) + kano = pandas.concat( + objs=[ + kano, + pandas.DataFrame( + [ + { + "功能名称": feature, + "魅力属性": distribution.loc[ + "非常满意", ["理应如此", "无所谓", "勉强接受"] + ].sum(), + "期望属性": distribution.loc["非常满意", "不满意"].sum(), + "必备属性": distribution.loc[ + ["理应如此", "无所谓", "勉强接受"], "不满意" + ].sum(), + "无差异属性": distribution.loc[ + ["理应如此", "无所谓", "勉强接受"], + ["理应如此", "无所谓", "勉强接受"], + ] + .sum() + .sum(), + "反向属性": distribution.loc[ + ["理应如此", "无所谓", "勉强接受", "不满意"], "非常满意" + ].sum() + + distribution.loc[ + ("不满意", ["理应如此", "无所谓", "勉强接受"]) + ].sum(), + "可疑结果": distribution.loc["非常满意", "非常满意"] + + distribution.loc["不满意", "不满意"], + } + ] ), - :, - ].shape[0] - / Sample_Size - * 100, - 2, + ], + ignore_index=True, ) - KANO_Report.loc[Question_Number, "A_Score"] = A_Score - - # 计算期望型需求分数 - O_Score = round( - DataSet.loc[ - (DataSet.iloc[:, Question_Number].isin(["我很喜欢"])) - & (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])), - :, - ].shape[0] - / Sample_Size - * 100, - 2, +# 计算better-worse分数,并贴标签 +kano = ( + kano.assign( + 满意系数=lambda dataframe: (dataframe["魅力属性"] + dataframe["期望属性"]) + / ( + dataframe["魅力属性"] + + dataframe["期望属性"] + + dataframe["必备属性"] + + dataframe["无差异属性"] + ), + 不满意系数=lambda dataframe: (dataframe["必备属性"] + dataframe["期望属性"]) + / ( + dataframe["魅力属性"] + + dataframe["期望属性"] + + dataframe["必备属性"] + + dataframe["无差异属性"] + ), ) - - KANO_Report.loc[Question_Number, "O_Score"] = O_Score - - # 计算必备型需求分数 - M_Score = round( - DataSet.loc[ - (DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"])) - & (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])), - :, - ].shape[0] - / Sample_Size - * 100, - 2, - ) - - KANO_Report.loc[Question_Number, "M_Score"] = M_Score - - # 计算无差型需求分数 - I_Score = round( - DataSet.loc[ - (DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"])) - & ( - DataSet.iloc[:, Question_Number + 1].isin( - ["理所应当", "无所谓", "勉强接受"] - ) + .pipe( + lambda dataframe: dataframe.assign( + 需求标签=numpy.select( + condlist=[ + (dataframe["满意系数"] >= dataframe["满意系数"].mean()) + & (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()), + (dataframe["满意系数"] >= dataframe["满意系数"].mean()) + & (dataframe["不满意系数"] < dataframe["不满意系数"].mean()), + (dataframe["满意系数"] < dataframe["满意系数"].mean()) + & (dataframe["不满意系数"] < dataframe["不满意系数"].mean()), + (dataframe["满意系数"] < dataframe["满意系数"].mean()) + & (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()), + ], + choicelist=[ + "期望型需求(P2)", + "魅力型需求(P3)", + "无差异需求(P4)", + "必备型需求(P1)", + ], + default="未分类", ), - :, - ].shape[0] - / Sample_Size - * 100, - 2, + ) ) - - KANO_Report.loc[Question_Number, "I_Score"] = I_Score - - # 计算反向型需求分数 - R_Score = round( - DataSet.loc[ - ( - DataSet.iloc[:, Question_Number].isin( - ["理所应当", "无所谓", "勉强接受", "我很不喜欢"] - ) - ) - & ( - DataSet.iloc[:, Question_Number + 1].isin( - ["我很喜欢", "理所应当", "无所谓", "勉强接受"] - ) - ), - :, - ].shape[0] - / Sample_Size - * 100 - - I_Score, - 2, - ) - - KANO_Report.loc[Question_Number, "R_Score"] = R_Score - - # 计算可疑型需求分数 - Q_Score = round( - DataSet.loc[ - (DataSet.iloc[:, Question_Number].isin(["我很喜欢"])) - & (DataSet.iloc[:, Question_Number + 1].isin(["我很喜欢"])), - :, - ].shape[0] - / Sample_Size - * 100 - + DataSet.loc[ - (DataSet.iloc[:, Question_Number].isin(["我很不喜欢"])) - & (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])), - :, - ].shape[0] - / Sample_Size - * 100, - 2, - ) - - KANO_Report.loc[Question_Number, "Q_Score"] = Q_Score - -# 打印表格 - -PrintTable = prettytable.PrettyTable() - -PrintTable.field_names = KANO_Report.columns.tolist() - -for Index in KANO_Report.index.tolist(): - - PrintTable.add_row(KANO_Report.loc[Index]) - -PrintTable.align["Requirement_Label"] = "l" - -PrintTable.align["A_Score"] = "r" - -PrintTable.align["O_Score"] = "r" - -PrintTable.align["M_Score"] = "r" - -PrintTable.align["I_Score"] = "r" - -PrintTable.align["R_Score"] = "r" - -PrintTable.align["Q_Score"] = "r" - -PrintTable.align["Better_Score"] = "r" - -PrintTable.align["Worse_Score"] = "r" - -PrintTable.float_format["A_Score"] = ".2" - -PrintTable.float_format["O_Score"] = ".2" - -PrintTable.float_format["M_Score"] = ".2" - -PrintTable.float_format["I_Score"] = ".2" - -PrintTable.float_format["R_Score"] = ".2" - -PrintTable.float_format["Q_Score"] = ".2" - -PrintTable.float_format["Better_Score"] = ".2" - -PrintTable.float_format["Worse_Score"] = ".2" - -print("附表 各需求的KANO评价维度计算结果:") - -print( - PrintTable.get_string( - fields=[ - "Requirement_Label", - "A_Score", - "O_Score", - "M_Score", - "I_Score", - "R_Score", - "Q_Score", - ] + .assign( + 魅力属性=lambda dataframe: dataframe["魅力属性"].apply( + lambda cell: f"{cell:.2f}" + ), + 期望属性=lambda dataframe: dataframe["期望属性"].apply( + lambda cell: f"{cell:.2f}" + ), + 必备属性=lambda dataframe: dataframe["必备属性"].apply( + lambda cell: f"{cell:.2f}" + ), + 无差异属性=lambda dataframe: dataframe["无差异属性"].apply( + lambda cell: f"{cell:.2f}" + ), + 反向属性=lambda dataframe: dataframe["反向属性"].apply( + lambda cell: f"{cell:.2f}" + ), + 可疑结果=lambda dataframe: dataframe["可疑结果"].apply( + lambda cell: f"{cell:.2f}" + ), + 满意系数=lambda dataframe: dataframe["满意系数"].apply( + lambda cell: f"{cell:.2f}" + ), + 不满意系数=lambda dataframe: dataframe["不满意系数"].apply( + lambda cell: f"{cell:.2f}" + ), ) ) -print("字段说明:") +save_as_workbook(worksheets=[("Sheet1", kano)], workbook_name="result.xlsx") -print( - "1)Requirement_Label为需求名称,A_Score为兴奋型需求分数,O_Score为期望型需求分数,M_Score为必备型需求分数,I_Score为无差型需求分数,R_Score为反向型需求分数,Q_Score为可疑型需求分数。" -) - -print("") - -print("3.3 定义需求类型和Better-Worse分数") -print("") - -# 以KANO评价维度中最高分定义需求类型 -Requirement_Types = list( - KANO_Report[ - ["A_Score", "O_Score", "M_Score", "I_Score", "R_Score", "Q_Score"] - ].idxmax(axis="columns") -) - -# 通过列名截取需求类型(第一位、'_'左侧字符串) -Requirement_Types = [ - x[0 : x.find("_")] for x in Requirement_Types if isinstance(x, str) -] - -KANO_Report["Requirement_Type"] = Requirement_Types - -# 计算Better分数 -KANO_Report["Better_Score"] = ( - (KANO_Report["A_Score"] + KANO_Report["O_Score"]) - / ( - KANO_Report["A_Score"] - + KANO_Report["O_Score"] - + KANO_Report["M_Score"] - + KANO_Report["I_Score"] - ) - * 100 -) - -# 计算Worse分数 -KANO_Report["Worse_Score"] = ( - -1 - * (KANO_Report["O_Score"] + KANO_Report["M_Score"]) - / ( - KANO_Report["A_Score"] - + KANO_Report["O_Score"] - + KANO_Report["M_Score"] - + KANO_Report["I_Score"] - ) - * 100 -) - -# 打印表格 - -PrintTable = prettytable.PrettyTable() - -PrintTable.field_names = KANO_Report.columns.tolist() - -for Index in KANO_Report.index.tolist(): - - PrintTable.add_row(KANO_Report.loc[Index]) - -PrintTable.align["Requirement_Label"] = "l" - -PrintTable.align["Requirement_Type"] = "r" - -PrintTable.align["Better_Score"] = "r" - -PrintTable.align["Worse_Score"] = "r" - -PrintTable.float_format["Better_Score"] = ".2" - -PrintTable.float_format["Worse_Score"] = ".2" - -print("附表 各需求的KANO评价维度计算结果:") - -print( - PrintTable.get_string( - fields=["Requirement_Label", "Requirement_Type", "Better_Score", "Worse_Score"] - ) -) - -print("字段说明:") - -print("1)Requirement_Label为需求名称,Requirement_Type为需求类型。") - -print( - "2)A为兴奋型需求:表示产品具有该种需求,则用户满意度会提高;没有该种需求,则用户满意度不会降低。建议给予P3关注。" -) - -print( - "3)O为期望型需求:表示产品具有该种需求,则用户满意度会提高;没有该种需求,则用户满意度会降低。建议给予P1关注。" -) - -print( - "4)M为必备型需求:表示产品具有该种需求,则用户满意度不会提高;没有该种需求,则用户满意度会降低。建议给予P2关注。" -) - -print( - "5)I为无差型需求:表示产品具有该种需求,则用户满意度不会提高;没有该种需求,则用户满意度不会降低。建议给予P4关注。" -) - -print("6)R为反向型需求:建议给予关注。") - -print("7)Q为可疑型需求:建议给予关注。") - -print( - "8)Better_Score为Better分数。表示如果产品具有某种需求,则用户满意度会提高,数值越大提高越大。" -) - -print( - "9)Worse_Score为Worse分数。表示如果产品没有某种需求,则用户满意度会下降,数值越小下降越大。" -) - -print("") +print("已完成") diff --git a/KANO/result.xlsx b/KANO/result.xlsx new file mode 100644 index 0000000..6c91fbc Binary files /dev/null and b/KANO/result.xlsx differ