251111更新

2025-11-11 17:33:22 +08:00 · 2025-11-11 17:33:22 +08:00 · 16a0b4e7c5
parent 2fedbac2a5
commit 16a0b4e7c5
2 changed files with 121 additions and 436 deletions
--- a/KANO/main.py
+++ b/KANO/main.py
@ -5,9 +5,12 @@
 本脚本用于KANO需求分析，能够处理问卷数据并输出需求分类结果
 """

+import re
+
+import numpy
 import pandas

-import prettytable
+from utils.pandas_extension import save_as_workbook


 print("1 打开并读取Excel文件...", end="")
@ -37,9 +40,9 @@ try:
        raise Exception("样本数为0")

    # 统计列数
-    columns_counts = dataset.shape[1]
+    columns = dataset.shape[1]
    # 若列数非奇数则抛出异常
-    if columns_counts % 2 != 0:
+    if columns % 2 != 0:
        raise Exception("列数为奇数")

    print(f"已完成，样本数为{samples_size}")
@ -48,451 +51,133 @@ except Exception as exception:
    print(f"发生异常：{str(exception)}")
    exit()

-# 读取问卷题目和答案（从第7列开始为题目或答案）
-DataSet = DataSet.iloc[:, 6:]
+print("2 生成KANO评价结果...", end="")

-# 统计数据集中样本数量和题目数量
-Sample_Size, Question_Amount = DataSet.shape
+# KANO评价结果
+kano = pandas.DataFrame()

-# 判断题目数量是否为偶数，若为偶数则计算问卷中涉及需求数量，若为奇数则终止脚本
-if Question_Amount % 2 == 0:
+for column in range(columns):
+    if column % 2 != 0:
+        continue

-    # 计算问卷中涉及需求数量
-    Requirement_Amount = int(Question_Amount / 2)
+    # 功能名称
+    feature = re.search(
+        pattern="【(?P<feature>.*?)】", string=dataset.columns[column]
+    ).group("feature")

-else:
+    # 生成某功能的选项分布
+    distribution = pandas.crosstab(
+        index=dataset.iloc[:, column],  # 行索引为如果该产品有{{功能名称}}的选项
+        columns=dataset.iloc[:, column + 1],  # 列索引为如果该产品没有{{功能名称}}的选项
+        rownames=[""],
+        colnames=[""],
+        normalize=True,  # 转为占比
+    ).reindex(index=alternatives, columns=alternatives, fill_value=0)

-    print("题目数量为奇数，请检查")
-    print("")
-
-    exit()
-
-print(
-    "数据集中包含 %d 份样本， %d 个问题（涉及 %d 个需求）"
-    % (Sample_Size, Question_Amount, Requirement_Amount)
-)
-print("")
-
-print("*" * 100)
-print("")
-
-print("2 数据预处理")
-print("")
-
-print("2.1 检查并删除不规范样本")
-print("")
-
-# 定义问卷中备选答案列表
-Alternatives = ["我很喜欢", "理所应当", "无所谓", "勉强接受", "我很不喜欢"]
-
-# 检查答案是否在指定范围，若否则将该答案定义为空值
-DataSet = DataSet.applymap(lambda x: x if x in Alternatives else None)
-
-# 删除包含缺失值的样本
-DataSet.dropna(axis="index", how="any", inplace=True)
-
-# 删除答案全部相同的样本
-DataSet = DataSet[DataSet.apply(pandas.Series.nunique, axis="columns") != 1]
-
-# 统计有效样本数量
-Sample_Size = DataSet.shape[0]
-
-print("处理后，有效样本数量为 %d 份" % (Sample_Size))
-print("")
-
-print("*" * 100)
-print("")
-
-print("3 数据处理")
-print("")
-
-Requirement_Labels = DataSet.columns.tolist()
-
-# 通过问题截取需求名称（截取'有'右侧、'，'左侧字符串）
-Requirement_Labels = [
-    x[x.find("有") + 1 : x.find("，")]
-    for x in Requirement_Labels
-    if isinstance(x, str) and "具有" in x
-]
-
-print("3.1 绘制KANO评价结果分类对照表")
-print("")
-
-for Question_Number in range(Requirement_Amount):
-
-    # 创建KANO评价结果分类对照表
-    KANO = pandas.DataFrame(data=[], index=Alternatives, columns=Alternatives)
-
-    for Column_Label in Alternatives:
-
-        for Index_Label in Alternatives:
-
-            # 统计并赋值
-            KANO.loc[Index_Label, Column_Label] = DataSet.loc[
-                (DataSet.iloc[:, Question_Number].isin([Index_Label]))
-                & (DataSet.iloc[:, Question_Number + 1].isin([Column_Label])),
-                :,
-            ].shape[0]
-
-    # 修改行名
-    KANO.index = [
-        "Provide_Like",
-        "Provide_Should",
-        "Provide_Indifferent",
-        "Provide_Grudging",
-        "Provide_Hate",
-    ]
-
-    # 重置索引
-    KANO.reset_index(inplace=True)
-
-    # 修改列名
-    KANO.columns = [
-        "",
-        "Not_Provide_Like",
-        "Not_Provide_Should",
-        "Not_Provide_Indifferent",
-        "Not_Provide_Grudging",
-        "Not_Provide_Hate",
-    ]
-
-    # 打印表格
-
-    PrintTable = prettytable.PrettyTable()
-
-    PrintTable.field_names = KANO.columns.tolist()
-
-    for Index in KANO.index.tolist():
-
-        PrintTable.add_row(KANO.loc[Index])
-
-    PrintTable.align = "r"
-
-    PrintTable.align[""] = "l"
-
-    PrintTable.float_format = "."
-
-    print(
-        "附表 需求%d：%s的KANO评价结果分类对照表："
-        % (Question_Number + 1, Requirement_Labels[Question_Number])
-    )
-
-    print(PrintTable)
-
-    print("")
-
-print("字段说明：")
-
-print(
-    "1）Not_Provide_Like为不提供该需求、用户表示“我很喜欢”，Not_Provide_Should为不提供该需求、用户表示“理所应当”，Not_Provide_Indifferent为不提供该需求、用户表示“无所谓”，Not_Provide_Grudging为不提供该需求、用户表示“勉强接受”，Not_Provide_Hate为不提供该需求、用户表示“我很不喜欢”。"
-)
-
-print(
-    "1）Provide_Like为提供该需求、用户表示“我很喜欢”，Provide_Should为提供该需求、用户表示“理所应当”，Provide_Indifferent为提供该需求、用户表示“无所谓”，Provide_Grudging为提供该需求、用户表示“勉强接受”，Provide_Hate为不提供该需求、用户表示“我很不喜欢”。"
-)
-
-print("")
-
-print("3.2 计算KANO评价维度")
-print("")
-
-# 创建KANO各维度分数表
-KANO_Report = pandas.DataFrame(
-    data=[],
-    columns=[
-        "Requirement_Label",
-        "A_Score",
-        "O_Score",
-        "M_Score",
-        "I_Score",
-        "R_Score",
-        "Q_Score",
-    ],
-    dtype="float",
-)
-
-KANO_Report["Requirement_Label"] = Requirement_Labels
-
-for Question_Number in range(Requirement_Amount):
-
-    # 计算兴奋型需求分数
-    A_Score = round(
-        DataSet.loc[
-            (DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
-            & (
-                DataSet.iloc[:, Question_Number + 1].isin(
-                    ["理所应当", "无所谓", "勉强接受"]
-                )
+    kano = pandas.concat(
+        objs=[
+            kano,
+            pandas.DataFrame(
+                [
+                    {
+                        "功能名称": feature,
+                        "魅力属性": distribution.loc[
+                            "非常满意", ["理应如此", "无所谓", "勉强接受"]
+                        ].sum(),
+                        "期望属性": distribution.loc["非常满意", "不满意"].sum(),
+                        "必备属性": distribution.loc[
+                            ["理应如此", "无所谓", "勉强接受"], "不满意"
+                        ].sum(),
+                        "无差异属性": distribution.loc[
+                            ["理应如此", "无所谓", "勉强接受"],
+                            ["理应如此", "无所谓", "勉强接受"],
+                        ]
+                        .sum()
+                        .sum(),
+                        "反向属性": distribution.loc[
+                            ["理应如此", "无所谓", "勉强接受", "不满意"], "非常满意"
+                        ].sum()
+                        + distribution.loc[
+                            ("不满意", ["理应如此", "无所谓", "勉强接受"])
+                        ].sum(),
+                        "可疑结果": distribution.loc["非常满意", "非常满意"]
+                        + distribution.loc["不满意", "不满意"],
+                    }
+                ]
            ),
-            :,
-        ].shape[0]
-        / Sample_Size
-        * 100,
-        2,
+        ],
+        ignore_index=True,
    )

-    KANO_Report.loc[Question_Number, "A_Score"] = A_Score
-
-    # 计算期望型需求分数
-    O_Score = round(
-        DataSet.loc[
-            (DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
-            & (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
-            :,
-        ].shape[0]
-        / Sample_Size
-        * 100,
-        2,
+# 计算better-worse分数，并贴标签
+kano = (
+    kano.assign(
+        满意系数=lambda dataframe: (dataframe["魅力属性"] + dataframe["期望属性"])
+        / (
+            dataframe["魅力属性"]
+            + dataframe["期望属性"]
+            + dataframe["必备属性"]
+            + dataframe["无差异属性"]
+        ),
+        不满意系数=lambda dataframe: (dataframe["必备属性"] + dataframe["期望属性"])
+        / (
+            dataframe["魅力属性"]
+            + dataframe["期望属性"]
+            + dataframe["必备属性"]
+            + dataframe["无差异属性"]
+        ),
    )
-
-    KANO_Report.loc[Question_Number, "O_Score"] = O_Score
-
-    # 计算必备型需求分数
-    M_Score = round(
-        DataSet.loc[
-            (DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"]))
-            & (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
-            :,
-        ].shape[0]
-        / Sample_Size
-        * 100,
-        2,
-    )
-
-    KANO_Report.loc[Question_Number, "M_Score"] = M_Score
-
-    # 计算无差型需求分数
-    I_Score = round(
-        DataSet.loc[
-            (DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"]))
-            & (
-                DataSet.iloc[:, Question_Number + 1].isin(
-                    ["理所应当", "无所谓", "勉强接受"]
-                )
+    .pipe(
+        lambda dataframe: dataframe.assign(
+            需求标签=numpy.select(
+                condlist=[
+                    (dataframe["满意系数"] >= dataframe["满意系数"].mean())
+                    & (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()),
+                    (dataframe["满意系数"] >= dataframe["满意系数"].mean())
+                    & (dataframe["不满意系数"] < dataframe["不满意系数"].mean()),
+                    (dataframe["满意系数"] < dataframe["满意系数"].mean())
+                    & (dataframe["不满意系数"] < dataframe["不满意系数"].mean()),
+                    (dataframe["满意系数"] < dataframe["满意系数"].mean())
+                    & (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()),
+                ],
+                choicelist=[
+                    "期望型需求(P2)",
+                    "魅力型需求(P3)",
+                    "无差异需求(P4)",
+                    "必备型需求(P1)",
+                ],
+                default="未分类",
            ),
-            :,
-        ].shape[0]
-        / Sample_Size
-        * 100,
-        2,
+        )
    )
-
-    KANO_Report.loc[Question_Number, "I_Score"] = I_Score
-
-    # 计算反向型需求分数
-    R_Score = round(
-        DataSet.loc[
-            (
-                DataSet.iloc[:, Question_Number].isin(
-                    ["理所应当", "无所谓", "勉强接受", "我很不喜欢"]
-                )
-            )
-            & (
-                DataSet.iloc[:, Question_Number + 1].isin(
-                    ["我很喜欢", "理所应当", "无所谓", "勉强接受"]
-                )
-            ),
-            :,
-        ].shape[0]
-        / Sample_Size
-        * 100
-        - I_Score,
-        2,
-    )
-
-    KANO_Report.loc[Question_Number, "R_Score"] = R_Score
-
-    # 计算可疑型需求分数
-    Q_Score = round(
-        DataSet.loc[
-            (DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
-            & (DataSet.iloc[:, Question_Number + 1].isin(["我很喜欢"])),
-            :,
-        ].shape[0]
-        / Sample_Size
-        * 100
-        + DataSet.loc[
-            (DataSet.iloc[:, Question_Number].isin(["我很不喜欢"]))
-            & (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
-            :,
-        ].shape[0]
-        / Sample_Size
-        * 100,
-        2,
-    )
-
-    KANO_Report.loc[Question_Number, "Q_Score"] = Q_Score
-
-# 打印表格
-
-PrintTable = prettytable.PrettyTable()
-
-PrintTable.field_names = KANO_Report.columns.tolist()
-
-for Index in KANO_Report.index.tolist():
-
-    PrintTable.add_row(KANO_Report.loc[Index])
-
-PrintTable.align["Requirement_Label"] = "l"
-
-PrintTable.align["A_Score"] = "r"
-
-PrintTable.align["O_Score"] = "r"
-
-PrintTable.align["M_Score"] = "r"
-
-PrintTable.align["I_Score"] = "r"
-
-PrintTable.align["R_Score"] = "r"
-
-PrintTable.align["Q_Score"] = "r"
-
-PrintTable.align["Better_Score"] = "r"
-
-PrintTable.align["Worse_Score"] = "r"
-
-PrintTable.float_format["A_Score"] = ".2"
-
-PrintTable.float_format["O_Score"] = ".2"
-
-PrintTable.float_format["M_Score"] = ".2"
-
-PrintTable.float_format["I_Score"] = ".2"
-
-PrintTable.float_format["R_Score"] = ".2"
-
-PrintTable.float_format["Q_Score"] = ".2"
-
-PrintTable.float_format["Better_Score"] = ".2"
-
-PrintTable.float_format["Worse_Score"] = ".2"
-
-print("附表 各需求的KANO评价维度计算结果：")
-
-print(
-    PrintTable.get_string(
-        fields=[
-            "Requirement_Label",
-            "A_Score",
-            "O_Score",
-            "M_Score",
-            "I_Score",
-            "R_Score",
-            "Q_Score",
-        ]
+    .assign(
+        魅力属性=lambda dataframe: dataframe["魅力属性"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
+        期望属性=lambda dataframe: dataframe["期望属性"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
+        必备属性=lambda dataframe: dataframe["必备属性"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
+        无差异属性=lambda dataframe: dataframe["无差异属性"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
+        反向属性=lambda dataframe: dataframe["反向属性"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
+        可疑结果=lambda dataframe: dataframe["可疑结果"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
+        满意系数=lambda dataframe: dataframe["满意系数"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
+        不满意系数=lambda dataframe: dataframe["不满意系数"].apply(
+            lambda cell: f"{cell:.2f}"
+        ),
    )
 )

-print("字段说明：")
+save_as_workbook(worksheets=[("Sheet1", kano)], workbook_name="result.xlsx")

-print(
-    "1）Requirement_Label为需求名称，A_Score为兴奋型需求分数，O_Score为期望型需求分数，M_Score为必备型需求分数，I_Score为无差型需求分数，R_Score为反向型需求分数，Q_Score为可疑型需求分数。"
-)
-
-print("")
-
-print("3.3 定义需求类型和Better-Worse分数")
-print("")
-
-# 以KANO评价维度中最高分定义需求类型
-Requirement_Types = list(
-    KANO_Report[
-        ["A_Score", "O_Score", "M_Score", "I_Score", "R_Score", "Q_Score"]
-    ].idxmax(axis="columns")
-)
-
-# 通过列名截取需求类型（第一位、'_'左侧字符串）
-Requirement_Types = [
-    x[0 : x.find("_")] for x in Requirement_Types if isinstance(x, str)
-]
-
-KANO_Report["Requirement_Type"] = Requirement_Types
-
-# 计算Better分数
-KANO_Report["Better_Score"] = (
-    (KANO_Report["A_Score"] + KANO_Report["O_Score"])
-    / (
-        KANO_Report["A_Score"]
-        + KANO_Report["O_Score"]
-        + KANO_Report["M_Score"]
-        + KANO_Report["I_Score"]
-    )
-    * 100
-)
-
-# 计算Worse分数
-KANO_Report["Worse_Score"] = (
-    -1
-    * (KANO_Report["O_Score"] + KANO_Report["M_Score"])
-    / (
-        KANO_Report["A_Score"]
-        + KANO_Report["O_Score"]
-        + KANO_Report["M_Score"]
-        + KANO_Report["I_Score"]
-    )
-    * 100
-)
-
-# 打印表格
-
-PrintTable = prettytable.PrettyTable()
-
-PrintTable.field_names = KANO_Report.columns.tolist()
-
-for Index in KANO_Report.index.tolist():
-
-    PrintTable.add_row(KANO_Report.loc[Index])
-
-PrintTable.align["Requirement_Label"] = "l"
-
-PrintTable.align["Requirement_Type"] = "r"
-
-PrintTable.align["Better_Score"] = "r"
-
-PrintTable.align["Worse_Score"] = "r"
-
-PrintTable.float_format["Better_Score"] = ".2"
-
-PrintTable.float_format["Worse_Score"] = ".2"
-
-print("附表 各需求的KANO评价维度计算结果：")
-
-print(
-    PrintTable.get_string(
-        fields=["Requirement_Label", "Requirement_Type", "Better_Score", "Worse_Score"]
-    )
-)
-
-print("字段说明：")
-
-print("1）Requirement_Label为需求名称，Requirement_Type为需求类型。")
-
-print(
-    "2）A为兴奋型需求：表示产品具有该种需求，则用户满意度会提高；没有该种需求，则用户满意度不会降低。建议给予P3关注。"
-)
-
-print(
-    "3）O为期望型需求：表示产品具有该种需求，则用户满意度会提高；没有该种需求，则用户满意度会降低。建议给予P1关注。"
-)
-
-print(
-    "4）M为必备型需求：表示产品具有该种需求，则用户满意度不会提高；没有该种需求，则用户满意度会降低。建议给予P2关注。"
-)
-
-print(
-    "5）I为无差型需求：表示产品具有该种需求，则用户满意度不会提高；没有该种需求，则用户满意度不会降低。建议给予P4关注。"
-)
-
-print("6）R为反向型需求：建议给予关注。")
-
-print("7）Q为可疑型需求：建议给予关注。")
-
-print(
-    "8）Better_Score为Better分数。表示如果产品具有某种需求，则用户满意度会提高，数值越大提高越大。"
-)
-
-print(
-    "9）Worse_Score为Worse分数。表示如果产品没有某种需求，则用户满意度会下降，数值越小下降越大。"
-)
-
-print("")
+print("已完成")
--- a/KANO/result.xlsx
+++ b/KANO/result.xlsx