251111更新

This commit is contained in:
marslbr 2025-11-11 17:33:22 +08:00
parent 2fedbac2a5
commit 16a0b4e7c5
2 changed files with 121 additions and 436 deletions

View File

@ -5,9 +5,12 @@
本脚本用于KANO需求分析能够处理问卷数据并输出需求分类结果
"""
import re
import numpy
import pandas
import prettytable
from utils.pandas_extension import save_as_workbook
print("1 打开并读取Excel文件...", end="")
@ -37,9 +40,9 @@ try:
raise Exception("样本数为0")
# 统计列数
columns_counts = dataset.shape[1]
columns = dataset.shape[1]
# 若列数非奇数则抛出异常
if columns_counts % 2 != 0:
if columns % 2 != 0:
raise Exception("列数为奇数")
print(f"已完成,样本数为{samples_size}")
@ -48,451 +51,133 @@ except Exception as exception:
print(f"发生异常:{str(exception)}")
exit()
# 读取问卷题目和答案从第7列开始为题目或答案
DataSet = DataSet.iloc[:, 6:]
print("2 生成KANO评价结果...", end="")
# 统计数据集中样本数量和题目数量
Sample_Size, Question_Amount = DataSet.shape
# KANO评价结果
kano = pandas.DataFrame()
# 判断题目数量是否为偶数,若为偶数则计算问卷中涉及需求数量,若为奇数则终止脚本
if Question_Amount % 2 == 0:
for column in range(columns):
if column % 2 != 0:
continue
# 计算问卷中涉及需求数量
Requirement_Amount = int(Question_Amount / 2)
# 功能名称
feature = re.search(
pattern="【(?P<feature>.*?)】", string=dataset.columns[column]
).group("feature")
else:
# 生成某功能的选项分布
distribution = pandas.crosstab(
index=dataset.iloc[:, column], # 行索引为如果该产品有{{功能名称}}的选项
columns=dataset.iloc[:, column + 1], # 列索引为如果该产品没有{{功能名称}}的选项
rownames=[""],
colnames=[""],
normalize=True, # 转为占比
).reindex(index=alternatives, columns=alternatives, fill_value=0)
print("题目数量为奇数,请检查")
print("")
exit()
print(
"数据集中包含 %d 份样本, %d 个问题(涉及 %d 个需求)"
% (Sample_Size, Question_Amount, Requirement_Amount)
)
print("")
print("*" * 100)
print("")
print("2 数据预处理")
print("")
print("2.1 检查并删除不规范样本")
print("")
# 定义问卷中备选答案列表
Alternatives = ["我很喜欢", "理所应当", "无所谓", "勉强接受", "我很不喜欢"]
# 检查答案是否在指定范围,若否则将该答案定义为空值
DataSet = DataSet.applymap(lambda x: x if x in Alternatives else None)
# 删除包含缺失值的样本
DataSet.dropna(axis="index", how="any", inplace=True)
# 删除答案全部相同的样本
DataSet = DataSet[DataSet.apply(pandas.Series.nunique, axis="columns") != 1]
# 统计有效样本数量
Sample_Size = DataSet.shape[0]
print("处理后,有效样本数量为 %d" % (Sample_Size))
print("")
print("*" * 100)
print("")
print("3 数据处理")
print("")
Requirement_Labels = DataSet.columns.tolist()
# 通过问题截取需求名称(截取'有'右侧、''左侧字符串)
Requirement_Labels = [
x[x.find("") + 1 : x.find("")]
for x in Requirement_Labels
if isinstance(x, str) and "具有" in x
]
print("3.1 绘制KANO评价结果分类对照表")
print("")
for Question_Number in range(Requirement_Amount):
# 创建KANO评价结果分类对照表
KANO = pandas.DataFrame(data=[], index=Alternatives, columns=Alternatives)
for Column_Label in Alternatives:
for Index_Label in Alternatives:
# 统计并赋值
KANO.loc[Index_Label, Column_Label] = DataSet.loc[
(DataSet.iloc[:, Question_Number].isin([Index_Label]))
& (DataSet.iloc[:, Question_Number + 1].isin([Column_Label])),
:,
].shape[0]
# 修改行名
KANO.index = [
"Provide_Like",
"Provide_Should",
"Provide_Indifferent",
"Provide_Grudging",
"Provide_Hate",
]
# 重置索引
KANO.reset_index(inplace=True)
# 修改列名
KANO.columns = [
"",
"Not_Provide_Like",
"Not_Provide_Should",
"Not_Provide_Indifferent",
"Not_Provide_Grudging",
"Not_Provide_Hate",
]
# 打印表格
PrintTable = prettytable.PrettyTable()
PrintTable.field_names = KANO.columns.tolist()
for Index in KANO.index.tolist():
PrintTable.add_row(KANO.loc[Index])
PrintTable.align = "r"
PrintTable.align[""] = "l"
PrintTable.float_format = "."
print(
"附表 需求%d%s的KANO评价结果分类对照表"
% (Question_Number + 1, Requirement_Labels[Question_Number])
)
print(PrintTable)
print("")
print("字段说明:")
print(
"1Not_Provide_Like为不提供该需求、用户表示“我很喜欢”Not_Provide_Should为不提供该需求、用户表示“理所应当”Not_Provide_Indifferent为不提供该需求、用户表示“无所谓”Not_Provide_Grudging为不提供该需求、用户表示“勉强接受”Not_Provide_Hate为不提供该需求、用户表示“我很不喜欢”。"
)
print(
"1Provide_Like为提供该需求、用户表示“我很喜欢”Provide_Should为提供该需求、用户表示“理所应当”Provide_Indifferent为提供该需求、用户表示“无所谓”Provide_Grudging为提供该需求、用户表示“勉强接受”Provide_Hate为不提供该需求、用户表示“我很不喜欢”。"
)
print("")
print("3.2 计算KANO评价维度")
print("")
# 创建KANO各维度分数表
KANO_Report = pandas.DataFrame(
data=[],
columns=[
"Requirement_Label",
"A_Score",
"O_Score",
"M_Score",
"I_Score",
"R_Score",
"Q_Score",
],
dtype="float",
)
KANO_Report["Requirement_Label"] = Requirement_Labels
for Question_Number in range(Requirement_Amount):
# 计算兴奋型需求分数
A_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
& (
DataSet.iloc[:, Question_Number + 1].isin(
["理所应当", "无所谓", "勉强接受"]
)
kano = pandas.concat(
objs=[
kano,
pandas.DataFrame(
[
{
"功能名称": feature,
"魅力属性": distribution.loc[
"非常满意", ["理应如此", "无所谓", "勉强接受"]
].sum(),
"期望属性": distribution.loc["非常满意", "不满意"].sum(),
"必备属性": distribution.loc[
["理应如此", "无所谓", "勉强接受"], "不满意"
].sum(),
"无差异属性": distribution.loc[
["理应如此", "无所谓", "勉强接受"],
["理应如此", "无所谓", "勉强接受"],
]
.sum()
.sum(),
"反向属性": distribution.loc[
["理应如此", "无所谓", "勉强接受", "不满意"], "非常满意"
].sum()
+ distribution.loc[
("不满意", ["理应如此", "无所谓", "勉强接受"])
].sum(),
"可疑结果": distribution.loc["非常满意", "非常满意"]
+ distribution.loc["不满意", "不满意"],
}
]
),
:,
].shape[0]
/ Sample_Size
* 100,
2,
],
ignore_index=True,
)
KANO_Report.loc[Question_Number, "A_Score"] = A_Score
# 计算期望型需求分数
O_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100,
2,
# 计算better-worse分数并贴标签
kano = (
kano.assign(
满意系数=lambda dataframe: (dataframe["魅力属性"] + dataframe["期望属性"])
/ (
dataframe["魅力属性"]
+ dataframe["期望属性"]
+ dataframe["必备属性"]
+ dataframe["无差异属性"]
),
不满意系数=lambda dataframe: (dataframe["必备属性"] + dataframe["期望属性"])
/ (
dataframe["魅力属性"]
+ dataframe["期望属性"]
+ dataframe["必备属性"]
+ dataframe["无差异属性"]
),
)
KANO_Report.loc[Question_Number, "O_Score"] = O_Score
# 计算必备型需求分数
M_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
KANO_Report.loc[Question_Number, "M_Score"] = M_Score
# 计算无差型需求分数
I_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"]))
& (
DataSet.iloc[:, Question_Number + 1].isin(
["理所应当", "无所谓", "勉强接受"]
)
.pipe(
lambda dataframe: dataframe.assign(
需求标签=numpy.select(
condlist=[
(dataframe["满意系数"] >= dataframe["满意系数"].mean())
& (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()),
(dataframe["满意系数"] >= dataframe["满意系数"].mean())
& (dataframe["不满意系数"] < dataframe["不满意系数"].mean()),
(dataframe["满意系数"] < dataframe["满意系数"].mean())
& (dataframe["不满意系数"] < dataframe["不满意系数"].mean()),
(dataframe["满意系数"] < dataframe["满意系数"].mean())
& (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()),
],
choicelist=[
"期望型需求(P2)",
"魅力型需求(P3)",
"无差异需求(P4)",
"必备型需求(P1)",
],
default="未分类",
),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
)
KANO_Report.loc[Question_Number, "I_Score"] = I_Score
# 计算反向型需求分数
R_Score = round(
DataSet.loc[
(
DataSet.iloc[:, Question_Number].isin(
["理所应当", "无所谓", "勉强接受", "我很不喜欢"]
)
)
& (
DataSet.iloc[:, Question_Number + 1].isin(
["我很喜欢", "理所应当", "无所谓", "勉强接受"]
)
),
:,
].shape[0]
/ Sample_Size
* 100
- I_Score,
2,
)
KANO_Report.loc[Question_Number, "R_Score"] = R_Score
# 计算可疑型需求分数
Q_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100
+ DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很不喜欢"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
KANO_Report.loc[Question_Number, "Q_Score"] = Q_Score
# 打印表格
PrintTable = prettytable.PrettyTable()
PrintTable.field_names = KANO_Report.columns.tolist()
for Index in KANO_Report.index.tolist():
PrintTable.add_row(KANO_Report.loc[Index])
PrintTable.align["Requirement_Label"] = "l"
PrintTable.align["A_Score"] = "r"
PrintTable.align["O_Score"] = "r"
PrintTable.align["M_Score"] = "r"
PrintTable.align["I_Score"] = "r"
PrintTable.align["R_Score"] = "r"
PrintTable.align["Q_Score"] = "r"
PrintTable.align["Better_Score"] = "r"
PrintTable.align["Worse_Score"] = "r"
PrintTable.float_format["A_Score"] = ".2"
PrintTable.float_format["O_Score"] = ".2"
PrintTable.float_format["M_Score"] = ".2"
PrintTable.float_format["I_Score"] = ".2"
PrintTable.float_format["R_Score"] = ".2"
PrintTable.float_format["Q_Score"] = ".2"
PrintTable.float_format["Better_Score"] = ".2"
PrintTable.float_format["Worse_Score"] = ".2"
print("附表 各需求的KANO评价维度计算结果")
print(
PrintTable.get_string(
fields=[
"Requirement_Label",
"A_Score",
"O_Score",
"M_Score",
"I_Score",
"R_Score",
"Q_Score",
]
.assign(
魅力属性=lambda dataframe: dataframe["魅力属性"].apply(
lambda cell: f"{cell:.2f}"
),
期望属性=lambda dataframe: dataframe["期望属性"].apply(
lambda cell: f"{cell:.2f}"
),
必备属性=lambda dataframe: dataframe["必备属性"].apply(
lambda cell: f"{cell:.2f}"
),
无差异属性=lambda dataframe: dataframe["无差异属性"].apply(
lambda cell: f"{cell:.2f}"
),
反向属性=lambda dataframe: dataframe["反向属性"].apply(
lambda cell: f"{cell:.2f}"
),
可疑结果=lambda dataframe: dataframe["可疑结果"].apply(
lambda cell: f"{cell:.2f}"
),
满意系数=lambda dataframe: dataframe["满意系数"].apply(
lambda cell: f"{cell:.2f}"
),
不满意系数=lambda dataframe: dataframe["不满意系数"].apply(
lambda cell: f"{cell:.2f}"
),
)
)
print("字段说明:")
save_as_workbook(worksheets=[("Sheet1", kano)], workbook_name="result.xlsx")
print(
"1Requirement_Label为需求名称A_Score为兴奋型需求分数O_Score为期望型需求分数M_Score为必备型需求分数I_Score为无差型需求分数R_Score为反向型需求分数Q_Score为可疑型需求分数。"
)
print("")
print("3.3 定义需求类型和Better-Worse分数")
print("")
# 以KANO评价维度中最高分定义需求类型
Requirement_Types = list(
KANO_Report[
["A_Score", "O_Score", "M_Score", "I_Score", "R_Score", "Q_Score"]
].idxmax(axis="columns")
)
# 通过列名截取需求类型(第一位、'_'左侧字符串)
Requirement_Types = [
x[0 : x.find("_")] for x in Requirement_Types if isinstance(x, str)
]
KANO_Report["Requirement_Type"] = Requirement_Types
# 计算Better分数
KANO_Report["Better_Score"] = (
(KANO_Report["A_Score"] + KANO_Report["O_Score"])
/ (
KANO_Report["A_Score"]
+ KANO_Report["O_Score"]
+ KANO_Report["M_Score"]
+ KANO_Report["I_Score"]
)
* 100
)
# 计算Worse分数
KANO_Report["Worse_Score"] = (
-1
* (KANO_Report["O_Score"] + KANO_Report["M_Score"])
/ (
KANO_Report["A_Score"]
+ KANO_Report["O_Score"]
+ KANO_Report["M_Score"]
+ KANO_Report["I_Score"]
)
* 100
)
# 打印表格
PrintTable = prettytable.PrettyTable()
PrintTable.field_names = KANO_Report.columns.tolist()
for Index in KANO_Report.index.tolist():
PrintTable.add_row(KANO_Report.loc[Index])
PrintTable.align["Requirement_Label"] = "l"
PrintTable.align["Requirement_Type"] = "r"
PrintTable.align["Better_Score"] = "r"
PrintTable.align["Worse_Score"] = "r"
PrintTable.float_format["Better_Score"] = ".2"
PrintTable.float_format["Worse_Score"] = ".2"
print("附表 各需求的KANO评价维度计算结果")
print(
PrintTable.get_string(
fields=["Requirement_Label", "Requirement_Type", "Better_Score", "Worse_Score"]
)
)
print("字段说明:")
print("1Requirement_Label为需求名称Requirement_Type为需求类型。")
print(
"2A为兴奋型需求表示产品具有该种需求则用户满意度会提高没有该种需求则用户满意度不会降低。建议给予P3关注。"
)
print(
"3O为期望型需求表示产品具有该种需求则用户满意度会提高没有该种需求则用户满意度会降低。建议给予P1关注。"
)
print(
"4M为必备型需求表示产品具有该种需求则用户满意度不会提高没有该种需求则用户满意度会降低。建议给予P2关注。"
)
print(
"5I为无差型需求表示产品具有该种需求则用户满意度不会提高没有该种需求则用户满意度不会降低。建议给予P4关注。"
)
print("6R为反向型需求建议给予关注。")
print("7Q为可疑型需求建议给予关注。")
print(
"8Better_Score为Better分数。表示如果产品具有某种需求则用户满意度会提高数值越大提高越大。"
)
print(
"9Worse_Score为Worse分数。表示如果产品没有某种需求则用户满意度会下降数值越小下降越大。"
)
print("")
print("已完成")

BIN
KANO/result.xlsx Normal file

Binary file not shown.