Python/KANO/main.py

499 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
脚本说明:
本脚本用于KANO需求分析能够处理问卷数据并输出需求分类结果
"""
import pandas
import prettytable
print("1 打开并读取Excel文件...", end="")
try:
dataset = pandas.read_excel(io="KANO模型客户调研问卷.xlsx", sheet_name="问卷结果")
# 选项范围
alternatives = ["非常满意", "理应如此", "无所谓", "勉强接受", "不满意"]
# 数据清洗
dataset = (
dataset.iloc[
:, 3:
] # 原始数据第一列为编号、第二列为提交人、第三列为提交时间,从第四列到最后一列为选项,删除第一列至第三列
.loc[lambda dataframe: dataframe.nunique(axis=1) != 1] # 删除相同选项的样本
.map(
lambda cell: cell if cell in alternatives else pandas.NA
) # 检查是否在选项范围,若不在选项范围则置为缺失值
.dropna(axis="index", how="any") # 删除缺失值
)
# 统计样本数
samples_size = dataset.shape[0]
# 若样本数为0则抛出异常
if samples_size == 0:
raise Exception("样本数为0")
# 统计列数
columns_counts = dataset.shape[1]
# 若列数非奇数则抛出异常
if columns_counts % 2 != 0:
raise Exception("列数为奇数")
print(f"已完成,样本数为{samples_size}")
except Exception as exception:
print(f"发生异常:{str(exception)}")
exit()
# 读取问卷题目和答案从第7列开始为题目或答案
DataSet = DataSet.iloc[:, 6:]
# 统计数据集中样本数量和题目数量
Sample_Size, Question_Amount = DataSet.shape
# 判断题目数量是否为偶数,若为偶数则计算问卷中涉及需求数量,若为奇数则终止脚本
if Question_Amount % 2 == 0:
# 计算问卷中涉及需求数量
Requirement_Amount = int(Question_Amount / 2)
else:
print("题目数量为奇数,请检查")
print("")
exit()
print(
"数据集中包含 %d 份样本, %d 个问题(涉及 %d 个需求)"
% (Sample_Size, Question_Amount, Requirement_Amount)
)
print("")
print("*" * 100)
print("")
print("2 数据预处理")
print("")
print("2.1 检查并删除不规范样本")
print("")
# 定义问卷中备选答案列表
Alternatives = ["我很喜欢", "理所应当", "无所谓", "勉强接受", "我很不喜欢"]
# 检查答案是否在指定范围,若否则将该答案定义为空值
DataSet = DataSet.applymap(lambda x: x if x in Alternatives else None)
# 删除包含缺失值的样本
DataSet.dropna(axis="index", how="any", inplace=True)
# 删除答案全部相同的样本
DataSet = DataSet[DataSet.apply(pandas.Series.nunique, axis="columns") != 1]
# 统计有效样本数量
Sample_Size = DataSet.shape[0]
print("处理后,有效样本数量为 %d" % (Sample_Size))
print("")
print("*" * 100)
print("")
print("3 数据处理")
print("")
Requirement_Labels = DataSet.columns.tolist()
# 通过问题截取需求名称(截取'有'右侧、''左侧字符串)
Requirement_Labels = [
x[x.find("") + 1 : x.find("")]
for x in Requirement_Labels
if isinstance(x, str) and "具有" in x
]
print("3.1 绘制KANO评价结果分类对照表")
print("")
for Question_Number in range(Requirement_Amount):
# 创建KANO评价结果分类对照表
KANO = pandas.DataFrame(data=[], index=Alternatives, columns=Alternatives)
for Column_Label in Alternatives:
for Index_Label in Alternatives:
# 统计并赋值
KANO.loc[Index_Label, Column_Label] = DataSet.loc[
(DataSet.iloc[:, Question_Number].isin([Index_Label]))
& (DataSet.iloc[:, Question_Number + 1].isin([Column_Label])),
:,
].shape[0]
# 修改行名
KANO.index = [
"Provide_Like",
"Provide_Should",
"Provide_Indifferent",
"Provide_Grudging",
"Provide_Hate",
]
# 重置索引
KANO.reset_index(inplace=True)
# 修改列名
KANO.columns = [
"",
"Not_Provide_Like",
"Not_Provide_Should",
"Not_Provide_Indifferent",
"Not_Provide_Grudging",
"Not_Provide_Hate",
]
# 打印表格
PrintTable = prettytable.PrettyTable()
PrintTable.field_names = KANO.columns.tolist()
for Index in KANO.index.tolist():
PrintTable.add_row(KANO.loc[Index])
PrintTable.align = "r"
PrintTable.align[""] = "l"
PrintTable.float_format = "."
print(
"附表 需求%d%s的KANO评价结果分类对照表"
% (Question_Number + 1, Requirement_Labels[Question_Number])
)
print(PrintTable)
print("")
print("字段说明:")
print(
"1Not_Provide_Like为不提供该需求、用户表示“我很喜欢”Not_Provide_Should为不提供该需求、用户表示“理所应当”Not_Provide_Indifferent为不提供该需求、用户表示“无所谓”Not_Provide_Grudging为不提供该需求、用户表示“勉强接受”Not_Provide_Hate为不提供该需求、用户表示“我很不喜欢”。"
)
print(
"1Provide_Like为提供该需求、用户表示“我很喜欢”Provide_Should为提供该需求、用户表示“理所应当”Provide_Indifferent为提供该需求、用户表示“无所谓”Provide_Grudging为提供该需求、用户表示“勉强接受”Provide_Hate为不提供该需求、用户表示“我很不喜欢”。"
)
print("")
print("3.2 计算KANO评价维度")
print("")
# 创建KANO各维度分数表
KANO_Report = pandas.DataFrame(
data=[],
columns=[
"Requirement_Label",
"A_Score",
"O_Score",
"M_Score",
"I_Score",
"R_Score",
"Q_Score",
],
dtype="float",
)
KANO_Report["Requirement_Label"] = Requirement_Labels
for Question_Number in range(Requirement_Amount):
# 计算兴奋型需求分数
A_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
& (
DataSet.iloc[:, Question_Number + 1].isin(
["理所应当", "无所谓", "勉强接受"]
)
),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
KANO_Report.loc[Question_Number, "A_Score"] = A_Score
# 计算期望型需求分数
O_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
KANO_Report.loc[Question_Number, "O_Score"] = O_Score
# 计算必备型需求分数
M_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
KANO_Report.loc[Question_Number, "M_Score"] = M_Score
# 计算无差型需求分数
I_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["理所应当", "无所谓", "勉强接受"]))
& (
DataSet.iloc[:, Question_Number + 1].isin(
["理所应当", "无所谓", "勉强接受"]
)
),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
KANO_Report.loc[Question_Number, "I_Score"] = I_Score
# 计算反向型需求分数
R_Score = round(
DataSet.loc[
(
DataSet.iloc[:, Question_Number].isin(
["理所应当", "无所谓", "勉强接受", "我很不喜欢"]
)
)
& (
DataSet.iloc[:, Question_Number + 1].isin(
["我很喜欢", "理所应当", "无所谓", "勉强接受"]
)
),
:,
].shape[0]
/ Sample_Size
* 100
- I_Score,
2,
)
KANO_Report.loc[Question_Number, "R_Score"] = R_Score
# 计算可疑型需求分数
Q_Score = round(
DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很喜欢"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100
+ DataSet.loc[
(DataSet.iloc[:, Question_Number].isin(["我很不喜欢"]))
& (DataSet.iloc[:, Question_Number + 1].isin(["我很不喜欢"])),
:,
].shape[0]
/ Sample_Size
* 100,
2,
)
KANO_Report.loc[Question_Number, "Q_Score"] = Q_Score
# 打印表格
PrintTable = prettytable.PrettyTable()
PrintTable.field_names = KANO_Report.columns.tolist()
for Index in KANO_Report.index.tolist():
PrintTable.add_row(KANO_Report.loc[Index])
PrintTable.align["Requirement_Label"] = "l"
PrintTable.align["A_Score"] = "r"
PrintTable.align["O_Score"] = "r"
PrintTable.align["M_Score"] = "r"
PrintTable.align["I_Score"] = "r"
PrintTable.align["R_Score"] = "r"
PrintTable.align["Q_Score"] = "r"
PrintTable.align["Better_Score"] = "r"
PrintTable.align["Worse_Score"] = "r"
PrintTable.float_format["A_Score"] = ".2"
PrintTable.float_format["O_Score"] = ".2"
PrintTable.float_format["M_Score"] = ".2"
PrintTable.float_format["I_Score"] = ".2"
PrintTable.float_format["R_Score"] = ".2"
PrintTable.float_format["Q_Score"] = ".2"
PrintTable.float_format["Better_Score"] = ".2"
PrintTable.float_format["Worse_Score"] = ".2"
print("附表 各需求的KANO评价维度计算结果")
print(
PrintTable.get_string(
fields=[
"Requirement_Label",
"A_Score",
"O_Score",
"M_Score",
"I_Score",
"R_Score",
"Q_Score",
]
)
)
print("字段说明:")
print(
"1Requirement_Label为需求名称A_Score为兴奋型需求分数O_Score为期望型需求分数M_Score为必备型需求分数I_Score为无差型需求分数R_Score为反向型需求分数Q_Score为可疑型需求分数。"
)
print("")
print("3.3 定义需求类型和Better-Worse分数")
print("")
# 以KANO评价维度中最高分定义需求类型
Requirement_Types = list(
KANO_Report[
["A_Score", "O_Score", "M_Score", "I_Score", "R_Score", "Q_Score"]
].idxmax(axis="columns")
)
# 通过列名截取需求类型(第一位、'_'左侧字符串)
Requirement_Types = [
x[0 : x.find("_")] for x in Requirement_Types if isinstance(x, str)
]
KANO_Report["Requirement_Type"] = Requirement_Types
# 计算Better分数
KANO_Report["Better_Score"] = (
(KANO_Report["A_Score"] + KANO_Report["O_Score"])
/ (
KANO_Report["A_Score"]
+ KANO_Report["O_Score"]
+ KANO_Report["M_Score"]
+ KANO_Report["I_Score"]
)
* 100
)
# 计算Worse分数
KANO_Report["Worse_Score"] = (
-1
* (KANO_Report["O_Score"] + KANO_Report["M_Score"])
/ (
KANO_Report["A_Score"]
+ KANO_Report["O_Score"]
+ KANO_Report["M_Score"]
+ KANO_Report["I_Score"]
)
* 100
)
# 打印表格
PrintTable = prettytable.PrettyTable()
PrintTable.field_names = KANO_Report.columns.tolist()
for Index in KANO_Report.index.tolist():
PrintTable.add_row(KANO_Report.loc[Index])
PrintTable.align["Requirement_Label"] = "l"
PrintTable.align["Requirement_Type"] = "r"
PrintTable.align["Better_Score"] = "r"
PrintTable.align["Worse_Score"] = "r"
PrintTable.float_format["Better_Score"] = ".2"
PrintTable.float_format["Worse_Score"] = ".2"
print("附表 各需求的KANO评价维度计算结果")
print(
PrintTable.get_string(
fields=["Requirement_Label", "Requirement_Type", "Better_Score", "Worse_Score"]
)
)
print("字段说明:")
print("1Requirement_Label为需求名称Requirement_Type为需求类型。")
print(
"2A为兴奋型需求表示产品具有该种需求则用户满意度会提高没有该种需求则用户满意度不会降低。建议给予P3关注。"
)
print(
"3O为期望型需求表示产品具有该种需求则用户满意度会提高没有该种需求则用户满意度会降低。建议给予P1关注。"
)
print(
"4M为必备型需求表示产品具有该种需求则用户满意度不会提高没有该种需求则用户满意度会降低。建议给予P2关注。"
)
print(
"5I为无差型需求表示产品具有该种需求则用户满意度不会提高没有该种需求则用户满意度不会降低。建议给予P4关注。"
)
print("6R为反向型需求建议给予关注。")
print("7Q为可疑型需求建议给予关注。")
print(
"8Better_Score为Better分数。表示如果产品具有某种需求则用户满意度会提高数值越大提高越大。"
)
print(
"9Worse_Score为Worse分数。表示如果产品没有某种需求则用户满意度会下降数值越小下降越大。"
)
print("")