Python/KANO/main.py

183 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
脚本说明:
本脚本用于KANO需求分析能够处理问卷数据并输出需求分类结果
"""
import re
import numpy
import pandas
from utils.pandas_extension import save_as_workbook
print("1 打开并读取Excel文件...", end="")
try:
dataset = pandas.read_excel(io="KANO模型客户调研问卷.xlsx", sheet_name="问卷结果")
# 选项范围
alternatives = ["非常满意", "理应如此", "无所谓", "勉强接受", "不满意"]
# 数据清洗
dataset = (
dataset.iloc[
:, 3:
] # 原始数据第一列为编号、第二列为提交人、第三列为提交时间,从第四列到最后一列为选项,删除第一列至第三列
.loc[lambda dataframe: dataframe.nunique(axis=1) != 1] # 删除相同选项的样本
.map(
lambda cell: cell if cell in alternatives else pandas.NA
) # 检查是否在选项范围,若不在选项范围则置为缺失值
.dropna(axis="index", how="any") # 删除缺失值
)
# 统计样本数
samples_size = dataset.shape[0]
# 若样本数为0则抛出异常
if samples_size == 0:
raise Exception("样本数为0")
# 统计列数
columns = dataset.shape[1]
# 若列数非奇数则抛出异常
if columns % 2 != 0:
raise Exception("列数为奇数")
print(f"已完成,样本数为{samples_size}")
except Exception as exception:
print(f"发生异常:{str(exception)}")
exit()
print("2 生成KANO评价结果...", end="")
# KANO评价结果
kano = pandas.DataFrame()
for column in range(columns):
if column % 2 != 0:
continue
# 功能名称
feature = re.search(
pattern="【(?P<feature>.*?)】", string=dataset.columns[column]
).group("feature")
# 生成某功能的选项分布
distribution = pandas.crosstab(
index=dataset.iloc[:, column], # 行索引为如果该产品有{{功能名称}}的选项
columns=dataset.iloc[:, column + 1], # 列索引为如果该产品没有{{功能名称}}的选项
rownames=[""],
colnames=[""],
normalize=True, # 转为占比
).reindex(index=alternatives, columns=alternatives, fill_value=0)
kano = pandas.concat(
objs=[
kano,
pandas.DataFrame(
[
{
"功能名称": feature,
"魅力属性": distribution.loc[
"非常满意", ["理应如此", "无所谓", "勉强接受"]
].sum(),
"期望属性": distribution.loc["非常满意", "不满意"].sum(),
"必备属性": distribution.loc[
["理应如此", "无所谓", "勉强接受"], "不满意"
].sum(),
"无差异属性": distribution.loc[
["理应如此", "无所谓", "勉强接受"],
["理应如此", "无所谓", "勉强接受"],
]
.sum()
.sum(),
"反向属性": distribution.loc[
["理应如此", "无所谓", "勉强接受", "不满意"], "非常满意"
].sum()
+ distribution.loc[
("不满意", ["理应如此", "无所谓", "勉强接受"])
].sum(),
"可疑结果": distribution.loc["非常满意", "非常满意"]
+ distribution.loc["不满意", "不满意"],
}
]
),
],
ignore_index=True,
)
# 计算better-worse分数并贴标签
kano = (
kano.assign(
满意系数=lambda dataframe: (dataframe["魅力属性"] + dataframe["期望属性"])
/ (
dataframe["魅力属性"]
+ dataframe["期望属性"]
+ dataframe["必备属性"]
+ dataframe["无差异属性"]
),
不满意系数=lambda dataframe: (dataframe["必备属性"] + dataframe["期望属性"])
/ (
dataframe["魅力属性"]
+ dataframe["期望属性"]
+ dataframe["必备属性"]
+ dataframe["无差异属性"]
),
)
.pipe(
lambda dataframe: dataframe.assign(
需求标签=numpy.select(
condlist=[
(dataframe["满意系数"] >= dataframe["满意系数"].mean())
& (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()),
(dataframe["满意系数"] >= dataframe["满意系数"].mean())
& (dataframe["不满意系数"] < dataframe["不满意系数"].mean()),
(dataframe["满意系数"] < dataframe["满意系数"].mean())
& (dataframe["不满意系数"] < dataframe["不满意系数"].mean()),
(dataframe["满意系数"] < dataframe["满意系数"].mean())
& (dataframe["不满意系数"] >= dataframe["不满意系数"].mean()),
],
choicelist=[
"期望型需求(P2)",
"魅力型需求(P3)",
"无差异需求(P4)",
"必备型需求(P1)",
],
default="未分类",
),
)
)
.assign(
魅力属性=lambda dataframe: dataframe["魅力属性"].apply(
lambda cell: f"{cell:.2f}"
),
期望属性=lambda dataframe: dataframe["期望属性"].apply(
lambda cell: f"{cell:.2f}"
),
必备属性=lambda dataframe: dataframe["必备属性"].apply(
lambda cell: f"{cell:.2f}"
),
无差异属性=lambda dataframe: dataframe["无差异属性"].apply(
lambda cell: f"{cell:.2f}"
),
反向属性=lambda dataframe: dataframe["反向属性"].apply(
lambda cell: f"{cell:.2f}"
),
可疑结果=lambda dataframe: dataframe["可疑结果"].apply(
lambda cell: f"{cell:.2f}"
),
满意系数=lambda dataframe: dataframe["满意系数"].apply(
lambda cell: f"{cell:.2f}"
),
不满意系数=lambda dataframe: dataframe["不满意系数"].apply(
lambda cell: f"{cell:.2f}"
),
)
)
save_as_workbook(worksheets=[("Sheet1", kano)], workbook_name="result.xlsx")
print("已完成")