251029更新

This commit is contained in:
marslbr 2025-10-29 18:02:40 +08:00
parent af5b539529
commit 376d8448a7
1 changed files with 47 additions and 37 deletions

View File

@ -12,52 +12,62 @@ import pandas
from utils.pandas_extension import save_as_workbook
print("正在清洗阿里云日志...", end="")
# 清洗后数据
data_processed = []
try:
print("正在清洗阿里云日志...", end="")
# 打开阿里云日志(将文件对象作为迭代器)
with open("log.csv", "r", encoding="utf-8") as file:
records = csv.DictReader(file)
for record in records:
# noinspection PyDictCreation
data = {}
# 清洗后数据
data_processed = []
# 解析时间戳
data["time"] = datetime.fromtimestamp(int(record["__time__"])).strftime(
"%Y-%m-%d %H:%M:%S"
)
# 打开阿里云日志(将文件对象作为迭代器)
with open("log.csv", "r", encoding="utf-8") as file:
records = csv.DictReader(file)
for record in records:
# noinspection PyDictCreation
data = {}
# 解析MESSAGE
message = record.get("message")
if message is None:
continue
# 解析时间戳
data["time"] = datetime.fromtimestamp(int(record["__time__"])).strftime(
"%Y-%m-%d %H:%M:%S"
)
for line in message.split("\n"):
# 解析用户标识
if line.startswith("user_id"):
if search := re.search(r"user_id\s*:\s*(?P<user_id>\d+)", line):
data["user_id"] = search.group("user_id")
# 解析MESSAGE
message = record.get("message")
if message is None:
continue
# 解析登录设备终端和版本号
if line.startswith("request params"):
# 解析staticTerminal
if search := re.search(
r"staticTerminal=(?P<static_terminal>[^&]+)", line
):
data["static_terminal"] = search.group("static_terminal")
# 解析staticVersion
if search := re.search(
r"staticVersion=(?P<static_version>[^&]+)", line
):
data["static_version"] = search.group("static_version")
for line in message.split("\n"):
# 解析用户标识
if line.startswith("user_id"):
if search := re.search(r"user_id\s*:\s*(?P<user_id>\d+)", line):
data["user_id"] = search.group("user_id")
data_processed.append(data)
# 解析登录设备终端和版本号
if line.startswith("request params"):
# 解析staticTerminal
if search := re.search(
r"staticTerminal=(?P<static_terminal>[^&]+)", line
):
data["static_terminal"] = search.group("static_terminal")
# 解析staticVersion
if search := re.search(
r"staticVersion=(?P<static_version>[^&]+)", line
):
data["static_version"] = search.group("static_version")
dataset = pandas.DataFrame(data=data_processed)
data_processed.append(data)
print("已完成")
dataset = pandas.DataFrame(data=data_processed)
print("已完成")
except FileNotFoundError:
print("未找到阿里云日志文件,脚本终止")
exit()
except Exception as exception:
print(f"发生异常 {exception},脚本终止")
exit()
print("正在保存为工作簿...", end="")