diff --git a/阿里云日志清洗/main.py b/阿里云日志清洗/main.py new file mode 100644 index 0000000..4448c66 --- /dev/null +++ b/阿里云日志清洗/main.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +""" +清洗阿里云日志,25-06-06修订 +""" +import csv +import os +import re +import sys +from datetime import datetime + +import pandas + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from utils.pandas_extension import save_as_workbook + +print("正在清洗阿里云日志...", end="") + +# 清洗后数据 +data_processed = [] + +# 打开阿里云日志(将文件对象作为迭代器) +with open("log.csv", "r", encoding="utf-8") as file: + records = csv.DictReader(file) + for record in records: + # noinspection PyDictCreation + data = {} + + # 解析时间戳 + data["time"] = datetime.fromtimestamp(int(record["__time__"])).strftime( + "%Y-%m-%d %H:%M:%S" + ) + + # 解析MESSAGE + message = record.get("message") + if message is None: + continue + + for line in message.split("\n"): + # 解析用户标识 + if line.startswith("user_id"): + if search := re.search(r"user_id\s*:\s*(?P\d+)", line): + data["user_id"] = search.group("user_id") + + # 解析登录设备终端和版本号 + if line.startswith("request params"): + # 解析staticTerminal + if search := re.search( + r"staticTerminal=(?P[^&]+)", line + ): + data["static_terminal"] = search.group("static_terminal") + # 解析staticVersion + if search := re.search( + r"staticVersion=(?P[^&]+)", line + ): + data["static_version"] = search.group("static_version") + + data_processed.append(data) + +dataset = pandas.DataFrame(data=data_processed) + +print("已完成") + +print("正在保存为工作簿...", end="") + +save_as_workbook(worksheets=[("Sheet1", dataset)], workbook_name="data_processed.xlsx") + +print("已完成")