# -*- coding: utf-8 -*- """ 清洗阿里云日志,25-06-06修订 """ import csv import re from datetime import datetime import pandas from utils.pandas_extension import save_as_workbook print("正在清洗阿里云日志...", end="") # 清洗后数据 data_processed = [] # 打开阿里云日志(将文件对象作为迭代器) with open("log.csv", "r", encoding="utf-8") as file: records = csv.DictReader(file) for record in records: # noinspection PyDictCreation data = {} # 解析时间戳 data["time"] = datetime.fromtimestamp(int(record["__time__"])).strftime( "%Y-%m-%d %H:%M:%S" ) # 解析MESSAGE message = record.get("message") if message is None: continue for line in message.split("\n"): # 解析用户标识 if line.startswith("user_id"): if search := re.search(r"user_id\s*:\s*(?P\d+)", line): data["user_id"] = search.group("user_id") # 解析登录设备终端和版本号 if line.startswith("request params"): # 解析staticTerminal if search := re.search( r"staticTerminal=(?P[^&]+)", line ): data["static_terminal"] = search.group("static_terminal") # 解析staticVersion if search := re.search( r"staticVersion=(?P[^&]+)", line ): data["static_version"] = search.group("static_version") data_processed.append(data) dataset = pandas.DataFrame(data=data_processed) print("已完成") print("正在保存为工作簿...", end="") save_as_workbook(worksheets=[("Sheet1", dataset)], workbook_name="data_processed.xlsx") print("已完成")