Python/阿里云日志清洗/main.py

77 lines
2.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
"""
清洗阿里云日志25-06-06修订
"""
import csv
import re
from datetime import datetime
import pandas
from utils.pandas_extension import save_as_workbook
try:
print("正在清洗阿里云日志...", end="")
# 清洗后数据
data_processed = []
# 打开阿里云日志(将文件对象作为迭代器)
with open("log.csv", "r", encoding="utf-8") as file:
records = csv.DictReader(file)
for record in records:
# noinspection PyDictCreation
data = {}
# 解析时间戳
data["time"] = datetime.fromtimestamp(int(record["__time__"])).strftime(
"%Y-%m-%d %H:%M:%S"
)
# 解析MESSAGE
message = record.get("message")
if message is None:
continue
for line in message.split("\n"):
# 解析用户标识
if line.startswith("user_id"):
if search := re.search(r"user_id\s*:\s*(?P<user_id>\d+)", line):
data["user_id"] = search.group("user_id")
# 解析登录设备终端和版本号
if line.startswith("request params"):
# 解析staticTerminal
if search := re.search(
r"staticTerminal=(?P<static_terminal>[^&]+)", line
):
data["static_terminal"] = search.group("static_terminal")
# 解析staticVersion
if search := re.search(
r"staticVersion=(?P<static_version>[^&]+)", line
):
data["static_version"] = search.group("static_version")
data_processed.append(data)
dataset = pandas.DataFrame(data=data_processed)
print("已完成")
except FileNotFoundError:
print("未找到阿里云日志文件,脚本终止")
exit()
except Exception as exception:
print(f"发生异常 {exception},脚本终止")
exit()
print("正在保存为工作簿...", end="")
save_as_workbook(worksheets=[("Sheet1", dataset)], workbook_name="data_processed.xlsx")
print("已完成")