70 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			70 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
| # -*- coding: utf-8 -*-
 | ||
| 
 | ||
| """
 | ||
| 清洗阿里云日志,25-06-06修订
 | ||
| """
 | ||
| import csv
 | ||
| import os
 | ||
| import re
 | ||
| import sys
 | ||
| from datetime import datetime
 | ||
| 
 | ||
| import pandas
 | ||
| 
 | ||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | ||
| 
 | ||
| from utils.pandas_extension import save_as_workbook
 | ||
| 
 | ||
| print("正在清洗阿里云日志...", end="")
 | ||
| 
 | ||
| # 清洗后数据
 | ||
| data_processed = []
 | ||
| 
 | ||
| # 打开阿里云日志(将文件对象作为迭代器)
 | ||
| with open("log.csv", "r", encoding="utf-8") as file:
 | ||
|     records = csv.DictReader(file)
 | ||
|     for record in records:
 | ||
|         # noinspection PyDictCreation
 | ||
|         data = {}
 | ||
| 
 | ||
|         # 解析时间戳
 | ||
|         data["time"] = datetime.fromtimestamp(int(record["__time__"])).strftime(
 | ||
|             "%Y-%m-%d %H:%M:%S"
 | ||
|         )
 | ||
| 
 | ||
|         # 解析MESSAGE
 | ||
|         message = record.get("message")
 | ||
|         if message is None:
 | ||
|             continue
 | ||
| 
 | ||
|         for line in message.split("\n"):
 | ||
|             # 解析用户标识
 | ||
|             if line.startswith("user_id"):
 | ||
|                 if search := re.search(r"user_id\s*:\s*(?P<user_id>\d+)", line):
 | ||
|                     data["user_id"] = search.group("user_id")
 | ||
| 
 | ||
|             # 解析登录设备终端和版本号
 | ||
|             if line.startswith("request params"):
 | ||
|                 # 解析staticTerminal
 | ||
|                 if search := re.search(
 | ||
|                     r"staticTerminal=(?P<static_terminal>[^&]+)", line
 | ||
|                 ):
 | ||
|                     data["static_terminal"] = search.group("static_terminal")
 | ||
|                 # 解析staticVersion
 | ||
|                 if search := re.search(
 | ||
|                     r"staticVersion=(?P<static_version>[^&]+)", line
 | ||
|                 ):
 | ||
|                     data["static_version"] = search.group("static_version")
 | ||
| 
 | ||
|         data_processed.append(data)
 | ||
| 
 | ||
| dataset = pandas.DataFrame(data=data_processed)
 | ||
| 
 | ||
| print("已完成")
 | ||
| 
 | ||
| print("正在保存为工作簿...", end="")
 | ||
| 
 | ||
| save_as_workbook(worksheets=[("Sheet1", dataset)], workbook_name="data_processed.xlsx")
 | ||
| 
 | ||
| print("已完成")
 |