# -*- coding: utf-8 -*- """ 脚本说明: 根据行政区划数据就待转化数据集进行转化 备注: 行政区划数据集来源于 https://lbsyun.baidu.com/faq/api?title=webapi/download """ import os import sys import numpy import pandas sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) from utils.pandas_extension import save_as_workbook print("1、读取Excel并创建数据集...", end="") try: # 待转化数据集 dataset = pandas.read_excel(io="dataset.xlsx", sheet_name="Sheet1") # 行政区划数据集 dataset_regions = pandas.read_excel(io="dataset.xlsx", sheet_name="Sheet2") except: print("读取Excel或创建数据集发生异常,脚本终止") print() exit() print("已完成") print() print("2、转化数据") print() print("2.1 基于行政区划数据集生成省级、地级和县级行政区字典", end="") # 县级名称和县级编码 regions = dataset_regions[ ["省级名称", "省级编码", "地级名称", "地级编码", "县级名称", "县级编码"] ].drop_duplicates() print("已完成") print() print("2.2 遍历并转化数据...", end="") dataset.replace( to_replace={numpy.nan: pandas.NA, None: pandas.NA, "": pandas.NA}, inplace=True ) for index, row in dataset.iterrows(): province_name = row["省名称"] if province_name is not pandas.NA: try: # 根据省名称匹配省级行政区字典并填充省区划编码 row["省区划编码"] = str( dataset_regions.loc[ dataset_regions["省级名称"] == province_name, "省级编码" ].iat[0] ) except: row["省区划编码"] = "未查询到省区划编码" row["与区划编码对比结果"] = "待确定" # 省编码数据类型转为字符 row["省编码"] = str(int(row["省编码"])) else: row["省区划编码"] = "省名称为空" row["省编码"] = "省名称为空" row["省名称"] = "省名称为空" row["与区划编码对比结果"] = "待确定" city_name = row["市名称"] if row["区划类型"] != "省": if city_name is not pandas.NA: try: # 根据市名称匹配地级行政区字典并填充市区划编码 row["市区划编码"] = str( dataset_regions.loc[ dataset_regions["地级名称"] == city_name, "地级编码" ].iat[0] ) except: row["市区划编码"] = "未查询到市区划编码" row["与区划编码对比结果"] = "待确定" # 市编码数据类型转为字符 row["市编码"] = str(int(row["市编码"])) else: row["市区划编码"] = "市名称为空" row["市编码"] = "市名称为空" row["市名称"] = "市名称为空" row["与区划编码对比结果"] = "待确定" else: row["市区划编码"] = "" row["市编码"] = "" row["市名称"] = "" region_name = row["区县名称"] if row["区划类型"] == "区县": if region_name is not pandas.NA: try: # 根据区县名称匹配县级行政区字典并填充区县区划编码 row["区县区划编码"] = str( regions.loc[regions["县级名称"] == region_name, "县级编码"].iat[0] ) if row["省名称"] == "省名称为空" or row["市名称"] == "市名称为空": # 若省名称或市名称为空则补充说明 row["与区划编码对比说明"] = "该区县所属{}/{}".format( str( regions.loc[ regions["县级名称"] == region_name, "省级名称" ].iat[0] ), str( regions.loc[ regions["县级名称"] == region_name, "地级名称" ].iat[0] ), ) except: row["区县区划编码"] = "未查询到区县区划编码" row["与区划编码对比结果"] = "待确定" # 县编码数据类型转为字符 row["区县编码"] = str(int(row["区县编码"])) else: row["区县区划编码"] = "区县名称为空" row["区县编码"] = "区县名称为空" row["区县名称"] = "区县名称为空" row["与区划编码对比结果"] = "待确定" else: row["区县区划编码"] = "" row["区县编码"] = "" row["区县名称"] = "" dataset.iloc[index] = row dataset.fillna(value="", inplace=True) print("已完成") print() print("正在保存为EXCEL...", end="") save_as_workbook(workbook_name="results.xlsx", worksheets=[("Sheet1", dataset)]) print("已完成") print()