137 lines
5.0 KiB
Python
137 lines
5.0 KiB
Python
# -*- coding: utf-8 -*-
|
||
"""
|
||
脚本说明:
|
||
根据行政区划数据就待转化数据集进行转化
|
||
备注:
|
||
行政区划数据集来源于 https://lbsyun.baidu.com/faq/api?title=webapi/download
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
|
||
import numpy
|
||
import pandas
|
||
|
||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||
from utils.pandas_extension import save_as_workbook
|
||
|
||
print("1、读取Excel并创建数据集...", end="")
|
||
try:
|
||
# 待转化数据集
|
||
dataset = pandas.read_excel(io="dataset.xlsx", sheet_name="Sheet1")
|
||
# 行政区划数据集
|
||
dataset_regions = pandas.read_excel(io="dataset.xlsx", sheet_name="Sheet2")
|
||
except:
|
||
print("读取Excel或创建数据集发生异常,脚本终止")
|
||
print()
|
||
exit()
|
||
print("已完成")
|
||
print()
|
||
|
||
print("2、转化数据")
|
||
print()
|
||
print("2.1 基于行政区划数据集生成省级、地级和县级行政区字典", end="")
|
||
# 县级名称和县级编码
|
||
regions = dataset_regions[
|
||
["省级名称", "省级编码", "地级名称", "地级编码", "县级名称", "县级编码"]
|
||
].drop_duplicates()
|
||
print("已完成")
|
||
print()
|
||
|
||
print("2.2 遍历并转化数据...", end="")
|
||
dataset.replace(
|
||
to_replace={numpy.nan: pandas.NA, None: pandas.NA, "": pandas.NA}, inplace=True
|
||
)
|
||
for index, row in dataset.iterrows():
|
||
province_name = row["省名称"]
|
||
if province_name is not pandas.NA:
|
||
try:
|
||
# 根据省名称匹配省级行政区字典并填充省区划编码
|
||
row["省区划编码"] = str(
|
||
dataset_regions.loc[
|
||
dataset_regions["省级名称"] == province_name, "省级编码"
|
||
].iat[0]
|
||
)
|
||
except:
|
||
row["省区划编码"] = "未查询到省区划编码"
|
||
row["与区划编码对比结果"] = "待确定"
|
||
# 省编码数据类型转为字符
|
||
row["省编码"] = str(int(row["省编码"]))
|
||
else:
|
||
row["省区划编码"] = "省名称为空"
|
||
row["省编码"] = "省名称为空"
|
||
row["省名称"] = "省名称为空"
|
||
row["与区划编码对比结果"] = "待确定"
|
||
city_name = row["市名称"]
|
||
if row["区划类型"] != "省":
|
||
if city_name is not pandas.NA:
|
||
try:
|
||
# 根据市名称匹配地级行政区字典并填充市区划编码
|
||
row["市区划编码"] = str(
|
||
dataset_regions.loc[
|
||
dataset_regions["地级名称"] == city_name, "地级编码"
|
||
].iat[0]
|
||
)
|
||
except:
|
||
row["市区划编码"] = "未查询到市区划编码"
|
||
row["与区划编码对比结果"] = "待确定"
|
||
# 市编码数据类型转为字符
|
||
row["市编码"] = str(int(row["市编码"]))
|
||
else:
|
||
row["市区划编码"] = "市名称为空"
|
||
row["市编码"] = "市名称为空"
|
||
row["市名称"] = "市名称为空"
|
||
row["与区划编码对比结果"] = "待确定"
|
||
else:
|
||
row["市区划编码"] = ""
|
||
row["市编码"] = ""
|
||
row["市名称"] = ""
|
||
region_name = row["区县名称"]
|
||
if row["区划类型"] == "区县":
|
||
if region_name is not pandas.NA:
|
||
try:
|
||
# 根据区县名称匹配县级行政区字典并填充区县区划编码
|
||
row["区县区划编码"] = str(
|
||
regions.loc[regions["县级名称"] == region_name, "县级编码"].iat[0]
|
||
)
|
||
if row["省名称"] == "省名称为空" or row["市名称"] == "市名称为空":
|
||
# 若省名称或市名称为空则补充说明
|
||
row["与区划编码对比说明"] = "该区县所属{}/{}".format(
|
||
str(
|
||
regions.loc[
|
||
regions["县级名称"] == region_name, "省级名称"
|
||
].iat[0]
|
||
),
|
||
str(
|
||
regions.loc[
|
||
regions["县级名称"] == region_name, "地级名称"
|
||
].iat[0]
|
||
),
|
||
)
|
||
except:
|
||
row["区县区划编码"] = "未查询到区县区划编码"
|
||
row["与区划编码对比结果"] = "待确定"
|
||
# 县编码数据类型转为字符
|
||
row["区县编码"] = str(int(row["区县编码"]))
|
||
else:
|
||
row["区县区划编码"] = "区县名称为空"
|
||
row["区县编码"] = "区县名称为空"
|
||
row["区县名称"] = "区县名称为空"
|
||
row["与区划编码对比结果"] = "待确定"
|
||
else:
|
||
row["区县区划编码"] = ""
|
||
row["区县编码"] = ""
|
||
row["区县名称"] = ""
|
||
dataset.iloc[index] = row
|
||
dataset.fillna(value="", inplace=True)
|
||
|
||
print("已完成")
|
||
print()
|
||
|
||
print("正在保存为EXCEL...", end="")
|
||
|
||
save_as_workbook(workbook_name="results.xlsx", worksheets=[("Sheet1", dataset)])
|
||
|
||
print("已完成")
|
||
print()
|