197 lines
4.2 KiB
Python
197 lines
4.2 KiB
Python
# -*- coding: utf-8 -*-
|
||
|
||
'''
|
||
|
||
脚本说明:
|
||
|
||
根据行政区划数据就待转化数据集进行转化
|
||
|
||
备注:
|
||
|
||
行政区划数据集来源于 https://lbsyun.baidu.com/faq/api?title=webapi/download
|
||
|
||
'''
|
||
|
||
import re
|
||
|
||
import json
|
||
|
||
import numpy
|
||
|
||
import pandas
|
||
|
||
import time
|
||
|
||
import os
|
||
|
||
import sys
|
||
|
||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
||
|
||
from utils.pandas_extension import SaveAsExcel
|
||
|
||
print('1、读取Excel并创建数据集...', end = '')
|
||
|
||
try:
|
||
|
||
#待转化数据集
|
||
dataset = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet1')
|
||
|
||
#行政区划数据集
|
||
dataset_regions = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet2')
|
||
|
||
except:
|
||
|
||
print('读取Excel或创建数据集发生异常,脚本终止')
|
||
print()
|
||
|
||
exit()
|
||
|
||
print('已完成')
|
||
print()
|
||
|
||
print('2、转化数据')
|
||
print()
|
||
|
||
print('2.1 基于行政区划数据集生成省级、地级和县级行政区字典', end = '')
|
||
|
||
#县级名称和县级编码
|
||
regions = dataset_regions[['省级名称', '省级编码', '地级名称', '地级编码', '县级名称', '县级编码']].drop_duplicates()
|
||
|
||
print('已完成')
|
||
print()
|
||
|
||
print('2.2 遍历并转化数据...', end = '')
|
||
|
||
dataset.replace(to_replace = {numpy.nan: pandas.NA, None: pandas.NA, '': pandas.NA}, inplace = True)
|
||
|
||
for index, row in dataset.iterrows():
|
||
|
||
province_name = row['省名称']
|
||
|
||
if province_name is not pandas.NA:
|
||
|
||
try:
|
||
|
||
#根据省名称匹配省级行政区字典并填充省区划编码
|
||
row['省区划编码'] = str(dataset_regions.loc[dataset_regions['省级名称'] == province_name, '省级编码'].iat[0])
|
||
|
||
except:
|
||
|
||
row['省区划编码'] = '未查询到省区划编码'
|
||
|
||
row['与区划编码对比结果'] = '待确定'
|
||
|
||
#省编码数据类型转为字符
|
||
row['省编码'] = str(int(row['省编码']))
|
||
|
||
else:
|
||
|
||
row['省区划编码'] = '省名称为空'
|
||
|
||
row['省编码'] = '省名称为空'
|
||
|
||
row['省名称'] = '省名称为空'
|
||
|
||
row['与区划编码对比结果'] = '待确定'
|
||
|
||
city_name = row['市名称']
|
||
|
||
if row['区划类型'] != '省':
|
||
|
||
if city_name is not pandas.NA:
|
||
|
||
try:
|
||
|
||
#根据市名称匹配地级行政区字典并填充市区划编码
|
||
row['市区划编码'] = str(dataset_regions.loc[dataset_regions['地级名称'] == city_name, '地级编码'].iat[0])
|
||
|
||
except:
|
||
|
||
row['市区划编码'] = '未查询到市区划编码'
|
||
|
||
row['与区划编码对比结果'] = '待确定'
|
||
|
||
#市编码数据类型转为字符
|
||
row['市编码'] = str(int(row['市编码']))
|
||
|
||
else:
|
||
|
||
row['市区划编码'] = '市名称为空'
|
||
|
||
row['市编码'] = '市名称为空'
|
||
|
||
row['市名称'] = '市名称为空'
|
||
|
||
row['与区划编码对比结果'] = '待确定'
|
||
|
||
else:
|
||
|
||
row['市区划编码'] = ''
|
||
|
||
row['市编码'] = ''
|
||
|
||
row['市名称'] = ''
|
||
|
||
region_name = row['区县名称']
|
||
|
||
if row['区划类型'] == '区县':
|
||
|
||
if region_name is not pandas.NA:
|
||
|
||
try:
|
||
|
||
#根据区县名称匹配县级行政区字典并填充区县区划编码
|
||
row['区县区划编码'] = str(regions.loc[regions['县级名称'] == region_name, '县级编码'].iat[0])
|
||
|
||
if row['省名称'] == '省名称为空' or row['市名称'] == '市名称为空':
|
||
|
||
#若省名称或市名称为空则补充说明
|
||
row['与区划编码对比说明'] = '该区县所属{}/{}'.format(str(regions.loc[regions['县级名称'] == region_name, '省级名称'].iat[0]), str(regions.loc[regions['县级名称'] == region_name, '地级名称'].iat[0]))
|
||
|
||
except:
|
||
|
||
row['区县区划编码'] = '未查询到区县区划编码'
|
||
|
||
row['与区划编码对比结果'] = '待确定'
|
||
|
||
#县编码数据类型转为字符
|
||
row['区县编码'] = str(int(row['区县编码']))
|
||
|
||
else:
|
||
|
||
row['区县区划编码'] = '区县名称为空'
|
||
|
||
row['区县编码'] = '区县名称为空'
|
||
|
||
row['区县名称'] = '区县名称为空'
|
||
|
||
row['与区划编码对比结果'] = '待确定'
|
||
|
||
else:
|
||
|
||
row['区县区划编码'] = ''
|
||
|
||
row['区县编码'] = ''
|
||
|
||
row['区县名称'] = ''
|
||
|
||
dataset.iloc[index] = row
|
||
|
||
dataset.fillna(value = '', inplace = True)
|
||
|
||
print('已完成')
|
||
print()
|
||
|
||
print('正在保存为EXCEL...', end = '')
|
||
|
||
SaveAsExcel(worksheets = [('Sheet1', dataset)], save_path = 'results.xlsx')
|
||
|
||
print('已完成')
|
||
print()
|
||
|
||
'''
|
||
|
||
修改记录
|
||
|
||
''' |