# -*- coding: utf-8 -*- ''' 脚本说明: 根据行政区划数据就待转化数据集进行转化 备注: 行政区划数据集来源于 https://lbsyun.baidu.com/faq/api?title=webapi/download ''' import re import json import numpy import pandas import time import os import sys sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) from utils.pandas_extension import SaveAsExcel print('1、读取Excel并创建数据集...', end = '') try: #待转化数据集 dataset = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet1') #行政区划数据集 dataset_regions = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet2') except: print('读取Excel或创建数据集发生异常,脚本终止') print() exit() print('已完成') print() print('2、转化数据') print() print('2.1 基于行政区划数据集生成省级、地级和县级行政区字典', end = '') #县级名称和县级编码 regions = dataset_regions[['省级名称', '省级编码', '地级名称', '地级编码', '县级名称', '县级编码']].drop_duplicates() print('已完成') print() print('2.2 遍历并转化数据...', end = '') dataset.replace(to_replace = {numpy.nan: pandas.NA, None: pandas.NA, '': pandas.NA}, inplace = True) for index, row in dataset.iterrows(): province_name = row['省名称'] if province_name is not pandas.NA: try: #根据省名称匹配省级行政区字典并填充省区划编码 row['省区划编码'] = str(dataset_regions.loc[dataset_regions['省级名称'] == province_name, '省级编码'].iat[0]) except: row['省区划编码'] = '未查询到省区划编码' row['与区划编码对比结果'] = '待确定' #省编码数据类型转为字符 row['省编码'] = str(int(row['省编码'])) else: row['省区划编码'] = '省名称为空' row['省编码'] = '省名称为空' row['省名称'] = '省名称为空' row['与区划编码对比结果'] = '待确定' city_name = row['市名称'] if row['区划类型'] != '省': if city_name is not pandas.NA: try: #根据市名称匹配地级行政区字典并填充市区划编码 row['市区划编码'] = str(dataset_regions.loc[dataset_regions['地级名称'] == city_name, '地级编码'].iat[0]) except: row['市区划编码'] = '未查询到市区划编码' row['与区划编码对比结果'] = '待确定' #市编码数据类型转为字符 row['市编码'] = str(int(row['市编码'])) else: row['市区划编码'] = '市名称为空' row['市编码'] = '市名称为空' row['市名称'] = '市名称为空' row['与区划编码对比结果'] = '待确定' else: row['市区划编码'] = '' row['市编码'] = '' row['市名称'] = '' region_name = row['区县名称'] if row['区划类型'] == '区县': if region_name is not pandas.NA: try: #根据区县名称匹配县级行政区字典并填充区县区划编码 row['区县区划编码'] = str(regions.loc[regions['县级名称'] == region_name, '县级编码'].iat[0]) if row['省名称'] == '省名称为空' or row['市名称'] == '市名称为空': #若省名称或市名称为空则补充说明 row['与区划编码对比说明'] = '该区县所属{}/{}'.format(str(regions.loc[regions['县级名称'] == region_name, '省级名称'].iat[0]), str(regions.loc[regions['县级名称'] == region_name, '地级名称'].iat[0])) except: row['区县区划编码'] = '未查询到区县区划编码' row['与区划编码对比结果'] = '待确定' #县编码数据类型转为字符 row['区县编码'] = str(int(row['区县编码'])) else: row['区县区划编码'] = '区县名称为空' row['区县编码'] = '区县名称为空' row['区县名称'] = '区县名称为空' row['与区划编码对比结果'] = '待确定' else: row['区县区划编码'] = '' row['区县编码'] = '' row['区县名称'] = '' dataset.iloc[index] = row dataset.fillna(value = '', inplace = True) print('已完成') print() print('正在保存为EXCEL...', end = '') SaveAsExcel(worksheets = [('Sheet1', dataset)], save_path = 'results.xlsx') print('已完成') print() ''' 修改记录 '''