197 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			197 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
| # -*- coding: utf-8 -*-
 | ||
| 
 | ||
| '''
 | ||
| 
 | ||
| 脚本说明:
 | ||
| 
 | ||
| 根据行政区划数据就待转化数据集进行转化
 | ||
| 
 | ||
| 备注:
 | ||
| 
 | ||
| 行政区划数据集来源于 https://lbsyun.baidu.com/faq/api?title=webapi/download
 | ||
| 
 | ||
| '''
 | ||
| 
 | ||
| import re
 | ||
| 
 | ||
| import json
 | ||
| 
 | ||
| import numpy
 | ||
| 
 | ||
| import pandas
 | ||
| 
 | ||
| import time
 | ||
| 
 | ||
| import os
 | ||
| 
 | ||
| import sys
 | ||
| 
 | ||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 | ||
| 
 | ||
| from utils.pandas_extension import SaveAsExcel
 | ||
| 
 | ||
| print('1、读取Excel并创建数据集...', end = '')
 | ||
| 
 | ||
| try:
 | ||
| 
 | ||
| 	#待转化数据集
 | ||
| 	dataset = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet1')
 | ||
| 
 | ||
| 	#行政区划数据集
 | ||
| 	dataset_regions = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet2')
 | ||
| 
 | ||
| except:
 | ||
| 
 | ||
| 	print('读取Excel或创建数据集发生异常,脚本终止')
 | ||
| 	print()
 | ||
| 
 | ||
| 	exit()
 | ||
| 
 | ||
| print('已完成')
 | ||
| print()
 | ||
| 
 | ||
| print('2、转化数据')
 | ||
| print()
 | ||
| 
 | ||
| print('2.1 基于行政区划数据集生成省级、地级和县级行政区字典', end = '')
 | ||
| 
 | ||
| #县级名称和县级编码
 | ||
| regions = dataset_regions[['省级名称', '省级编码', '地级名称', '地级编码', '县级名称', '县级编码']].drop_duplicates()
 | ||
| 
 | ||
| print('已完成')
 | ||
| print()
 | ||
| 
 | ||
| print('2.2 遍历并转化数据...', end = '')
 | ||
| 
 | ||
| dataset.replace(to_replace = {numpy.nan: pandas.NA, None: pandas.NA, '': pandas.NA}, inplace = True)
 | ||
| 
 | ||
| for index, row in dataset.iterrows():
 | ||
| 
 | ||
| 	province_name = row['省名称']
 | ||
| 
 | ||
| 	if province_name is not pandas.NA:
 | ||
| 
 | ||
| 		try:
 | ||
| 
 | ||
| 			#根据省名称匹配省级行政区字典并填充省区划编码
 | ||
| 			row['省区划编码'] = str(dataset_regions.loc[dataset_regions['省级名称'] == province_name, '省级编码'].iat[0])
 | ||
| 
 | ||
| 		except:
 | ||
| 
 | ||
| 			row['省区划编码'] = '未查询到省区划编码'
 | ||
| 
 | ||
| 			row['与区划编码对比结果'] = '待确定'
 | ||
| 
 | ||
| 		#省编码数据类型转为字符
 | ||
| 		row['省编码'] = str(int(row['省编码']))
 | ||
| 
 | ||
| 	else:
 | ||
| 
 | ||
| 		row['省区划编码'] = '省名称为空'
 | ||
| 
 | ||
| 		row['省编码'] = '省名称为空'
 | ||
| 
 | ||
| 		row['省名称'] = '省名称为空'
 | ||
| 
 | ||
| 		row['与区划编码对比结果'] = '待确定'
 | ||
| 
 | ||
| 	city_name = row['市名称']
 | ||
| 
 | ||
| 	if row['区划类型'] != '省':
 | ||
| 
 | ||
| 		if city_name is not pandas.NA:
 | ||
| 
 | ||
| 			try:
 | ||
| 
 | ||
| 				#根据市名称匹配地级行政区字典并填充市区划编码
 | ||
| 				row['市区划编码'] = str(dataset_regions.loc[dataset_regions['地级名称'] == city_name, '地级编码'].iat[0])
 | ||
| 
 | ||
| 			except:
 | ||
| 
 | ||
| 				row['市区划编码'] = '未查询到市区划编码'
 | ||
| 
 | ||
| 				row['与区划编码对比结果'] = '待确定'
 | ||
| 
 | ||
| 			#市编码数据类型转为字符
 | ||
| 			row['市编码'] = str(int(row['市编码']))
 | ||
| 
 | ||
| 		else:
 | ||
| 
 | ||
| 			row['市区划编码'] = '市名称为空'
 | ||
| 
 | ||
| 			row['市编码'] = '市名称为空'
 | ||
| 
 | ||
| 			row['市名称'] = '市名称为空'
 | ||
| 
 | ||
| 			row['与区划编码对比结果'] = '待确定'
 | ||
| 
 | ||
| 	else:
 | ||
| 
 | ||
| 		row['市区划编码'] = ''
 | ||
| 
 | ||
| 		row['市编码'] = ''
 | ||
| 
 | ||
| 		row['市名称'] = ''		
 | ||
| 
 | ||
| 	region_name = row['区县名称']
 | ||
| 
 | ||
| 	if row['区划类型'] == '区县':
 | ||
| 
 | ||
| 		if region_name is not pandas.NA:
 | ||
| 
 | ||
| 			try:
 | ||
| 
 | ||
| 				#根据区县名称匹配县级行政区字典并填充区县区划编码
 | ||
| 				row['区县区划编码'] = str(regions.loc[regions['县级名称'] == region_name, '县级编码'].iat[0])
 | ||
| 
 | ||
| 				if row['省名称'] == '省名称为空' or row['市名称'] == '市名称为空':
 | ||
| 
 | ||
| 					#若省名称或市名称为空则补充说明
 | ||
| 					row['与区划编码对比说明'] = '该区县所属{}/{}'.format(str(regions.loc[regions['县级名称'] == region_name, '省级名称'].iat[0]), str(regions.loc[regions['县级名称'] == region_name, '地级名称'].iat[0]))
 | ||
| 
 | ||
| 			except:
 | ||
| 
 | ||
| 				row['区县区划编码'] = '未查询到区县区划编码'
 | ||
| 
 | ||
| 				row['与区划编码对比结果'] = '待确定'
 | ||
| 
 | ||
| 			#县编码数据类型转为字符
 | ||
| 			row['区县编码'] = str(int(row['区县编码']))
 | ||
| 
 | ||
| 		else:
 | ||
| 
 | ||
| 			row['区县区划编码'] = '区县名称为空'
 | ||
| 
 | ||
| 			row['区县编码'] = '区县名称为空'
 | ||
| 
 | ||
| 			row['区县名称'] = '区县名称为空'
 | ||
| 
 | ||
| 			row['与区划编码对比结果'] = '待确定'
 | ||
| 
 | ||
| 	else:
 | ||
| 
 | ||
| 		row['区县区划编码'] = ''
 | ||
| 
 | ||
| 		row['区县编码'] = ''
 | ||
| 
 | ||
| 		row['区县名称'] = ''
 | ||
| 
 | ||
| 	dataset.iloc[index] = row
 | ||
| 
 | ||
| dataset.fillna(value = '', inplace = True)
 | ||
| 
 | ||
| print('已完成')
 | ||
| print()
 | ||
| 
 | ||
| print('正在保存为EXCEL...', end = '')
 | ||
| 
 | ||
| SaveAsExcel(worksheets = [('Sheet1', dataset)], save_path = 'results.xlsx')
 | ||
| 
 | ||
| print('已完成')
 | ||
| print()
 | ||
| 
 | ||
| '''
 | ||
| 
 | ||
| 修改记录
 | ||
| 
 | ||
| ''' |