Python/regions/main.py

197 lines
4.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
'''
脚本说明:
根据行政区划数据就待转化数据集进行转化
备注:
行政区划数据集来源于 https://lbsyun.baidu.com/faq/api?title=webapi/download
'''
import re
import json
import numpy
import pandas
import time
import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from utils.pandas_extension import SaveAsExcel
print('1、读取Excel并创建数据集...', end = '')
try:
#待转化数据集
dataset = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet1')
#行政区划数据集
dataset_regions = pandas.read_excel(io = 'dataset.xlsx', sheet_name = 'Sheet2')
except:
print('读取Excel或创建数据集发生异常脚本终止')
print()
exit()
print('已完成')
print()
print('2、转化数据')
print()
print('2.1 基于行政区划数据集生成省级、地级和县级行政区字典', end = '')
#县级名称和县级编码
regions = dataset_regions[['省级名称', '省级编码', '地级名称', '地级编码', '县级名称', '县级编码']].drop_duplicates()
print('已完成')
print()
print('2.2 遍历并转化数据...', end = '')
dataset.replace(to_replace = {numpy.nan: pandas.NA, None: pandas.NA, '': pandas.NA}, inplace = True)
for index, row in dataset.iterrows():
province_name = row['省名称']
if province_name is not pandas.NA:
try:
#根据省名称匹配省级行政区字典并填充省区划编码
row['省区划编码'] = str(dataset_regions.loc[dataset_regions['省级名称'] == province_name, '省级编码'].iat[0])
except:
row['省区划编码'] = '未查询到省区划编码'
row['与区划编码对比结果'] = '待确定'
#省编码数据类型转为字符
row['省编码'] = str(int(row['省编码']))
else:
row['省区划编码'] = '省名称为空'
row['省编码'] = '省名称为空'
row['省名称'] = '省名称为空'
row['与区划编码对比结果'] = '待确定'
city_name = row['市名称']
if row['区划类型'] != '':
if city_name is not pandas.NA:
try:
#根据市名称匹配地级行政区字典并填充市区划编码
row['市区划编码'] = str(dataset_regions.loc[dataset_regions['地级名称'] == city_name, '地级编码'].iat[0])
except:
row['市区划编码'] = '未查询到市区划编码'
row['与区划编码对比结果'] = '待确定'
#市编码数据类型转为字符
row['市编码'] = str(int(row['市编码']))
else:
row['市区划编码'] = '市名称为空'
row['市编码'] = '市名称为空'
row['市名称'] = '市名称为空'
row['与区划编码对比结果'] = '待确定'
else:
row['市区划编码'] = ''
row['市编码'] = ''
row['市名称'] = ''
region_name = row['区县名称']
if row['区划类型'] == '区县':
if region_name is not pandas.NA:
try:
#根据区县名称匹配县级行政区字典并填充区县区划编码
row['区县区划编码'] = str(regions.loc[regions['县级名称'] == region_name, '县级编码'].iat[0])
if row['省名称'] == '省名称为空' or row['市名称'] == '市名称为空':
#若省名称或市名称为空则补充说明
row['与区划编码对比说明'] = '该区县所属{}/{}'.format(str(regions.loc[regions['县级名称'] == region_name, '省级名称'].iat[0]), str(regions.loc[regions['县级名称'] == region_name, '地级名称'].iat[0]))
except:
row['区县区划编码'] = '未查询到区县区划编码'
row['与区划编码对比结果'] = '待确定'
#县编码数据类型转为字符
row['区县编码'] = str(int(row['区县编码']))
else:
row['区县区划编码'] = '区县名称为空'
row['区县编码'] = '区县名称为空'
row['区县名称'] = '区县名称为空'
row['与区划编码对比结果'] = '待确定'
else:
row['区县区划编码'] = ''
row['区县编码'] = ''
row['区县名称'] = ''
dataset.iloc[index] = row
dataset.fillna(value = '', inplace = True)
print('已完成')
print()
print('正在保存为EXCEL...', end = '')
SaveAsExcel(worksheets = [('Sheet1', dataset)], save_path = 'results.xlsx')
print('已完成')
print()
'''
修改记录
'''