# -*- coding:utf-8 -*-
import openpyxl
import xlrd
import csv
import os
import time
import sys
import datetime
def show_message(message=''):
print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), str(message))
def explain_excel(path, index=0):
file_data = []
if path[-3:] == 'xls':
workbook = xlrd.open_workbook(path)
workbook_sheet = workbook.sheet_by_index(index)
workbook_sheet_rows = workbook_sheet.nrows
if workbook_sheet_rows > 0:
for row_i in range(0, workbook_sheet_rows):
if len(workbook_sheet.row_values(row_i)) > 0:
file_data.append(workbook_sheet.row_values(row_i))
if path[-4:] == 'xlsx':
workbook = openpyxl.load_workbook(path)
for row_data in workbook.worksheets[index].values:
if len(row_data) > 0:
file_data.append(row_data)
return file_data
def explain_csv(path):
file_data = []
if path[-3:] == 'csv':
csv_reader = csv.reader(open(path))
for text in csv_reader:
if len(text) > 0:
file_data.append(text)
return file_data
# 解析并导出文件内容
def explain_file(path):
explain_filename = path.replace('\\', '/').split('/')
explain_filename = explain_filename[-1].split('.')
explain_filename = explain_filename[0]
file_data = []
path_data = path.split('.')
if path_data[-1] == 'xls' or path_data[-1] == 'xlsx':
file_data = explain_excel(path, int(sys.argv[4]))
if path_data[-1] == 'csv':
file_data = explain_csv(path)
return file_data
def split_mobile(mobile):
province = ['上海市', '云南省', '内蒙古自治区', '北京市', '吉林省', '四川省', '天津市', '宁夏回族自治区', '安徽省', '山东省', '山西省', '广东省','广西壮族自治区', '广西省', '新疆维吾尔自治区', '江苏省',
'江西省', '河北省', '河南省', '浙江省', '海南省', '湖北省', '湖南省', '甘肃省', '福建省', '西藏自治区', '贵州省', '辽宁省', '重庆市', '陕西省', '青海省', '黑龙江省',
'上海', '云南', '内蒙古', '北京', '吉林', '四川', '天津', '宁夏', '安徽', '山东', '山西', '广东', '广西', '新疆', '江苏',
'江西', '河北', '河南', '浙江', '海南', '湖北', '湖南', '甘肃', '福建', '西藏', '贵州', '辽宁', '重庆', '陕西', '青海', '黑龙江']
for p in province:
mobile = mobile.replace(p," ")
addressData = mobile.split(" ")
if len(addressData)>1:
mobile = addressData[0]
return mobile
if __name__ == '__main__':
os.environ['REQUESTS_CA_BUNDLE'] = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'cacert.pem')
# 记录 excel 内容
excel_rows = []
tmp_file_data = []
# excel 行数记数
excel_i = 0
# 当前路径
current_dir = os.path.abspath('.')
current_path = current_dir + "\\" + sys.argv[1]
excel_data = explain_file(current_path)
if len(excel_data) < 2:
print('文件: ' + sys.argv[1] + ' 没有内容')
else:
wb = openpyxl.Workbook()
ws = wb.create_sheet('Sheet1', 0)
ws.append(['快递单号', '姓名', '电话', '地址'])
ed_i = 0
for ed in excel_data:
has_ed = False
if ed_i >= 0:
tracking_i = int(sys.argv[2])
address_i = int(sys.argv[3])
if ed[tracking_i] != None and ed[address_i] != None:
tmp_address = str(ed[address_i])
addressData = tmp_address.split(' ')
if len(addressData) == 3:
name = addressData[0]
mobile = addressData[1]
address = addressData[2]
ws.append([ed[tracking_i], name, mobile, address, ed_i])
excel_i = excel_i + 1
continue
tmp_address = tmp_address.replace(' ', '')
tmp_address = tmp_address.lstrip()
tmp_address = tmp_address.replace(',', '')
addressData = tmp_address.split(' ')
if len(addressData) > 3:
for a in addressData:
try:
if a == str(int(a)) and len(a) < 11:
tmp_address = tmp_address.replace(' ', '')
addressData = tmp_address.split(' ')
break
except:
pass
if len(addressData) == 2:
tmp_address = str(ed[address_i]).replace(' ', '')
addressData = tmp_address.split(' ')
if len(addressData) > 3:
name = addressData[0]
mobile = addressData[1]
if mobile[0:1] == '\n':
print('第 ' + str(ed_i + 1) + ' 行报错', addressData)
os._exit(0)
if '\n' in mobile:
mobileData = mobile.split('\n')
try:
mobile = mobileData[0]
mobile_find = mobile.find(':')
if mobile_find > -1:
#mobileData[0] = mobileData[0][0:mobile_find]
name = mobile.split(':')[1]
# tmp_mobile = int(mobileData[0])
except:
print('第 ' + str(ed_i + 1) + ' 行手机号错误', mobileData[0])
mobile = '\t' + mobileData[0]
address = ''.join(mobileData[1:] + addressData[2:]).replace(' ', '')
else:
mobile = str("\t" + addressData[1])
address = ''.join(addressData[2:]).replace(' ', '')
has_ed = True
elif len(addressData) == 1:
province = ['上海市', '云南省', '内蒙古自治区', '北京市', '吉林省', '四川省', '天津市', '宁夏回族自治区', '安徽省', '山东省', '山西省', '广东省','广西壮族自治区', '广西省', '新疆维吾尔自治区', '江苏省',
'江西省', '河北省', '河南省', '浙江省', '海南省', '湖北省', '湖南省', '甘肃省', '福建省', '西藏自治区', '贵州省', '辽宁省', '重庆市', '陕西省', '青海省', '黑龙江省',
'上海', '云南', '内蒙古', '北京', '吉林', '四川', '天津', '宁夏', '安徽', '山东', '山西', '广东', '广西', '新疆', '江苏',
'江西', '河北', '河南', '浙江', '海南', '湖北', '湖南', '甘肃', '福建', '西藏', '贵州', '辽宁', '重庆', '陕西', '青海', '黑龙江']
address = tmp_address
for p in province:
if p in address:
addressData = address.split(p)
if len(addressData) > 1:
address = p + p.join(addressData[1:])
name = tmp_address.replace(address, '')
nameData = name.split('1')
if len(nameData) > 1:
name = nameData[0]
mobile = '\t1' + '1'.join(nameData[1:])
mobile = split_mobile(mobile)
has_ed = True
namefind = mobile.find(':')
if namefind > -1:
namefind = mobile.split(":")
name = namefind[1]
mobile = namefind[0]
break
else:
print('没有找到名字'+str(nameData))
else:
print('没有找到地址'+str(addressData))
if has_ed == True:
ws.append([ed[tracking_i], name, mobile, address, ed_i])
excel_i = excel_i + 1
ed_i = ed_i + 1
if excel_i > 0:
current_path = current_dir + '\\' + datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + '.xlsx'
wb.save(current_path)
wb.close()
print('file export finished: ' + current_path)
else:
wb.close()
print('no file to export')
# print(os.path.abspath(os.path.dirname(__file__)))
# print(os.getcwd())