# -*- coding:utf-8 -*- import openpyxl import xlrd import csv import os import time import sys import datetime def show_message(message=''): print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), str(message)) def explain_excel(path, index=0): file_data = [] if path[-3:] == 'xls': workbook = xlrd.open_workbook(path) workbook_sheet = workbook.sheet_by_index(index) workbook_sheet_rows = workbook_sheet.nrows if workbook_sheet_rows > 0: for row_i in range(0, workbook_sheet_rows): if len(workbook_sheet.row_values(row_i)) > 0: file_data.append(workbook_sheet.row_values(row_i)) if path[-4:] == 'xlsx': workbook = openpyxl.load_workbook(path) for row_data in workbook.worksheets[index].values: if len(row_data) > 0: file_data.append(row_data) return file_data def explain_csv(path): file_data = [] if path[-3:] == 'csv': csv_reader = csv.reader(open(path)) for text in csv_reader: if len(text) > 0: file_data.append(text) return file_data # 解析并导出文件内容 def explain_file(path): explain_filename = path.replace('\\', '/').split('/') explain_filename = explain_filename[-1].split('.') explain_filename = explain_filename[0] file_data = [] path_data = path.split('.') if path_data[-1] == 'xls' or path_data[-1] == 'xlsx': file_data = explain_excel(path, int(sys.argv[4])) if path_data[-1] == 'csv': file_data = explain_csv(path) return file_data def split_mobile(mobile): province = ['上海市', '云南省', '内蒙古自治区', '北京市', '吉林省', '四川省', '天津市', '宁夏回族自治区', '安徽省', '山东省', '山西省', '广东省','广西壮族自治区', '广西省', '新疆维吾尔自治区', '江苏省', '江西省', '河北省', '河南省', '浙江省', '海南省', '湖北省', '湖南省', '甘肃省', '福建省', '西藏自治区', '贵州省', '辽宁省', '重庆市', '陕西省', '青海省', '黑龙江省', '上海', '云南', '内蒙古', '北京', '吉林', '四川', '天津', '宁夏', '安徽', '山东', '山西', '广东', '广西', '新疆', '江苏', '江西', '河北', '河南', '浙江', '海南', '湖北', '湖南', '甘肃', '福建', '西藏', '贵州', '辽宁', '重庆', '陕西', '青海', '黑龙江'] for p in province: mobile = mobile.replace(p," ") addressData = mobile.split(" ") if len(addressData)>1: mobile = addressData[0] return mobile if __name__ == '__main__': os.environ['REQUESTS_CA_BUNDLE'] = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'cacert.pem') # 记录 excel 内容 excel_rows = [] tmp_file_data = [] # excel 行数记数 excel_i = 0 # 当前路径 current_dir = os.path.abspath('.') current_path = current_dir + "\\" + sys.argv[1] excel_data = explain_file(current_path) if len(excel_data) < 2: print('文件: ' + sys.argv[1] + ' 没有内容') else: wb = openpyxl.Workbook() ws = wb.create_sheet('Sheet1', 0) ws.append(['快递单号', '姓名', '电话', '地址']) ed_i = 0 for ed in excel_data: has_ed = False if ed_i >= 0: tracking_i = int(sys.argv[2]) address_i = int(sys.argv[3]) if ed[tracking_i] != None and ed[address_i] != None: tmp_address = str(ed[address_i]) addressData = tmp_address.split(' ') if len(addressData) == 3: name = addressData[0] mobile = addressData[1] address = addressData[2] ws.append([ed[tracking_i], name, mobile, address, ed_i]) excel_i = excel_i + 1 continue tmp_address = tmp_address.replace(' ', '') tmp_address = tmp_address.lstrip() tmp_address = tmp_address.replace(',', '') addressData = tmp_address.split(' ') if len(addressData) > 3: for a in addressData: try: if a == str(int(a)) and len(a) < 11: tmp_address = tmp_address.replace(' ', '') addressData = tmp_address.split(' ') break except: pass if len(addressData) == 2: tmp_address = str(ed[address_i]).replace(' ', '') addressData = tmp_address.split(' ') if len(addressData) > 3: name = addressData[0] mobile = addressData[1] if mobile[0:1] == '\n': print('第 ' + str(ed_i + 1) + ' 行报错', addressData) os._exit(0) if '\n' in mobile: mobileData = mobile.split('\n') try: mobile = mobileData[0] mobile_find = mobile.find(':') if mobile_find > -1: #mobileData[0] = mobileData[0][0:mobile_find] name = mobile.split(':')[1] # tmp_mobile = int(mobileData[0]) except: print('第 ' + str(ed_i + 1) + ' 行手机号错误', mobileData[0]) mobile = '\t' + mobileData[0] address = ''.join(mobileData[1:] + addressData[2:]).replace(' ', '') else: mobile = str("\t" + addressData[1]) address = ''.join(addressData[2:]).replace(' ', '') has_ed = True elif len(addressData) == 1: province = ['上海市', '云南省', '内蒙古自治区', '北京市', '吉林省', '四川省', '天津市', '宁夏回族自治区', '安徽省', '山东省', '山西省', '广东省','广西壮族自治区', '广西省', '新疆维吾尔自治区', '江苏省', '江西省', '河北省', '河南省', '浙江省', '海南省', '湖北省', '湖南省', '甘肃省', '福建省', '西藏自治区', '贵州省', '辽宁省', '重庆市', '陕西省', '青海省', '黑龙江省', '上海', '云南', '内蒙古', '北京', '吉林', '四川', '天津', '宁夏', '安徽', '山东', '山西', '广东', '广西', '新疆', '江苏', '江西', '河北', '河南', '浙江', '海南', '湖北', '湖南', '甘肃', '福建', '西藏', '贵州', '辽宁', '重庆', '陕西', '青海', '黑龙江'] address = tmp_address for p in province: if p in address: addressData = address.split(p) if len(addressData) > 1: address = p + p.join(addressData[1:]) name = tmp_address.replace(address, '') nameData = name.split('1') if len(nameData) > 1: name = nameData[0] mobile = '\t1' + '1'.join(nameData[1:]) mobile = split_mobile(mobile) has_ed = True namefind = mobile.find(':') if namefind > -1: namefind = mobile.split(":") name = namefind[1] mobile = namefind[0] break else: print('没有找到名字'+str(nameData)) else: print('没有找到地址'+str(addressData)) if has_ed == True: ws.append([ed[tracking_i], name, mobile, address, ed_i]) excel_i = excel_i + 1 ed_i = ed_i + 1 if excel_i > 0: current_path = current_dir + '\\' + datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + '.xlsx' wb.save(current_path) wb.close() print('file export finished: ' + current_path) else: wb.close() print('no file to export') # print(os.path.abspath(os.path.dirname(__file__))) # print(os.getcwd())