分割xlsx文件教程
- # -*- coding: utf-8 -*-
- # 将多个Excel文件合并成一个
- import xlrd
- import xlsxwriter
- import code
- import glob
- import csv
- import time
- import redis
- import re
- import pymysql
- # 打开一个excel文件
- #db =pymysql.connect(host='',port=3306,user='root',passwd='123
- 456',db='微店店主',charset='utf8')
- #r_conn = redis.Redis('','6666')
- def open_xls(file):
- fh = xlrd.open_workbook(file)
- return fh
-
- # 获取excel中所有的sheet表
- def getsheet(fh):
- return fh.sheets()
-
- # 获取sheet表的行数
- def getnrows(fh, sheet):
- table = fh.sheets()[sheet]
- return table.nrows
-
- # 读取文件内容并返回行内容
- def getFilect(file, shnum,filename,gg):
- fh = open_xls(file)
- table = fh.sheets()[shnum]
- num = table.nrows
- for row in range(num):
- gg = gg+1
- print(gg)
- rdata = table.row_values(row)
- if len(rdata)>=2:
- pass
- else:
- continue
- datavalue.append(rdata)
- phone = rdata[1]
- phone = str(phone)
- print(phone)
- pat = '1\d{10}'
- phone_test = re.compile(pat).findall(phone)
- if len(phone_test)>0:
- phone = phone_test[0]
- else:
- continue
-
- filename_num = str(gg/100)
- pat = ('.*?\.0
- )
- test = re.compile(pat).findall(filename_num)
- if len(test) > 0:
- filename = filename_num.replace('.0','')
- with open(str(filename)+'.csv','a',newline='') as file:
- writer = csv.writer(file)
- writer.writerow(['手机号码','标签'])
- else:
- print('it not the int')
- pass
-
- try:
- with open(str(filename)+'.csv','a',newline='') as file:
- writer = csv.writer(file)
- writer.writerow([phone,'股票投资'])
- except Exception as gl:
- print(gl)
-
- print('\n数据条信息解析完成:')
-
- # 获取sheet表的个数
- def getshnum(fh):
- x = 0
- sh = getsheet(fh)
- for sheet in sh:
- x += 1
- return x
-
- if __name__ == '__main__':
- # make_mysql_table()
- # get_redis_data()
- filename = '微店号'
- # with open(str(filename) + '.csv', 'a', newline='') as file:
- # writer = csv.writer(file)
- # writer.writerow(['手机号','标签'])
- # print('log:make csv file success')
- datavalue = []
- allxls = glob.glob('100w查询.xlsx')
- print('\n总共发现%s个信息数据xlsx文件' % len(allxls))
- for fl in allxls:
- fh = open_xls(fl)
- x = getshnum(fh)
- for shnum in range(0,1):
- gg =0
- print("正在读取文件:" + str(fl) + "表的内容...")
- rvalue = getFilect(fl, shnum,filename,gg)
复制代码
|