|
- import pdfplumber,os
- from openpyxl import Workbook
- from datetime import datetime
- # path = r'D:\发票'
- path = input('file_path:')
- wenjian = Workbook() # 创建存放发票信息的文件
- D = wenjian.active
- D.column_dimensions['A'].width = 40
- D.column_dimensions['B'].width = 20
- D.column_dimensions['C'].width = 30
- D.column_dimensions['D'].width = 40
- D.column_dimensions['E'].width = 20
- title = ["开票号码","开票日期", "买方信息","卖方信息","价税合计" ] # 先把要提取内容的抬头写入单元格
- for h in range(1, 6):
- D.cell(1, h, title[h - 1])
- now = datetime.now()
- nt = str(now)[11:19]
- file_houzhui = nt.replace(':','')
- # print(file_houzhui)
- i = 2
- list = []
- files = os.listdir(path)
- print(files)
- for file in files:
- if file.endswith('pdf'):
- # print(path+'\\'+file)
- with pdfplumber.open(path+'\\'+file) as pdf:
- page = pdf.pages[0]
- text = page.extract_text()
- # print(text)
- xuhao = text.split('发票号码:')[1].split()[0]
- # print(xuhao)
- riqi = text.split('开票日期:')[1].split()[0]
- # print(type(riqi))
- riqi = datetime.strptime(riqi, "%Y年%m月%d日")
- # riqi = riqi.strftime('%Y-%m-%d')
- # print(type(riqi))
- goumai = text.split('购 名称:')[1].split()[0]
- # print(goumai)
- maijia = text.split('销 名称:')[1].split()[0]
- # print(maijia)
- total = text.split('(小写)¥')[1].split()[0]
- total = float(total)
- # print(total)
- wenjian.active.cell(i,1,xuhao)
- wenjian.active.cell(i,2,riqi)
- wenjian.active.cell(i,3,goumai)
- wenjian.active.cell(i,4,maijia)
- wenjian.active.cell(i,5,total)
- # print(list)
- i = i +1
- wenjian.save(f'{path}\\发票-{file_houzhui}.xlsx')
复制代码- import pdfplumber,os
- from openpyxl import Workbook
- from datetime import datetime
- path = input('file_path:')
- wenjian = Workbook() # 创建存放信息的文件
- D = wenjian.active
- D.column_dimensions['A'].width = 30
- D.column_dimensions['B'].width = 30
- D.column_dimensions['C'].width = 20
- D.column_dimensions['D'].width = 40
- D.column_dimensions['E'].width = 20
- title = ["甲方","乙方", "金额" ] # 先把要提取内容的抬头写入单元格
- for h in range(1, 4):
- D.cell(1, h, title[h - 1])
- i = 2
- now = datetime.now()
- nt = str(now)[11:19]
- file_houzhui = nt.replace(':','')
- files = os.listdir(path)
- for file in files:
- if file.endswith('pdf'):
- print(path+file)
- with pdfplumber.open(path+'\\'+file) as pdf:
- page = pdf.pages[1]
- text = page.extract_text()
- print(text)
- print('---'*20)
- jiafang = text.split('下简称甲方):')[1].split()[0]
- print(jiafang)
- yifang = text.split('简称乙方):')[1].split()[0]
- print(yifang)
- total = text.split('合计 ')[1].split()[0]
- print(total)
- total= float(total)
- wenjian.active.cell(i,1,jiafang)
- wenjian.active.cell(i,2,yifang)
- wenjian.active.cell(i,3,total)
- i = i +1
- wenjian.save(f'{path}\\合同-{file_houzhui}.xlsx')
复制代码
|
|