发票合同20.
import pdfplumber,osfrom openpyxl import Workbook
from datetime import datetime
# path = r'D:\发票'
path = input('file_path:')
wenjian = Workbook()# 创建存放发票信息的文件
D = wenjian.active
D.column_dimensions['A'].width = 40
D.column_dimensions['B'].width = 20
D.column_dimensions['C'].width = 30
D.column_dimensions['D'].width = 40
D.column_dimensions['E'].width = 20
title = ["开票号码","开票日期", "买方信息","卖方信息","价税合计" ]# 先把要提取内容的抬头写入单元格
for h in range(1, 6):
D.cell(1, h, title)
now = datetime.now()
nt = str(now)
file_houzhui = nt.replace(':','')
# print(file_houzhui)
i = 2
list = []
files = os.listdir(path)
print(files)
for file in files:
if file.endswith('pdf'):
# print(path+'\\'+file)
with pdfplumber.open(path+'\\'+file) as pdf:
page = pdf.pages
text = page.extract_text()
# print(text)
xuhao = text.split('发票号码:').split()
# print(xuhao)
riqi = text.split('开票日期:').split()
# print(type(riqi))
riqi = datetime.strptime(riqi, "%Y年%m月%d日")
# riqi = riqi.strftime('%Y-%m-%d')
# print(type(riqi))
goumai = text.split('购 名称:').split()
# print(goumai)
maijia = text.split('销 名称:').split()
# print(maijia)
total = text.split('(小写)¥').split()
total = float(total)
# print(total)
wenjian.active.cell(i,1,xuhao)
wenjian.active.cell(i,2,riqi)
wenjian.active.cell(i,3,goumai)
wenjian.active.cell(i,4,maijia)
wenjian.active.cell(i,5,total)
# print(list)
i = i +1
wenjian.save(f'{path}\\发票-{file_houzhui}.xlsx')
import pdfplumber,os
from openpyxl import Workbook
from datetime import datetime
path = input('file_path:')
wenjian = Workbook()# 创建存放信息的文件
D = wenjian.active
D.column_dimensions['A'].width = 30
D.column_dimensions['B'].width = 30
D.column_dimensions['C'].width = 20
D.column_dimensions['D'].width = 40
D.column_dimensions['E'].width = 20
title = ["甲方","乙方", "金额" ]# 先把要提取内容的抬头写入单元格
for h in range(1, 4):
D.cell(1, h, title)
i = 2
now = datetime.now()
nt = str(now)
file_houzhui = nt.replace(':','')
files = os.listdir(path)
for file in files:
if file.endswith('pdf'):
print(path+file)
with pdfplumber.open(path+'\\'+file) as pdf:
page = pdf.pages
text = page.extract_text()
print(text)
print('---'*20)
jiafang = text.split('下简称甲方):').split()
print(jiafang)
yifang = text.split('简称乙方):').split()
print(yifang)
total = text.split('合计 ').split()
print(total)
total= float(total)
wenjian.active.cell(i,1,jiafang)
wenjian.active.cell(i,2,yifang)
wenjian.active.cell(i,3,total)
i = i +1
wenjian.save(f'{path}\\合同-{file_houzhui}.xlsx')
页:
[1]