deepseek写的word转txt的代码
import osfrom docx import Document
import win32com.client# 仅用于处理.doc文件(需Windows环境)
import pythoncom
import glob
def docx_to_txt(docx_path, txt_path):
"""将.docx文件转换为.txt文件"""
try:
doc = Document(docx_path)
text = '\n'.join()
with open(txt_path, 'w', encoding='utf-8') as f:
f.write(text)
return True
except Exception as e:
print(f"处理 {docx_path} 时出错: {str(e)}")
return False
def doc_to_txt(doc_path, txt_path):
"""将.doc文件转换为.txt文件(需安装pywin32且只能在Windows运行)"""
try:
pythoncom.CoInitialize()# 初始化COM库
word = win32com.client.Dispatch("Word.Application")
doc = word.Documents.Open(doc_path)
doc.SaveAs(txt_path, FileFormat=2)# FileFormat=2表示保存为txt
doc.Close()
word.Quit()
pythoncom.CoUninitialize()
return True
except Exception as e:
print(f"处理 {doc_path} 时出错: {str(e)}")
return False
def convert_folder(input_folder, output_folder):
"""转换指定文件夹内的所有Word文档"""
# 确保输出目录存在
os.makedirs(output_folder, exist_ok=True)
# 处理.docx文件
for docx_file in glob.glob(os.path.join(input_folder, "*.docx")):
base_name = os.path.basename(docx_file)[:-5]
txt_file = os.path.join(output_folder, f"{base_name}.txt")
if docx_to_txt(docx_file, txt_file):
print(f"转换成功: {docx_file} -> {txt_file}")
# 处理.doc文件(仅在Windows环境生效)
if os.name == 'nt':
for doc_file in glob.glob(os.path.join(input_folder, "*.doc")):
base_name = os.path.basename(doc_file)[:-4]
txt_file = os.path.join(output_folder, f"{base_name}.txt")
if doc_to_txt(doc_file, txt_file):
print(f"转换成功: {doc_file} -> {txt_file}")
if __name__ == "__main__":
input_dir = r"C:\Users\Administrator\Documents\001"# 输入文件夹路径(存放Word文档)
output_dir = "output_txt"# 输出文件夹路径(存放TXT文件)
convert_folder(input_dir, output_dir)
# 如果存在不支持的.doc文件且不在Windows环境,给出提示
if glob.glob(os.path.join(input_dir, "*.doc")) and os.name != 'nt':
print("\n发现.doc文件,请注意:")
print("1. .doc转换需要Windows操作系统并安装Microsoft Word")
print("2. 需要安装pywin32库:pip install pywin32")
print("3. 非Windows用户建议手动将.doc文件另存为.docx格式")
需要安装两个库python-docx pywin32
页:
[1]