Mar-26-2023, 07:41 AM
Hello. I have a lots of ANSI docx files. I made this Python code, and I got an error of codec:
This is the error:
Can anyone update my code, so that it can read ANSI and save as UTF-8 into PDF?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import re import os from pathlib import Path from docx import Document from docx.shared import Inches import sys from docx2pdf import convert # The location where the files are located input_path = r 'c:\Folder7\input' # The location where we will write the PDF files output_path = r 'c:\Folder7\output' # Creeaza structura de foldere daca nu exista os.makedirs(output_path, exist_ok = True ) # Verifica existenta folder-ului directory_path = Path(input_path) if directory_path.exists() and directory_path.is_dir(): print (directory_path, "exists" ) else : print (directory_path, "is invalid" ) sys.exit( 1 ) for file_path in directory_path.glob( "*" ): # file_path is a Path object print ( "Procesez fisierul:" , file_path) document = Document() # file_path.name is the name of the file as str without the Path document.add_heading(file_path.name, 0 ) file_content = file_path.read_text(encoding = 'UTF-8' ) document.add_paragraph(file_content) # build the new path where we store the files output_file_path = os.path.join(output_path, file_path.name + ".pdf" ) document.save(output_file_path) print ( "Am convertit urmatorul fisier:" , file_path, "in: " , output_file_path) |
1 2 3 4 5 6 7 8 |
Traceback (most recent call last): File "D:\Convert docx to pdf.py" , line 32 , in <module> file_content = file_path.read_text(encoding = 'UTF-8' ) File "C:\Program Files\Python39\lib\pathlib.py" , line 1133 , in read_text return f.read() File "C:\Program Files\Python39\lib\codecs.py" , line 322 , in decode (result, consumed) = self ._buffer_decode(data, self .errors, final) UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd2 in position 16 : invalid continuation byte |