db_query / doctodocx.py
DavMelchi's picture
Add utility scripts for DOC to DOCX conversion and CSV file compression with win32com-based Word automation and batch ZIP archiving functionality
6f9c387
import os
import time
import win32com.client as win32
from win32com.client import constants
### delete contais of C:\Users\David\AppData\Local\Temp\gen_py if error module 'win32com.gen_py.00020905-0000-0000-C000-000000000046x0x8x7' has no attribute 'MinorVersion' occurs
def convert_doc_to_docx(doc_path, docx_path=None):
"""
Convert a .doc file to .docx format.
Args:
doc_path (str): Path to the input .doc file
docx_path (str, optional): Path to save the output .docx file.
If None, saves to same directory with .docx extension.
Returns:
str: Path to the converted file
"""
# If output path not specified, create one with .docx extension
if docx_path is None:
base = os.path.splitext(doc_path)[0]
docx_path = base + ".docx"
# Make sure paths are absolute
doc_path = os.path.abspath(doc_path)
docx_path = os.path.abspath(docx_path)
try:
# Create Word application object
word = win32.gencache.EnsureDispatch("Word.Application")
word.Visible = False # Run Word in background
# Open the DOC file
doc = word.Documents.Open(doc_path)
# Save as DOCX (file format constant is 16 for docx)
doc.SaveAs(docx_path, FileFormat=constants.wdFormatXMLDocument)
# Close the document and quit Word
doc.Close(False)
word.Quit()
print(f"Successfully converted: {doc_path} -> {docx_path}")
return docx_path
except Exception as e:
print(f"Error converting {doc_path}: {str(e)}")
return None
def convert_folder(input_folder, output_folder=None):
"""
Convert all DOC files in a folder to DOCX format.
Args:
input_folder (str): Path to folder containing .doc files
output_folder (str, optional): Folder to save converted files.
If None, saves to same folder as input.
"""
# Validate input folder
if not os.path.isdir(input_folder):
print(f"Error: Input folder does not exist: {input_folder}")
return
# Create output folder if specified
if output_folder and not os.path.exists(output_folder):
os.makedirs(output_folder)
# Get all DOC files in input folder
doc_files = [
f
for f in os.listdir(input_folder)
if f.lower().endswith(".doc") and os.path.isfile(os.path.join(input_folder, f))
]
if not doc_files:
print("No DOC files found in the input folder.")
return
print(f"Found {len(doc_files)} DOC files to convert.")
# Convert each file
success_count = 0
for doc_file in doc_files:
input_path = os.path.join(input_folder, doc_file)
if output_folder:
output_path = os.path.join(
output_folder, os.path.splitext(doc_file)[0] + ".docx"
)
else:
output_path = None # Let convert_doc_to_docx handle it
if convert_doc_to_docx(input_path, output_path):
success_count += 1
# wait 1 second
time.sleep(1)
print(
f"\nConversion complete. Successfully converted {success_count} of {len(doc_files)} files."
)
# Example usage
if __name__ == "__main__":
input_folder = (
r"C:\Users\David\Downloads\TR_ FN4B" # Change this to your folder path
)
output_folder = r"C:\Users\David\Downloads\TR_ FN4B\Nouveau dossier" # Optional - set to None to save in same folder
convert_folder(input_folder, output_folder)