Python Forum

Full Version: Word documents merging
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
Hi,

I have a task: I have an XLSX document with data such as name, surname, class, school ID, etc. I also have a template for the Certificate of Appreciation. My goal is to read the data from the XLSX file, generate new documents using this data and the template, and merge all new documents into one.

Now I have this code. (Sorry, this is only my second Python script, and it might be messy)

import os
import sys
import time

from docxtpl import DocxTemplate
from docx import Document

from docx.shared import Cm
from docxcompose.composer import Composer
from docx import Document as Document_compose
from pathlib import Path

#############################################################################

source_folder = './result/'
destination_folder = './result/сводные/'
final_doc_name = 'сводный.docx'
gbou_name = './tpl/gbou.txt'
docx_tpl = './tpl/tpl3.docx'

# Отступы в docx шаблоне
top = 0.75
bottom = 0.5
left = 1.27
right = 1.27

###########################################################################

if len(sys.argv) > 1:
    xls_name = sys.argv[1]
else:
    xls_name = "data.xlsx"

def chk_dir(): 
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)    

def xls2doc(): 
    workbook = openpyxl.load_workbook(xls_name)
    worksheet = workbook.active

    with open(gbou_name, encoding='utf-8') as f:
        schools = [line.strip().split(';') for line in f]

    header_row = 1
    last_col = worksheet.max_column
    headers = {}
    for col in range(1, last_col+1):
        cell = worksheet.cell(row=header_row, column=col)
        if cell.value:
            headers[cell.value] = col

    for row in range(header_row+1, worksheet.max_row+1):
        last_name = worksheet.cell(row=row, column=headers['Фамилия']).value
        first_name = worksheet.cell(row=row, column=headers['Имя']).value
        middle_name = worksheet.cell(row=row, column=headers.get('Отчество')).value

        full_name = f'{last_name} {first_name}'
        if middle_name:
            full_name += f' {middle_name}'

        sex = worksheet.cell(row=row, column=headers['Пол']).value
        if sex and sex[0] in ['Ж', 'ж']:
            sex = 'учащаяся'
        elif sex and sex[0] in ['М', 'м']:
            sex = 'учащийся'
        else:
            sex = 'неизвестно'

        school_name = worksheet.cell(row=row, column=headers['Полное название общеобразовательного учреждения']).value
        school_num = ''
        if school_name:
            school_num = [s for s in school_name.split() if s.isdigit() or s == 'Морская']
            if school_num:
                school_num = school_num[0]
        gbou = ''
        for school in schools:
            if school_num in school[0]:
                gbou = school[1]
                break

        discipline = worksheet.cell(row=row, column=headers['Предмет']).value
        class_num = worksheet.cell(row=row, column=headers['Класс обучения']).value
        status = worksheet.cell(row=row, column=headers['Статус участника']).value
        teacher = worksheet.cell(row=row, column=headers['Фамилия, Имя, Отчество учителя']).value

        tpl = DocxTemplate(docx_tpl)
        context = {
            'full_name': full_name,
            'sex': sex,
            'class_num': class_num,
            'gbou': gbou,
            'status': status,
            'discipline': discipline,
            'teacher': teacher
        }
        tpl.render(context)

        tpl.save(f'./Result/{full_name}_{discipline}.docx')

def create_master_docx(path: Path):
    doc = Document()

    sections = doc.sections
    for section in sections:
        section.top_margin = Cm(top)
        section.bottom_margin = Cm(bottom)
        section.left_margin = Cm(left)
        section.right_margin = Cm(right)

        section = sections[0]
        section.page_height = Cm(29.7)
        section.page_width = Cm(21.0)

        doc.save(os.path.join(destination_folder,final_doc_name))

def merge_docx(path_master: destination_folder, files: list):
    number_of_sections = len(files)
    master = Document_compose(path_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document_compose(files[i])
        composer.append(doc_temp)
        composer.save(path_master)


def main():
    chk_dir()
    xls2doc() 

    
    path = Path.cwd()

    files = [Path(source_folder) / x for x in os.listdir(source_folder) if Path(x).suffix == ".docx"]
    if files:
        create_master_docx(destination_folder)
        merge_docx(os.path.join(destination_folder, final_doc_name), files)
        print(f"Объединение завершено. Объединенный файл -> {os.path.join(destination_folder, final_doc_name)} ")
    else:
        print("Файлов для объединения не найдено")

    time.sleep(3)    

if __name__ == "__main__":
    main()
As far as I can see, this code works fine for single pages. However, after merging the DOCX files, I encounter a strange bug. The first page is okay, but after the first page, all the text moves up. For example, I have attached screenshots from the bottom of the first page and the eighth page. Can someone please tell me what I did wrong?
What if you add a pagebreak at the end of each page/file?