Python Forum
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Word documents merging
#1
Hi,

I have a task: I have an XLSX document with data such as name, surname, class, school ID, etc. I also have a template for the Certificate of Appreciation. My goal is to read the data from the XLSX file, generate new documents using this data and the template, and merge all new documents into one.

Now I have this code. (Sorry, this is only my second Python script, and it might be messy)

import os
import sys
import time

from docxtpl import DocxTemplate
from docx import Document

from docx.shared import Cm
from docxcompose.composer import Composer
from docx import Document as Document_compose
from pathlib import Path

#############################################################################

source_folder = './result/'
destination_folder = './result/сводные/'
final_doc_name = 'сводный.docx'
gbou_name = './tpl/gbou.txt'
docx_tpl = './tpl/tpl3.docx'

# Отступы в docx шаблоне
top = 0.75
bottom = 0.5
left = 1.27
right = 1.27

###########################################################################

if len(sys.argv) > 1:
    xls_name = sys.argv[1]
else:
    xls_name = "data.xlsx"

def chk_dir(): 
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)    

def xls2doc(): 
    workbook = openpyxl.load_workbook(xls_name)
    worksheet = workbook.active

    with open(gbou_name, encoding='utf-8') as f:
        schools = [line.strip().split(';') for line in f]

    header_row = 1
    last_col = worksheet.max_column
    headers = {}
    for col in range(1, last_col+1):
        cell = worksheet.cell(row=header_row, column=col)
        if cell.value:
            headers[cell.value] = col

    for row in range(header_row+1, worksheet.max_row+1):
        last_name = worksheet.cell(row=row, column=headers['Фамилия']).value
        first_name = worksheet.cell(row=row, column=headers['Имя']).value
        middle_name = worksheet.cell(row=row, column=headers.get('Отчество')).value

        full_name = f'{last_name} {first_name}'
        if middle_name:
            full_name += f' {middle_name}'

        sex = worksheet.cell(row=row, column=headers['Пол']).value
        if sex and sex[0] in ['Ж', 'ж']:
            sex = 'учащаяся'
        elif sex and sex[0] in ['М', 'м']:
            sex = 'учащийся'
        else:
            sex = 'неизвестно'

        school_name = worksheet.cell(row=row, column=headers['Полное название общеобразовательного учреждения']).value
        school_num = ''
        if school_name:
            school_num = [s for s in school_name.split() if s.isdigit() or s == 'Морская']
            if school_num:
                school_num = school_num[0]
        gbou = ''
        for school in schools:
            if school_num in school[0]:
                gbou = school[1]
                break

        discipline = worksheet.cell(row=row, column=headers['Предмет']).value
        class_num = worksheet.cell(row=row, column=headers['Класс обучения']).value
        status = worksheet.cell(row=row, column=headers['Статус участника']).value
        teacher = worksheet.cell(row=row, column=headers['Фамилия, Имя, Отчество учителя']).value

        tpl = DocxTemplate(docx_tpl)
        context = {
            'full_name': full_name,
            'sex': sex,
            'class_num': class_num,
            'gbou': gbou,
            'status': status,
            'discipline': discipline,
            'teacher': teacher
        }
        tpl.render(context)

        tpl.save(f'./Result/{full_name}_{discipline}.docx')

def create_master_docx(path: Path):
    doc = Document()

    sections = doc.sections
    for section in sections:
        section.top_margin = Cm(top)
        section.bottom_margin = Cm(bottom)
        section.left_margin = Cm(left)
        section.right_margin = Cm(right)

        section = sections[0]
        section.page_height = Cm(29.7)
        section.page_width = Cm(21.0)

        doc.save(os.path.join(destination_folder,final_doc_name))

def merge_docx(path_master: destination_folder, files: list):
    number_of_sections = len(files)
    master = Document_compose(path_master)
    composer = Composer(master)
    for i in range(0, number_of_sections):
        doc_temp = Document_compose(files[i])
        composer.append(doc_temp)
        composer.save(path_master)


def main():
    chk_dir()
    xls2doc() 

    
    path = Path.cwd()

    files = [Path(source_folder) / x for x in os.listdir(source_folder) if Path(x).suffix == ".docx"]
    if files:
        create_master_docx(destination_folder)
        merge_docx(os.path.join(destination_folder, final_doc_name), files)
        print(f"Объединение завершено. Объединенный файл -> {os.path.join(destination_folder, final_doc_name)} ")
    else:
        print("Файлов для объединения не найдено")

    time.sleep(3)    

if __name__ == "__main__":
    main()
As far as I can see, this code works fine for single pages. However, after merging the DOCX files, I encounter a strange bug. The first page is okay, but after the first page, all the text moves up. For example, I have attached screenshots from the bottom of the first page and the eighth page. Can someone please tell me what I did wrong?

Attached Files

Thumbnail(s)
       
Reply
#2
What if you add a pagebreak at the end of each page/file?
If you can't explain it to a six year old, you don't understand it yourself, Albert Einstein
How to Ask Questions The Smart Way: link and another link
Create MCV example
Debug small programs

Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
Question Problem: Check if a list contains a word and then continue with the next word Mangono 2 2,518 Aug-12-2021, 04:25 PM
Last Post: palladium
  Сombine (Merge) word documents using python-docx Lancellot 1 11,575 May-12-2021, 11:07 AM
Last Post: toothedsword
  Copy documents to Teams using python SallySmith 0 2,390 Mar-23-2021, 04:27 AM
Last Post: SallySmith
Question Mouseover(Hover/Float) text in PDF documents ak52 1 2,574 Feb-24-2021, 06:13 PM
Last Post: nilamo
  Confused by 'break' in the official documents Chuck_Norwich 2 2,445 Apr-12-2020, 09:26 PM
Last Post: Chuck_Norwich
  Python Speech recognition, word by word AceScottie 6 16,028 Apr-12-2020, 09:50 AM
Last Post: vinayakdhage
  Extracting parts of paragraphs from word documents using python-docx library & lists Den0st 0 10,657 Nov-06-2019, 12:07 AM
Last Post: Den0st
  print a word after specific word search evilcode1 8 4,867 Oct-22-2019, 08:08 AM
Last Post: newbieAuggie2019
  I need help using Python to generate usernames and passwords with excel documents Jannejannesson 3 4,028 May-08-2019, 02:30 PM
Last Post: Jannejannesson
  Google documents arsenal58 2 3,682 Nov-20-2018, 05:13 PM
Last Post: arsenal58

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020