docx_helper.py

import logging

from docx import Document

logger = logging.getLogger(__name__)


def process_file(path_to_docx, path_to_new_docx, changes_to_apply):
    """
    Tries to open the docx document using given path to file.
    Then, applies the transformations- replaces template tags
    in format of ##name## to values specified in the second
    argument.
    """
    doc = Document(path_to_docx)
    for placeholder, replacement in list(changes_to_apply.items()):
        for paragraph in doc.paragraphs:
            if placeholder in paragraph.text:
                paragraph.text = paragraph.text.replace(placeholder, replacement)

        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        if placeholder in paragraph.text:
                            paragraph.text = paragraph.text.replace(placeholder, replacement)

    doc.save(path_to_new_docx)


def merge_files(files, path_to_new_docx):
    """
    When combining templates into a new Document object, python-docx does not properly copy the information of the
    images, then Word is not able to locate the original content referred by the XML tag in the document. To fix this
    problem (see #234 ) the first file is loaded and the rest of templates are concatenated to this. This way, the
    original image content and rId match and the images are shown adequately.
    See issue #235
    """
    first_file = files[0]
    files = files[1:]
    merged_document = Document(first_file)  # first file

    if len(files) > 0:
        merged_document.add_page_break()

    for index, file in enumerate(files):  # rest of files if any
        sub_doc = Document(file)
        if index < len(files) - 1:
            sub_doc.add_page_break()
        for element in sub_doc.element.body:
            merged_document.element.body.append(element)

    merged_document.save(path_to_new_docx)