docx_helper.py

import logging

from docx import Document

logger = logging.getLogger(__name__)


def process_file(path_to_docx, path_to_new_docx, changes_to_apply):
    """
    Tries to open the docx document using given path to file.
    Then, applies the transformations- replaces template tags
    in format of ##name## to values specified in the second
    argument.
    """
    doc = Document(path_to_docx)
    for placeholder, replacement in changes_to_apply.items():
        for paragraph in doc.paragraphs:
            if placeholder in paragraph.text:
                paragraph.text = paragraph.text.replace(placeholder, replacement)

        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        paragraph.text = paragraph.text.replace(placeholder, replacement)

    doc.save(path_to_new_docx)

def merge_files(files, path_to_new_docx):
    '''
    When combining templates into a new Document object, python-docx does not properly copy the information of the images, then Word is not able to locate the original content referred by the XML tag in the document. To fix this problem (see #234 ) the first file is loaded and the rest of templates are concatenated to this. This way, the original image content and rId match and the images are shown adequately.
    See issue #235
    '''
    first_file = files[0]
    files = files[1:]
    merged_document = Document(first_file) #first file

    if len(files) > 0:
        merged_document.add_page_break()

    for index, file in enumerate(files): #rest of files if any
        sub_doc = Document(file)
        if index < len(files) - 1:
            sub_doc.add_page_break()
        for element in sub_doc.element.body:
            merged_document.element.body.append(element)
       
    merged_document.save(path_to_new_docx)