Skip to content
Snippets Groups Projects
docx_helper.py 1.85 KiB
Newer Older
Jacek Lebioda's avatar
Jacek Lebioda committed
from docx import Document

logger = logging.getLogger(__name__)

Jacek Lebioda's avatar
Jacek Lebioda committed

def process_file(path_to_docx, path_to_new_docx, changes_to_apply):
Valentin Groues's avatar
Valentin Groues committed
    """
    Tries to open the docx document using given path to file.
    Then, applies the transformations- replaces template tags
    in format of ##name## to values specified in the second
    argument.
    """
    doc = Document(path_to_docx)
    for placeholder, replacement in changes_to_apply.items():
        for paragraph in doc.paragraphs:
Valentin Groues's avatar
Valentin Groues committed
            if placeholder in paragraph.text:
                paragraph.text = paragraph.text.replace(placeholder, replacement)
Jacek Lebioda's avatar
Jacek Lebioda committed

        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        paragraph.text = paragraph.text.replace(placeholder, replacement)

Valentin Groues's avatar
Valentin Groues committed
    doc.save(path_to_new_docx)

def merge_files(files, path_to_new_docx):
    '''
    When combining templates into a new Document object, python-docx does not properly copy the information of the images, then Word is not able to locate the original content referred by the XML tag in the document. To fix this problem (see #234 ) the first file is loaded and the rest of templates are concatenated to this. This way, the original image content and rId match and the images are shown adequately.
    See issue #235
    '''
    first_file = files[0]
    files = files[1:]
    merged_document = Document(first_file) #first file

    if len(files) > 0:
        merged_document.add_page_break()

    for index, file in enumerate(files): #rest of files if any
        sub_doc = Document(file)
        if index < len(files) - 1:
            sub_doc.add_page_break()
        for element in sub_doc.element.body:
            merged_document.element.body.append(element)