import logging from docx import Document logger = logging.getLogger(__name__) def process_file(path_to_docx, path_to_new_docx, changes_to_apply): """ Tries to open the docx document using given path to file. Then, applies the transformations- replaces template tags in format of ##name## to values specified in the second argument. """ doc = Document(path_to_docx) for placeholder, replacement in changes_to_apply.items(): for paragraph in doc.paragraphs: if placeholder in paragraph.text: paragraph.text = paragraph.text.replace(placeholder, replacement) for table in doc.tables: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: paragraph.text = paragraph.text.replace(placeholder, replacement) doc.save(path_to_new_docx) def merge_files(files, path_to_new_docx): ''' When combining templates into a new Document object, python-docx does not properly copy the information of the images, then Word is not able to locate the original content referred by the XML tag in the document. To fix this problem (see #234 ) the first file is loaded and the rest of templates are concatenated to this. This way, the original image content and rId match and the images are shown adequately. See issue #235 ''' first_file = files[0] files = files[1:] merged_document = Document(first_file) #first file if len(files) > 0: merged_document.add_page_break() for index, file in enumerate(files): #rest of files if any sub_doc = Document(file) if index < len(files) - 1: sub_doc.add_page_break() for element in sub_doc.element.body: merged_document.element.body.append(element) merged_document.save(path_to_new_docx)