Python Forum

Full Version: python-docx regex : Browse the found words in turn from top to bottom
You're currently viewing a stripped down version of our content. View the full version with proper formatting.
I'm trying to finalize my personal project and i'm having a new problem which is matching top to bottom positions in and out of the board to change word. here is my example docx file [Image: q2mR4.png] . I used the following code to change the word
import docx
import re
def iter_block_items(parent):
    if isinstance(parent, _Document):
        parent_elm = parent.element.body
        # print(parent_elm.xml)
    elif isinstance(parent, _Cell):
        parent_elm = parent._tc
    else:
        raise ValueError("something's not right")

    for child in parent_elm.iterchildren():
        if isinstance(child, CT_P):
            yield Paragraph(child, parent)
        elif isinstance(child, CT_Tbl):
            yield Table(child, parent)
def replace_string(key,value,NumberList,countKey,p):
    lenght = len(key)
    tmp_padding = len(key) - len (value)
    matchs = re.findall(key,p.text,re.IGNORECASE) 
    lines = p.runs 
    for j in range(len(lines)):
        padding = 0
        line = lines[j].text 
        for i in range(len(line)-lenght+1): 
            text = line[i - padding : i + lenght - padding] 
            if text in matchs:
                if countKey in NumberList:
                    text = line.replace(text, value) 
                    padding -= tmp_padding 
                    lines[j].text = text 
                countKey +=1 
    return countKey

def replace(filename,key,value,numberList,output_file):
    countKey = 1 
    doc = Document(filename)
    for block in iter_block_items(doc):
        if isinstance(block, Paragraph):
            if re.findall(key,block.text,re.IGNORECASE):
                countKey = replace_string(key,value,numberList,countKey,block)
        else:
            for table in doc.tables:
                for row in table.rows:
                    for cell in iter_unique_cells(row):
                        for p in cell.paragraphs:
                            if re.findall(key,p.text,re.IGNORECASE):
                                 countKey = replace_string(key,value,numberList,countKey,p)
    doc.save(output_file)
path = 'path of file docx'
tereplace(path,'collum','table',[1,3],'test2.docx')
here is the result: [Image: Xop7v.png]

Based on the results I see they match in the previous table.How can i position all the words in the text one by one?