【Python】尝试修改文档中所有的拼写错误

# 文档中修改的单词个数
count_correct = 0
# 获取文档对象
file = Document("ErrorDocument.docx")
punkt_list = r",.?\"'!()/\\-<>:@#$%^&*~"
document = Document()  # word文档句柄


def write_correct_paragraph(i):
    """
        修改一个段落中的错误
    """
    global count_correct
    # 每一段的内容
    paragraph = file.paragraphs[i].text.strip()
    # 进行句子划分
    sentences = sent_tokenize(text=paragraph)
    # 词语划分
    words_list = [word_tokenize(sentence) for sentence in sentences]
    # 段落句柄
    p = document.add_paragraph(' ' * 7)
    for word_list in words_list:

        for word in word_list:

            if word not in punkt_list:

                p.add_run(' ')
                # 纠正单词,如果单词正确,则返回原单词
                correct_word = correct_text_generic(word)

                # 每一句话第一个单词的第一个字母大写
                if word_list.index(word) == 0 and words_list.index(word_list) == 0:
                    correct_word = correct_word[0].upper() + correct_word[1:]

                # 如果单词有修改,则颜色为红色
                if correct_word != word:
                    colored_word = p.add_run(correct_word)
                    font = colored_word.font
                    font.color.rgb = RGBColor(0xFF, 0x00, 0x00)
                    count_correct += 1
                else:
                    p.add_run(correct_word)
            else:
                p.add_run(word)


if __name__ == '__main__':
    print("段落数:" + str(len(file.paragraphs)))
    for i in range(len(file.paragraphs)):
        write_correct_paragraph(i)
    document.save("CorrectDocument.docx")
    print("修改并保存文件完毕!")
    print("一共修改了%d处。" % count_correct)

评论列表,共 0 条评论
    暂无评论
© 2023 - good good study day day up - 角木蛟要学习

因为喜欢,可迎万难

© 慕容驹 - 苏ICP备2021030040号