File size: 2,099 Bytes
8b414b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
__all__ = ['SmartSpellChecker']

import re
from collections import Counter
from functools import lru_cache
from typing import Iterable

from spellchecker import SpellChecker


def get_word_counter(text: str):
    # removes punctuation and count words in sentence
    text = re.sub(r"[.,!?;:]", " ", text)

    return Counter(text.split())


custom_mappings = {
    'alot': 'a lot',
    'classwork': 'class work',
    'everytime': 'every time',
    'loosing': 'losing',
    'clases': 'classes',
    'payed': 'paid',
    'learnd': 'learned',
    'ect': 'etc',
    'wasnt': "wasn't",
    'wich': 'which',
    "sol's": 'souls',
    'thigs': 'things',
    'activies': 'activities',
    'oline': 'online',
    'thru': 'through',
    'inconclusion': 'in conclusion',
}

skipped_mappings = {
    ' u ': ' you ',
    'youll': "you will",
    'wont': "won't"}

exclude_words_from_check = {
    "you're", 'covid'
}

black_list = {'ther', "waldo's", "f's", ""}


class SmartSpellChecker:
    def __init__(self):
        self.spellcheck = SpellChecker()

    @lru_cache(maxsize=None)
    def correct_word(self, mismatch: str):
        if mismatch in custom_mappings:
            return custom_mappings[mismatch]

        if mismatch in black_list:
            return ""

        if mismatch in exclude_words_from_check:
            return None

        # sometimes spellcheck thinks 'b' or 'c' if misspelled words
        # this condition > 2 is needed
        if len(mismatch) <= 2:
            return None

        return self.spellcheck.correction(mismatch)

    def correct_text(self, text: str):
        for key, value in skipped_mappings.items():
            if key in text:
                text = text.replace(key, value)

        word_count = get_word_counter(text)

        unknown_words = self.unknown(word_count)
        for misspell in unknown_words:
            correct = self.correct_word(misspell)
            if correct is not None:
                text = text.replace(misspell, correct)

        return text

    def unknown(self, words: Iterable):
        return self.spellcheck.unknown(words)