Spaces:
Sleeping
Sleeping
Update utils/mistral.py
Browse files- utils/mistral.py +10 -0
utils/mistral.py
CHANGED
@@ -39,6 +39,16 @@ def Data_Cleaner(text):
|
|
39 |
return text # Return the original text if cleaning goes wrong
|
40 |
'''
|
41 |
def Data_Cleaner(text):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# Use a regex pattern to extract JSON if it exists within ```json and ```
|
43 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
44 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|
|
|
39 |
return text # Return the original text if cleaning goes wrong
|
40 |
'''
|
41 |
def Data_Cleaner(text):
|
42 |
+
"""
|
43 |
+
Preprocess the JSON string to remove extra spaces, tabs, and newlines.
|
44 |
+
"""
|
45 |
+
# Remove leading and trailing whitespace
|
46 |
+
text = text.strip()
|
47 |
+
# Remove unnecessary newlines and tabs
|
48 |
+
text = re.sub(r'\s*\n\s*', ' ', text) # Replace newlines with a space
|
49 |
+
text = re.sub(r'\s+', ' ', text) # Replace multiple spaces with a single space
|
50 |
+
text = re.sub(r'\s*([{}:,])\s*', r'\1', text) # Remove spaces around braces, colons, and commas
|
51 |
+
|
52 |
# Use a regex pattern to extract JSON if it exists within ```json and ```
|
53 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
54 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|