Spaces:
Sleeping
Sleeping
Update utils/mistral.py
Browse files- utils/mistral.py +7 -8
utils/mistral.py
CHANGED
@@ -41,14 +41,7 @@ def Data_Cleaner(text):
|
|
41 |
def Data_Cleaner(text):
|
42 |
"""
|
43 |
Preprocess the JSON string to remove extra spaces, tabs, and newlines.
|
44 |
-
"""
|
45 |
-
# Remove leading and trailing whitespace
|
46 |
-
text = text.strip()
|
47 |
-
# Remove unnecessary newlines and tabs
|
48 |
-
text = re.sub(r'\s*\n\s*', ' ', text) # Replace newlines with a space
|
49 |
-
text = re.sub(r'\s+', ' ', text) # Replace multiple spaces with a single space
|
50 |
-
text = re.sub(r'\s*([{}:,])\s*', r'\1', text) # Remove spaces around braces, colons, and commas
|
51 |
-
|
52 |
# Use a regex pattern to extract JSON if it exists within ```json and ```
|
53 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
54 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|
@@ -59,6 +52,12 @@ def Data_Cleaner(text):
|
|
59 |
# If no match, check if text itself is a JSON object
|
60 |
try:
|
61 |
json_obj = json.loads(text.strip()) # Attempt to load the text as JSON
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
return text # Return the parsed JSON as a dictionary
|
63 |
except json.JSONDecodeError:
|
64 |
logging.error("No valid JSON found in the text")
|
|
|
41 |
def Data_Cleaner(text):
|
42 |
"""
|
43 |
Preprocess the JSON string to remove extra spaces, tabs, and newlines.
|
44 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
# Use a regex pattern to extract JSON if it exists within ```json and ```
|
46 |
pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
|
47 |
match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
|
|
|
52 |
# If no match, check if text itself is a JSON object
|
53 |
try:
|
54 |
json_obj = json.loads(text.strip()) # Attempt to load the text as JSON
|
55 |
+
# Remove leading and trailing whitespace
|
56 |
+
text = text.strip()
|
57 |
+
# Remove unnecessary newlines and tabs
|
58 |
+
text = re.sub(r'\s*\n\s*', ' ', text) # Replace newlines with a space
|
59 |
+
text = re.sub(r'\s+', ' ', text) # Replace multiple spaces with a single space
|
60 |
+
text = re.sub(r'\s*([{}:,])\s*', r'\1', text) # Remove spaces around braces, colons, and commas
|
61 |
return text # Return the parsed JSON as a dictionary
|
62 |
except json.JSONDecodeError:
|
63 |
logging.error("No valid JSON found in the text")
|