WebashalarForML commited on
Commit
48c8a6f
·
verified ·
1 Parent(s): 4160924

Update utils/mistral.py

Browse files
Files changed (1) hide show
  1. utils/mistral.py +7 -8
utils/mistral.py CHANGED
@@ -41,14 +41,7 @@ def Data_Cleaner(text):
41
  def Data_Cleaner(text):
42
  """
43
  Preprocess the JSON string to remove extra spaces, tabs, and newlines.
44
- """
45
- # Remove leading and trailing whitespace
46
- text = text.strip()
47
- # Remove unnecessary newlines and tabs
48
- text = re.sub(r'\s*\n\s*', ' ', text) # Replace newlines with a space
49
- text = re.sub(r'\s+', ' ', text) # Replace multiple spaces with a single space
50
- text = re.sub(r'\s*([{}:,])\s*', r'\1', text) # Remove spaces around braces, colons, and commas
51
-
52
  # Use a regex pattern to extract JSON if it exists within ```json and ```
53
  pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
54
  match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
@@ -59,6 +52,12 @@ def Data_Cleaner(text):
59
  # If no match, check if text itself is a JSON object
60
  try:
61
  json_obj = json.loads(text.strip()) # Attempt to load the text as JSON
 
 
 
 
 
 
62
  return text # Return the parsed JSON as a dictionary
63
  except json.JSONDecodeError:
64
  logging.error("No valid JSON found in the text")
 
41
  def Data_Cleaner(text):
42
  """
43
  Preprocess the JSON string to remove extra spaces, tabs, and newlines.
44
+ """
 
 
 
 
 
 
 
45
  # Use a regex pattern to extract JSON if it exists within ```json and ```
46
  pattern = r"```json\s*(\{.*?\})\s*```" # Non-greedy matching inside braces
47
  match = re.search(pattern, text, re.DOTALL) # DOTALL to match newlines
 
52
  # If no match, check if text itself is a JSON object
53
  try:
54
  json_obj = json.loads(text.strip()) # Attempt to load the text as JSON
55
+ # Remove leading and trailing whitespace
56
+ text = text.strip()
57
+ # Remove unnecessary newlines and tabs
58
+ text = re.sub(r'\s*\n\s*', ' ', text) # Replace newlines with a space
59
+ text = re.sub(r'\s+', ' ', text) # Replace multiple spaces with a single space
60
+ text = re.sub(r'\s*([{}:,])\s*', r'\1', text) # Remove spaces around braces, colons, and commas
61
  return text # Return the parsed JSON as a dictionary
62
  except json.JSONDecodeError:
63
  logging.error("No valid JSON found in the text")