WebashalarForML commited on
Commit
087e276
·
verified ·
1 Parent(s): 5be90fe

Update utils/mistral.py

Browse files
Files changed (1) hide show
  1. utils/mistral.py +16 -5
utils/mistral.py CHANGED
@@ -4,7 +4,6 @@ import json
4
  import logging
5
  from huggingface_hub import InferenceClient
6
  from huggingface_hub.utils._errors import BadRequestError
7
- #from huggingface_hub import BadRequestError
8
  from dotenv import load_dotenv
9
  from utils.fileTotext import extract_text_based_on_format
10
  import re
@@ -24,11 +23,19 @@ def Data_Cleaner(text):
24
  pattern = r".*?format:"
25
  result = re.split(pattern, text, maxsplit=1)
26
  if len(result) > 1:
 
27
  text_after_format = result[1].strip().strip('`').strip('json')
28
  else:
29
  text_after_format = text.strip().strip('`').strip('json')
30
-
31
- return text_after_format
 
 
 
 
 
 
 
32
 
33
  # Function to call Mistral and process output
34
  def Model_ProfessionalDetails_Output(resume, client):
@@ -63,7 +70,7 @@ def Model_ProfessionalDetails_Output(resume, client):
63
 
64
 
65
  response = ""
66
- for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=3000, stream=True, temperature=0.35):
67
  response += message.choices[0].delta.content
68
 
69
  try:
@@ -301,10 +308,11 @@ def process_resume_data(file_path):
301
  try:
302
  # Extract personal details using Mistral
303
  per_data = Model_PersonalDetails_Output(resume_text, client)
 
304
 
305
  # Extract professional details using Mistral
306
  pro_data = Model_ProfessionalDetails_Output(resume_text, client)
307
-
308
  # Check if per_data and pro_data have been populated correctly
309
  if not per_data:
310
  logging.warning("Mistral personal data extraction failed.")
@@ -359,6 +367,7 @@ def process_resume_data(file_path):
359
  # If Mistral produces valid output, return it
360
  if per_data or pro_data:
361
  logging.info("Successfully extracted data using Mistral.")
 
362
  print("---------Mistral-------")
363
  return result
364
  else:
@@ -376,3 +385,5 @@ def process_resume_data(file_path):
376
  logging.warning("Mistral failed, switching to SpaCy.")
377
  print("---------SpaCy-------")
378
  return Parser_from_model(file_path)
 
 
 
4
  import logging
5
  from huggingface_hub import InferenceClient
6
  from huggingface_hub.utils._errors import BadRequestError
 
7
  from dotenv import load_dotenv
8
  from utils.fileTotext import extract_text_based_on_format
9
  import re
 
23
  pattern = r".*?format:"
24
  result = re.split(pattern, text, maxsplit=1)
25
  if len(result) > 1:
26
+ # Handle edge cases where JSON might not be properly formatted after 'format:'
27
  text_after_format = result[1].strip().strip('`').strip('json')
28
  else:
29
  text_after_format = text.strip().strip('`').strip('json')
30
+
31
+ # Try to ensure valid JSON is returned
32
+ try:
33
+ json.loads(text_after_format) # Check if it's valid JSON
34
+ return text_after_format
35
+ except json.JSONDecodeError:
36
+ logging.error("Data cleaning led to invalid JSON")
37
+ return text # Return the original text if cleaning goes wrong
38
+
39
 
40
  # Function to call Mistral and process output
41
  def Model_ProfessionalDetails_Output(resume, client):
 
70
 
71
 
72
  response = ""
73
+ for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=4096, stream=True, temperature=0.35):
74
  response += message.choices[0].delta.content
75
 
76
  try:
 
308
  try:
309
  # Extract personal details using Mistral
310
  per_data = Model_PersonalDetails_Output(resume_text, client)
311
+ print(per_data)
312
 
313
  # Extract professional details using Mistral
314
  pro_data = Model_ProfessionalDetails_Output(resume_text, client)
315
+ print(pro_data)
316
  # Check if per_data and pro_data have been populated correctly
317
  if not per_data:
318
  logging.warning("Mistral personal data extraction failed.")
 
367
  # If Mistral produces valid output, return it
368
  if per_data or pro_data:
369
  logging.info("Successfully extracted data using Mistral.")
370
+ print(result)
371
  print("---------Mistral-------")
372
  return result
373
  else:
 
385
  logging.warning("Mistral failed, switching to SpaCy.")
386
  print("---------SpaCy-------")
387
  return Parser_from_model(file_path)
388
+
389
+