Spaces:
Sleeping
Sleeping
Update utils/mistral.py
Browse files- utils/mistral.py +16 -5
utils/mistral.py
CHANGED
@@ -4,7 +4,6 @@ import json
|
|
4 |
import logging
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from huggingface_hub.utils._errors import BadRequestError
|
7 |
-
#from huggingface_hub import BadRequestError
|
8 |
from dotenv import load_dotenv
|
9 |
from utils.fileTotext import extract_text_based_on_format
|
10 |
import re
|
@@ -24,11 +23,19 @@ def Data_Cleaner(text):
|
|
24 |
pattern = r".*?format:"
|
25 |
result = re.split(pattern, text, maxsplit=1)
|
26 |
if len(result) > 1:
|
|
|
27 |
text_after_format = result[1].strip().strip('`').strip('json')
|
28 |
else:
|
29 |
text_after_format = text.strip().strip('`').strip('json')
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# Function to call Mistral and process output
|
34 |
def Model_ProfessionalDetails_Output(resume, client):
|
@@ -63,7 +70,7 @@ def Model_ProfessionalDetails_Output(resume, client):
|
|
63 |
|
64 |
|
65 |
response = ""
|
66 |
-
for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=
|
67 |
response += message.choices[0].delta.content
|
68 |
|
69 |
try:
|
@@ -301,10 +308,11 @@ def process_resume_data(file_path):
|
|
301 |
try:
|
302 |
# Extract personal details using Mistral
|
303 |
per_data = Model_PersonalDetails_Output(resume_text, client)
|
|
|
304 |
|
305 |
# Extract professional details using Mistral
|
306 |
pro_data = Model_ProfessionalDetails_Output(resume_text, client)
|
307 |
-
|
308 |
# Check if per_data and pro_data have been populated correctly
|
309 |
if not per_data:
|
310 |
logging.warning("Mistral personal data extraction failed.")
|
@@ -359,6 +367,7 @@ def process_resume_data(file_path):
|
|
359 |
# If Mistral produces valid output, return it
|
360 |
if per_data or pro_data:
|
361 |
logging.info("Successfully extracted data using Mistral.")
|
|
|
362 |
print("---------Mistral-------")
|
363 |
return result
|
364 |
else:
|
@@ -376,3 +385,5 @@ def process_resume_data(file_path):
|
|
376 |
logging.warning("Mistral failed, switching to SpaCy.")
|
377 |
print("---------SpaCy-------")
|
378 |
return Parser_from_model(file_path)
|
|
|
|
|
|
4 |
import logging
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from huggingface_hub.utils._errors import BadRequestError
|
|
|
7 |
from dotenv import load_dotenv
|
8 |
from utils.fileTotext import extract_text_based_on_format
|
9 |
import re
|
|
|
23 |
pattern = r".*?format:"
|
24 |
result = re.split(pattern, text, maxsplit=1)
|
25 |
if len(result) > 1:
|
26 |
+
# Handle edge cases where JSON might not be properly formatted after 'format:'
|
27 |
text_after_format = result[1].strip().strip('`').strip('json')
|
28 |
else:
|
29 |
text_after_format = text.strip().strip('`').strip('json')
|
30 |
+
|
31 |
+
# Try to ensure valid JSON is returned
|
32 |
+
try:
|
33 |
+
json.loads(text_after_format) # Check if it's valid JSON
|
34 |
+
return text_after_format
|
35 |
+
except json.JSONDecodeError:
|
36 |
+
logging.error("Data cleaning led to invalid JSON")
|
37 |
+
return text # Return the original text if cleaning goes wrong
|
38 |
+
|
39 |
|
40 |
# Function to call Mistral and process output
|
41 |
def Model_ProfessionalDetails_Output(resume, client):
|
|
|
70 |
|
71 |
|
72 |
response = ""
|
73 |
+
for message in client.chat_completion(messages=[system_role, user_prompt], max_tokens=4096, stream=True, temperature=0.35):
|
74 |
response += message.choices[0].delta.content
|
75 |
|
76 |
try:
|
|
|
308 |
try:
|
309 |
# Extract personal details using Mistral
|
310 |
per_data = Model_PersonalDetails_Output(resume_text, client)
|
311 |
+
print(per_data)
|
312 |
|
313 |
# Extract professional details using Mistral
|
314 |
pro_data = Model_ProfessionalDetails_Output(resume_text, client)
|
315 |
+
print(pro_data)
|
316 |
# Check if per_data and pro_data have been populated correctly
|
317 |
if not per_data:
|
318 |
logging.warning("Mistral personal data extraction failed.")
|
|
|
367 |
# If Mistral produces valid output, return it
|
368 |
if per_data or pro_data:
|
369 |
logging.info("Successfully extracted data using Mistral.")
|
370 |
+
print(result)
|
371 |
print("---------Mistral-------")
|
372 |
return result
|
373 |
else:
|
|
|
385 |
logging.warning("Mistral failed, switching to SpaCy.")
|
386 |
print("---------SpaCy-------")
|
387 |
return Parser_from_model(file_path)
|
388 |
+
|
389 |
+
|