Spaces:
Sleeping
Sleeping
tried using nltk to improve input handling
Browse files
app.py
CHANGED
|
@@ -12,6 +12,7 @@ import chardet
|
|
| 12 |
import gradio as gr
|
| 13 |
import pandas as pd
|
| 14 |
import json
|
|
|
|
| 15 |
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
logger = logging.getLogger(__name__)
|
|
@@ -61,7 +62,15 @@ def load_documents(file_paths):
|
|
| 61 |
logger.error(f"Error processing file {file_path}: {e}")
|
| 62 |
return docs
|
| 63 |
|
| 64 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def is_valid_input(text):
|
| 67 |
"""Validate the user's input question."""
|
|
@@ -71,10 +80,12 @@ def is_valid_input(text):
|
|
| 71 |
if len(text.strip()) < 2:
|
| 72 |
return False, "Input is too short. Please provide more context or details."
|
| 73 |
|
| 74 |
-
# Check
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
|
| 79 |
return True, "Valid input."
|
| 80 |
|
|
|
|
| 12 |
import gradio as gr
|
| 13 |
import pandas as pd
|
| 14 |
import json
|
| 15 |
+
from nltk.corpus import words
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.INFO)
|
| 18 |
logger = logging.getLogger(__name__)
|
|
|
|
| 62 |
logger.error(f"Error processing file {file_path}: {e}")
|
| 63 |
return docs
|
| 64 |
|
| 65 |
+
# Enhanced input validation
|
| 66 |
+
|
| 67 |
+
# Load NLTK word list
|
| 68 |
+
try:
|
| 69 |
+
english_words = set(words.words())
|
| 70 |
+
except LookupError:
|
| 71 |
+
import nltk
|
| 72 |
+
nltk.download('words')
|
| 73 |
+
english_words = set(words.words())
|
| 74 |
|
| 75 |
def is_valid_input(text):
|
| 76 |
"""Validate the user's input question."""
|
|
|
|
| 80 |
if len(text.strip()) < 2:
|
| 81 |
return False, "Input is too short. Please provide more context or details."
|
| 82 |
|
| 83 |
+
# Check for valid words
|
| 84 |
+
words_in_text = re.findall(r'\b\w+\b', text.lower())
|
| 85 |
+
recognized_words = [word for word in words_in_text if word in english_words]
|
| 86 |
+
|
| 87 |
+
if not recognized_words:
|
| 88 |
+
return False, "Input appears unclear. Please use valid words in your question."
|
| 89 |
|
| 90 |
return True, "Valid input."
|
| 91 |
|