Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -29,7 +29,7 @@ if not hf_token:
|
|
| 29 |
|
| 30 |
# Constants
|
| 31 |
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
|
| 32 |
-
KNOWLEDGE_BASE_DIR = "
|
| 33 |
|
| 34 |
class DocumentLoader:
|
| 35 |
"""Class to manage PDF document loading."""
|
|
@@ -37,7 +37,11 @@ class DocumentLoader:
|
|
| 37 |
@staticmethod
|
| 38 |
def load_pdfs(directory_path: str) -> List:
|
| 39 |
documents = []
|
| 40 |
-
pdf_files = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
for pdf_file in pdf_files:
|
| 43 |
pdf_path = os.path.join(directory_path, pdf_file)
|
|
@@ -48,7 +52,7 @@ class DocumentLoader:
|
|
| 48 |
for doc in pdf_documents:
|
| 49 |
doc.metadata.update({
|
| 50 |
'title': pdf_file,
|
| 51 |
-
'type': 'technical' if 'Valencia' in pdf_file else 'qa',
|
| 52 |
'language': 'en',
|
| 53 |
'page': doc.metadata.get('page', 0)
|
| 54 |
})
|
|
@@ -58,6 +62,9 @@ class DocumentLoader:
|
|
| 58 |
except Exception as e:
|
| 59 |
logger.error(f"Error loading {pdf_file}: {str(e)}")
|
| 60 |
|
|
|
|
|
|
|
|
|
|
| 61 |
return documents
|
| 62 |
|
| 63 |
class TextProcessor:
|
|
@@ -334,4 +341,4 @@ try:
|
|
| 334 |
|
| 335 |
except Exception as e:
|
| 336 |
logger.error(f"Error in Gradio interface creation: {str(e)}")
|
| 337 |
-
raise
|
|
|
|
| 29 |
|
| 30 |
# Constants
|
| 31 |
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
|
| 32 |
+
KNOWLEDGE_BASE_DIR = "." # Changed to root directory
|
| 33 |
|
| 34 |
class DocumentLoader:
|
| 35 |
"""Class to manage PDF document loading."""
|
|
|
|
| 37 |
@staticmethod
|
| 38 |
def load_pdfs(directory_path: str) -> List:
|
| 39 |
documents = []
|
| 40 |
+
pdf_files = [
|
| 41 |
+
f for f in os.listdir(directory_path)
|
| 42 |
+
if f.endswith('.pdf') and
|
| 43 |
+
(f.startswith('valencia') or 'fislac' in f.lower() or 'Valencia' in f)
|
| 44 |
+
]
|
| 45 |
|
| 46 |
for pdf_file in pdf_files:
|
| 47 |
pdf_path = os.path.join(directory_path, pdf_file)
|
|
|
|
| 52 |
for doc in pdf_documents:
|
| 53 |
doc.metadata.update({
|
| 54 |
'title': pdf_file,
|
| 55 |
+
'type': 'technical' if 'valencia' in pdf_file.lower() or 'Valencia' in pdf_file else 'qa',
|
| 56 |
'language': 'en',
|
| 57 |
'page': doc.metadata.get('page', 0)
|
| 58 |
})
|
|
|
|
| 62 |
except Exception as e:
|
| 63 |
logger.error(f"Error loading {pdf_file}: {str(e)}")
|
| 64 |
|
| 65 |
+
if not documents:
|
| 66 |
+
logger.warning("No PDF documents found in the specified directory")
|
| 67 |
+
|
| 68 |
return documents
|
| 69 |
|
| 70 |
class TextProcessor:
|
|
|
|
| 341 |
|
| 342 |
except Exception as e:
|
| 343 |
logger.error(f"Error in Gradio interface creation: {str(e)}")
|
| 344 |
+
raise
|