Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -29,7 +29,7 @@ if not hf_token:
|
|
29 |
|
30 |
# Constants
|
31 |
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
|
32 |
-
KNOWLEDGE_BASE_DIR = "
|
33 |
|
34 |
class DocumentLoader:
|
35 |
"""Class to manage PDF document loading."""
|
@@ -37,7 +37,11 @@ class DocumentLoader:
|
|
37 |
@staticmethod
|
38 |
def load_pdfs(directory_path: str) -> List:
|
39 |
documents = []
|
40 |
-
pdf_files = [
|
|
|
|
|
|
|
|
|
41 |
|
42 |
for pdf_file in pdf_files:
|
43 |
pdf_path = os.path.join(directory_path, pdf_file)
|
@@ -48,7 +52,7 @@ class DocumentLoader:
|
|
48 |
for doc in pdf_documents:
|
49 |
doc.metadata.update({
|
50 |
'title': pdf_file,
|
51 |
-
'type': 'technical' if 'Valencia' in pdf_file else 'qa',
|
52 |
'language': 'en',
|
53 |
'page': doc.metadata.get('page', 0)
|
54 |
})
|
@@ -58,6 +62,9 @@ class DocumentLoader:
|
|
58 |
except Exception as e:
|
59 |
logger.error(f"Error loading {pdf_file}: {str(e)}")
|
60 |
|
|
|
|
|
|
|
61 |
return documents
|
62 |
|
63 |
class TextProcessor:
|
@@ -334,4 +341,4 @@ try:
|
|
334 |
|
335 |
except Exception as e:
|
336 |
logger.error(f"Error in Gradio interface creation: {str(e)}")
|
337 |
-
raise
|
|
|
29 |
|
30 |
# Constants
|
31 |
MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
|
32 |
+
KNOWLEDGE_BASE_DIR = "." # Changed to root directory
|
33 |
|
34 |
class DocumentLoader:
|
35 |
"""Class to manage PDF document loading."""
|
|
|
37 |
@staticmethod
|
38 |
def load_pdfs(directory_path: str) -> List:
|
39 |
documents = []
|
40 |
+
pdf_files = [
|
41 |
+
f for f in os.listdir(directory_path)
|
42 |
+
if f.endswith('.pdf') and
|
43 |
+
(f.startswith('valencia') or 'fislac' in f.lower() or 'Valencia' in f)
|
44 |
+
]
|
45 |
|
46 |
for pdf_file in pdf_files:
|
47 |
pdf_path = os.path.join(directory_path, pdf_file)
|
|
|
52 |
for doc in pdf_documents:
|
53 |
doc.metadata.update({
|
54 |
'title': pdf_file,
|
55 |
+
'type': 'technical' if 'valencia' in pdf_file.lower() or 'Valencia' in pdf_file else 'qa',
|
56 |
'language': 'en',
|
57 |
'page': doc.metadata.get('page', 0)
|
58 |
})
|
|
|
62 |
except Exception as e:
|
63 |
logger.error(f"Error loading {pdf_file}: {str(e)}")
|
64 |
|
65 |
+
if not documents:
|
66 |
+
logger.warning("No PDF documents found in the specified directory")
|
67 |
+
|
68 |
return documents
|
69 |
|
70 |
class TextProcessor:
|
|
|
341 |
|
342 |
except Exception as e:
|
343 |
logger.error(f"Error in Gradio interface creation: {str(e)}")
|
344 |
+
raise
|