Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -317,7 +317,7 @@ def retrieve_metadata(document_indices: List[int], metadata_path: str = 'recipes
|
|
317 |
required_columns = {'id', 'original_file_name', 'url'}
|
318 |
if not required_columns.issubset(metadata_df.columns):
|
319 |
raise ValueError(f"Metadata file must contain columns: {required_columns}")
|
320 |
-
metadata_df['id'] = metadata_df['id'].astype(int)
|
321 |
filtered_metadata = metadata_df[metadata_df['id'].isin(document_indices)]
|
322 |
metadata_dict = {
|
323 |
int(row['id']): {
|
@@ -331,21 +331,6 @@ def retrieve_metadata(document_indices: List[int], metadata_path: str = 'recipes
|
|
331 |
print(f"Error retrieving metadata: {e}")
|
332 |
return {}
|
333 |
|
334 |
-
|
335 |
-
def retrieve_metadata(document_indices: List[str], metadata_path: str = 'recipes_metadata.xlsx') -> Dict[str, Dict[str, str]]:
|
336 |
-
try:
|
337 |
-
metadata_df = pd.read_excel(metadata_path)
|
338 |
-
required_columns = {'id', 'original_file_name', 'url'}
|
339 |
-
if not required_columns.issubset(metadata_df.columns):
|
340 |
-
raise ValueError(f"Metadata file must contain the following columns: {required_columns}")
|
341 |
-
metadata_mapping = metadata_df.set_index('id')[['original_file_name', 'url']].to_dict('index')
|
342 |
-
result = {doc_id: metadata_mapping.get(doc_id, {}) for doc_id in document_indices}
|
343 |
-
return result
|
344 |
-
except Exception as e:
|
345 |
-
print(f"Error retrieving metadata: {e}")
|
346 |
-
return {}
|
347 |
-
|
348 |
-
|
349 |
def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
|
350 |
try:
|
351 |
pairs = [(query, doc) for doc in document_texts]
|
|
|
317 |
required_columns = {'id', 'original_file_name', 'url'}
|
318 |
if not required_columns.issubset(metadata_df.columns):
|
319 |
raise ValueError(f"Metadata file must contain columns: {required_columns}")
|
320 |
+
metadata_df['id'] = metadata_df['id'].astype(int)
|
321 |
filtered_metadata = metadata_df[metadata_df['id'].isin(document_indices)]
|
322 |
metadata_dict = {
|
323 |
int(row['id']): {
|
|
|
331 |
print(f"Error retrieving metadata: {e}")
|
332 |
return {}
|
333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
334 |
def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
|
335 |
try:
|
336 |
pairs = [(query, doc) for doc in document_texts]
|