Rathapoom commited on
Commit
87cdd83
·
verified ·
1 Parent(s): 09ec353

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -3,8 +3,7 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from PyPDF2 import PdfReader
5
  import gradio as gr
6
- from datasets import Dataset, load_from_disk, save_to_disk
7
- import faiss
8
 
9
  # Extract text from PDF
10
  def extract_text_from_pdf(pdf_path):
@@ -21,15 +20,16 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
  model = AutoModelForCausalLM.from_pretrained(model_name)
22
 
23
  # Extract text from the provided PDF
24
- pdf_text = extract_text_from_pdf("TOPF 2564.pdf") # Updated path
 
25
  passages = [{"title": "", "text": line} for line in pdf_text.split('\n') if line.strip()]
26
 
27
  # Create a Dataset
28
  dataset = Dataset.from_list(passages)
29
 
30
  # Save the dataset and create an index in the current working directory
31
- dataset_path = "./rag_document_dataset"
32
- index_path = "./rag_document_index"
33
 
34
  # Ensure the directory exists
35
  os.makedirs(dataset_path, exist_ok=True)
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from PyPDF2 import PdfReader
5
  import gradio as gr
6
+ from datasets import Dataset, load_from_disk
 
7
 
8
  # Extract text from PDF
9
  def extract_text_from_pdf(pdf_path):
 
20
  model = AutoModelForCausalLM.from_pretrained(model_name)
21
 
22
  # Extract text from the provided PDF
23
+ pdf_path = "/home/user/app/TOPF 2564.pdf" # Ensure this path is correct
24
+ pdf_text = extract_text_from_pdf(pdf_path)
25
  passages = [{"title": "", "text": line} for line in pdf_text.split('\n') if line.strip()]
26
 
27
  # Create a Dataset
28
  dataset = Dataset.from_list(passages)
29
 
30
  # Save the dataset and create an index in the current working directory
31
+ dataset_path = "/home/user/app/rag_document_dataset"
32
+ index_path = "/home/user/app/rag_document_index"
33
 
34
  # Ensure the directory exists
35
  os.makedirs(dataset_path, exist_ok=True)