wifix199 commited on
Commit
df8ede4
·
verified ·
1 Parent(s): 899724f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -1,15 +1,18 @@
1
  import gradio as gr
2
- import openai
3
  from langchain.chains import RetrievalQA
4
- from langchain.embeddings import OpenAIEmbeddings
5
  from langchain.vectorstores import FAISS
 
6
  from langchain.document_loaders import TextLoader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain.llms import OpenAI
9
- import os
10
 
11
- OPENAI_API_KEYS = os.getenv("OPENAI_API_KEYS")
 
 
 
 
12
 
 
13
  # Knowledge base for Crustdata APIs
14
  docs = """
15
  # Crustdata Dataset API
@@ -154,13 +157,14 @@ The Crustdata Discovery and Enrichment API allows users to enrich their datasets
154
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
155
  doc_chunks = text_splitter.create_documents([docs])
156
 
157
- # Embed the documents using OpenAI embeddings
158
- embeddings = OpenAIEmbeddings()
 
159
  docsearch = FAISS.from_documents(doc_chunks, embeddings)
160
 
161
  # Create a QA chain
162
  qa_chain = RetrievalQA.from_chain_type(
163
- llm=OpenAI(model="gpt-3.5-turbo"),
164
  retriever=docsearch.as_retriever(),
165
  return_source_documents=True
166
  )
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
  from langchain.chains import RetrievalQA
 
4
  from langchain.vectorstores import FAISS
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.document_loaders import TextLoader
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
 
8
 
9
+ # Load a Hugging Face model for Q&A
10
+ model_name = "EleutherAI/gpt-neox-20b" # You can choose a lighter model if needed
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name)
13
+ qa_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=512)
14
 
15
+ # Knowledge base for Crustdata APIs
16
  # Knowledge base for Crustdata APIs
17
  docs = """
18
  # Crustdata Dataset API
 
157
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
158
  doc_chunks = text_splitter.create_documents([docs])
159
 
160
+ # Embed the documents using sentence-transformers
161
+ embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
162
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
163
  docsearch = FAISS.from_documents(doc_chunks, embeddings)
164
 
165
  # Create a QA chain
166
  qa_chain = RetrievalQA.from_chain_type(
167
+ llm=qa_pipeline,
168
  retriever=docsearch.as_retriever(),
169
  return_source_documents=True
170
  )