VanguardAI commited on
Commit
72a27e8
·
verified ·
1 Parent(s): c8b523c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -16
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  import torch
3
  import os
4
  import numpy as np
@@ -7,10 +8,11 @@ from transformers import AutoModel, AutoTokenizer
7
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
8
  from parler_tts import ParlerTTSForConditionalGeneration
9
  import soundfile as sf
10
- from llama_index import LLMPredictor, ServiceContext
11
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
12
- from llama_index.langchain_helpers.text_splitter import RecursiveCharacterTextSplitter
13
- from langchain import OpenAI
 
14
  from PIL import Image
15
  from decord import VideoReader, cpu
16
  import requests
@@ -61,21 +63,21 @@ def numpy_calculate(code: str) -> str:
61
  return str(result)
62
  except Exception as e:
63
  return f"An error occurred: {str(e)}"
64
-
65
  # Function to handle different input types
66
  def handle_input(user_prompt, image=None, video=None, audio=None, doc=None):
67
  messages = [{"role": "user", "content": user_prompt}]
68
-
69
  if audio:
70
  transcription = client.audio.transcriptions.create(
71
  file=(audio.name, audio.read()),
72
  model="whisper-large-v3"
73
  )
74
  user_prompt = transcription.text
75
-
76
  if doc:
77
- index = create_rag_index(doc.name, doc.read())
78
- response = index.query(user_prompt)
79
  elif image and not video:
80
  image = Image.open(image).convert('RGB')
81
  messages[0]['content'] = [image, user_prompt]
@@ -90,15 +92,24 @@ def handle_input(user_prompt, image=None, video=None, audio=None, doc=None):
90
  messages=messages,
91
  tools=initialize_tools()
92
  ).choices[0].message.content
93
-
94
  return response
95
 
96
- # Function to create RAG index using LlamaIndex or Langchain
97
- def create_rag_index(file_name, file_content):
98
- docs = SimpleDirectoryReader(file_name, file_content).load_data()
99
- service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=OpenAI(temperature=0)))
100
- index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
101
- return index
 
 
 
 
 
 
 
 
 
102
 
103
  # Function to encode video
104
  def encode_video(video_path):
 
1
  import gradio as gr
2
+ import spaces
3
  import torch
4
  import os
5
  import numpy as np
 
8
  from diffusers import StableDiffusionXLPipeline, UNet2DConditionModel, EulerDiscreteScheduler
9
  from parler_tts import ParlerTTSForConditionalGeneration
10
  import soundfile as sf
11
+ from langchain.embeddings.openai import OpenAIEmbeddings
12
+ from langchain.vectorstores import Chroma
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ from langchain.chains import RetrievalQA
15
+ from langchain.llms import OpenAI
16
  from PIL import Image
17
  from decord import VideoReader, cpu
18
  import requests
 
63
  return str(result)
64
  except Exception as e:
65
  return f"An error occurred: {str(e)}"
66
+
67
  # Function to handle different input types
68
  def handle_input(user_prompt, image=None, video=None, audio=None, doc=None):
69
  messages = [{"role": "user", "content": user_prompt}]
70
+
71
  if audio:
72
  transcription = client.audio.transcriptions.create(
73
  file=(audio.name, audio.read()),
74
  model="whisper-large-v3"
75
  )
76
  user_prompt = transcription.text
77
+
78
  if doc:
79
+ # RAG with Langchain
80
+ response = use_langchain_rag(doc.name, doc.read(), user_prompt)
81
  elif image and not video:
82
  image = Image.open(image).convert('RGB')
83
  messages[0]['content'] = [image, user_prompt]
 
92
  messages=messages,
93
  tools=initialize_tools()
94
  ).choices[0].message.content
95
+
96
  return response
97
 
98
+ # Function to use Langchain for RAG
99
+ def use_langchain_rag(file_name, file_content, query):
100
+ # Split the document into chunks
101
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
102
+ docs = text_splitter.create_documents([file_content])
103
+
104
+ # Create embeddings and store in the vector database
105
+ embeddings = OpenAIEmbeddings()
106
+ db = Chroma.from_documents(docs, embeddings, persist_directory=".chroma_db") # Use a persistent directory
107
+
108
+ # Create a question-answering chain
109
+ qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=db.as_retriever())
110
+
111
+ # Get the answer
112
+ return qa.run(query)
113
 
114
  # Function to encode video
115
  def encode_video(video_path):