Update app.py
Browse files
app.py
CHANGED
@@ -105,20 +105,17 @@ retriever = db.as_retriever(
|
|
105 |
|
106 |
# Load model directly
|
107 |
|
108 |
-
model_id="CohereForAI/c4ai-command-r7b-12-2024"
|
109 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
110 |
-
model = AutoModelForCausalLM.from_pretrained(model_id)
|
111 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
112 |
-
llm = HuggingFacePipeline(pipeline=pipe)
|
113 |
|
114 |
|
115 |
# Set up the LLM
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
122 |
|
123 |
# Create prompt template with chat history
|
124 |
template = """
|
@@ -158,7 +155,7 @@ def create_rag_chain(chat_history: str):
|
|
158 |
chat_history = ChatHistory()
|
159 |
|
160 |
# Gradio Function
|
161 |
-
|
162 |
def ask_question_gradio(question, history):
|
163 |
try:
|
164 |
# Add user question to chat history
|
|
|
105 |
|
106 |
# Load model directly
|
107 |
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
|
110 |
# Set up the LLM
|
111 |
+
llm = ChatOpenAI(
|
112 |
+
base_url="https://api-inference.huggingface.co/v1/",
|
113 |
+
temperature=0,
|
114 |
+
api_key=HF_TOKEN,
|
115 |
+
model="Qwen/Qwen2.5-Coder-32B-Instruct",
|
116 |
+
stream=True
|
117 |
+
|
118 |
+
)
|
119 |
|
120 |
# Create prompt template with chat history
|
121 |
template = """
|
|
|
155 |
chat_history = ChatHistory()
|
156 |
|
157 |
# Gradio Function
|
158 |
+
|
159 |
def ask_question_gradio(question, history):
|
160 |
try:
|
161 |
# Add user question to chat history
|