Spaces:

Daemontatox
/

Mawared-Support-Assistant

Running

Daemontatox commited on Jan 10

Commit

9950104

verified ·

1 Parent(s): ed81a64

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,6 +12,9 @@ import logging
 from typing import List, Tuple
 from dataclasses import dataclass
 from datetime import datetime
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -98,13 +101,23 @@ retriever = db.as_retriever(
     search_kwargs={"k": 5}
 )
 # Set up the LLM
-llm = ChatOpenAI(
-    base_url="https://api-inference.huggingface.co/v1/",
-    temperature=0,
-    api_key=HF_TOKEN,
-    model="meta-llama/Llama-3.3-70B-Instruct"
-)
 # Create prompt template with chat history
 template = """
@@ -144,6 +157,7 @@ def create_rag_chain(chat_history: str):
 chat_history = ChatHistory()
 # Gradio Function
 def ask_question_gradio(question, history):
     try:
         # Add user question to chat history

 from typing import List, Tuple
 from dataclasses import dataclass
 from datetime import datetime
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import spaces
 # Configure logging
 logging.basicConfig(level=logging.INFO)
     search_kwargs={"k": 5}
 )
+# Load model directly
+model_id="CohereForAI/c4ai-command-r7b-12-2024"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+llm = HuggingFacePipeline(pipeline=pipe)
 # Set up the LLM
+#llm = ChatOpenAI(
+#    base_url="https://api-inference.huggingface.co/v1/",
+  #  temperature=0,
+   # api_key=HF_TOKEN,
+  #  model="meta-llama/Llama-3.3-70B-Instruct"
+#)
 # Create prompt template with chat history
 template = """
 chat_history = ChatHistory()
 # Gradio Function
+@spaces.GPU
 def ask_question_gradio(question, history):
     try:
         # Add user question to chat history