Spaces:

suayptalha
/

QwQ-32B-Preview-Vision

Running

App Files Files Community

suayptalha commited on Dec 18, 2024

Commit

cc9d2fe

verified ·

1 Parent(s): bc47067

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -40

app.py CHANGED Viewed

@@ -8,13 +8,13 @@ moondream_client = Client("vikhyatk/moondream2")
 # LLaMA için InferenceClient kullanıyoruz
 llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
-# Sohbet geçmişini tutmak için bir değişken
 history = []
 # Resim açıklama fonksiyonu
 def describe_image(image, user_message):
     global history
     # Resmi Moondream2 API'sine gönderiyoruz
     result = moondream_client.predict(
         img=handle_file(image),
@@ -22,52 +22,72 @@ def describe_image(image, user_message):
         api_name="/answer_question"
     )
-    # Moondream2'den alınan açıklamayı sisteme dahil ediyoruz
-    description = result  # Moondream2'nin cevabını alıyoruz
-    # LLaMA API'sine açıklamayı ve kullanıcının mesajını gönderiyoruz
-    history.append((user_message, description))
-    # Sohbet geçmişini birleştirip tek bir mesaj olarak LLaMA'ya gönderiyoruz
-    full_conversation = "\n".join([f"User: {msg[0]}\nAssistant: {msg[1]}" for msg in history])
     llama_result = llama_client.chat_completion(
-        messages=[{"role": "user", "content": full_conversation}],
-        max_tokens=512,  # Burada token sayısını belirleyebilirsiniz
-        temperature=0.7,  # Sıcaklık parametresi
-        top_p=0.95  # Nucleus sampling için top_p parametresi
     )
-    # Sonucu döndürüyoruz
-    return description + "\n\nAssistant: " + llama_result['choices'][0]['message']['content']
-# Sohbet fonksiyonu, resim yüklenip yüklenmediğine göre yönlendirecek
-def chat_or_image(image, user_message):
-    global history
-    # Resim yüklenmişse, önce açıklama alıp sonra LLaMA'ya gönderiyoruz
-    if image:
-        return describe_image(image, user_message)
-    else:
-        # Resim yoksa, direkt LLaMA'ya mesajı gönderiyoruz
-        history.append((user_message, ''))  # Boş cevap ekleniyor, çünkü sadece metin var
-        full_conversation = "\n".join([f"User: {msg[0]}\nAssistant: {msg[1]}" for msg in history])
-        llama_result = llama_client.chat_completion(
-            messages=[{"role": "user", "content": full_conversation}],
-            max_tokens=512,
-            temperature=0.7,
-            top_p=0.95
-        )
-        return llama_result['choices'][0]['message']['content']
 # Gradio arayüzü
-demo = gr.Interface(
-    fn=chat_or_image,  # Hem resim hem de metin için kullanılacak fonksiyon
-    inputs=[
-        gr.Image(type="filepath", label="Upload image (Optional)"),  # Resim yükleme
-        gr.Textbox(label="Ask anything", placeholder="Ask a question...", lines=2)  # Metin girişi
     ],
-    outputs="text",  # Çıktı metin olarak dönecek
 )
 if __name__ == "__main__":
-    demo.launch(show_error=True)  # Hata raporlamayı etkinleştiriyoruz

 # LLaMA için InferenceClient kullanıyoruz
 llama_client = InferenceClient("Qwen/QwQ-32B-Preview")
+# Sohbet geçmişi
 history = []
 # Resim açıklama fonksiyonu
 def describe_image(image, user_message):
     global history
     # Resmi Moondream2 API'sine gönderiyoruz
     result = moondream_client.predict(
         img=handle_file(image),
         api_name="/answer_question"
     )
+    description = result  # Moondream2'den açıklama alıyoruz
+    history.append({"role": "user", "content": [{"type": "text", "text": user_message}]})
+    history.append({"role": "assistant", "content": [{"type": "text", "text": description}]})
+    return description
+# Text ve history ile sohbet fonksiyonu
+def chat_with_text(user_message, max_new_tokens=250):
+    global history
+    # Kullanıcı mesajını history'ye ekliyoruz
+    history.append({"role": "user", "content": [{"type": "text", "text": user_message}]})
+    # Tüm geçmişi LLaMA'ya gönderiyoruz
+    texts = [
+        {"role": msg["role"], "content": " ".join([part["text"] for part in msg["content"]])}
+        for msg in history
+    ]
     llama_result = llama_client.chat_completion(
+        messages=texts,
+        max_tokens=max_new_tokens,
+        temperature=0.7,
+        top_p=0.95
     )
+    # Asistan cevabını alıyoruz ve history'ye ekliyoruz
+    assistant_reply = llama_result["choices"][0]["message"]["content"]
+    history.append({"role": "assistant", "content": [{"type": "text", "text": assistant_reply}]})
+    return assistant_reply
+# Resim ve/veya metin tabanlı sohbet fonksiyonu
+def bot_streaming(message, history, max_new_tokens=250):
+    user_message = message.get("text", "")
+    image = message.get("image", None)
+    if image:  # Resim yüklenmişse
+        response = describe_image(image, user_message)
+    else:  # Sadece metin mesajı gönderilmişse
+        response = chat_with_text(user_message, max_new_tokens)
+    return response
 # Gradio arayüzü
+demo = gr.ChatInterface(
+    fn=bot_streaming,
+    title="Multimodal Chat Assistant",
+    additional_inputs=[
+        gr.Image(label="Upload an image (optional)", type="pil", optional=True),
+        gr.Slider(
+            minimum=10,
+            maximum=500,
+            value=250,
+            step=10,
+            label="Maximum number of new tokens to generate",
+        )
     ],
+    description=(
+        "This demo combines text and image understanding using Moondream2 for visual "
+        "tasks and LLaMA for conversational AI. Upload an image, ask questions, "
+        "or just chat!"
+    ),
+    stop_btn="Stop Generation",
+    fill_height=True,
+    multimodal=True,
 )
 if __name__ == "__main__":
+    demo.launch(debug=True)