Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,7 @@ import networkx as nx
|
|
13 |
from collections import Counter
|
14 |
import json
|
15 |
from datetime import datetime
|
|
|
16 |
|
17 |
@dataclass
|
18 |
class ChatMessage:
|
@@ -32,6 +33,8 @@ class XylariaChat:
|
|
32 |
model="Qwen/QwQ-32B-Preview",
|
33 |
token=self.hf_token
|
34 |
)
|
|
|
|
|
35 |
|
36 |
self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
|
37 |
self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
|
@@ -401,6 +404,29 @@ class XylariaChat:
|
|
401 |
print(f"Error resetting API client: {e}")
|
402 |
|
403 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
404 |
|
405 |
def caption_image(self, image):
|
406 |
try:
|
@@ -484,8 +510,9 @@ class XylariaChat:
|
|
484 |
messages.append(msg)
|
485 |
|
486 |
if image:
|
487 |
-
|
488 |
-
|
|
|
489 |
|
490 |
messages.append(ChatMessage(
|
491 |
role="user",
|
|
|
13 |
from collections import Counter
|
14 |
import json
|
15 |
from datetime import datetime
|
16 |
+
from transformers import pipeline
|
17 |
|
18 |
@dataclass
|
19 |
class ChatMessage:
|
|
|
33 |
model="Qwen/QwQ-32B-Preview",
|
34 |
token=self.hf_token
|
35 |
)
|
36 |
+
|
37 |
+
self.image_captioning_pipe = pipeline("image-text-to-text", model="llava-onevision-qwen2-0.5b-si-hf")
|
38 |
|
39 |
self.image_api_url = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
|
40 |
self.image_api_headers = {"Authorization": f"Bearer {self.hf_token}"}
|
|
|
404 |
print(f"Error resetting API client: {e}")
|
405 |
|
406 |
return None
|
407 |
+
|
408 |
+
def caption_image_llava(self, image_path, prompt):
|
409 |
+
try:
|
410 |
+
with open(image_path, "rb") as img_file:
|
411 |
+
image_data = base64.b64encode(img_file.read()).decode("utf-8")
|
412 |
+
|
413 |
+
messages = [
|
414 |
+
{
|
415 |
+
"role": "user",
|
416 |
+
"content": [
|
417 |
+
{"type": "image", "url": image_path},
|
418 |
+
{"type": "text", "text": prompt},
|
419 |
+
],
|
420 |
+
},
|
421 |
+
]
|
422 |
+
|
423 |
+
caption_result = self.image_captioning_pipe(text=messages, max_new_tokens=50)
|
424 |
+
|
425 |
+
caption = caption_result[0]['generated_text'] if caption_result else "No caption generated"
|
426 |
+
|
427 |
+
return caption
|
428 |
+
except Exception as e:
|
429 |
+
return f"Error captioning image with llava: {str(e)}"
|
430 |
|
431 |
def caption_image(self, image):
|
432 |
try:
|
|
|
510 |
messages.append(msg)
|
511 |
|
512 |
if image:
|
513 |
+
image_caption_prompt = "Can you describe this image for me?"
|
514 |
+
caption = self.caption_image_llava(image, image_caption_prompt)
|
515 |
+
user_input = f"Here is a description of an image: {caption}\n\n{user_input}"
|
516 |
|
517 |
messages.append(ChatMessage(
|
518 |
role="user",
|