Spaces:
Build error
Build error
qwen season
Browse files- app.py +25 -40
- requirements.txt +15 -11
app.py
CHANGED
@@ -1,20 +1,16 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
-
import inspect
|
5 |
import pandas as pd
|
6 |
-
from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document
|
7 |
-
from llama_index.llms import HuggingFaceLLM
|
8 |
-
from llama_index import ServiceContext
|
9 |
-
from llama_index.embeddings import HuggingFaceEmbedding
|
10 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
11 |
-
import
|
12 |
from dotenv import load_dotenv
|
13 |
from typing import Optional, Dict, Any
|
14 |
-
import yt_dlp
|
15 |
-
from transformers import AutoProcessor, AutoModelForVision2Seq
|
16 |
from PIL import Image
|
|
|
|
|
17 |
import re
|
|
|
18 |
|
19 |
# (Keep Constants as is)
|
20 |
# --- Constants ---
|
@@ -28,51 +24,40 @@ class BasicAgent:
|
|
28 |
print("BasicAgent initialized.")
|
29 |
load_dotenv()
|
30 |
|
31 |
-
#
|
32 |
-
model_name = "
|
33 |
|
34 |
-
# Configure quantization
|
35 |
quantization_config = BitsAndBytesConfig(
|
36 |
-
load_in_8bit=True,
|
37 |
-
bnb_4bit_compute_dtype=torch.float16,
|
38 |
)
|
39 |
|
40 |
-
self.tokenizer = AutoTokenizer.from_pretrained(
|
|
|
|
|
41 |
self.model = AutoModelForCausalLM.from_pretrained(
|
42 |
model_name,
|
43 |
-
low_cpu_mem_usage=True,
|
44 |
-
trust_remote_code=True,
|
45 |
-
quantization_config=quantization_config,
|
46 |
-
)
|
47 |
-
|
48 |
-
# Create HuggingFaceLLM instance
|
49 |
-
self.llm = HuggingFaceLLM(
|
50 |
-
context_window=4096,
|
51 |
-
max_new_tokens=512,
|
52 |
-
tokenizer=self.tokenizer,
|
53 |
-
model=self.model,
|
54 |
-
model_name=model_name,
|
55 |
device_map="auto",
|
56 |
-
|
|
|
57 |
)
|
58 |
|
59 |
-
# Initialize vision model
|
60 |
-
|
61 |
-
|
62 |
-
)
|
63 |
self.vision_model = AutoModelForVision2Seq.from_pretrained(
|
64 |
-
"
|
65 |
)
|
66 |
|
67 |
-
# Initialize tools
|
68 |
-
self.tools = {
|
69 |
-
"analyze_image": self.analyze_image,
|
70 |
-
"analyze_video": self.analyze_video,
|
71 |
-
"analyze_data": self.analyze_data,
|
72 |
-
}
|
73 |
-
|
74 |
print("Agent initialized with multimodal capabilities.")
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
def analyze_image(self, image_url: str) -> str:
|
77 |
try:
|
78 |
response = requests.get(image_url)
|
@@ -165,7 +150,7 @@ class BasicAgent:
|
|
165 |
return self.analyze_image(image_match.group(0))
|
166 |
|
167 |
# General question
|
168 |
-
return self.
|
169 |
|
170 |
except Exception as e:
|
171 |
print(f"Error processing question: {e}")
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
import requests
|
|
|
4 |
import pandas as pd
|
|
|
|
|
|
|
|
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
6 |
+
from transformers import AutoProcessor, AutoModelForVision2Seq
|
7 |
from dotenv import load_dotenv
|
8 |
from typing import Optional, Dict, Any
|
|
|
|
|
9 |
from PIL import Image
|
10 |
+
import yt_dlp
|
11 |
+
import torch
|
12 |
import re
|
13 |
+
from io import BytesIO
|
14 |
|
15 |
# (Keep Constants as is)
|
16 |
# --- Constants ---
|
|
|
24 |
print("BasicAgent initialized.")
|
25 |
load_dotenv()
|
26 |
|
27 |
+
# Initialize model and tokenizer using Qwen
|
28 |
+
model_name = "Qwen/Qwen-7B-Chat" # Changed to Qwen
|
29 |
|
30 |
+
# Configure quantization
|
31 |
quantization_config = BitsAndBytesConfig(
|
32 |
+
load_in_8bit=True, bnb_4bit_compute_dtype=torch.float16
|
|
|
33 |
)
|
34 |
|
35 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
36 |
+
model_name, trust_remote_code=True # Required for Qwen
|
37 |
+
)
|
38 |
self.model = AutoModelForCausalLM.from_pretrained(
|
39 |
model_name,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
device_map="auto",
|
41 |
+
quantization_config=quantization_config,
|
42 |
+
trust_remote_code=True,
|
43 |
)
|
44 |
|
45 |
+
# Initialize vision model
|
46 |
+
vision_model_name = "microsoft/kosmos-2-patch14-224"
|
47 |
+
self.vision_processor = AutoProcessor.from_pretrained(vision_model_name)
|
|
|
48 |
self.vision_model = AutoModelForVision2Seq.from_pretrained(
|
49 |
+
vision_model_name, device_map="auto", trust_remote_code=True
|
50 |
)
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
print("Agent initialized with multimodal capabilities.")
|
53 |
|
54 |
+
def generate_text(self, prompt: str) -> str:
|
55 |
+
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
|
56 |
+
outputs = self.model.generate(
|
57 |
+
**inputs, max_new_tokens=512, temperature=0.1, do_sample=True
|
58 |
+
)
|
59 |
+
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
60 |
+
|
61 |
def analyze_image(self, image_url: str) -> str:
|
62 |
try:
|
63 |
response = requests.get(image_url)
|
|
|
150 |
return self.analyze_image(image_match.group(0))
|
151 |
|
152 |
# General question
|
153 |
+
return self.generate_text(question)
|
154 |
|
155 |
except Exception as e:
|
156 |
print(f"Error processing question: {e}")
|
requirements.txt
CHANGED
@@ -1,12 +1,16 @@
|
|
1 |
-
gradio
|
2 |
-
|
3 |
-
|
4 |
-
transformers
|
5 |
-
python-dotenv
|
6 |
-
torch
|
7 |
-
sentence-transformers
|
8 |
-
nltk
|
9 |
-
accelerate
|
10 |
-
bitsandbytes
|
11 |
yt-dlp
|
12 |
-
Pillow
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio~=4.44.0
|
2 |
+
numpy<2.0.0
|
3 |
+
requests==2.31.0
|
4 |
+
transformers
|
5 |
+
python-dotenv==1.0.0
|
6 |
+
torch==2.2.0
|
7 |
+
sentence-transformers==2.3.1
|
8 |
+
nltk==3.8.1
|
9 |
+
accelerate==0.27.2
|
10 |
+
bitsandbytes==0.41.0
|
11 |
yt-dlp
|
12 |
+
Pillow==10.2.0
|
13 |
+
pandas==2.1.4
|
14 |
+
gradio[oauth]
|
15 |
+
transformers_stream_generator
|
16 |
+
einops
|