leofltt commited on
Commit
4489283
·
1 Parent(s): b795696

qwen season

Browse files
Files changed (2) hide show
  1. app.py +25 -40
  2. requirements.txt +15 -11
app.py CHANGED
@@ -1,20 +1,16 @@
1
  import os
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
6
- from llama_index import VectorStoreIndex, SimpleDirectoryReader, Document
7
- from llama_index.llms import HuggingFaceLLM
8
- from llama_index import ServiceContext
9
- from llama_index.embeddings import HuggingFaceEmbedding
10
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
11
- import torch
12
  from dotenv import load_dotenv
13
  from typing import Optional, Dict, Any
14
- import yt_dlp
15
- from transformers import AutoProcessor, AutoModelForVision2Seq
16
  from PIL import Image
 
 
17
  import re
 
18
 
19
  # (Keep Constants as is)
20
  # --- Constants ---
@@ -28,51 +24,40 @@ class BasicAgent:
28
  print("BasicAgent initialized.")
29
  load_dotenv()
30
 
31
- # Use official Mistral model
32
- model_name = "mistralai/Mixtral-8x7B-Instruct-v0.2" # or "mistralai/Mistral-7B-Instruct-v0.2"
33
 
34
- # Configure quantization for efficient CPU usage
35
  quantization_config = BitsAndBytesConfig(
36
- load_in_8bit=True, # 8-bit is more CPU friendly than 4-bit
37
- bnb_4bit_compute_dtype=torch.float16,
38
  )
39
 
40
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
41
  self.model = AutoModelForCausalLM.from_pretrained(
42
  model_name,
43
- low_cpu_mem_usage=True,
44
- trust_remote_code=True,
45
- quantization_config=quantization_config,
46
- )
47
-
48
- # Create HuggingFaceLLM instance
49
- self.llm = HuggingFaceLLM(
50
- context_window=4096,
51
- max_new_tokens=512,
52
- tokenizer=self.tokenizer,
53
- model=self.model,
54
- model_name=model_name,
55
  device_map="auto",
56
- temperature=0.1,
 
57
  )
58
 
59
- # Initialize vision model for image/video understanding
60
- self.vision_processor = AutoProcessor.from_pretrained(
61
- "microsoft/kosmos-2-patch14-224"
62
- )
63
  self.vision_model = AutoModelForVision2Seq.from_pretrained(
64
- "microsoft/kosmos-2-patch14-224"
65
  )
66
 
67
- # Initialize tools
68
- self.tools = {
69
- "analyze_image": self.analyze_image,
70
- "analyze_video": self.analyze_video,
71
- "analyze_data": self.analyze_data,
72
- }
73
-
74
  print("Agent initialized with multimodal capabilities.")
75
 
 
 
 
 
 
 
 
76
  def analyze_image(self, image_url: str) -> str:
77
  try:
78
  response = requests.get(image_url)
@@ -165,7 +150,7 @@ class BasicAgent:
165
  return self.analyze_image(image_match.group(0))
166
 
167
  # General question
168
- return self.generate_response(question)
169
 
170
  except Exception as e:
171
  print(f"Error processing question: {e}")
 
1
  import os
2
  import gradio as gr
3
  import requests
 
4
  import pandas as pd
 
 
 
 
5
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
6
+ from transformers import AutoProcessor, AutoModelForVision2Seq
7
  from dotenv import load_dotenv
8
  from typing import Optional, Dict, Any
 
 
9
  from PIL import Image
10
+ import yt_dlp
11
+ import torch
12
  import re
13
+ from io import BytesIO
14
 
15
  # (Keep Constants as is)
16
  # --- Constants ---
 
24
  print("BasicAgent initialized.")
25
  load_dotenv()
26
 
27
+ # Initialize model and tokenizer using Qwen
28
+ model_name = "Qwen/Qwen-7B-Chat" # Changed to Qwen
29
 
30
+ # Configure quantization
31
  quantization_config = BitsAndBytesConfig(
32
+ load_in_8bit=True, bnb_4bit_compute_dtype=torch.float16
 
33
  )
34
 
35
+ self.tokenizer = AutoTokenizer.from_pretrained(
36
+ model_name, trust_remote_code=True # Required for Qwen
37
+ )
38
  self.model = AutoModelForCausalLM.from_pretrained(
39
  model_name,
 
 
 
 
 
 
 
 
 
 
 
 
40
  device_map="auto",
41
+ quantization_config=quantization_config,
42
+ trust_remote_code=True,
43
  )
44
 
45
+ # Initialize vision model
46
+ vision_model_name = "microsoft/kosmos-2-patch14-224"
47
+ self.vision_processor = AutoProcessor.from_pretrained(vision_model_name)
 
48
  self.vision_model = AutoModelForVision2Seq.from_pretrained(
49
+ vision_model_name, device_map="auto", trust_remote_code=True
50
  )
51
 
 
 
 
 
 
 
 
52
  print("Agent initialized with multimodal capabilities.")
53
 
54
+ def generate_text(self, prompt: str) -> str:
55
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
56
+ outputs = self.model.generate(
57
+ **inputs, max_new_tokens=512, temperature=0.1, do_sample=True
58
+ )
59
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
60
+
61
  def analyze_image(self, image_url: str) -> str:
62
  try:
63
  response = requests.get(image_url)
 
150
  return self.analyze_image(image_match.group(0))
151
 
152
  # General question
153
+ return self.generate_text(question)
154
 
155
  except Exception as e:
156
  print(f"Error processing question: {e}")
requirements.txt CHANGED
@@ -1,12 +1,16 @@
1
- gradio
2
- requests
3
- llama-index
4
- transformers>=4.36.0
5
- python-dotenv
6
- torch
7
- sentence-transformers
8
- nltk
9
- accelerate
10
- bitsandbytes>=0.41.0
11
  yt-dlp
12
- Pillow
 
 
 
 
 
1
+ gradio~=4.44.0
2
+ numpy<2.0.0
3
+ requests==2.31.0
4
+ transformers
5
+ python-dotenv==1.0.0
6
+ torch==2.2.0
7
+ sentence-transformers==2.3.1
8
+ nltk==3.8.1
9
+ accelerate==0.27.2
10
+ bitsandbytes==0.41.0
11
  yt-dlp
12
+ Pillow==10.2.0
13
+ pandas==2.1.4
14
+ gradio[oauth]
15
+ transformers_stream_generator
16
+ einops