reab5555 commited on
Commit
f57fde9
·
verified ·
1 Parent(s): 3da6b4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -26
app.py CHANGED
@@ -19,42 +19,48 @@ load_dotenv()
19
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
20
 
21
 
22
- def encode_image_to_base64(image):
23
- # If image is a tuple (as sometimes provided by Gradio), take the first element
24
- if isinstance(image, tuple):
25
- image = image[0]
26
-
27
- # If image is a numpy array, convert to PIL Image
28
- if isinstance(image, np.ndarray):
29
- image = Image.fromarray(image)
30
-
31
- # Ensure image is in PIL Image format
32
  if not isinstance(image, Image.Image):
33
- raise ValueError("Input must be a PIL Image, numpy array, or tuple containing an image")
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  buffered = io.BytesIO()
36
- image.save(buffered, format="PNG")
 
 
37
  return base64.b64encode(buffered.getvalue()).decode('utf-8')
38
 
39
-
40
  def analyze_image(image):
41
  client = OpenAI(api_key=OPENAI_API_KEY)
42
- base64_image = encode_image_to_base64(image)
43
 
44
- # Build the list-of-dicts prompt:
 
 
 
45
  prompt_dict = [
46
  {
47
  "type": "text",
48
- "text": """Your task is to determine if the image is surprising or not surprising.
49
- If the image is surprising, determine which element, figure, or object in the image is making the image surprising and write it only in one sentence with no more than 6 words.
50
- Otherwise, write 'NA'.
51
- Also, rate how surprising the image is on a scale of 1-5, where 1 is not surprising at all and 5 is highly surprising.
52
- Provide the response as a JSON with the following structure:
53
  {
54
- "label": "[surprising OR not surprising]",
55
- "element": "[element]",
56
- "rating": [1-5]
57
- }"""
 
58
  },
59
  {
60
  "type": "image_url",
@@ -64,15 +70,16 @@ def analyze_image(image):
64
  }
65
  ]
66
 
67
- # JSON-encode the list so "content" is a string
68
  json_prompt = json.dumps(prompt_dict)
69
 
 
70
  response = client.chat.completions.create(
71
  model="gpt-4o-mini",
72
  messages=[
73
  {
74
  "role": "user",
75
- "content": json_prompt, # must be a string
76
  }
77
  ],
78
  max_tokens=100,
@@ -83,6 +90,7 @@ def analyze_image(image):
83
  return response.choices[0].message.content
84
 
85
 
 
86
  def show_mask(mask, ax, random_color=False):
87
  if random_color:
88
  color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
 
19
  OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
20
 
21
 
22
+ def resize_and_compress(image, max_width=800, max_height=800, quality=50):
23
+ """Resize (if > max_width/height) and compress the image to keep Base64 under ~1MB."""
 
 
 
 
 
 
 
 
24
  if not isinstance(image, Image.Image):
25
+ raise ValueError("Input must be a PIL Image")
26
+
27
+ width, height = image.size
28
+ if width > max_width or height > max_height:
29
+ aspect_ratio = width / height
30
+ if aspect_ratio > 1:
31
+ new_width = max_width
32
+ new_height = int(new_width / aspect_ratio)
33
+ else:
34
+ new_height = max_height
35
+ new_width = int(new_height * aspect_ratio)
36
+ image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
37
 
38
  buffered = io.BytesIO()
39
+ # Save as JPEG with reduced quality
40
+ image.save(buffered, format="JPEG", quality=quality)
41
+ buffered.seek(0)
42
  return base64.b64encode(buffered.getvalue()).decode('utf-8')
43
 
 
44
  def analyze_image(image):
45
  client = OpenAI(api_key=OPENAI_API_KEY)
 
46
 
47
+ # Step 1: Resize + compress to keep the Base64 string under 1 MB
48
+ base64_image = resize_and_compress(image, max_width=800, max_height=800, quality=50)
49
+
50
+ # Build the list-of-dicts prompt
51
  prompt_dict = [
52
  {
53
  "type": "text",
54
+ "text": """Your task is to determine if the image is surprising or not.
55
+ If the image is surprising, which element is surprising (max 6 words).
56
+ Otherwise, 'NA'. Also rate how surprising (1-5).
57
+ Return JSON like:
 
58
  {
59
+ "label": "[surprising or not surprising]",
60
+ "element": "[element]",
61
+ "rating": [1-5]
62
+ }
63
+ """
64
  },
65
  {
66
  "type": "image_url",
 
70
  }
71
  ]
72
 
73
+ # JSON-encode to ensure content is a string
74
  json_prompt = json.dumps(prompt_dict)
75
 
76
+ # Send request
77
  response = client.chat.completions.create(
78
  model="gpt-4o-mini",
79
  messages=[
80
  {
81
  "role": "user",
82
+ "content": json_prompt
83
  }
84
  ],
85
  max_tokens=100,
 
90
  return response.choices[0].message.content
91
 
92
 
93
+
94
  def show_mask(mask, ax, random_color=False):
95
  if random_color:
96
  color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)