Malaji71 commited on
Commit
9abf097
·
verified ·
1 Parent(s): e53d7f7

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +137 -49
models.py CHANGED
@@ -7,6 +7,7 @@ import spaces
7
  import logging
8
  import tempfile
9
  import os
 
10
  from typing import Optional, Dict, Any, Tuple
11
  from PIL import Image
12
  from gradio_client import Client, handle_file
@@ -82,38 +83,121 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
82
  return False
83
 
84
  def _extract_camera_setup(self, description: str) -> Optional[str]:
85
- """Extract camera setup recommendation from BAGEL response"""
86
  try:
87
- # Look for CAMERA_SETUP section
88
  if "CAMERA_SETUP:" in description:
89
  parts = description.split("CAMERA_SETUP:")
90
  if len(parts) > 1:
91
- camera_part = parts[1].strip()
92
- # Clean up any additional formatting
93
- camera_part = camera_part.replace("\n", " ").strip()
94
- return camera_part
 
95
 
96
- # Alternative patterns for camera recommendations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  camera_patterns = [
98
- "Shot on ",
99
- "Camera: ",
100
- "Equipment: ",
101
- "Recommended camera:",
102
- "Camera setup:"
 
103
  ]
104
 
 
105
  for pattern in camera_patterns:
106
- if pattern in description:
107
- # Extract text after the pattern
108
- idx = description.find(pattern)
109
- camera_text = description[idx:].split('.')[0] # Take first sentence
110
- if len(camera_text) > len(pattern) + 10: # Ensure meaningful content
111
- return camera_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  return None
114
 
115
  except Exception as e:
116
- logger.warning(f"Failed to extract camera setup: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  return None
118
 
119
  def _save_temp_image(self, image: Image.Image) -> str:
@@ -165,22 +249,24 @@ class BagelAPIAnalyzer(BaseImageAnalyzer):
165
  }
166
 
167
  try:
168
- # Default prompt for detailed image analysis
169
  if prompt is None:
170
- prompt = """You are analyzing a photograph for FLUX image generation. Provide a detailed analysis in two sections:
171
 
172
- 1. DESCRIPTION: Start directly with the subject (e.g., "A color photograph showing..." or "A black and white photograph depicting..."). First, determine if this is a photograph, illustration, or artwork. Then describe the visual elements, composition, lighting, colors (be specific about the color palette - warm tones, cool tones, monochrome, etc.), artistic style, mood, and atmosphere. Also mention the image format/aspect ratio (square, portrait, landscape, widescreen, etc.) and how the composition uses this format. Write as a flowing paragraph without numbered lists.
 
 
 
 
 
173
 
174
- 2. CAMERA_SETUP: Based on the photographic characteristics, scene type, and aspect ratio you observe, recommend the specific camera system and lens that would realistically capture this type of scene:
175
- - For street/documentary photography: suggest cameras like Canon EOS R6, Sony A7 IV, Leica Q2 with 35mm or 24-70mm lenses
176
- - For portraits: suggest cameras like Canon EOS R5, Sony A7R V with 85mm or 135mm lenses
177
- - For landscapes/widescreen: suggest cameras like Phase One XT, Fujifilm GFX with wide-angle lenses (16-35mm, 24-70mm)
178
- - For sports/action: suggest cameras like Canon EOS-1D X, Sony A9 III with telephoto lenses
179
- - For macro: suggest specialized macro lenses
180
- - For cinematic/widescreen formats: suggest cinema cameras or full-frame with appropriate aspect ratios
181
- Be specific about focal length, aperture, and shooting style based on what you actually see in the image dimensions and content.
182
 
183
- Analyze carefully and be accurate about colors, image type, and proportions."""
184
 
185
  # Save image to temporary file
186
  temp_path = self._save_temp_image(image)
@@ -195,7 +281,7 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
195
  prompt=prompt,
196
  show_thinking=False,
197
  do_sample=False,
198
- text_temperature=0.3,
199
  max_new_tokens=512,
200
  api_name=self.api_endpoint
201
  )
@@ -206,17 +292,19 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
206
  else:
207
  description = str(result)
208
 
209
- # Clean up the description and extract camera setup if present
210
  if isinstance(description, str) and description.strip():
211
  description = description.strip()
212
 
213
- # Store camera setup separately if found
214
  camera_setup = self._extract_camera_setup(description)
215
  if camera_setup:
216
  metadata["camera_setup"] = camera_setup
217
  metadata["has_camera_suggestion"] = True
 
218
  else:
219
  metadata["has_camera_suggestion"] = False
 
220
  else:
221
  description = "Detailed image analysis completed successfully"
222
  metadata["has_camera_suggestion"] = False
@@ -226,7 +314,7 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
226
  "response_length": len(description)
227
  })
228
 
229
- logger.info(f"BAGEL API analysis complete: {len(description)} characters")
230
  return description, metadata
231
 
232
  except Exception as e:
@@ -240,22 +328,22 @@ Analyze carefully and be accurate about colors, image type, and proportions."""
240
 
241
  def analyze_for_flux_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
242
  """Analyze image specifically for FLUX prompt generation"""
243
- flux_prompt = """You are analyzing a photograph for professional FLUX generation. Provide two sections:
244
 
245
- 1. DESCRIPTION: Determine first if this is a real photograph, digital artwork, or illustration. Then create a detailed, flowing description starting directly with the subject. Be precise about:
246
- - Image type (photograph, illustration, artwork)
247
- - Color palette (specify if color or black/white, warm/cool tones, specific colors)
248
- - Photographic style (street, portrait, landscape, documentary, artistic, etc.)
249
- - Composition, lighting, mood, and atmosphere
250
- Write as a single coherent paragraph.
 
 
 
 
 
 
251
 
252
- 2. CAMERA_SETUP: Recommend specific professional equipment that would realistically capture this exact scene:
253
- - Street/urban scenes: Canon EOS R6, Sony A7 IV, Leica Q2 with 24-70mm f/2.8 or 35mm f/1.4
254
- - Portraits: Canon EOS R5, Sony A7R V, Hasselblad X2D with 85mm f/1.4 or 135mm f/2
255
- - Landscapes: Phase One XT, Fujifilm GFX 100S with 16-35mm f/2.8 or 40mm f/4
256
- - Documentary: Canon EOS-1D X, Sony A9 III with 24-105mm f/4 or 70-200mm f/2.8
257
- - Action/Sports: Canon EOS R3, Sony A1 with 300mm f/2.8 or 400mm f/2.8
258
- Match the equipment to what you actually observe in the scene type and shooting conditions."""
259
 
260
  return self.analyze_image(image, flux_prompt)
261
 
 
7
  import logging
8
  import tempfile
9
  import os
10
+ import re
11
  from typing import Optional, Dict, Any, Tuple
12
  from PIL import Image
13
  from gradio_client import Client, handle_file
 
83
  return False
84
 
85
  def _extract_camera_setup(self, description: str) -> Optional[str]:
86
+ """Extract camera setup recommendation from BAGEL response with improved parsing"""
87
  try:
88
+ # Look for CAMERA_SETUP section first
89
  if "CAMERA_SETUP:" in description:
90
  parts = description.split("CAMERA_SETUP:")
91
  if len(parts) > 1:
92
+ camera_section = parts[1].strip()
93
+ # Take the first meaningful sentence from camera setup
94
+ camera_text = camera_section.split('\n')[0].strip()
95
+ if len(camera_text) > 20: # Ensure meaningful content
96
+ return self._parse_camera_recommendation(camera_text)
97
 
98
+ # Look for "2. CAMERA_SETUP" pattern
99
+ if "2. CAMERA_SETUP" in description:
100
+ parts = description.split("2. CAMERA_SETUP")
101
+ if len(parts) > 1:
102
+ camera_section = parts[1].strip()
103
+ camera_text = camera_section.split('\n')[0].strip()
104
+ if len(camera_text) > 20:
105
+ return self._parse_camera_recommendation(camera_text)
106
+
107
+ # Look for camera recommendations within the text
108
+ camera_recommendation = self._find_camera_recommendation(description)
109
+ if camera_recommendation:
110
+ return camera_recommendation
111
+
112
+ return None
113
+
114
+ except Exception as e:
115
+ logger.warning(f"Failed to extract camera setup: {e}")
116
+ return None
117
+
118
+ def _parse_camera_recommendation(self, camera_text: str) -> Optional[str]:
119
+ """Parse and extract specific camera and lens information"""
120
+ try:
121
+ # Remove common prefixes and clean text
122
+ camera_text = re.sub(r'^(Based on.*?recommend|I would recommend|For this.*?recommend)\s*', '', camera_text, flags=re.IGNORECASE)
123
+ camera_text = re.sub(r'^(using a|use a|cameras? like)\s*', '', camera_text, flags=re.IGNORECASE)
124
+
125
+ # Extract camera model with specific patterns
126
  camera_patterns = [
127
+ r'(Canon EOS [R\d]+[^\s,]*(?:\s+[IVX]+)?)',
128
+ r'(Sony A[^\s,]+(?:\s+[IVX]+)?)',
129
+ r'(Leica [^\s,]+)',
130
+ r'(Hasselblad [^\s,]+)',
131
+ r'(Phase One [^\s,]+)',
132
+ r'(Fujifilm [^\s,]+)'
133
  ]
134
 
135
+ camera_model = None
136
  for pattern in camera_patterns:
137
+ match = re.search(pattern, camera_text, re.IGNORECASE)
138
+ if match:
139
+ camera_model = match.group(1).strip()
140
+ break
141
+
142
+ # Extract lens information with improved patterns
143
+ lens_patterns = [
144
+ r'(\d+mm\s*f/[\d.]+(?:\s*lens)?)',
145
+ r'(\d+-\d+mm\s*f/[\d.]+(?:\s*lens)?)',
146
+ r'(with\s+(?:a\s+)?(\d+mm[^,.]*))',
147
+ r'(paired with.*?(\d+mm[^,.]*))'
148
+ ]
149
+
150
+ lens_info = None
151
+ for pattern in lens_patterns:
152
+ match = re.search(pattern, camera_text, re.IGNORECASE)
153
+ if match:
154
+ lens_info = match.group(1).strip()
155
+ lens_info = re.sub(r'^(with\s+(?:a\s+)?|paired with\s+)', '', lens_info, flags=re.IGNORECASE)
156
+ break
157
+
158
+ # Extract aperture if not in lens info
159
+ if not lens_info or 'f/' not in lens_info:
160
+ aperture_match = re.search(r'(f/[\d.]+)', camera_text)
161
+ aperture = aperture_match.group(1) if aperture_match else None
162
+ if aperture and lens_info:
163
+ lens_info = f"{lens_info} {aperture}"
164
+
165
+ # Build clean recommendation
166
+ parts = []
167
+ if camera_model:
168
+ parts.append(camera_model)
169
+ if lens_info:
170
+ parts.append(lens_info)
171
+
172
+ if parts:
173
+ result = ', '.join(parts)
174
+ logger.info(f"Parsed camera recommendation: {result}")
175
+ return result
176
 
177
  return None
178
 
179
  except Exception as e:
180
+ logger.warning(f"Failed to parse camera recommendation: {e}")
181
+ return None
182
+
183
+ def _find_camera_recommendation(self, text: str) -> Optional[str]:
184
+ """Find camera recommendations anywhere in the text"""
185
+ try:
186
+ # Look for sentences containing camera info
187
+ sentences = re.split(r'[.!?]', text)
188
+
189
+ for sentence in sentences:
190
+ # Check if sentence contains camera info
191
+ if any(brand in sentence.lower() for brand in ['canon', 'sony', 'leica', 'hasselblad', 'phase one', 'fujifilm']):
192
+ if any(term in sentence.lower() for term in ['recommend', 'suggest', 'would use', 'camera', 'lens']):
193
+ parsed = self._parse_camera_recommendation(sentence.strip())
194
+ if parsed:
195
+ return parsed
196
+
197
+ return None
198
+
199
+ except Exception as e:
200
+ logger.warning(f"Failed to find camera recommendation: {e}")
201
  return None
202
 
203
  def _save_temp_image(self, image: Image.Image) -> str:
 
249
  }
250
 
251
  try:
252
+ # Enhanced prompt for better structured output
253
  if prompt is None:
254
+ prompt = """Analyze this image for professional photography reproduction. Provide exactly two sections:
255
 
256
+ 1. DESCRIPTION: Write a single flowing paragraph describing what you see. Start directly with the subject (e.g., "A color photograph showing..." or "A black and white image depicting..."). Include:
257
+ - Image type (photograph, illustration, artwork)
258
+ - Subject and composition
259
+ - Color palette and lighting conditions
260
+ - Mood and atmosphere
261
+ - Photographic style and format
262
 
263
+ 2. CAMERA_SETUP: Based on the scene type you observe, recommend ONE specific professional camera and lens combination:
264
+ - For street/documentary scenes: Canon EOS R6 with 35mm f/1.4 lens
265
+ - For portrait photography: Canon EOS R5 with 85mm f/1.4 lens
266
+ - For landscape photography: Phase One XT with 24-70mm f/4 lens
267
+ - For action/sports: Sony A1 with 70-200mm f/2.8 lens
 
 
 
268
 
269
+ Give only the camera model and lens specification, nothing else."""
270
 
271
  # Save image to temporary file
272
  temp_path = self._save_temp_image(image)
 
281
  prompt=prompt,
282
  show_thinking=False,
283
  do_sample=False,
284
+ text_temperature=0.2,
285
  max_new_tokens=512,
286
  api_name=self.api_endpoint
287
  )
 
292
  else:
293
  description = str(result)
294
 
295
+ # Process the description and extract camera setup
296
  if isinstance(description, str) and description.strip():
297
  description = description.strip()
298
 
299
+ # Extract camera setup with improved parsing
300
  camera_setup = self._extract_camera_setup(description)
301
  if camera_setup:
302
  metadata["camera_setup"] = camera_setup
303
  metadata["has_camera_suggestion"] = True
304
+ logger.info(f"Extracted camera setup: {camera_setup}")
305
  else:
306
  metadata["has_camera_suggestion"] = False
307
+ logger.warning("No valid camera setup found in BAGEL response")
308
  else:
309
  description = "Detailed image analysis completed successfully"
310
  metadata["has_camera_suggestion"] = False
 
314
  "response_length": len(description)
315
  })
316
 
317
+ logger.info(f"BAGEL API analysis complete: {len(description)} characters, Camera: {metadata.get('has_camera_suggestion', False)}")
318
  return description, metadata
319
 
320
  except Exception as e:
 
328
 
329
  def analyze_for_flux_prompt(self, image: Image.Image) -> Tuple[str, Dict[str, Any]]:
330
  """Analyze image specifically for FLUX prompt generation"""
331
+ flux_prompt = """Analyze this image for professional FLUX generation. Provide exactly two sections:
332
 
333
+ 1. DESCRIPTION: Create a single flowing paragraph starting directly with the subject. Be precise about:
334
+ - Image type (photograph, illustration, artwork)
335
+ - Subject matter and composition
336
+ - Color palette (specific colors, warm/cool tones, monochrome)
337
+ - Lighting conditions and photographic style
338
+ - Mood, atmosphere, and artistic elements
339
+
340
+ 2. CAMERA_SETUP: Recommend ONE specific professional camera and lens for this scene type:
341
+ - Street/urban/documentary: Canon EOS R6 with 35mm f/1.4 lens
342
+ - Portrait photography: Canon EOS R5 with 85mm f/1.4 lens
343
+ - Landscape photography: Phase One XT with 24-70mm f/4 lens
344
+ - Action/sports: Sony A1 with 70-200mm f/2.8 lens
345
 
346
+ Give only the camera model and exact lens specification."""
 
 
 
 
 
 
347
 
348
  return self.analyze_image(image, flux_prompt)
349