RADARPICKv3

Sleeping

App Files Files Community

BenK0y commited on Sep 19, 2024

Commit

18862e8

verified ·

1 Parent(s): 5da7355

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -30

app.py CHANGED Viewed

@@ -14,10 +14,10 @@ load_dotenv()
 # Get the API key from the environment
 API_KEY = os.getenv("GOOGLE_API_KEY")
-# Set up the model with the API key
 genai.configure(api_key=API_KEY)
-# Set up the model
 generation_config = {
     "temperature": 0.7,
     "top_p": 0.9,
@@ -48,6 +48,11 @@ model = genai.GenerativeModel(model_name="gemini-1.5-flash-latest",
                               generation_config=generation_config,
                               safety_settings=safety_settings)
 def input_image_setup(file_loc):
     if not (img := Path(file_loc)).exists():
         raise FileNotFoundError(f"Could not find image: {img}")
@@ -60,24 +65,17 @@ def input_image_setup(file_loc):
         ]
     return image_parts
-def generate_gemini_response(input_prompt, image_loc):
-    image_prompt = input_image_setup(image_loc)
-    prompt_parts = [input_prompt, image_prompt[0]]
     response = model.generate_content(prompt_parts)
     return response.text
-input_prompt = """ give me the info of the car:
-- plate:
-- model:
-- color: """
-def upload_file(files):
-    if not files:
-        return None, "Image not uploaded"
-    file_paths = [file.name for file in files]
-    response = generate_gemini_response(input_prompt, file_paths[0])
-    return file_paths[0], response
 # Object detection part
 def detect_objects(image):
     processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
@@ -89,13 +87,57 @@ def detect_objects(image):
     target_sizes = torch.tensor([image.size[::-1]])
     results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
     draw = ImageDraw.Draw(image)
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
-        box = [round(i, 2) for i in box.tolist()]
-        draw.rectangle(box, outline="red", width=3)
-        draw.text((box[0], box[1]), f"{model.config.id2label[label.item()]}: {round(score.item(), 2)}", fill="red")
-    return image
 with gr.Blocks() as demo:
     header = gr.Label("RADARPICK: Vous avez été radarisé!")
@@ -105,15 +147,7 @@ with gr.Blocks() as demo:
     file_output = gr.Textbox(label="Generated Caption/Post Content")
-    def process_generate(files):
-        if not files:
-            return None, "Image not uploaded"
-        file_path = files[0].name
-        image = Image.open(file_path)
-        detected_image = detect_objects(image)
-        return detected_image, upload_file(files)[1]
     upload_button.upload(fn=lambda files: files[0].name if files else None, inputs=[upload_button], outputs=image_output)
     generate_button.click(fn=process_generate, inputs=[upload_button], outputs=[image_output, file_output])
-demo.launch()

 # Get the API key from the environment
 API_KEY = os.getenv("GOOGLE_API_KEY")
+# Set up the generative AI model with the API key
 genai.configure(api_key=API_KEY)
+# Set up the generative model
 generation_config = {
     "temperature": 0.7,
     "top_p": 0.9,
                               generation_config=generation_config,
                               safety_settings=safety_settings)
+input_prompt_template = """give me the info of the car:
+- plate:
+- model:
+- color: """
 def input_image_setup(file_loc):
     if not (img := Path(file_loc)).exists():
         raise FileNotFoundError(f"Could not find image: {img}")
         ]
     return image_parts
+def generate_gemini_response(input_prompt, image):
+    image_parts = [
+        {
+            "mime_type": "image/jpeg",
+            "data": image
+        }
+    ]
+    prompt_parts = [input_prompt, image_parts[0]]
     response = model.generate_content(prompt_parts)
     return response.text
 # Object detection part
 def detect_objects(image):
     processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
     target_sizes = torch.tensor([image.size[::-1]])
     results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
+    detected_cars = []
     draw = ImageDraw.Draw(image)
+    # Loop through detections and filter only "car" class (ID 3 for COCO dataset)
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        if model.config.id2label[label.item()] == 'car' and score.item() > 0.9:
+            box = [round(i, 2) for i in box.tolist()]
+            # Crop the detected car
+            cropped_car = image.crop(box)
+            # Convert the cropped image to bytes
+            cropped_car_bytes = image_to_bytes(cropped_car)
+            detected_cars.append((cropped_car_bytes, box))
+            # Draw bounding box around the car
+            draw.rectangle(box, outline="red", width=3)
+            draw.text((box[0], box[1]), f"Car: {round(score.item(), 2)}", fill="red")
+    return image, detected_cars
+def image_to_bytes(img):
+    # Convert a PIL image to bytes
+    from io import BytesIO
+    img_bytes = BytesIO()
+    img.save(img_bytes, format="JPEG")
+    img_bytes = img_bytes.getvalue()
+    return img_bytes
+def upload_file(files):
+    if not files:
+        return None, "Image not uploaded"
+    file_paths = [file.name for file in files]
+    return file_paths[0]
+def process_generate(files):
+    if not files:
+        return None, "Image not uploaded"
+    # Load the image
+    file_path = files[0].name
+    image = Image.open(file_path)
+    # Detect cars and return cropped car images
+    detected_image, detected_cars = detect_objects(image)
+    # Generate responses for each car
+    car_info_list = []
+    for car_bytes, box in detected_cars:
+        car_info = generate_gemini_response(input_prompt_template, car_bytes)
+        car_info_list.append(f"Car at {box}:\n{car_info}\n")
+    return detected_image, "\n".join(car_info_list)
 with gr.Blocks() as demo:
     header = gr.Label("RADARPICK: Vous avez été radarisé!")
     file_output = gr.Textbox(label="Generated Caption/Post Content")
     upload_button.upload(fn=lambda files: files[0].name if files else None, inputs=[upload_button], outputs=image_output)
     generate_button.click(fn=process_generate, inputs=[upload_button], outputs=[image_output, file_output])
+demo.launch()