File size: 3,213 Bytes
22e1b62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import json
import os
from dotenv import load_dotenv
import requests

# Load Bing Search API key
load_dotenv() 
BING_API_KEY = os.getenv("BING_API_KEY")

def print_json(obj):
    """Print the object as json"""
    print(json.dumps(obj, sort_keys=True, indent=4, separators=(',', ': ')))


def get_image_urls(search_results):
    """
    Extracts image URLs from Bing Visual Search response.
    Ref: https://learn.microsoft.com/en-us/bing/search-apis/bing-visual-search/how-to/search-response

    Args:
        search_results: A dict containing the Bing VisualSearch response data.

    Returns:
        A tuple containing two lists:
            - List of image URLs from "PagesIncluding" section.
            - List of image URLs from "VisualSearch" section (backup).
    """
    
    pages_including_urls = []
    visual_search_urls = []
    
    if "tags" not in search_results:
        return pages_including_urls, visual_search_urls
    
    # Check for required keys directly
    if not any(action.get("actions") for action in search_results["tags"]):
        return pages_including_urls, visual_search_urls
    
    
    for action in search_results["tags"]:
        for result in action.get("actions", []):
            # actions = PagesIncluding, main results
            if result["name"] == "PagesIncluding":
                pages_including_urls.extend(item["contentUrl"] for item in result["data"]["value"])
            # actions = VisualSearch, back up results 
            elif result["name"] == "VisualSearch":
                visual_search_urls.extend(item["contentUrl"] for item in result["data"]["value"])

    return pages_including_urls, visual_search_urls

def reverse_image_search(image_path, subscription_key=BING_API_KEY):
    """Performs a reverse image search using the Bing Visual Search API.

    Args:
        image_path: The path to the image file to search for.

    Returns:
        A list of image URLs found that are similar to the image in the 
            specified path.

    Raises:
        requests.exceptions.RequestException: If the API request fails.
    """
    base_uri = "https://api.bing.microsoft.com/v7.0/images/visualsearch"
    headers = {"Ocp-Apim-Subscription-Key": subscription_key}
    
    try:
        files = {"image": ("image", open(image_path, "rb"))}
        response = requests.post(base_uri, headers=headers, files=files)
        response.raise_for_status()
        search_results = response.json()
        
        return search_results

    except requests.exceptions.RequestException as e:
        raise requests.exceptions.RequestException(f"API request failed: {e}")
    except OSError as e:
        raise OSError(f"Error opening image file: {e}")

if __name__ == "__main__":
    # Example usage:
    image_path = "data/test_data/human_news.jpg"
    try:
        search_results = reverse_image_search(image_path)
        image_urls, backup_image_urls = get_image_urls(search_results)

        # Print the results
        print("Image URLs from PagesIncluding:")
        print(image_urls)
        print("\nImage URLs from VisualSearch (backup):")
        print(backup_image_urls)
    except Exception as e:
        print(f"An error occurred: {e}")