Spaces:
Sleeping
Sleeping
| import io | |
| import boto3 | |
| import requests | |
| import numpy as np | |
| import polars as pl | |
| from PIL import Image | |
| from botocore.config import Config | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| # S3 for sample images | |
| my_config = Config( | |
| region_name='us-east-1' | |
| ) | |
| s3_client = boto3.client('s3', config=my_config) | |
| # Set basepath for EOL pages for info | |
| EOL_URL = "https://eol.org/pages/" | |
| RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species"] | |
| def get_sample(df, pred_taxon, rank): | |
| ''' | |
| Function to retrieve a sample image of the predicted taxon and EOL page link for more info. | |
| Parameters: | |
| ----------- | |
| df : DataFrame | |
| DataFrame with all sample images listed and their filepaths (in "file_path" column). | |
| pred_taxon : str | |
| Predicted taxon of the uploaded image. | |
| rank : int | |
| Index of rank in RANKS chosen for prediction. | |
| Returns: | |
| -------- | |
| img : PIL.Image | |
| Sample image of predicted taxon for display. | |
| eol_page : str | |
| URL to EOL page for the taxon (may be a lower rank, e.g., species sample). | |
| ''' | |
| logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}") | |
| try: | |
| filepath, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank) | |
| except Exception as e: | |
| logger.error(f"Error retrieving sample data: {e}") | |
| return None, f"We encountered the following error trying to retrieve a sample image: {e}." | |
| if filepath is None: | |
| logger.warning(f"No sample image found for taxon: {pred_taxon}") | |
| return None, f"Sorry, our EOL images do not include {pred_taxon}." | |
| # Get sample image of selected individual | |
| try: | |
| img_src = s3_client.generate_presigned_url('get_object', | |
| Params={'Bucket': 'treeoflife-10m-sample-images', | |
| 'Key': filepath} | |
| ) | |
| img_resp = requests.get(img_src) | |
| img = Image.open(io.BytesIO(img_resp.content)) | |
| full_eol_url = EOL_URL + eol_page_id | |
| if is_exact: | |
| eol_page = f"<p>Check out the EOL entry for {pred_taxon} to learn more: <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>" | |
| else: | |
| eol_page = f"<p>Check out an example EOL entry within {pred_taxon} to learn more: {full_name} <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>" | |
| logger.info(f"Successfully retrieved sample image and EOL page for {pred_taxon}") | |
| return img, eol_page | |
| except Exception as e: | |
| logger.error(f"Error retrieving sample image: {e}") | |
| return None, f"We encountered the following error trying to retrieve a sample image: {e}." | |
| def get_sample_data(df, pred_taxon, rank): | |
| ''' | |
| Function to randomly select a sample individual of the given taxon and provide associated native location. | |
| Parameters: | |
| ----------- | |
| df : DataFrame | |
| DataFrame with all sample images listed and their filepaths (in "file_path" column). | |
| pred_taxon : str | |
| Predicted taxon of the uploaded image. | |
| rank : int | |
| Index of rank in RANKS chosen for prediction. | |
| Returns: | |
| -------- | |
| filepath : str | |
| Filepath of selected sample image for predicted taxon. | |
| eol_page_id : str | |
| EOL page ID associated with predicted taxon for more information. | |
| full_name : str | |
| Full taxonomic name of the selected sample. | |
| is_exact : bool | |
| Flag indicating if the match is exact (i.e., with empty lower ranks). | |
| ''' | |
| for idx in range(rank + 1): | |
| taxon = RANKS[idx] | |
| target_taxon = pred_taxon.split(" ")[idx] | |
| df = df.filter(pl.col(taxon) == target_taxon) | |
| if df.shape[0] == 0: | |
| return None, np.nan, "", False | |
| # First, try to find entries with empty lower ranks | |
| exact_df = df | |
| for lower_rank in RANKS[rank + 1:]: | |
| exact_df = exact_df.filter((pl.col(lower_rank).is_null()) | (pl.col(lower_rank) == "")) | |
| if exact_df.shape[0] > 0: | |
| df_filtered = exact_df.sample() | |
| full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) | |
| return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True | |
| # If no exact matches, return any entry with the specified rank | |
| df_filtered = df.sample() | |
| full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0)) | |
| return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False | |