import os
import base64
import math
import pytz
import torch
import yaml
import pycountry
import subprocess
import sys
import numpy as np
import sounddevice as sd

from tools.final_answer import FinalAnswerTool
from tools.visit_webpage import VisitWebpageTool
from tools.translation import TranslationTool
from tools.best_model_for_task import HFModelDownloadsTool
from tools.rag_transformers import retriever_tool

from transformers import pipeline
from Gradio_UI import GradioUI
from Gradio_UI_with_image import GradioUIImage
from dotenv import load_dotenv
from datetime import datetime
from skimage import io
from PIL import Image
from typing import Optional, Tuple

from opentelemetry.sdk.trace import TracerProvider
from openinference.instrumentation.smolagents import SmolagentsInstrumentor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.trace.export import SimpleSpanProcessor

from langchain_community.agent_toolkits.load_tools import load_tools
from langchain.chains import LLMChain
from langchain_community.utilities.dalle_image_generator import DallEAPIWrapper
from langchain_core.prompts import PromptTemplate
from langchain_openai import OpenAI
from transformers import AutoTokenizer

from io import BytesIO
from time import sleep

from smolagents.utils import BASE_BUILTIN_MODULES
from smolagents.agents import ActionStep
from smolagents.cli import load_model
from smolagents import (
    CodeAgent,
    DuckDuckGoSearchTool,
    GoogleSearchTool,
    HfApiModel,
    TransformersModel,
    OpenAIServerModel,
    load_tool,
    Tool,
    tool,
    ToolCollection,
    E2BExecutor
)

# load .env vars
load_dotenv()

BASE_BUILTIN_MODULES.remove("re")

# fast prototyping tools
@tool
def get_current_time_in_timezone(timezone: str) -> str:
    """A tool that fetches the current local time in a specified timezone formatted as '%m/%d/%y %H:%M:%S'
    Args:
        timezone (str): A string representing a valid timezone (e.g., 'America/New_York').
    """
    try:
        tz = pytz.timezone(timezone)
        local_time = datetime.now(tz).strftime('%m/%d/%y %H:%M:%S')
        return f"The current local time in {timezone} is: {local_time}"
    except Exception as e:
        return f"Error fetching time for timezone '{timezone}': {str(e)}"


@tool
def language_detection(text:str)-> str:
    """Detects the language of the input text using basic xlm-roberta-base-language-detection.
     Args:
        text: the input message or wording to detect language from.
    """
    model_ckpt = "papluca/xlm-roberta-base-language-detection"
    pipe = pipeline("text-classification", model=model_ckpt)
    preds = pipe(text, return_all_scores=True, truncation=True, max_length=128)
    if preds:
        pred = preds[0]
        language_probabilities_dict = {p["label"]: float(p["score"]) for p in pred}
        predicted_language_code = max(language_probabilities_dict, key=language_probabilities_dict.get)
        tool_prediction_confidence = language_probabilities_dict[predicted_language_code]
        confidence_str = f"Tool Confidence: {tool_prediction_confidence}"
        predicted_language_code_str = f"Predicted language code (ISO 639): {predicted_language_code}/n{confidence_str}"
        try:
            predicted_language = pycountry.languages.get(alpha_2=predicted_language_code)
            if predicted_language:
                predicted_language_str = f"Predicted language: {predicted_language.name}/n{confidence_str}"
                return predicted_language_str 
            return predicted_language_code_str
            
        except Exception as e:
            return f"Error mapping country code to name (pycountry): {str(e)}/n{predicted_language_code_str}"
    else:
        return "None"


@tool
def advanced_image_generation(description:str)->Image.Image:
    """Generates an image using a textual description.
         Args:
            description: the textual description provided by the user to prompt a text-to-image model
        """
    llm = OpenAI(temperature=0.9)
    prompt = PromptTemplate(
        input_variables=["image_desc"],
        template="Generate a detailed but short prompt (must be less than 900 characters) to generate an image based on the following description: {image_desc}",
    )
    chain = LLMChain(llm=llm, prompt=prompt)
    image_url = DallEAPIWrapper().run(chain.run(description))
    image_array = io.imread(image_url)
    pil_image = Image.fromarray(image_array)
    return pil_image


@tool
def calculate_cargo_travel_time(
    origin_coords: Tuple[float, float],
    destination_coords: Tuple[float, float],
    cruising_speed_kmh: Optional[float] = 750.0,  # Average speed for cargo planes
) -> float:
    """
    Calculate the travel time for a cargo plane between two points on Earth using great-circle distance.

    Args:
        origin_coords: Tuple of (latitude, longitude) for the starting point
        destination_coords: Tuple of (latitude, longitude) for the destination
        cruising_speed_kmh: Optional cruising speed in km/h (defaults to 750 km/h for typical cargo planes)

    Returns:
        float: The estimated travel time in hours

    Example:
        >>> # Chicago (41.8781° N, 87.6298° W) to Sydney (33.8688° S, 151.2093° E)
        >>> result = calculate_cargo_travel_time((41.8781, -87.6298), (-33.8688, 151.2093))
    """

    def to_radians(degrees: float) -> float:
        return degrees * (math.pi / 180)

    # Extract coordinates
    lat1, lon1 = map(to_radians, origin_coords)
    lat2, lon2 = map(to_radians, destination_coords)

    # Earth's radius in kilometers
    EARTH_RADIUS_KM = 6371.0

    # Calculate great-circle distance using the haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = (
        math.sin(dlat / 2) ** 2
        + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
    )
    c = 2 * math.asin(math.sqrt(a))
    distance = EARTH_RADIUS_KM * c

    # Add 10% to account for non-direct routes and air traffic controls
    actual_distance = distance * 1.1

    # Calculate flight time
    # Add 1 hour for takeoff and landing procedures
    flight_time = (actual_distance / cruising_speed_kmh) + 1.0

    # Format the results
    return round(flight_time, 2)


@tool
def browser_automation(original_user_query:str)->str:
    """
    Browser automation is like “simulating a real user” and works for interactive,
    dynamic sites and when visual navigation is required to show the process to the user.
    Navigates the web using helium to answer a user query by appending helium_instructions to the original query
    by searching for text matches through the navigation.
    Args:
        original_user_query: The original
    """
    # Use sys.executable to ensure the same Python interpreter is used.
    result = subprocess.run(
        [sys.executable, "vision_web_browser.py", original_user_query],
        capture_output=True,  # Captures both stdout and stderr
        text=True  # Returns output as a string instead of bytes
    )
    print("vision_web_browser.py: ", result.stderr)
    return result.stdout


text_to_speech_pipe = pipeline(
    task="text-to-speech",
    model="suno/bark-small",
    device = 0 if torch.cuda.is_available() else "cpu",
    torch_dtype=torch.float16,
    )
text_to_speech_pipe.model.enable_cpu_offload()
text_to_speech_pipe.model.use_flash_attention_2=True
text_to_speech_pipe.model.pad_token_id=0  # 50257


tokenizer = AutoTokenizer.from_pretrained("suno/bark-small")
#print("suno/bark-small tokenizer pad_token_id: ", tokenizer.pad_token_id)  # 0
#print("suno/bark-small tokenizer eos_token_id: ", tokenizer.eos_token_id)  # none
text_to_speech_pipe.model.pad_token_id = tokenizer.pad_token_id
text_to_speech_pipe.model.eos_token_id = tokenizer.eos_token_id


def speech_to_text(final_answer_text, agent_memory):
    text = f"[clears throat] Here is the final answer: {final_answer_text}"
    # attention_mask = [1] * len(text.split())  # Create an attention mask for your text

    # Run the pipeline with the attention mask
    output = text_to_speech_pipe(text)

    # display(Audio(output["audio"], rate=output["sampling_rate"]))  # notebook
    audio = np.array(output["audio"], dtype=np.float32)
    print("Original audio shape:", audio.shape)

    # Adjust audio shape if necessary:
    if audio.ndim == 1:
        # Mono audio, should be fine. You can check if your device expects stereo.
        print("Mono audio... should be fine. You can check if your device expects stereo.")
    elif audio.ndim == 2:
        # Check if the number of channels is acceptable (e.g., 1 or 2)
        channels = audio.shape[1]
        if channels not in [1, 2]:
            # Try to squeeze extra dimensions
            audio = np.squeeze(audio)
            print("Squeezed audio shape:", audio.shape)
    else:
        # If audio has more dimensions than expected, flatten or reshape as needed
        audio = np.squeeze(audio)
        print("Squeezed audio shape:", audio.shape)

    # Play the audio using sounddevice
    try:
        sd.play(audio, output["sampling_rate"])
        sd.wait()  # Wait until audio playback is complete
    except Exception as e:
        print(f"Error playing audio: {e}")

    return True


def initialize_langfuse_opentelemetry_instrumentation():
    LANGFUSE_PUBLIC_KEY=os.environ.get("LANGFUSE_PUBLIC_KEY")
    LANGFUSE_SECRET_KEY=os.environ.get("LANGFUSE_SECRET_KEY")
    LANGFUSE_AUTH=base64.b64encode(f"{LANGFUSE_PUBLIC_KEY}:{LANGFUSE_SECRET_KEY}".encode()).decode()
    
    os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = "https://cloud.langfuse.com/api/public/otel" # EU data region
    os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"Authorization=Basic {LANGFUSE_AUTH}"
    
    trace_provider = TracerProvider()
    trace_provider.add_span_processor(SimpleSpanProcessor(OTLPSpanExporter()))
    
    SmolagentsInstrumentor().instrument(tracer_provider=trace_provider)


# telemetry
initialize_langfuse_opentelemetry_instrumentation()

# load tools from /tools/
final_answer = FinalAnswerTool()
visit_webpage = VisitWebpageTool()
translation = TranslationTool()
best_model_for_task = HFModelDownloadsTool()
transformers_retriever = retriever_tool

# load tools from smoloagents library
google_web_search = GoogleSearchTool()  # provider="serper" (SERPER_API_KEY) or "serpapi" (default)
google_web_search.name = "google_web_search"
duckduckgo_web_search = DuckDuckGoSearchTool()
duckduckgo_web_search.name = "duckduckgo_web_search"

# load tools from hub and langchain
# image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True)  # Tool.from_space("black-forest-labs/FLUX.1-schnell", name="image_generator", description="Generate an image from a prompt")
advanced_search_tool = Tool.from_langchain(load_tools(["searchapi"], allow_dangerous_tools=True)[0])  # serpapi is not real time scrapping
advanced_search_tool.name = "advanced_search_tool"

image_generation_tool_fast = Tool.from_space(
    "black-forest-labs/FLUX.1-schnell",
    name="image_generator",
    description="Generate an image from a prompt"
)


ceo_model = load_model("LiteLLMModel", "gpt-4o")   # or anthropic/claude-3-sonnet

"""
ceo_model = HfApiModel(
max_tokens=2096,  # 8096 for manager
temperature=0.5,
model_id=  'https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',  # "meta-llama/Llama-3.3-70B-Instruct",  # 'https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',  # same as Qwen/Qwen2.5-Coder-32B-Instruct
custom_role_conversions=None,
)
"""
with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)

tools = [
        final_answer,
        best_model_for_task,
        advanced_search_tool,
        google_web_search,
        duckduckgo_web_search,
        visit_webpage,
        browser_automation,
        get_current_time_in_timezone,
        advanced_image_generation,
        image_generation_tool,
        transformers_retriever,
        language_detection,
        translation,
        calculate_cargo_travel_time
    ]

agent = CodeAgent(
    model=ceo_model,
    tools=tools,
    max_steps=20,  # 15 is good for a light manager, too much when there is no need of a manager
    verbosity_level=2,
    grammar=None,
    # planning_interval=5,  # (add more steps for heavier reasoning, leave default if not manager)  # test for crashing issues.
    name="Alfredo",
    description="CEO",
    prompt_templates=prompt_templates,
    # executor_type="e2b",  # security, could also be "docker" (set keys)
    # sandbox=E2BSandbox()  (or E2BExecutor?),
    # step_callbacks=[save_screenshot],  # todo: configure the web_navigation agent as a separate agent and manage it with alfred
    final_answer_checks=[speech_to_text],
    additional_authorized_imports=[
        "geopandas",
        "plotly",
        "shapely",
        "json",
        "pandas",
        "numpy",
        "requests",
        "helium",
        "bs4"
    ],
    # I could also add the authorized_imports from a LIST_SAFE_MODULES
)

agent.python_executor("from helium import *")   # agent.state

# agent.push_to_hub('laverdes/Alfredo')
agent.visualize()

# prompt = ("navigate to a random wikipedia page and give me a summary of the content, then make a single image representing all the content")
# agent.run(prompt)

GradioUI(agent).launch()