Spaces:

willwade
/

AACKGDemo

Sleeping

App Files Files Community

willwade commited on May 14

Commit

b929813

1 Parent(s): deb6f27

fixing demo

Browse files

Files changed (1) hide show

demo.py +706 -28

demo.py CHANGED Viewed

@@ -1,40 +1,718 @@
 from transformers import pipeline
 import json
-# Load model
-# Use a simpler approach with a pre-built pipeline
-rag_pipeline = pipeline("text-generation", model="distilgpt2")
-# Load KG
-with open("social_graph.json", "r") as f:
-    kg = json.load(f)
-# Build context
-person = kg["people"]["billy"]  # Using Billy instead of Bob
-context = person["context"]
-# User input
-query = "What should I say to Billy?"
-# RAG-style prompt
-prompt = """I am Will, a 38-year-old father with MND (Motor Neuron Disease). I have a 7-year-old son named Billy who loves Manchester United football.
-Billy just asked me: "Dad, did you see the United match last night?"
-My response to Billy:"""
-# Generate
-response = rag_pipeline(
     prompt,
-    max_length=100,  # Longer output
-    temperature=0.9,  # More creative
-    do_sample=True,
-    num_return_sequences=1,
-    top_p=0.92,  # More focused sampling
-    top_k=50,  # Limit vocabulary
-)
-print("Generated response:")
-# For text-generation models, we need to extract just the generated part (not the prompt)
-generated_text = response[0]["generated_text"][len(prompt) :]
-print(generated_text)

 from transformers import pipeline
 import json
+import argparse
+import os
+import sys
+import subprocess
+import requests
+from typing import List, Dict, Any, Optional, Union
+import time
+# Check for Hugging Face token
+def check_hf_token():
+    """Check if a Hugging Face token is properly set up."""
+    token = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get("HF_TOKEN")
+    if not token:
+        print("\nWarning: No Hugging Face token found in environment variables.")
+        print(
+            "To use gated models like Gemma, you need to set up a token with the right permissions."
+        )
+        print("1. Create a token at https://huggingface.co/settings/tokens")
+        print("2. Make sure to enable 'Access to public gated repositories'")
+        print("3. Set it as an environment variable:")
+        print("   export HUGGING_FACE_HUB_TOKEN=your_token_here")
+        return False
+    return True
+def load_social_graph(file_path="social_graph.json"):
+    """Load the social graph from a JSON file."""
+    with open(file_path, "r") as f:
+        return json.load(f)
+def get_person_info(social_graph, person_id):
+    """Get information about a person from the social graph."""
+    if person_id in social_graph["people"]:
+        return social_graph["people"][person_id]
+    else:
+        available_people = ", ".join(social_graph["people"].keys())
+        raise ValueError(
+            f"Person '{person_id}' not found in social graph. Available people: {available_people}"
+        )
+def build_enhanced_prompt(social_graph, person_id, topic=None, user_message=None):
+    """Build an enhanced prompt using social graph information."""
+    # Get AAC user information
+    aac_user = social_graph["aac_user"]
+    # Get conversation partner information
+    person = get_person_info(social_graph, person_id)
+    # Start building the prompt with AAC user information
+    prompt = f"""I am {aac_user['name']}, a {aac_user['age']}-year-old with MND (Motor Neuron Disease) from {aac_user['location']}.
+{aac_user['background']}
+My communication needs: {aac_user['communication_needs']}
+I am talking to {person['name']}, who is my {person['role']}.
+About {person['name']}: {person['context']}
+We typically talk about: {', '.join(person['topics'])}
+We communicate {person['frequency']}.
+"""
+    # Add places information if available
+    if "places" in social_graph:
+        relevant_places = social_graph["places"][
+            :3
+        ]  # Just use a few places for context
+        prompt += f"\nPlaces important to me: {', '.join(relevant_places)}\n"
+    # Add communication style based on relationship
+    if person["role"] in ["wife", "son", "daughter", "mother", "father"]:
+        prompt += "I communicate with my family in a warm, loving way, sometimes using inside jokes.\n"
+    elif person["role"] in ["doctor", "therapist", "nurse"]:
+        prompt += (
+            "I communicate with healthcare providers in a direct, informative way.\n"
+        )
+    elif person["role"] in ["best mate", "friend"]:
+        prompt += "I communicate with friends casually, often with humor and sometimes swearing.\n"
+    elif person["role"] in ["work colleague", "boss"]:
+        prompt += "I communicate with colleagues professionally but still friendly.\n"
+    # Add common utterances by category if available
+    if "common_utterances" in social_graph:
+        # Try to find relevant utterance category based on topic
+        utterance_category = None
+        if topic == "football" or topic == "sports":
+            utterance_category = "sports_talk"
+        elif topic == "programming" or topic == "tech news":
+            utterance_category = "tech_talk"
+        elif topic in ["family plans", "children's activities"]:
+            utterance_category = "family_talk"
+        # Add relevant utterances if category exists
+        if (
+            utterance_category
+            and utterance_category in social_graph["common_utterances"]
+        ):
+            utterances = social_graph["common_utterances"][utterance_category][:2]
+            prompt += f"\nI might say things like: {' or '.join(utterances)}\n"
+    # Add topic information if provided
+    if topic and topic in person["topics"]:
+        prompt += f"\nWe are currently discussing {topic}.\n"
+        # Add specific context about this topic with this person
+        if topic == "football" and "Manchester United" in person["context"]:
+            prompt += (
+                "We both support Manchester United and often discuss recent matches.\n"
+            )
+        elif topic == "programming" and "software developer" in person["context"]:
+            prompt += (
+                "We both work in software development and share technical interests.\n"
+            )
+        elif topic == "family plans" and person["role"] in ["wife", "husband"]:
+            prompt += "We make family decisions together, considering my condition.\n"
+        elif topic == "old scout adventures" and person["role"] == "best mate":
+            prompt += "We often reminisce about our Scout camping trips in South East London.\n"
+        elif topic == "cycling" and "cycling" in person["context"]:
+            prompt += "I miss being able to cycle but enjoy talking about past cycling adventures.\n"
+        # Add shared experiences based on relationship and topic
+        if person["role"] == "best mate" and topic in ["football", "pub quizzes"]:
+            prompt += (
+                "We've watched many matches together and done countless pub quizzes.\n"
+            )
+        elif person["role"] == "wife" and topic in ["family plans", "weekend outings"]:
+            prompt += "Emma has been amazing at keeping family life as normal as possible despite my condition.\n"
+        elif person["role"] == "son" and topic == "football":
+            prompt += "I try to stay engaged with Billy's football enthusiasm even as my condition progresses.\n"
+    # Add the user's message if provided
+    if user_message:
+        prompt += f"\n{person['name']} just said to me: \"{user_message}\"\n"
+    else:
+        # Use a common phrase from the person if no message is provided
+        if person["common_phrases"]:
+            default_message = person["common_phrases"][0]
+            prompt += f"\n{person['name']} just said to me: \"{default_message}\"\n"
+    # Add the response prompt with specific guidance
+    prompt += f"""
+I want to respond to {person['name']} in a way that is natural, brief (1-2 sentences), and directly relevant to what they just said. I'll use casual language with some humor since we're close friends.
+My response to {person['name']}:"""
+    return prompt
+class LLMInterface:
+    """Base interface for language model generation."""
+    def __init__(self, model_name, max_length=150, temperature=0.9):
+        """Initialize the LLM interface.
+        Args:
+            model_name: Name or path of the model
+            max_length: Maximum length of generated text
+            temperature: Controls randomness (higher = more random)
+        """
+        self.model_name = model_name
+        self.max_length = max_length
+        self.temperature = temperature
+    def generate(self, prompt, num_responses=3):
+        """Generate responses for the given prompt.
+        Args:
+            prompt: The prompt to generate responses for
+            num_responses: Number of responses to generate
+        Returns:
+            A list of generated responses
+        """
+        raise NotImplementedError("Subclasses must implement this method")
+    def cleanup_response(self, text):
+        """Clean up a generated response.
+        Args:
+            text: The raw generated text
+        Returns:
+            Cleaned up text
+        """
+        # Make sure it's a complete sentence or phrase
+        # If it ends abruptly, add an ellipsis
+        if text and not any(text.endswith(end) for end in [".", "!", "?", '..."']):
+            if text.endswith('"'):
+                text = text[:-1] + '..."'
+            else:
+                text += "..."
+        return text
+class HuggingFaceInterface(LLMInterface):
+    """Interface for Hugging Face Transformers models."""
+    def __init__(self, model_name="distilgpt2", max_length=150, temperature=0.9):
+        """Initialize the Hugging Face interface."""
+        super().__init__(model_name, max_length, temperature)
+        try:
+            # Check if we're dealing with a gated model
+            is_gated_model = any(
+                name in model_name for name in ["gemma", "llama", "mistral"]
+            )
+            # Get token from environment
+            import os
+            token = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get(
+                "HF_TOKEN"
+            )
+            if is_gated_model and token:
+                print(f"Using token for gated model: {model_name}")
+                from huggingface_hub import login
+                login(token=token, add_to_git_credential=False)
+                # Explicitly pass token to pipeline
+                from transformers import AutoTokenizer, AutoModelForCausalLM
+                tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
+                model = AutoModelForCausalLM.from_pretrained(model_name, token=token)
+                self.pipeline = pipeline(
+                    "text-generation", model=model, tokenizer=tokenizer
+                )
+            else:
+                self.pipeline = pipeline("text-generation", model=model_name)
+            print(f"Successfully loaded model: {model_name}")
+        except Exception as e:
+            print(f"Error loading model {model_name}: {e}")
+            if "gated" in str(e).lower() or "403" in str(e):
+                print(
+                    "\nThis appears to be a gated model that requires authentication."
+                )
+                print("Please make sure you:")
+                print("1. Have accepted the model license on the Hugging Face Hub")
+                print(
+                    "2. Have created a token with 'Access to public gated repositories' permission"
+                )
+                print(
+                    "3. Have set the token as HUGGING_FACE_HUB_TOKEN environment variable"
+                )
+                print("\nAlternatively, try using the Ollama backend:")
+                print(
+                    f"python demo.py --backend ollama --model gemma:7b-it [other args]"
+                )
+            raise
+    def generate(self, prompt, num_responses=3):
+        """Generate responses using the Hugging Face pipeline."""
+        # Calculate prompt length in tokens (approximate)
+        prompt_length = len(prompt.split())
+        # Generate the responses
+        responses = self.pipeline(
+            prompt,
+            max_length=prompt_length + self.max_length,
+            temperature=self.temperature,
+            do_sample=True,
+            num_return_sequences=num_responses,
+            top_p=0.92,
+            top_k=50,
+            truncation=True,
+        )
+        # Extract just the generated parts (not the prompt)
+        generated_texts = []
+        for resp in responses:
+            # Get the text after the prompt
+            generated = resp["generated_text"][len(prompt) :].strip()
+            # Clean up the response
+            generated = self.cleanup_response(generated)
+            # Add to our list if it's not empty
+            if generated:
+                generated_texts.append(generated)
+        return generated_texts
+class OllamaInterface(LLMInterface):
+    """Interface for Ollama models."""
+    def __init__(self, model_name="gemma:7b", max_length=150, temperature=0.9):
+        """Initialize the Ollama interface."""
+        super().__init__(model_name, max_length, temperature)
+        # Check if Ollama is installed and the model is available
+        try:
+            import requests
+            response = requests.get("http://localhost:11434/api/tags")
+            if response.status_code == 200:
+                models = [model["name"] for model in response.json()["models"]]
+                if model_name not in models:
+                    print(
+                        f"Warning: Model {model_name} not found in Ollama. Available models: {', '.join(models)}"
+                    )
+                    print(f"You may need to run: ollama pull {model_name}")
+            print(f"Ollama is available and will use model: {model_name}")
+        except Exception as e:
+            print(f"Warning: Ollama may not be installed or running: {e}")
+            print("You can install Ollama from https://ollama.ai/")
+    def generate(self, prompt, num_responses=3):
+        """Generate responses using Ollama API."""
+        import requests
+        generated_texts = []
+        for _ in range(num_responses):
+            try:
+                response = requests.post(
+                    "http://localhost:11434/api/generate",
+                    json={
+                        "model": self.model_name,
+                        "prompt": prompt,
+                        "temperature": self.temperature,
+                        "max_tokens": self.max_length,
+                    },
+                    stream=False,
+                )
+                if response.status_code == 200:
+                    # Extract the generated text
+                    generated = response.json().get("response", "").strip()
+                    # Clean up the response
+                    generated = self.cleanup_response(generated)
+                    # Add to our list if it's not empty
+                    if generated:
+                        generated_texts.append(generated)
+                else:
+                    print(f"Error from Ollama API: {response.text}")
+            except Exception as e:
+                print(f"Error generating with Ollama: {e}")
+        return generated_texts
+class LLMToolInterface(LLMInterface):
+    """Interface for Simon Willison's LLM tool."""
+    def __init__(
+        self, model_name="gemini-1.5-pro-latest", max_length=150, temperature=0.9
+    ):
+        """Initialize the LLM tool interface."""
+        super().__init__(model_name, max_length, temperature)
+        # Check if LLM tool is installed
+        try:
+            import subprocess
+            result = subprocess.run(["llm", "models"], capture_output=True, text=True)
+            if result.returncode == 0:
+                models = [
+                    line.strip() for line in result.stdout.split("\n") if line.strip()
+                ]
+                print(f"LLM tool is available. Found {len(models)} models.")
+                # Check for specific model types
+                gemini_models = [
+                    m for m in models if "gemini" in m.lower() or "gemma" in m.lower()
+                ]
+                if gemini_models:
+                    print(f"Gemini models available: {', '.join(gemini_models[:3])}...")
+                # Check for Ollama models
+                ollama_models = [m for m in models if "ollama" in m.lower()]
+                if ollama_models:
+                    print(f"Ollama models available: {', '.join(ollama_models[:3])}...")
+                # Check for MLX models
+                mlx_models = [m for m in models if "mlx" in m.lower()]
+                if mlx_models:
+                    print(f"MLX models available: {', '.join(mlx_models[:3])}...")
+                # Check if the specified model is available
+                if not any(self.model_name in m for m in models):
+                    print(
+                        f"Warning: Model '{self.model_name}' not found in available models."
+                    )
+                    print("You may need to install the appropriate plugin:")
+                    if (
+                        "gemini" in self.model_name.lower()
+                        or "gemma" in self.model_name.lower()
+                    ):
+                        print("llm install llm-gemini")
+                    elif "mlx" in self.model_name.lower():
+                        print("llm install llm-mlx")
+                    elif "ollama" in self.model_name.lower():
+                        print("llm install llm-ollama")
+                        print("ollama pull " + self.model_name.replace("ollama/", ""))
+            else:
+                print("Warning: LLM tool may be installed but returned an error.")
+        except Exception as e:
+            print(f"Warning: Simon Willison's LLM tool may not be installed: {e}")
+            print("You can install it with: pip install llm")
+    def generate(self, prompt, num_responses=3):
+        """Generate responses using the LLM tool."""
+        import subprocess
+        import os
+        # Check for required environment variables
+        if "gemini" in self.model_name.lower() or "gemma" in self.model_name.lower():
+            if not os.environ.get("GEMINI_API_KEY"):
+                print("Warning: GEMINI_API_KEY environment variable not found.")
+                print("Gemini API may not work without it.")
+        elif "ollama" in self.model_name.lower():
+            # Check if Ollama is running
+            try:
+                import requests
+                response = requests.get("http://localhost:11434/api/tags", timeout=2)
+                if response.status_code != 200:
+                    print("Warning: Ollama server doesn't seem to be running.")
+                    print("Start Ollama with: ollama serve")
+            except Exception:
+                print("Warning: Ollama server doesn't seem to be running.")
+                print("Start Ollama with: ollama serve")
+        # Determine the appropriate parameter name for max tokens
+        if "gemini" in self.model_name.lower() or "gemma" in self.model_name.lower():
+            max_tokens_param = "max_output_tokens"
+        elif "ollama" in self.model_name.lower():
+            max_tokens_param = "num_predict"
+        else:
+            max_tokens_param = "max_tokens"
+        generated_texts = []
+        for _ in range(num_responses):
+            try:
+                # Call the LLM tool
+                result = subprocess.run(
+                    [
+                        "llm",
+                        "-m",
+                        self.model_name,
+                        "-s",
+                        f"temperature={self.temperature}",
+                        "-s",
+                        f"{max_tokens_param}={self.max_length}",
+                        prompt,
+                    ],
+                    capture_output=True,
+                    text=True,
+                )
+                if result.returncode == 0:
+                    # Get the generated text
+                    generated = result.stdout.strip()
+                    # Clean up the response
+                    generated = self.cleanup_response(generated)
+                    # Add to our list if it's not empty
+                    if generated:
+                        generated_texts.append(generated)
+                else:
+                    print(f"Error from LLM tool: {result.stderr}")
+            except Exception as e:
+                print(f"Error generating with LLM tool: {e}")
+        return generated_texts
+class MLXInterface(LLMInterface):
+    """Interface for MLX-powered models on Mac."""
+    def __init__(
+        self, model_name="mlx-community/gemma-7b-it", max_length=150, temperature=0.9
+    ):
+        """Initialize the MLX interface."""
+        super().__init__(model_name, max_length, temperature)
+        # Check if MLX is installed
+        try:
+            import importlib.util
+            if importlib.util.find_spec("mlx") is not None:
+                print("MLX is available for optimized inference on Mac")
+            else:
+                print("Warning: MLX is not installed. Install with: pip install mlx")
+        except Exception as e:
+            print(f"Warning: Error checking for MLX: {e}")
+    def generate(self, prompt, num_responses=3):
+        """Generate responses using MLX."""
+        try:
+            # Dynamically import MLX to avoid errors on non-Mac platforms
+            import mlx.core as mx
+            from transformers import AutoTokenizer, AutoModelForCausalLM
+            # Load the model and tokenizer
+            tokenizer = AutoTokenizer.from_pretrained(self.model_name)
+            model = AutoModelForCausalLM.from_pretrained(
+                self.model_name, trust_remote_code=True, mx_dtype=mx.float16
+            )
+            generated_texts = []
+            for _ in range(num_responses):
+                # Tokenize the prompt
+                inputs = tokenizer(prompt, return_tensors="np")
+                # Generate
+                outputs = model.generate(
+                    inputs["input_ids"],
+                    max_length=len(inputs["input_ids"][0]) + self.max_length,
+                    temperature=self.temperature,
+                    do_sample=True,
+                    top_p=0.92,
+                    top_k=50,
+                )
+                # Decode the generated tokens
+                generated = tokenizer.decode(
+                    outputs[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True
+                )
+                # Clean up the response
+                generated = self.cleanup_response(generated)
+                # Add to our list if it's not empty
+                if generated:
+                    generated_texts.append(generated)
+            return generated_texts
+        except Exception as e:
+            print(f"Error generating with MLX: {e}")
+            return []
+def create_llm_interface(backend, model_name, max_length=150, temperature=0.9):
+    """Create an appropriate LLM interface based on the backend.
+    Args:
+        backend: The backend to use ('hf', 'llm')
+        model_name: The name of the model to use
+        max_length: Maximum length of generated text
+        temperature: Controls randomness (higher = more random)
+    Returns:
+        An LLM interface instance
+    """
+    if backend == "hf":
+        return HuggingFaceInterface(model_name, max_length, temperature)
+    elif backend == "llm":
+        return LLMToolInterface(model_name, max_length, temperature)
+    else:
+        raise ValueError(f"Unknown backend: {backend}")
+def generate_response(
     prompt,
+    model_name="distilgpt2",
+    max_length=150,
+    temperature=0.9,
+    num_responses=3,
+    backend="hf",
+):
+    """Generate multiple responses using the specified model and backend.
+    Args:
+        prompt: The prompt to generate responses for
+        model_name: The name of the model to use
+        max_length: Maximum number of new tokens to generate
+        temperature: Controls randomness (higher = more random)
+        num_responses: Number of different responses to generate
+        backend: The backend to use ('hf', 'ollama', 'llm', 'mlx')
+    Returns:
+        A list of generated responses
+    """
+    # Create the appropriate interface
+    interface = create_llm_interface(backend, model_name, max_length, temperature)
+    # Generate responses
+    return interface.generate(prompt, num_responses)
+def main():
+    # Set up argument parser
+    parser = argparse.ArgumentParser(
+        description="Generate AAC responses using social graph context"
+    )
+    parser.add_argument(
+        "--person", default="billy", help="Person ID from the social graph"
+    )
+    parser.add_argument("--topic", help="Topic of conversation")
+    parser.add_argument("--message", help="Message from the conversation partner")
+    parser.add_argument(
+        "--backend",
+        default="llm",
+        choices=["hf", "llm"],
+        help="Backend to use for generation (hf=HuggingFace, "
+        "llm=Simon Willison's LLM tool with support for Gemini/MLX/Ollama)",
+    )
+    parser.add_argument(
+        "--model",
+        default="gemini-1.5-pro-latest",
+        help="Model to use for generation. Recommended models by backend:\n"
+        "- hf: 'distilgpt2', 'gpt2-medium', 'google/gemma-2b-it'\n"
+        "- llm: 'gemini-1.5-pro-latest', 'gemma-3-27b-it' (requires llm-gemini plugin)\n"
+        "       'mlx-community/gemma-7b-it' (requires llm-mlx plugin)\n"
+        "       'ollama/gemma3:4b-it-qat', 'ollama/llama3:8b' (requires llm-ollama plugin)",
+    )
+    parser.add_argument(
+        "--num_responses", type=int, default=3, help="Number of responses to generate"
+    )
+    parser.add_argument(
+        "--max_length",
+        type=int,
+        default=150,
+        help="Maximum length of generated responses",
+    )
+    parser.add_argument(
+        "--temperature",
+        type=float,
+        default=0.9,
+        help="Temperature for generation (higher = more creative)",
+    )
+    args = parser.parse_args()
+    # Check for token if using HF backend with gated models
+    if args.backend == "hf" and any(
+        name in args.model for name in ["gemma", "llama", "mistral"]
+    ):
+        if not check_hf_token():
+            print("\nSuggestion: Try using the LLM tool with Gemini API instead:")
+            print(
+                f"python demo.py --backend llm --model gemini-1.5-pro-latest --person {args.person}"
+                + (f' --topic "{args.topic}"' if args.topic else "")
+                + (f' --message "{args.message}"' if args.message else "")
+            )
+            print("\nOr use a non-gated model:")
+            print(
+                f"python demo.py --backend hf --model gpt2-medium --person {args.person}"
+                + (f' --topic "{args.topic}"' if args.topic else "")
+                + (f' --message "{args.message}"' if args.message else "")
+            )
+            print("\nContinuing anyway, but expect authentication errors...\n")
+    # Load the social graph
+    social_graph = load_social_graph()
+    # Build the prompt
+    prompt = build_enhanced_prompt(social_graph, args.person, args.topic, args.message)
+    print("\n=== PROMPT ===")
+    print(prompt)
+    print(
+        f"\n=== GENERATING RESPONSE USING {args.backend.upper()} BACKEND WITH MODEL {args.model} ==="
+    )
+    # Generate the responses
+    try:
+        responses = generate_response(
+            prompt,
+            args.model,
+            max_length=args.max_length,
+            num_responses=args.num_responses,
+            temperature=args.temperature,
+            backend=args.backend,
+        )
+        print("\n=== RESPONSES ===")
+        for i, response in enumerate(responses, 1):
+            print(f"{i}. {response}")
+            print()
+    except Exception as e:
+        print(f"\nError generating responses: {e}")
+        if args.backend == "hf" and any(
+            name in args.model for name in ["gemma", "llama", "mistral"]
+        ):
+            print("\nThis appears to be an authentication issue with a gated model.")
+            print("Try using the LLM tool with Gemini API instead:")
+            print(
+                f"python demo.py --backend llm --model gemini-1.5-pro-latest --person {args.person}"
+                + (f' --topic "{args.topic}"' if args.topic else "")
+                + (f' --message "{args.message}"' if args.message else "")
+            )
+        # Ollama is now handled through the llm backend
+        elif args.backend == "llm":
+            if "gemini" in args.model.lower() or "gemma" in args.model.lower():
+                print(
+                    "\nMake sure you have the GEMINI_API_KEY environment variable set:"
+                )
+                print("export GEMINI_API_KEY=your_api_key")
+                print("\nAnd make sure llm-gemini is installed:")
+                print("llm install llm-gemini")
+            elif "mlx" in args.model.lower():
+                print("\nMake sure llm-mlx is installed:")
+                print("llm install llm-mlx")
+            elif "ollama" in args.model.lower():
+                print("\nMake sure Ollama is installed and running:")
+                print("1. Install from https://ollama.ai/")
+                print("2. Start Ollama with: ollama serve")
+                print("3. Install the llm-ollama plugin: llm install llm-ollama")
+                print(
+                    f"4. Pull the model: ollama pull {args.model.replace('ollama/', '')}"
+                )
+            else:
+                print("\nMake sure Simon Willison's LLM tool is installed:")
+                print("pip install llm")
+# If running as a script
+if __name__ == "__main__":
+    main()