Spaces:

mgbam
/

builder

Running

App Files Files Community

mgbam commited on about 1 month ago

Commit

489ab9c

verified ·

1 Parent(s): a73275a

Update services.py

Browse files

Files changed (1) hide show

services.py +73 -68

services.py CHANGED Viewed

@@ -2,108 +2,113 @@
 """
 Manages interactions with external services like LLM providers and web search APIs.
-This module uses a class-based approach to encapsulate API clients and their
-logic, making it easy to manage connections and mock services for testing.
 """
 import os
 import logging
 from typing import Dict, Any, Generator, List
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 from tavily import TavilyClient
-# --- Setup Logging ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# --- Load Environment Variables ---
 load_dotenv()
 HF_TOKEN = os.getenv("HF_TOKEN")
 TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
-if not HF_TOKEN:
-    raise ValueError("HF_TOKEN environment variable is not set. Please get a token from https://huggingface.co/settings/tokens")
 # --- Type Definitions ---
 Messages = List[Dict[str, Any]]
 class LLMService:
-    """A wrapper for the Hugging Face Inference API."""
-    def __init__(self, api_key: str = HF_TOKEN):
-        if not api_key:
-            raise ValueError("Hugging Face API key is required.")
-        self.api_key = api_key
-    def get_client(self, model_id: str, provider: str = "auto") -> InferenceClient:
-        """Initializes and returns an InferenceClient."""
-        return InferenceClient(provider=provider, api_key=self.api_key, bill_to="huggingface")
     def generate_code_stream(
-        self, model_id: str, messages: Messages, provider: str = "auto", max_tokens: int = 10000
     ) -> Generator[str, None, None]:
         """
-        Streams code generation from the specified model.
-        Yields content chunks as they are received.
         """
-        client = self.get_client(model_id, provider)
         try:
-            stream = client.chat.completions.create(
-                model=model_id,
-                messages=messages,
-                stream=True,
-                max_tokens=max_tokens,
-            )
-            for chunk in stream:
-                if chunk.choices and chunk.choices[0].delta.content:
                     yield chunk.choices[0].delta.content
         except Exception as e:
-            logging.error(f"LLM API Error for model {model_id}: {e}")
-            yield f"Error: Could not get a response from the model. Details: {str(e)}"
-            # Re-raise or handle as appropriate for your application flow
-            # For this app, we yield an error message to the user.
 class SearchService:
-    """A wrapper for the Tavily Search API."""
     def __init__(self, api_key: str = TAVILY_API_KEY):
-        if not api_key:
-            logging.warning("TAVILY_API_KEY not set. Web search will be disabled.")
-            self.client = None
-        else:
-            try:
-                self.client = TavilyClient(api_key=api_key)
-            except Exception as e:
-                logging.error(f"Failed to initialize Tavily client: {e}")
-                self.client = None
     def is_available(self) -> bool:
-        """Checks if the search service is configured and available."""
-        return self.client is not None
     def search(self, query: str, max_results: int = 5) -> str:
-        """
-        Performs a web search and returns a formatted string of results.
-        """
-        if not self.is_available():
-            return "Web search is not available."
-        try:
-            response = self.client.search(
-                query,
-                search_depth="advanced",
-                max_results=min(max(1, max_results), 10)
-            )
-            results = [
-                f"Title: {res.get('title', 'N/A')}\nURL: {res.get('url', 'N/A')}\nContent: {res.get('content', 'N/A')}"
-                for res in response.get('results', [])
-            ]
-            return "Web Search Results:\n\n" + "\n---\n".join(results) if results else "No search results found."
-        except Exception as e:
-            logging.error(f"Tavily search error: {e}")
-            return f"Search error: {str(e)}"
 # --- Singleton Instances ---
-# These instances can be imported and used throughout the application.
 llm_service = LLMService()
 search_service = SearchService()

 """
 Manages interactions with external services like LLM providers and web search APIs.
+This module has been refactored to support multiple LLM providers:
+- Hugging Face (for standard and multimodal models)
+- Groq (for high-speed inference)
+- Fireworks AI
 """
 import os
 import logging
 from typing import Dict, Any, Generator, List
 from dotenv import load_dotenv
+# Import all necessary clients
 from huggingface_hub import InferenceClient
 from tavily import TavilyClient
+from groq import Groq
+import fireworks.client as Fireworks
+# --- Setup Logging & Environment ---
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 load_dotenv()
+# --- API Keys ---
 HF_TOKEN = os.getenv("HF_TOKEN")
 TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+FIREWORKS_API_KEY = os.getenv("FIREWORKS_API_KEY")
 # --- Type Definitions ---
 Messages = List[Dict[str, Any]]
 class LLMService:
+    """A multi-provider wrapper for LLM Inference APIs."""
+    def __init__(self):
+        # Initialize clients if their API keys are available
+        self.hf_client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else None
+        self.groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
+        self.fireworks_client = Fireworks if FIREWORKS_API_KEY else None
+        if self.fireworks_client:
+            self.fireworks_client.api_key = FIREWORKS_API_KEY
     def generate_code_stream(
+        self, model_id: str, messages: Messages, max_tokens: int = 8000
     ) -> Generator[str, None, None]:
         """
+        Streams code generation, dispatching to the correct provider based on model_id.
+        The model_id format is 'provider/model-name' or a full HF model_id.
         """
+        provider = "huggingface" # Default provider
+        model_name = model_id
+        if '/' in model_id:
+            parts = model_id.split('/', 1)
+            if parts[0] in ['groq', 'fireworks', 'huggingface']:
+                provider = parts[0]
+                model_name = parts[1]
+        logging.info(f"Dispatching to provider: {provider} for model: {model_name}")
         try:
+            # --- Groq Provider ---
+            if provider == 'groq':
+                if not self.groq_client:
+                    raise ValueError("Groq API key is not configured.")
+                stream = self.groq_client.chat.completions.create(
+                    model=model_name, messages=messages, stream=True, max_tokens=max_tokens
+                )
+                for chunk in stream:
+                    if chunk.choices[0].delta.content:
+                        yield chunk.choices[0].delta.content
+            # --- Fireworks AI Provider ---
+            elif provider == 'fireworks':
+                if not self.fireworks_client:
+                    raise ValueError("Fireworks AI API key is not configured.")
+                stream = self.fireworks_client.ChatCompletion.create(
+                    model=model_name, messages=messages, stream=True, max_tokens=max_tokens
+                )
+                for chunk in stream:
+                    if chunk.choices[0].delta.content:
+                        yield chunk.choices[0].delta.content
+            # --- Hugging Face Provider (Default) ---
+            else:
+                if not self.hf_client:
+                    raise ValueError("Hugging Face API token is not configured.")
+                # For HF, the model_name is the full original model_id
+                stream = self.hf_client.chat_completion(
+                    model=model_name, messages=messages, stream=True, max_tokens=max_tokens
+                )
+                for chunk in stream:
                     yield chunk.choices[0].delta.content
         except Exception as e:
+            logging.error(f"LLM API Error with provider {provider}: {e}")
+            yield f"Error from {provider.capitalize()}: {str(e)}"
 class SearchService:
+    # (This class remains unchanged)
     def __init__(self, api_key: str = TAVILY_API_KEY):
+        # ... existing code ...
     def is_available(self) -> bool:
+        # ... existing code ...
     def search(self, query: str, max_results: int = 5) -> str:
+        # ... existing code ...
 # --- Singleton Instances ---
 llm_service = LLMService()
 search_service = SearchService()