Spaces:

mgbam
/

builder

Running

App Files Files Community

mgbam commited on about 1 month ago

Commit

1687ea3

verified ·

1 Parent(s): f60d626

Rename file_processing.py to services.py

Browse files

Files changed (2) hide show

file_processing.py +0 -90
services.py +109 -0

file_processing.py DELETED Viewed

@@ -1,90 +0,0 @@
-import os
-import mimetypes
-import PyPDF2
-import docx
-import cv2
-import numpy as np
-from PIL import Image
-import pytesseract
-def process_image_for_model(image):
-    """Convert image to base64 for model input"""
-    if image is None:
-        return None
-    # Convert numpy array to PIL Image if needed
-    import io
-    import base64
-    # Handle numpy array from Gradio
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    buffer = io.BytesIO()
-    image.save(buffer, format='PNG')
-    img_str = base64.b64encode(buffer.getvalue()).decode()
-    return f"data:image/png;base64,{img_str}"
-def extract_text_from_image(image_path):
-    """Extract text from image using OCR"""
-    try:
-        # Check if tesseract is available
-        try:
-            pytesseract.get_tesseract_version()
-        except Exception:
-            return "Error: Tesseract OCR is not installed. Please install Tesseract to extract text from images. See install_tesseract.md for instructions."
-        image = cv2.imread(image_path)
-        if image is None:
-            return "Error: Could not read image file"
-        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
-        _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-        text = pytesseract.image_to_string(binary, config='--psm 6')
-        return text.strip() if text.strip() else "No text found in image"
-    except Exception as e:
-        return f"Error extracting text from image: {e}"
-def extract_text_from_file(file_path):
-    if not file_path:
-        return ""
-    ext = os.path.splitext(file_path)[1].lower()
-    try:
-        if ext == ".pdf":
-            with open(file_path, "rb") as f:
-                reader = PyPDF2.PdfReader(f)
-                return "\n".join(page.extract_text() or "" for page in reader.pages)
-        elif ext in [".txt", ".md", ".csv"]:
-            with open(file_path, "r", encoding="utf-8") as f:
-                return f.read()
-        elif ext == ".docx":
-            doc = docx.Document(file_path)
-            return "\n".join([para.text for para in doc.paragraphs])
-        elif ext.lower() in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"]:
-            return extract_text_from_image(file_path)
-        else:
-            return ""
-    except Exception as e:
-        return f"Error extracting text: {e}"
-def create_multimodal_message(text, image=None):
-    """Create a multimodal message with text and optional image"""
-    if image is None:
-        return {"role": "user", "content": text}
-    content = [
-        {
-            "type": "text",
-            "text": text
-        },
-        {
-            "type": "image_url",
-            "image_url": {
-                "url": process_image_for_model(image)
-            }
-        }
-    ]
-    return {"role": "user", "content": content}

services.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# /services.py
+"""
+Manages interactions with external services like LLM providers and web search APIs.
+This module uses a class-based approach to encapsulate API clients and their
+logic, making it easy to manage connections and mock services for testing.
+"""
+import os
+import logging
+from typing import Dict, Any, Generator, List
+from dotenv import load_dotenv
+from huggingface_hub import InferenceClient
+from tavily import TavilyClient
+# --- Setup Logging ---
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# --- Load Environment Variables ---
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
+TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")
+if not HF_TOKEN:
+    raise ValueError("HF_TOKEN environment variable is not set. Please get a token from https://huggingface.co/settings/tokens")
+# --- Type Definitions ---
+Messages = List[Dict[str, Any]]
+class LLMService:
+    """A wrapper for the Hugging Face Inference API."""
+    def __init__(self, api_key: str = HF_TOKEN):
+        if not api_key:
+            raise ValueError("Hugging Face API key is required.")
+        self.api_key = api_key
+    def get_client(self, model_id: str, provider: str = "auto") -> InferenceClient:
+        """Initializes and returns an InferenceClient."""
+        return InferenceClient(provider=provider, api_key=self.api_key, bill_to="huggingface")
+    def generate_code_stream(
+        self, model_id: str, messages: Messages, provider: str = "auto", max_tokens: int = 10000
+    ) -> Generator[str, None, None]:
+        """
+        Streams code generation from the specified model.
+        Yields content chunks as they are received.
+        """
+        client = self.get_client(model_id, provider)
+        try:
+            stream = client.chat.completions.create(
+                model=model_id,
+                messages=messages,
+                stream=True,
+                max_tokens=max_tokens,
+            )
+            for chunk in stream:
+                if chunk.choices and chunk.choices[0].delta.content:
+                    yield chunk.choices[0].delta.content
+        except Exception as e:
+            logging.error(f"LLM API Error for model {model_id}: {e}")
+            yield f"Error: Could not get a response from the model. Details: {str(e)}"
+            # Re-raise or handle as appropriate for your application flow
+            # For this app, we yield an error message to the user.
+class SearchService:
+    """A wrapper for the Tavily Search API."""
+    def __init__(self, api_key: str = TAVILY_API_KEY):
+        if not api_key:
+            logging.warning("TAVILY_API_KEY not set. Web search will be disabled.")
+            self.client = None
+        else:
+            try:
+                self.client = TavilyClient(api_key=api_key)
+            except Exception as e:
+                logging.error(f"Failed to initialize Tavily client: {e}")
+                self.client = None
+    def is_available(self) -> bool:
+        """Checks if the search service is configured and available."""
+        return self.client is not None
+    def search(self, query: str, max_results: int = 5) -> str:
+        """
+        Performs a web search and returns a formatted string of results.
+        """
+        if not self.is_available():
+            return "Web search is not available."
+        try:
+            response = self.client.search(
+                query,
+                search_depth="advanced",
+                max_results=min(max(1, max_results), 10)
+            )
+            results = [
+                f"Title: {res.get('title', 'N/A')}\nURL: {res.get('url', 'N/A')}\nContent: {res.get('content', 'N/A')}"
+                for res in response.get('results', [])
+            ]
+            return "Web Search Results:\n\n" + "\n---\n".join(results) if results else "No search results found."
+        except Exception as e:
+            logging.error(f"Tavily search error: {e}")
+            return f"Search error: {str(e)}"
+# --- Singleton Instances ---
+# These instances can be imported and used throughout the application.
+llm_service = LLMService()
+search_service = SearchService()