ameliakris commited on
Commit
0e4080b
·
0 Parent(s):

Initial commit

Browse files
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ GOOGLE_AI_STUDIO_KEY=AIzaSyDdAWvaw8wW-PiljMKwGNNccPRMvG70MNA
2
+ SUPABASE_URL=https://afuvukjavlffublsdvsa.supabase.co
3
+ SUPABASE_SERVICE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImFmdXZ1a2phdmxmZnVibHNkdnNhIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc0NDA2NDY0MiwiZXhwIjoyMDU5NjQwNjQyfQ.3718kzdI96X23Ewrk_zh39mgKRD7PDB11P9Wv22Fk-Q
4
+ HF_MODEL_ID=mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ curl \
9
+ software-properties-common \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy requirements first to leverage Docker cache
13
+ COPY requirements.txt .
14
+ RUN pip install -r requirements.txt
15
+
16
+ # Copy the rest of the application
17
+ COPY . .
18
+
19
+ # Set environment variables
20
+ ENV HOST=0.0.0.0
21
+ ENV PORT=7860
22
+
23
+ # Expose the port HF Spaces expects
24
+ EXPOSE 7860
25
+
26
+ # Start the FastAPI app
27
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Gemma AI Assistant Space
2
+
3
+ This Space hosts the backend API for the Gemma AI Assistant, a conversational AI that combines local LLM processing using HuggingFace Transformers and real-time chat capabilities with Google's Gemini API.
4
+
5
+ ## Features
6
+
7
+ - FastAPI backend with async support
8
+ - Local LLM using `mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF`
9
+ - Gemini API integration for real-time chat
10
+ - Supabase integration for data persistence
11
+ - Containerized deployment
12
+
13
+ ## API Endpoints
14
+
15
+ ### POST /api/chat
16
+ Process chat messages using either the local LLM or Gemini API.
17
+
18
+ **Request Body:**
19
+ ```json
20
+ {
21
+ "messages": [
22
+ {
23
+ "role": "user",
24
+ "content": "Hello, how are you?"
25
+ }
26
+ ],
27
+ "use_gemini": true,
28
+ "temperature": 0.7
29
+ }
30
+ ```
31
+
32
+ **Response:**
33
+ ```json
34
+ {
35
+ "response": "I'm doing well, thank you! How can I help you today?"
36
+ }
37
+ ```
38
+
39
+ ## Environment Variables Required
40
+
41
+ - `GOOGLE_AI_STUDIO_KEY`: Your Google AI Studio API key
42
+ - `SUPABASE_URL`: Your Supabase project URL
43
+ - `SUPABASE_SERVICE_KEY`: Your Supabase service role key
44
+ - `HF_MODEL_ID`: HuggingFace model ID (default: mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF)
45
+
46
+ ## Local Development
47
+
48
+ 1. Install dependencies:
49
+ ```bash
50
+ pip install -r requirements.txt
51
+ ```
52
+
53
+ 2. Run the server:
54
+ ```bash
55
+ uvicorn app:app --reload --port 7860
56
+ ```
57
+
58
+ ## Testing
59
+
60
+ Run the tests using pytest:
61
+ ```bash
62
+ pytest test_app.py -v
63
+ ```
__pycache__/app.cpython-312.pyc ADDED
Binary file (3.38 kB). View file
 
__pycache__/test_app.cpython-312-pytest-8.4.1.pyc ADDED
Binary file (5.23 kB). View file
 
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Depends
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from typing import List, Dict, Optional
5
+ from supabase import create_client, Client
6
+ import os
7
+ from dotenv import load_dotenv
8
+
9
+ from llm import LLMPipeline
10
+ from chat import GeminiChat
11
+
12
+ load_dotenv()
13
+
14
+ app = FastAPI()
15
+
16
+ # CORS middleware
17
+ app.add_middleware(
18
+ CORSMiddleware,
19
+ allow_origins=["*"], # In production, replace with your frontend URL
20
+ allow_credentials=True,
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ # Initialize Supabase client
26
+ supabase_url = os.getenv("SUPABASE_URL")
27
+ supabase_key = os.getenv("SUPABASE_SERVICE_KEY")
28
+
29
+ if not supabase_url or not supabase_key:
30
+ raise ValueError("Supabase environment variables not set")
31
+
32
+ supabase: Client = create_client(supabase_url, supabase_key)
33
+
34
+ # Initialize AI models
35
+ llm = LLMPipeline()
36
+ gemini = GeminiChat()
37
+
38
+ class Message(BaseModel):
39
+ role: str
40
+ content: str
41
+
42
+ class ChatRequest(BaseModel):
43
+ messages: List[Message]
44
+ use_gemini: bool = True
45
+ temperature: float = 0.7
46
+
47
+ @app.post("/api/chat")
48
+ async def chat(request: ChatRequest):
49
+ try:
50
+ if request.use_gemini:
51
+ # Use Gemini for interactive chat
52
+ response = await gemini.chat(
53
+ [{"role": m.role, "content": m.content} for m in request.messages],
54
+ temperature=request.temperature
55
+ )
56
+ else:
57
+ # Use local LLM for specific tasks
58
+ last_message = request.messages[-1].content
59
+ response = await llm.generate(last_message)
60
+
61
+ # Store chat history in Supabase
62
+ supabase.table("chat_history").insert({
63
+ "messages": [m.dict() for m in request.messages],
64
+ "response": response,
65
+ "model": "gemini" if request.use_gemini else "local"
66
+ }).execute()
67
+
68
+ return {"response": response}
69
+
70
+ except Exception as e:
71
+ raise HTTPException(status_code=500, detail=str(e))
72
+
73
+ if __name__ == "__main__":
74
+ import uvicorn
75
+ uvicorn.run(app, host="0.0.0.0", port=7860)
chat.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import google.generativeai as genai
2
+ from typing import List, Dict
3
+ import os
4
+
5
+ class GeminiChat:
6
+ def __init__(self):
7
+ api_key = os.getenv("GOOGLE_AI_STUDIO_KEY")
8
+ if not api_key:
9
+ raise ValueError("GOOGLE_AI_STUDIO_KEY environment variable not set")
10
+
11
+ genai.configure(api_key=api_key)
12
+ self.model = genai.GenerativeModel('gemini-pro')
13
+
14
+ async def chat(self,
15
+ messages: List[Dict[str, str]],
16
+ temperature: float = 0.7) -> str:
17
+ """Generate a chat response using Gemini API."""
18
+ try:
19
+ chat = self.model.start_chat(history=messages)
20
+ response = chat.send_message(
21
+ messages[-1]["content"],
22
+ generation_config={"temperature": temperature}
23
+ )
24
+ return response.text
25
+ except Exception as e:
26
+ print(f"Error in Gemini chat: {e}")
27
+ return ""
llm.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ import torch
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ class LLMPipeline:
9
+ def __init__(self):
10
+ model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
11
+ self.pipeline = pipeline(
12
+ "text-generation",
13
+ model=model_id,
14
+ torch_dtype=torch.float16,
15
+ device_map="auto"
16
+ )
17
+
18
+ async def generate(self, prompt: str, max_length: int = 100) -> str:
19
+ """Generate text using the local Gemma model."""
20
+ try:
21
+ result = self.pipeline(
22
+ prompt,
23
+ max_length=max_length,
24
+ num_return_sequences=1,
25
+ temperature=0.7,
26
+ top_p=0.9
27
+ )
28
+ return result[0]['generated_text']
29
+ except Exception as e:
30
+ print(f"Error in LLM generation: {e}")
31
+ return ""
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.68.0
2
+ uvicorn>=0.15.0
3
+ python-dotenv>=0.19.0
4
+ transformers>=4.30.0
5
+ torch>=2.0.0
6
+ google-generativeai>=0.3.0
7
+ supabase>=2.0.0
8
+ python-multipart>=0.0.6
9
+ numpy>=1.21.0
10
+ scipy>=1.7.0
11
+ pytest>=7.0.0
12
+ httpx>=0.24.0 # Required for TestClient
test_app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from fastapi.testclient import TestClient
3
+ from app import app
4
+
5
+ client = TestClient(app)
6
+
7
+ def test_chat_endpoint():
8
+ test_messages = [
9
+ {"role": "user", "content": "What is 2+2?"}
10
+ ]
11
+
12
+ response = client.post(
13
+ "/api/chat",
14
+ json={
15
+ "messages": test_messages,
16
+ "use_gemini": False, # Test local LLM
17
+ "temperature": 0.7
18
+ }
19
+ )
20
+
21
+ assert response.status_code == 200
22
+ assert "response" in response.json()
23
+ assert isinstance(response.json()["response"], str)
24
+
25
+ def test_gemini_chat():
26
+ test_messages = [
27
+ {"role": "user", "content": "Tell me a short joke."}
28
+ ]
29
+
30
+ response = client.post(
31
+ "/api/chat",
32
+ json={
33
+ "messages": test_messages,
34
+ "use_gemini": True, # Test Gemini API
35
+ "temperature": 0.7
36
+ }
37
+ )
38
+
39
+ assert response.status_code == 200
40
+ assert "response" in response.json()
41
+ assert isinstance(response.json()["response"], str)