Spaces:
Build error
Build error
Commit
·
0e4080b
0
Parent(s):
Initial commit
Browse files- .env +4 -0
- Dockerfile +27 -0
- README.md +63 -0
- __pycache__/app.cpython-312.pyc +0 -0
- __pycache__/test_app.cpython-312-pytest-8.4.1.pyc +0 -0
- app.py +75 -0
- chat.py +27 -0
- llm.py +31 -0
- requirements.txt +12 -0
- test_app.py +41 -0
.env
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GOOGLE_AI_STUDIO_KEY=AIzaSyDdAWvaw8wW-PiljMKwGNNccPRMvG70MNA
|
2 |
+
SUPABASE_URL=https://afuvukjavlffublsdvsa.supabase.co
|
3 |
+
SUPABASE_SERVICE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImFmdXZ1a2phdmxmZnVibHNkdnNhIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTc0NDA2NDY0MiwiZXhwIjoyMDU5NjQwNjQyfQ.3718kzdI96X23Ewrk_zh39mgKRD7PDB11P9Wv22Fk-Q
|
4 |
+
HF_MODEL_ID=mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF
|
Dockerfile
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
# Install system dependencies
|
6 |
+
RUN apt-get update && apt-get install -y \
|
7 |
+
build-essential \
|
8 |
+
curl \
|
9 |
+
software-properties-common \
|
10 |
+
&& rm -rf /var/lib/apt/lists/*
|
11 |
+
|
12 |
+
# Copy requirements first to leverage Docker cache
|
13 |
+
COPY requirements.txt .
|
14 |
+
RUN pip install -r requirements.txt
|
15 |
+
|
16 |
+
# Copy the rest of the application
|
17 |
+
COPY . .
|
18 |
+
|
19 |
+
# Set environment variables
|
20 |
+
ENV HOST=0.0.0.0
|
21 |
+
ENV PORT=7860
|
22 |
+
|
23 |
+
# Expose the port HF Spaces expects
|
24 |
+
EXPOSE 7860
|
25 |
+
|
26 |
+
# Start the FastAPI app
|
27 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Gemma AI Assistant Space
|
2 |
+
|
3 |
+
This Space hosts the backend API for the Gemma AI Assistant, a conversational AI that combines local LLM processing using HuggingFace Transformers and real-time chat capabilities with Google's Gemini API.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- FastAPI backend with async support
|
8 |
+
- Local LLM using `mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF`
|
9 |
+
- Gemini API integration for real-time chat
|
10 |
+
- Supabase integration for data persistence
|
11 |
+
- Containerized deployment
|
12 |
+
|
13 |
+
## API Endpoints
|
14 |
+
|
15 |
+
### POST /api/chat
|
16 |
+
Process chat messages using either the local LLM or Gemini API.
|
17 |
+
|
18 |
+
**Request Body:**
|
19 |
+
```json
|
20 |
+
{
|
21 |
+
"messages": [
|
22 |
+
{
|
23 |
+
"role": "user",
|
24 |
+
"content": "Hello, how are you?"
|
25 |
+
}
|
26 |
+
],
|
27 |
+
"use_gemini": true,
|
28 |
+
"temperature": 0.7
|
29 |
+
}
|
30 |
+
```
|
31 |
+
|
32 |
+
**Response:**
|
33 |
+
```json
|
34 |
+
{
|
35 |
+
"response": "I'm doing well, thank you! How can I help you today?"
|
36 |
+
}
|
37 |
+
```
|
38 |
+
|
39 |
+
## Environment Variables Required
|
40 |
+
|
41 |
+
- `GOOGLE_AI_STUDIO_KEY`: Your Google AI Studio API key
|
42 |
+
- `SUPABASE_URL`: Your Supabase project URL
|
43 |
+
- `SUPABASE_SERVICE_KEY`: Your Supabase service role key
|
44 |
+
- `HF_MODEL_ID`: HuggingFace model ID (default: mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF)
|
45 |
+
|
46 |
+
## Local Development
|
47 |
+
|
48 |
+
1. Install dependencies:
|
49 |
+
```bash
|
50 |
+
pip install -r requirements.txt
|
51 |
+
```
|
52 |
+
|
53 |
+
2. Run the server:
|
54 |
+
```bash
|
55 |
+
uvicorn app:app --reload --port 7860
|
56 |
+
```
|
57 |
+
|
58 |
+
## Testing
|
59 |
+
|
60 |
+
Run the tests using pytest:
|
61 |
+
```bash
|
62 |
+
pytest test_app.py -v
|
63 |
+
```
|
__pycache__/app.cpython-312.pyc
ADDED
Binary file (3.38 kB). View file
|
|
__pycache__/test_app.cpython-312-pytest-8.4.1.pyc
ADDED
Binary file (5.23 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Depends
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from typing import List, Dict, Optional
|
5 |
+
from supabase import create_client, Client
|
6 |
+
import os
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
from llm import LLMPipeline
|
10 |
+
from chat import GeminiChat
|
11 |
+
|
12 |
+
load_dotenv()
|
13 |
+
|
14 |
+
app = FastAPI()
|
15 |
+
|
16 |
+
# CORS middleware
|
17 |
+
app.add_middleware(
|
18 |
+
CORSMiddleware,
|
19 |
+
allow_origins=["*"], # In production, replace with your frontend URL
|
20 |
+
allow_credentials=True,
|
21 |
+
allow_methods=["*"],
|
22 |
+
allow_headers=["*"],
|
23 |
+
)
|
24 |
+
|
25 |
+
# Initialize Supabase client
|
26 |
+
supabase_url = os.getenv("SUPABASE_URL")
|
27 |
+
supabase_key = os.getenv("SUPABASE_SERVICE_KEY")
|
28 |
+
|
29 |
+
if not supabase_url or not supabase_key:
|
30 |
+
raise ValueError("Supabase environment variables not set")
|
31 |
+
|
32 |
+
supabase: Client = create_client(supabase_url, supabase_key)
|
33 |
+
|
34 |
+
# Initialize AI models
|
35 |
+
llm = LLMPipeline()
|
36 |
+
gemini = GeminiChat()
|
37 |
+
|
38 |
+
class Message(BaseModel):
|
39 |
+
role: str
|
40 |
+
content: str
|
41 |
+
|
42 |
+
class ChatRequest(BaseModel):
|
43 |
+
messages: List[Message]
|
44 |
+
use_gemini: bool = True
|
45 |
+
temperature: float = 0.7
|
46 |
+
|
47 |
+
@app.post("/api/chat")
|
48 |
+
async def chat(request: ChatRequest):
|
49 |
+
try:
|
50 |
+
if request.use_gemini:
|
51 |
+
# Use Gemini for interactive chat
|
52 |
+
response = await gemini.chat(
|
53 |
+
[{"role": m.role, "content": m.content} for m in request.messages],
|
54 |
+
temperature=request.temperature
|
55 |
+
)
|
56 |
+
else:
|
57 |
+
# Use local LLM for specific tasks
|
58 |
+
last_message = request.messages[-1].content
|
59 |
+
response = await llm.generate(last_message)
|
60 |
+
|
61 |
+
# Store chat history in Supabase
|
62 |
+
supabase.table("chat_history").insert({
|
63 |
+
"messages": [m.dict() for m in request.messages],
|
64 |
+
"response": response,
|
65 |
+
"model": "gemini" if request.use_gemini else "local"
|
66 |
+
}).execute()
|
67 |
+
|
68 |
+
return {"response": response}
|
69 |
+
|
70 |
+
except Exception as e:
|
71 |
+
raise HTTPException(status_code=500, detail=str(e))
|
72 |
+
|
73 |
+
if __name__ == "__main__":
|
74 |
+
import uvicorn
|
75 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
chat.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import google.generativeai as genai
|
2 |
+
from typing import List, Dict
|
3 |
+
import os
|
4 |
+
|
5 |
+
class GeminiChat:
|
6 |
+
def __init__(self):
|
7 |
+
api_key = os.getenv("GOOGLE_AI_STUDIO_KEY")
|
8 |
+
if not api_key:
|
9 |
+
raise ValueError("GOOGLE_AI_STUDIO_KEY environment variable not set")
|
10 |
+
|
11 |
+
genai.configure(api_key=api_key)
|
12 |
+
self.model = genai.GenerativeModel('gemini-pro')
|
13 |
+
|
14 |
+
async def chat(self,
|
15 |
+
messages: List[Dict[str, str]],
|
16 |
+
temperature: float = 0.7) -> str:
|
17 |
+
"""Generate a chat response using Gemini API."""
|
18 |
+
try:
|
19 |
+
chat = self.model.start_chat(history=messages)
|
20 |
+
response = chat.send_message(
|
21 |
+
messages[-1]["content"],
|
22 |
+
generation_config={"temperature": temperature}
|
23 |
+
)
|
24 |
+
return response.text
|
25 |
+
except Exception as e:
|
26 |
+
print(f"Error in Gemini chat: {e}")
|
27 |
+
return ""
|
llm.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
import torch
|
3 |
+
import os
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
|
6 |
+
load_dotenv()
|
7 |
+
|
8 |
+
class LLMPipeline:
|
9 |
+
def __init__(self):
|
10 |
+
model_id = os.getenv("HF_MODEL_ID", "mradermacher/Huihui-gemma-3n-E4B-it-abliterated-GGUF")
|
11 |
+
self.pipeline = pipeline(
|
12 |
+
"text-generation",
|
13 |
+
model=model_id,
|
14 |
+
torch_dtype=torch.float16,
|
15 |
+
device_map="auto"
|
16 |
+
)
|
17 |
+
|
18 |
+
async def generate(self, prompt: str, max_length: int = 100) -> str:
|
19 |
+
"""Generate text using the local Gemma model."""
|
20 |
+
try:
|
21 |
+
result = self.pipeline(
|
22 |
+
prompt,
|
23 |
+
max_length=max_length,
|
24 |
+
num_return_sequences=1,
|
25 |
+
temperature=0.7,
|
26 |
+
top_p=0.9
|
27 |
+
)
|
28 |
+
return result[0]['generated_text']
|
29 |
+
except Exception as e:
|
30 |
+
print(f"Error in LLM generation: {e}")
|
31 |
+
return ""
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi>=0.68.0
|
2 |
+
uvicorn>=0.15.0
|
3 |
+
python-dotenv>=0.19.0
|
4 |
+
transformers>=4.30.0
|
5 |
+
torch>=2.0.0
|
6 |
+
google-generativeai>=0.3.0
|
7 |
+
supabase>=2.0.0
|
8 |
+
python-multipart>=0.0.6
|
9 |
+
numpy>=1.21.0
|
10 |
+
scipy>=1.7.0
|
11 |
+
pytest>=7.0.0
|
12 |
+
httpx>=0.24.0 # Required for TestClient
|
test_app.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
from fastapi.testclient import TestClient
|
3 |
+
from app import app
|
4 |
+
|
5 |
+
client = TestClient(app)
|
6 |
+
|
7 |
+
def test_chat_endpoint():
|
8 |
+
test_messages = [
|
9 |
+
{"role": "user", "content": "What is 2+2?"}
|
10 |
+
]
|
11 |
+
|
12 |
+
response = client.post(
|
13 |
+
"/api/chat",
|
14 |
+
json={
|
15 |
+
"messages": test_messages,
|
16 |
+
"use_gemini": False, # Test local LLM
|
17 |
+
"temperature": 0.7
|
18 |
+
}
|
19 |
+
)
|
20 |
+
|
21 |
+
assert response.status_code == 200
|
22 |
+
assert "response" in response.json()
|
23 |
+
assert isinstance(response.json()["response"], str)
|
24 |
+
|
25 |
+
def test_gemini_chat():
|
26 |
+
test_messages = [
|
27 |
+
{"role": "user", "content": "Tell me a short joke."}
|
28 |
+
]
|
29 |
+
|
30 |
+
response = client.post(
|
31 |
+
"/api/chat",
|
32 |
+
json={
|
33 |
+
"messages": test_messages,
|
34 |
+
"use_gemini": True, # Test Gemini API
|
35 |
+
"temperature": 0.7
|
36 |
+
}
|
37 |
+
)
|
38 |
+
|
39 |
+
assert response.status_code == 200
|
40 |
+
assert "response" in response.json()
|
41 |
+
assert isinstance(response.json()["response"], str)
|