Spaces:
Running
Running
“vinit5112”
commited on
Commit
·
aff287e
1
Parent(s):
d994686
load model locally
Browse files- Dockerfile +7 -1
- backend/CONVERSATION_HISTORY_SYSTEM.md +0 -249
- backend/SETUP_OFFLINE.md +0 -68
- backend/STREAMING_ANALYSIS.md +0 -178
- backend/download_model.py +0 -66
- backend/vector_store.py +59 -43
- model/all-MiniLM-L6-v2/1_Pooling/config.json +10 -0
- model/all-MiniLM-L6-v2/README.md +173 -0
- model/all-MiniLM-L6-v2/config.json +25 -0
- model/all-MiniLM-L6-v2/config_sentence_transformers.json +14 -0
- model/all-MiniLM-L6-v2/model.safetensors +3 -0
- model/all-MiniLM-L6-v2/modules.json +20 -0
- model/all-MiniLM-L6-v2/sentence_bert_config.json +4 -0
- model/all-MiniLM-L6-v2/special_tokens_map.json +37 -0
- model/all-MiniLM-L6-v2/tokenizer.json +0 -0
- model/all-MiniLM-L6-v2/tokenizer_config.json +65 -0
- model/all-MiniLM-L6-v2/vocab.txt +0 -0
- temp.py +6 -0
Dockerfile
CHANGED
@@ -25,7 +25,13 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
25 |
# Copy backend code
|
26 |
COPY backend/ /app/backend
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
COPY --from=frontend-build /app/frontend/build /app/frontend_build
|
30 |
|
31 |
# Install nginx
|
|
|
25 |
# Copy backend code
|
26 |
COPY backend/ /app/backend
|
27 |
|
28 |
+
|
29 |
+
COPY model/ /app/model/
|
30 |
+
|
31 |
+
ENV TRANSFORMERS_CACHE=/app/model
|
32 |
+
ENV HF_HUB_OFFLINE=1
|
33 |
+
ENV TRANSFORMERS_OFFLINE=1
|
34 |
+
|
35 |
COPY --from=frontend-build /app/frontend/build /app/frontend_build
|
36 |
|
37 |
# Install nginx
|
backend/CONVERSATION_HISTORY_SYSTEM.md
DELETED
@@ -1,249 +0,0 @@
|
|
1 |
-
# Conversation History Management System
|
2 |
-
|
3 |
-
## Overview
|
4 |
-
The conversation history system has been upgraded from a basic memory-only implementation to a comprehensive, persistent storage solution using localStorage with advanced features.
|
5 |
-
|
6 |
-
## 🔄 **Previous Implementation (Memory Only)**
|
7 |
-
```javascript
|
8 |
-
// ❌ OLD - Lost on page refresh
|
9 |
-
const [conversations, setConversations] = useState([]);
|
10 |
-
```
|
11 |
-
|
12 |
-
## ✅ **New Implementation (Persistent Storage)**
|
13 |
-
|
14 |
-
### 1. **Core Storage Utility** (`utils/conversationStorage.js`)
|
15 |
-
A comprehensive utility class that handles all conversation persistence:
|
16 |
-
|
17 |
-
```javascript
|
18 |
-
import ConversationStorage from './utils/conversationStorage';
|
19 |
-
|
20 |
-
// Load conversations from localStorage
|
21 |
-
const conversations = ConversationStorage.loadConversations();
|
22 |
-
|
23 |
-
// Save conversations to localStorage
|
24 |
-
ConversationStorage.saveConversations(conversations);
|
25 |
-
```
|
26 |
-
|
27 |
-
### 2. **Enhanced Conversation Structure**
|
28 |
-
```javascript
|
29 |
-
{
|
30 |
-
id: "timestamp_based_id",
|
31 |
-
title: "Conversation Title",
|
32 |
-
messages: [
|
33 |
-
{
|
34 |
-
id: "message_id",
|
35 |
-
role: "user" | "assistant",
|
36 |
-
content: "message content",
|
37 |
-
timestamp: Date
|
38 |
-
}
|
39 |
-
],
|
40 |
-
createdAt: Date,
|
41 |
-
updatedAt: Date // ✅ NEW - Track when conversation was last modified
|
42 |
-
}
|
43 |
-
```
|
44 |
-
|
45 |
-
### 3. **Automatic Persistence**
|
46 |
-
- **Load on App Start**: Conversations are automatically loaded from localStorage
|
47 |
-
- **Save on Changes**: All conversation updates are automatically saved
|
48 |
-
- **No Manual Intervention**: Everything happens transparently
|
49 |
-
|
50 |
-
## 🚀 **Key Features**
|
51 |
-
|
52 |
-
### ✅ **Persistent Storage**
|
53 |
-
- Conversations survive page refreshes
|
54 |
-
- Conversations persist across browser sessions
|
55 |
-
- Automatic loading on app startup
|
56 |
-
|
57 |
-
### ✅ **Conversation Management**
|
58 |
-
- **Create**: New conversations are automatically saved
|
59 |
-
- **Update**: Message additions and title changes are saved
|
60 |
-
- **Delete**: Conversations can be permanently removed
|
61 |
-
- **Search**: Full-text search across all conversations
|
62 |
-
|
63 |
-
### ✅ **Storage Optimization**
|
64 |
-
- **Quota Management**: Handles localStorage size limits
|
65 |
-
- **Conversation Limits**: Maximum 50 conversations (configurable)
|
66 |
-
- **Automatic Cleanup**: Reduces storage when quota exceeded
|
67 |
-
|
68 |
-
### ✅ **Import/Export**
|
69 |
-
- **Export**: Download all conversations as JSON
|
70 |
-
- **Import**: Upload and merge conversation files
|
71 |
-
- **Backup**: Easy backup and restore functionality
|
72 |
-
|
73 |
-
### ✅ **Statistics & Monitoring**
|
74 |
-
- **Storage Usage**: Track localStorage consumption
|
75 |
-
- **Conversation Count**: Monitor total conversations
|
76 |
-
- **Message Count**: Track total messages across all conversations
|
77 |
-
|
78 |
-
## 🛠 **Implementation Details**
|
79 |
-
|
80 |
-
### App.js Integration
|
81 |
-
```javascript
|
82 |
-
// Load conversations on app start
|
83 |
-
useEffect(() => {
|
84 |
-
const savedConversations = ConversationStorage.loadConversations();
|
85 |
-
if (savedConversations.length > 0) {
|
86 |
-
setConversations(savedConversations);
|
87 |
-
setChatStarted(true);
|
88 |
-
setActiveConversationId(savedConversations[0].id);
|
89 |
-
}
|
90 |
-
}, []);
|
91 |
-
|
92 |
-
// Enhanced conversation management
|
93 |
-
const updateConversations = (updatedConversations) => {
|
94 |
-
setConversations(updatedConversations);
|
95 |
-
ConversationStorage.saveConversations(updatedConversations);
|
96 |
-
};
|
97 |
-
```
|
98 |
-
|
99 |
-
### ChatInterface.js Integration
|
100 |
-
```javascript
|
101 |
-
// Conversations are automatically saved when updated
|
102 |
-
setConversations(prev => prev.map(conv =>
|
103 |
-
conv.id === conversationId
|
104 |
-
? { ...conv, messages: [...conv.messages, newMessage] }
|
105 |
-
: conv
|
106 |
-
));
|
107 |
-
```
|
108 |
-
|
109 |
-
### Sidebar.js Integration
|
110 |
-
```javascript
|
111 |
-
// Delete conversations with confirmation
|
112 |
-
const handleDelete = (conversationId) => {
|
113 |
-
if (window.confirm('Are you sure you want to delete this conversation?')) {
|
114 |
-
onDeleteConversation(conversationId);
|
115 |
-
}
|
116 |
-
};
|
117 |
-
```
|
118 |
-
|
119 |
-
## 📊 **Storage Management**
|
120 |
-
|
121 |
-
### Local Storage Structure
|
122 |
-
```
|
123 |
-
Key: "ca_study_conversations"
|
124 |
-
Value: JSON array of conversation objects
|
125 |
-
```
|
126 |
-
|
127 |
-
### Storage Limits
|
128 |
-
- **Maximum Conversations**: 50 (prevents localStorage overflow)
|
129 |
-
- **Auto-Reduction**: Reduces to 25 conversations if quota exceeded
|
130 |
-
- **Size Monitoring**: Tracks storage usage in KB
|
131 |
-
|
132 |
-
### Error Handling
|
133 |
-
- **JSON Parse Errors**: Gracefully handles corrupted data
|
134 |
-
- **Storage Quota**: Automatic handling of localStorage limits
|
135 |
-
- **Network Issues**: Offline-first design
|
136 |
-
|
137 |
-
## 🔧 **Advanced Features**
|
138 |
-
|
139 |
-
### 1. **Search Functionality**
|
140 |
-
```javascript
|
141 |
-
// Search conversations by title or content
|
142 |
-
const results = ConversationStorage.searchConversations("accounting");
|
143 |
-
```
|
144 |
-
|
145 |
-
### 2. **Export Conversations**
|
146 |
-
```javascript
|
147 |
-
// Download all conversations as JSON file
|
148 |
-
ConversationStorage.exportConversations();
|
149 |
-
```
|
150 |
-
|
151 |
-
### 3. **Import Conversations**
|
152 |
-
```javascript
|
153 |
-
// Import conversations from file
|
154 |
-
const result = await ConversationStorage.importConversations(file);
|
155 |
-
console.log(`Imported ${result.count} conversations`);
|
156 |
-
```
|
157 |
-
|
158 |
-
### 4. **Storage Statistics**
|
159 |
-
```javascript
|
160 |
-
// Get detailed storage information
|
161 |
-
const stats = ConversationStorage.getStatistics();
|
162 |
-
// Returns: { totalConversations, totalMessages, storageSize, ... }
|
163 |
-
```
|
164 |
-
|
165 |
-
## 🔐 **Data Security & Privacy**
|
166 |
-
|
167 |
-
### Client-Side Storage
|
168 |
-
- **No Server Storage**: All data stays in user's browser
|
169 |
-
- **Privacy First**: No conversation data sent to servers
|
170 |
-
- **User Control**: Users can export/delete their own data
|
171 |
-
|
172 |
-
### Data Format
|
173 |
-
- **JSON Structure**: Human-readable format
|
174 |
-
- **Portable**: Easy to migrate between devices
|
175 |
-
- **Versionable**: Future-proof with version tracking
|
176 |
-
|
177 |
-
## 🎯 **User Experience Improvements**
|
178 |
-
|
179 |
-
### Before (Memory Only)
|
180 |
-
❌ Lost conversations on page refresh
|
181 |
-
❌ No conversation history
|
182 |
-
❌ No persistent sessions
|
183 |
-
❌ No conversation management
|
184 |
-
|
185 |
-
### After (Persistent Storage)
|
186 |
-
✅ Conversations survive page refreshes
|
187 |
-
✅ Full conversation history
|
188 |
-
✅ Persistent user sessions
|
189 |
-
✅ Advanced conversation management
|
190 |
-
✅ Search and filter capabilities
|
191 |
-
✅ Export/import functionality
|
192 |
-
✅ Storage monitoring and optimization
|
193 |
-
|
194 |
-
## 🚀 **Future Enhancements**
|
195 |
-
|
196 |
-
### Planned Features
|
197 |
-
1. **Cloud Sync**: Optional cloud storage integration
|
198 |
-
2. **User Authentication**: Multi-device synchronization
|
199 |
-
3. **Advanced Search**: Semantic search within conversations
|
200 |
-
4. **Tags/Categories**: Organize conversations by topics
|
201 |
-
5. **Shared Conversations**: Share conversations with others
|
202 |
-
6. **Analytics**: Conversation usage analytics
|
203 |
-
|
204 |
-
### Backend Integration (Optional)
|
205 |
-
```javascript
|
206 |
-
// Future: Optional backend storage
|
207 |
-
const backendStorage = new BackendConversationStorage();
|
208 |
-
await backendStorage.syncConversations(localConversations);
|
209 |
-
```
|
210 |
-
|
211 |
-
## 📋 **Migration Guide**
|
212 |
-
|
213 |
-
### For Existing Users
|
214 |
-
1. **Automatic Migration**: Existing conversations will be migrated to new format
|
215 |
-
2. **No Data Loss**: All existing conversations preserved
|
216 |
-
3. **Enhanced Features**: Immediate access to new capabilities
|
217 |
-
|
218 |
-
### For New Users
|
219 |
-
1. **Automatic Setup**: No configuration required
|
220 |
-
2. **Immediate Persistence**: Conversations saved from first use
|
221 |
-
3. **Full Feature Access**: All features available immediately
|
222 |
-
|
223 |
-
## 🔧 **Troubleshooting**
|
224 |
-
|
225 |
-
### Common Issues
|
226 |
-
1. **Storage Quota Exceeded**: Automatically handled with conversation reduction
|
227 |
-
2. **Corrupted Data**: Graceful fallback to empty conversation list
|
228 |
-
3. **Import Errors**: Validation and error reporting for file imports
|
229 |
-
|
230 |
-
### Debug Information
|
231 |
-
```javascript
|
232 |
-
// Check storage status
|
233 |
-
const stats = ConversationStorage.getStatistics();
|
234 |
-
console.log('Storage Stats:', stats);
|
235 |
-
|
236 |
-
// Clear all conversations (emergency)
|
237 |
-
ConversationStorage.clearAllConversations();
|
238 |
-
```
|
239 |
-
|
240 |
-
## ✅ **Conclusion**
|
241 |
-
|
242 |
-
The conversation history system has been completely upgraded to provide:
|
243 |
-
- **Persistent Storage**: No more lost conversations
|
244 |
-
- **Advanced Management**: Full CRUD operations
|
245 |
-
- **User Control**: Export/import capabilities
|
246 |
-
- **Performance**: Optimized for large conversation histories
|
247 |
-
- **Reliability**: Robust error handling and data protection
|
248 |
-
|
249 |
-
This system provides a professional-grade conversation management experience while maintaining simplicity and user privacy.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/SETUP_OFFLINE.md
DELETED
@@ -1,68 +0,0 @@
|
|
1 |
-
# Offline Mode Setup Guide
|
2 |
-
|
3 |
-
## Problem
|
4 |
-
The application fails to start with network connectivity errors when trying to download the sentence transformer model from Hugging Face.
|
5 |
-
|
6 |
-
## Error Message
|
7 |
-
```
|
8 |
-
Failed to resolve 'huggingface.co' ([Errno 11001] getaddrinfo failed)
|
9 |
-
```
|
10 |
-
|
11 |
-
## Solutions
|
12 |
-
|
13 |
-
### Option 1: Download Model When You Have Internet Access
|
14 |
-
1. When you have internet access, run the download script:
|
15 |
-
```bash
|
16 |
-
cd backend
|
17 |
-
python download_model.py
|
18 |
-
```
|
19 |
-
|
20 |
-
2. This will download and cache the model locally for offline use.
|
21 |
-
|
22 |
-
### Option 2: Manual Download
|
23 |
-
If you have internet access on another machine:
|
24 |
-
|
25 |
-
1. On a machine with internet access, run:
|
26 |
-
```python
|
27 |
-
from sentence_transformers import SentenceTransformer
|
28 |
-
model = SentenceTransformer('all-MiniLM-L6-v2')
|
29 |
-
```
|
30 |
-
|
31 |
-
2. Copy the cached model from:
|
32 |
-
- Windows: `C:\Users\{username}\.cache\huggingface\transformers\`
|
33 |
-
- Linux/Mac: `~/.cache/huggingface/transformers/`
|
34 |
-
|
35 |
-
3. Place it in the same location on your offline machine.
|
36 |
-
|
37 |
-
### Option 3: Force Offline Mode
|
38 |
-
If you believe the model is already cached, you can force offline mode by setting environment variables:
|
39 |
-
|
40 |
-
```bash
|
41 |
-
set TRANSFORMERS_OFFLINE=1
|
42 |
-
set HF_HUB_OFFLINE=1
|
43 |
-
python backend_api.py
|
44 |
-
```
|
45 |
-
|
46 |
-
### Option 4: Network Troubleshooting
|
47 |
-
If you should have internet access:
|
48 |
-
|
49 |
-
1. Check your internet connection
|
50 |
-
2. If behind a corporate firewall, ensure `huggingface.co` is accessible
|
51 |
-
3. Try accessing `https://huggingface.co` in your browser
|
52 |
-
4. Contact your IT department if needed
|
53 |
-
|
54 |
-
## Verification
|
55 |
-
After setting up offline mode, you can verify the model is working by running:
|
56 |
-
```bash
|
57 |
-
python download_model.py
|
58 |
-
```
|
59 |
-
|
60 |
-
This will check if the model is cached and available for offline use.
|
61 |
-
|
62 |
-
## Technical Details
|
63 |
-
The sentence transformer model "all-MiniLM-L6-v2" is approximately 80MB and is used for generating embeddings from text for the vector search functionality.
|
64 |
-
|
65 |
-
The application has been modified to:
|
66 |
-
1. Try loading the model normally first
|
67 |
-
2. Fall back to offline mode if network fails
|
68 |
-
3. Provide clear error messages with solutions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/STREAMING_ANALYSIS.md
DELETED
@@ -1,178 +0,0 @@
|
|
1 |
-
# Streaming Implementation Analysis
|
2 |
-
|
3 |
-
## Overview
|
4 |
-
This document analyzes the streaming implementation across the backend and frontend components of the CA Study Assistant application.
|
5 |
-
|
6 |
-
## ✅ Backend Implementation Analysis
|
7 |
-
|
8 |
-
### 1. RAG Streaming Function (`rag.py`)
|
9 |
-
- **Status**: ✅ **GOOD** - Recently updated with latest API
|
10 |
-
- **Implementation**:
|
11 |
-
```python
|
12 |
-
for chunk in self.client.models.generate_content_stream(
|
13 |
-
model='gemini-2.5-flash',
|
14 |
-
contents=prompt
|
15 |
-
):
|
16 |
-
yield chunk.text
|
17 |
-
```
|
18 |
-
- **✅ Improvements Made**:
|
19 |
-
- Updated to use `generate_content_stream` instead of deprecated method
|
20 |
-
- Uses `gemini-2.5-flash` model (latest)
|
21 |
-
- Proper error handling with try-catch
|
22 |
-
|
23 |
-
### 2. FastAPI Streaming Endpoint (`backend_api.py`)
|
24 |
-
- **Status**: ✅ **IMPROVED** - Enhanced with better error handling
|
25 |
-
- **Implementation**:
|
26 |
-
```python
|
27 |
-
@app.post("/api/ask_stream")
|
28 |
-
async def ask_question_stream(request: QuestionRequest):
|
29 |
-
async def event_generator():
|
30 |
-
for chunk in rag_system.ask_question_stream(request.question):
|
31 |
-
if chunk: # Only yield non-empty chunks
|
32 |
-
yield chunk
|
33 |
-
return StreamingResponse(event_generator(), media_type="text/plain")
|
34 |
-
```
|
35 |
-
- **✅ Improvements Made**:
|
36 |
-
- Added null/empty chunk filtering
|
37 |
-
- Enhanced error handling in generator
|
38 |
-
- Proper async generator implementation
|
39 |
-
|
40 |
-
## ✅ Frontend Implementation Analysis
|
41 |
-
|
42 |
-
### 1. API Service (`services/api.js`)
|
43 |
-
- **Status**: ✅ **IMPROVED** - Enhanced with better error handling
|
44 |
-
- **Implementation**:
|
45 |
-
```javascript
|
46 |
-
export const sendMessageStream = async (message, onChunk) => {
|
47 |
-
const response = await fetch(`${API_BASE_URL}/ask_stream`, {
|
48 |
-
method: 'POST',
|
49 |
-
headers: { 'Content-Type': 'application/json' },
|
50 |
-
body: JSON.stringify({ question: message }),
|
51 |
-
});
|
52 |
-
|
53 |
-
const reader = response.body.getReader();
|
54 |
-
const decoder = new TextDecoder();
|
55 |
-
|
56 |
-
while (true) {
|
57 |
-
const { done, value } = await reader.read();
|
58 |
-
if (done) break;
|
59 |
-
const chunk = decoder.decode(value, { stream: true });
|
60 |
-
if (chunk) onChunk(chunk);
|
61 |
-
}
|
62 |
-
};
|
63 |
-
```
|
64 |
-
- **✅ Improvements Made**:
|
65 |
-
- Added HTTP status code checking
|
66 |
-
- Added reader.releaseLock() for proper cleanup
|
67 |
-
- Enhanced error handling
|
68 |
-
- Added null chunk filtering
|
69 |
-
|
70 |
-
### 2. Chat Interface (`components/ChatInterface.js`)
|
71 |
-
- **Status**: ✅ **GOOD** - Proper real-time UI updates
|
72 |
-
- **Implementation**:
|
73 |
-
```javascript
|
74 |
-
await sendMessageStream(message.trim(), (chunk) => {
|
75 |
-
fullResponse += chunk;
|
76 |
-
setConversations(prev => prev.map(conv =>
|
77 |
-
conv.id === conversationId ? {
|
78 |
-
...conv,
|
79 |
-
messages: conv.messages.map(msg =>
|
80 |
-
msg.id === assistantMessageId
|
81 |
-
? { ...msg, content: fullResponse }
|
82 |
-
: msg
|
83 |
-
),
|
84 |
-
} : conv
|
85 |
-
));
|
86 |
-
});
|
87 |
-
```
|
88 |
-
- **✅ Features**:
|
89 |
-
- Real-time message updates
|
90 |
-
- Proper loading states
|
91 |
-
- Error handling with toast notifications
|
92 |
-
- Typing indicators during streaming
|
93 |
-
|
94 |
-
## 🔧 Additional Improvements Made
|
95 |
-
|
96 |
-
### 1. Error Handling Enhancement
|
97 |
-
- **Backend**: Added comprehensive error handling in streaming generator
|
98 |
-
- **Frontend**: Added HTTP status checking and proper resource cleanup
|
99 |
-
- **Both**: Added null/empty chunk filtering
|
100 |
-
|
101 |
-
### 2. Testing Infrastructure
|
102 |
-
- **Created**: `test_streaming.py` - Comprehensive test suite for streaming
|
103 |
-
- **Features**:
|
104 |
-
- API connection testing
|
105 |
-
- Streaming functionality testing
|
106 |
-
- Error handling verification
|
107 |
-
- Performance metrics
|
108 |
-
|
109 |
-
### 3. Documentation
|
110 |
-
- **Created**: `STREAMING_ANALYSIS.md` - This comprehensive analysis
|
111 |
-
- **Updated**: Inline code comments for better maintainability
|
112 |
-
|
113 |
-
## 🚀 How to Test the Implementation
|
114 |
-
|
115 |
-
### 1. Test API Connection
|
116 |
-
```bash
|
117 |
-
cd backend
|
118 |
-
python test_streaming.py
|
119 |
-
```
|
120 |
-
|
121 |
-
### 2. Test Full Application
|
122 |
-
```bash
|
123 |
-
# Terminal 1 - Backend
|
124 |
-
cd backend
|
125 |
-
python backend_api.py
|
126 |
-
|
127 |
-
# Terminal 2 - Frontend
|
128 |
-
cd frontend
|
129 |
-
npm start
|
130 |
-
```
|
131 |
-
|
132 |
-
### 3. Test Streaming Manually
|
133 |
-
1. Open the application in browser
|
134 |
-
2. Ask a question
|
135 |
-
3. Observe real-time streaming response
|
136 |
-
4. Check browser dev tools for any errors
|
137 |
-
|
138 |
-
## 📊 Performance Characteristics
|
139 |
-
|
140 |
-
### Backend
|
141 |
-
- **Latency**: Low - streams immediately as chunks arrive from Gemini
|
142 |
-
- **Memory**: Efficient - no buffering, direct streaming
|
143 |
-
- **Error Recovery**: Graceful - continues streaming even if some chunks fail
|
144 |
-
|
145 |
-
### Frontend
|
146 |
-
- **UI Responsiveness**: Excellent - real-time updates without blocking
|
147 |
-
- **Memory Usage**: Low - processes chunks as they arrive
|
148 |
-
- **Error Handling**: Comprehensive - proper cleanup and user feedback
|
149 |
-
|
150 |
-
## 🎯 API Compatibility
|
151 |
-
|
152 |
-
### Google Generative AI API
|
153 |
-
- **✅ Model**: `gemini-2.5-flash` (latest)
|
154 |
-
- **✅ Method**: `generate_content_stream` (current)
|
155 |
-
- **✅ Parameters**: `model` and `contents` (correct format)
|
156 |
-
|
157 |
-
### FastAPI Streaming
|
158 |
-
- **✅ Response Type**: `StreamingResponse` (correct)
|
159 |
-
- **✅ Media Type**: `text/plain` (compatible with frontend)
|
160 |
-
- **✅ Async Generator**: Proper async/await implementation
|
161 |
-
|
162 |
-
### Frontend Fetch API
|
163 |
-
- **✅ ReadableStream**: Proper stream handling
|
164 |
-
- **✅ TextDecoder**: Correct UTF-8 decoding
|
165 |
-
- **✅ Resource Management**: Proper cleanup
|
166 |
-
|
167 |
-
## ✅ Conclusion
|
168 |
-
|
169 |
-
The streaming implementation is **WORKING CORRECTLY** and has been enhanced with:
|
170 |
-
|
171 |
-
1. **Latest API compatibility** - Uses gemini-2.5-flash with correct method
|
172 |
-
2. **Robust error handling** - Comprehensive error management
|
173 |
-
3. **Performance optimizations** - Efficient streaming without buffering
|
174 |
-
4. **Proper resource management** - No memory leaks or resource issues
|
175 |
-
5. **Real-time UI updates** - Smooth user experience
|
176 |
-
6. **Comprehensive testing** - Test suite for validation
|
177 |
-
|
178 |
-
The implementation follows best practices and should provide a smooth, responsive chat experience with real-time streaming responses.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/download_model.py
DELETED
@@ -1,66 +0,0 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Download the sentence transformer model for offline use.
|
4 |
-
Run this script when you have internet access to cache the model locally.
|
5 |
-
"""
|
6 |
-
|
7 |
-
import os
|
8 |
-
import sys
|
9 |
-
from sentence_transformers import SentenceTransformer
|
10 |
-
|
11 |
-
def download_model():
|
12 |
-
"""Download and cache the sentence transformer model."""
|
13 |
-
try:
|
14 |
-
print("Downloading sentence transformer model 'all-MiniLM-L6-v2'...")
|
15 |
-
print("This may take a few minutes on first run...")
|
16 |
-
|
17 |
-
# This will download and cache the model
|
18 |
-
model = SentenceTransformer("all-MiniLM-L6-v2")
|
19 |
-
|
20 |
-
# Test that it works
|
21 |
-
test_text = "This is a test sentence."
|
22 |
-
embedding = model.encode([test_text])
|
23 |
-
|
24 |
-
print(f"✓ Model downloaded successfully!")
|
25 |
-
print(f"✓ Model tested successfully!")
|
26 |
-
print(f"✓ Embedding dimension: {len(embedding[0])}")
|
27 |
-
print(f"✓ Model cache location: {model.cache_folder}")
|
28 |
-
|
29 |
-
return True
|
30 |
-
|
31 |
-
except Exception as e:
|
32 |
-
print(f"✗ Failed to download model: {e}")
|
33 |
-
return False
|
34 |
-
|
35 |
-
def check_model_exists():
|
36 |
-
"""Check if the model is already cached."""
|
37 |
-
try:
|
38 |
-
# Try to load from cache
|
39 |
-
import os
|
40 |
-
os.environ['TRANSFORMERS_OFFLINE'] = '1'
|
41 |
-
os.environ['HF_HUB_OFFLINE'] = '1'
|
42 |
-
|
43 |
-
model = SentenceTransformer("all-MiniLM-L6-v2")
|
44 |
-
print("✓ Model is already cached and available for offline use!")
|
45 |
-
return True
|
46 |
-
|
47 |
-
except Exception:
|
48 |
-
print("✗ Model is not cached or not available for offline use")
|
49 |
-
return False
|
50 |
-
|
51 |
-
if __name__ == "__main__":
|
52 |
-
print("Sentence Transformer Model Downloader")
|
53 |
-
print("=" * 40)
|
54 |
-
|
55 |
-
# Check if model already exists
|
56 |
-
if check_model_exists():
|
57 |
-
print("\nModel is already available. No download needed.")
|
58 |
-
sys.exit(0)
|
59 |
-
|
60 |
-
# Download the model
|
61 |
-
print("\nDownloading model...")
|
62 |
-
if download_model():
|
63 |
-
print("\n✓ Setup complete! You can now run the application offline.")
|
64 |
-
else:
|
65 |
-
print("\n✗ Download failed. Please check your internet connection.")
|
66 |
-
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/vector_store.py
CHANGED
@@ -38,62 +38,78 @@ class VectorStore:
|
|
38 |
self._create_collection_if_not_exists()
|
39 |
|
40 |
def _initialize_embedding_model(self):
|
41 |
-
"""Initialize the embedding model
|
42 |
try:
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
return model
|
48 |
-
|
49 |
except Exception as e:
|
50 |
-
print(f"Failed to load model
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
|
63 |
-
|
64 |
-
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
-
|
79 |
-
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
|
92 |
-
|
93 |
-
|
94 |
|
95 |
-
|
96 |
-
|
97 |
|
98 |
def _create_collection_if_not_exists(self) -> bool:
|
99 |
"""
|
|
|
38 |
self._create_collection_if_not_exists()
|
39 |
|
40 |
def _initialize_embedding_model(self):
|
41 |
+
"""Initialize the embedding model from a local directory"""
|
42 |
try:
|
43 |
+
print("Loading sentence transformer model from local path...")
|
44 |
+
# Resolve local path to model directory
|
45 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
46 |
+
local_model_path = os.path.join(current_dir, "..", "model", "all-MiniLM-L6-v2")
|
47 |
+
model = SentenceTransformer(local_model_path)
|
48 |
+
print("Successfully loaded local sentence transformer model")
|
49 |
return model
|
|
|
50 |
except Exception as e:
|
51 |
+
print(f"Failed to load local model: {e}")
|
52 |
+
raise RuntimeError("Failed to initialize embedding model from local path")
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
# def _initialize_embedding_model(self):
|
57 |
+
# """Initialize the embedding model with offline support"""
|
58 |
+
# try:
|
59 |
+
# # Try to load the model normally first
|
60 |
+
# print("Attempting to load sentence transformer model...")
|
61 |
+
# model = SentenceTransformer("all-MiniLM-L6-v2")
|
62 |
+
# print("Successfully loaded sentence transformer model")
|
63 |
+
# return model
|
64 |
+
|
65 |
+
# except Exception as e:
|
66 |
+
# print(f"Failed to load model online: {e}")
|
67 |
+
# print("Attempting to load model in offline mode...")
|
68 |
|
69 |
+
# try:
|
70 |
+
# # Try to load from cache with offline mode
|
71 |
+
# import os
|
72 |
+
# os.environ['TRANSFORMERS_OFFLINE'] = '1'
|
73 |
+
# os.environ['HF_HUB_OFFLINE'] = '1'
|
74 |
|
75 |
+
# model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=None)
|
76 |
+
# print("Successfully loaded model in offline mode")
|
77 |
+
# return model
|
78 |
|
79 |
+
# except Exception as offline_error:
|
80 |
+
# print(f"Failed to load model in offline mode: {offline_error}")
|
81 |
|
82 |
+
# # Try to find a local cache directory
|
83 |
+
# try:
|
84 |
+
# import transformers
|
85 |
+
# cache_dir = os.path.join(os.path.expanduser("~"), ".cache", "huggingface", "transformers")
|
86 |
+
# if os.path.exists(cache_dir):
|
87 |
+
# print(f"Looking for cached model in: {cache_dir}")
|
88 |
|
89 |
+
# # Try to load from specific cache directory
|
90 |
+
# model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder=cache_dir)
|
91 |
+
# print("Successfully loaded model from cache")
|
92 |
+
# return model
|
93 |
|
94 |
+
# except Exception as cache_error:
|
95 |
+
# print(f"Failed to load from cache: {cache_error}")
|
96 |
|
97 |
+
# # If all else fails, provide instructions
|
98 |
+
# error_msg = """
|
99 |
+
# Failed to initialize sentence transformer model. This is likely due to network connectivity issues.
|
100 |
|
101 |
+
# Solutions:
|
102 |
+
# 1. Check your internet connection
|
103 |
+
# 2. If behind a corporate firewall, ensure huggingface.co is accessible
|
104 |
+
# 3. Pre-download the model when you have internet access by running:
|
105 |
+
# python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
|
106 |
+
# 4. Or manually download the model and place it in your cache directory
|
107 |
|
108 |
+
# For now, the application will not work without the embedding model.
|
109 |
+
# """
|
110 |
|
111 |
+
# print(error_msg)
|
112 |
+
# raise RuntimeError(f"Cannot initialize embedding model: {str(e)}")
|
113 |
|
114 |
def _create_collection_if_not_exists(self) -> bool:
|
115 |
"""
|
model/all-MiniLM-L6-v2/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
model/all-MiniLM-L6-v2/README.md
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language: en
|
3 |
+
license: apache-2.0
|
4 |
+
library_name: sentence-transformers
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- feature-extraction
|
8 |
+
- sentence-similarity
|
9 |
+
- transformers
|
10 |
+
datasets:
|
11 |
+
- s2orc
|
12 |
+
- flax-sentence-embeddings/stackexchange_xml
|
13 |
+
- ms_marco
|
14 |
+
- gooaq
|
15 |
+
- yahoo_answers_topics
|
16 |
+
- code_search_net
|
17 |
+
- search_qa
|
18 |
+
- eli5
|
19 |
+
- snli
|
20 |
+
- multi_nli
|
21 |
+
- wikihow
|
22 |
+
- natural_questions
|
23 |
+
- trivia_qa
|
24 |
+
- embedding-data/sentence-compression
|
25 |
+
- embedding-data/flickr30k-captions
|
26 |
+
- embedding-data/altlex
|
27 |
+
- embedding-data/simple-wiki
|
28 |
+
- embedding-data/QQP
|
29 |
+
- embedding-data/SPECTER
|
30 |
+
- embedding-data/PAQ_pairs
|
31 |
+
- embedding-data/WikiAnswers
|
32 |
+
pipeline_tag: sentence-similarity
|
33 |
+
---
|
34 |
+
|
35 |
+
|
36 |
+
# all-MiniLM-L6-v2
|
37 |
+
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
|
38 |
+
|
39 |
+
## Usage (Sentence-Transformers)
|
40 |
+
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
|
41 |
+
|
42 |
+
```
|
43 |
+
pip install -U sentence-transformers
|
44 |
+
```
|
45 |
+
|
46 |
+
Then you can use the model like this:
|
47 |
+
```python
|
48 |
+
from sentence_transformers import SentenceTransformer
|
49 |
+
sentences = ["This is an example sentence", "Each sentence is converted"]
|
50 |
+
|
51 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
52 |
+
embeddings = model.encode(sentences)
|
53 |
+
print(embeddings)
|
54 |
+
```
|
55 |
+
|
56 |
+
## Usage (HuggingFace Transformers)
|
57 |
+
Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
|
58 |
+
|
59 |
+
```python
|
60 |
+
from transformers import AutoTokenizer, AutoModel
|
61 |
+
import torch
|
62 |
+
import torch.nn.functional as F
|
63 |
+
|
64 |
+
#Mean Pooling - Take attention mask into account for correct averaging
|
65 |
+
def mean_pooling(model_output, attention_mask):
|
66 |
+
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
|
67 |
+
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
|
68 |
+
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
|
69 |
+
|
70 |
+
|
71 |
+
# Sentences we want sentence embeddings for
|
72 |
+
sentences = ['This is an example sentence', 'Each sentence is converted']
|
73 |
+
|
74 |
+
# Load model from HuggingFace Hub
|
75 |
+
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
76 |
+
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
|
77 |
+
|
78 |
+
# Tokenize sentences
|
79 |
+
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
|
80 |
+
|
81 |
+
# Compute token embeddings
|
82 |
+
with torch.no_grad():
|
83 |
+
model_output = model(**encoded_input)
|
84 |
+
|
85 |
+
# Perform pooling
|
86 |
+
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
|
87 |
+
|
88 |
+
# Normalize embeddings
|
89 |
+
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
|
90 |
+
|
91 |
+
print("Sentence embeddings:")
|
92 |
+
print(sentence_embeddings)
|
93 |
+
```
|
94 |
+
|
95 |
+
------
|
96 |
+
|
97 |
+
## Background
|
98 |
+
|
99 |
+
The project aims to train sentence embedding models on very large sentence level datasets using a self-supervised
|
100 |
+
contrastive learning objective. We used the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model and fine-tuned in on a
|
101 |
+
1B sentence pairs dataset. We use a contrastive learning objective: given a sentence from the pair, the model should predict which out of a set of randomly sampled other sentences, was actually paired with it in our dataset.
|
102 |
+
|
103 |
+
We developed this model during the
|
104 |
+
[Community week using JAX/Flax for NLP & CV](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/7104),
|
105 |
+
organized by Hugging Face. We developed this model as part of the project:
|
106 |
+
[Train the Best Sentence Embedding Model Ever with 1B Training Pairs](https://discuss.huggingface.co/t/train-the-best-sentence-embedding-model-ever-with-1b-training-pairs/7354). We benefited from efficient hardware infrastructure to run the project: 7 TPUs v3-8, as well as intervention from Googles Flax, JAX, and Cloud team member about efficient deep learning frameworks.
|
107 |
+
|
108 |
+
## Intended uses
|
109 |
+
|
110 |
+
Our model is intended to be used as a sentence and short paragraph encoder. Given an input text, it outputs a vector which captures
|
111 |
+
the semantic information. The sentence vector may be used for information retrieval, clustering or sentence similarity tasks.
|
112 |
+
|
113 |
+
By default, input text longer than 256 word pieces is truncated.
|
114 |
+
|
115 |
+
|
116 |
+
## Training procedure
|
117 |
+
|
118 |
+
### Pre-training
|
119 |
+
|
120 |
+
We use the pretrained [`nreimers/MiniLM-L6-H384-uncased`](https://huggingface.co/nreimers/MiniLM-L6-H384-uncased) model. Please refer to the model card for more detailed information about the pre-training procedure.
|
121 |
+
|
122 |
+
### Fine-tuning
|
123 |
+
|
124 |
+
We fine-tune the model using a contrastive objective. Formally, we compute the cosine similarity from each possible sentence pairs from the batch.
|
125 |
+
We then apply the cross entropy loss by comparing with true pairs.
|
126 |
+
|
127 |
+
#### Hyper parameters
|
128 |
+
|
129 |
+
We trained our model on a TPU v3-8. We train the model during 100k steps using a batch size of 1024 (128 per TPU core).
|
130 |
+
We use a learning rate warm up of 500. The sequence length was limited to 128 tokens. We used the AdamW optimizer with
|
131 |
+
a 2e-5 learning rate. The full training script is accessible in this current repository: `train_script.py`.
|
132 |
+
|
133 |
+
#### Training data
|
134 |
+
|
135 |
+
We use the concatenation from multiple datasets to fine-tune our model. The total number of sentence pairs is above 1 billion sentences.
|
136 |
+
We sampled each dataset given a weighted probability which configuration is detailed in the `data_config.json` file.
|
137 |
+
|
138 |
+
|
139 |
+
| Dataset | Paper | Number of training tuples |
|
140 |
+
|--------------------------------------------------------|:----------------------------------------:|:--------------------------:|
|
141 |
+
| [Reddit comments (2015-2018)](https://github.com/PolyAI-LDN/conversational-datasets/tree/master/reddit) | [paper](https://arxiv.org/abs/1904.06472) | 726,484,430 |
|
142 |
+
| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Abstracts) | [paper](https://aclanthology.org/2020.acl-main.447/) | 116,288,806 |
|
143 |
+
| [WikiAnswers](https://github.com/afader/oqa#wikianswers-corpus) Duplicate question pairs | [paper](https://doi.org/10.1145/2623330.2623677) | 77,427,422 |
|
144 |
+
| [PAQ](https://github.com/facebookresearch/PAQ) (Question, Answer) pairs | [paper](https://arxiv.org/abs/2102.07033) | 64,371,441 |
|
145 |
+
| [S2ORC](https://github.com/allenai/s2orc) Citation pairs (Titles) | [paper](https://aclanthology.org/2020.acl-main.447/) | 52,603,982 |
|
146 |
+
| [S2ORC](https://github.com/allenai/s2orc) (Title, Abstract) | [paper](https://aclanthology.org/2020.acl-main.447/) | 41,769,185 |
|
147 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Body) pairs | - | 25,316,456 |
|
148 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title+Body, Answer) pairs | - | 21,396,559 |
|
149 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) (Title, Answer) pairs | - | 21,396,559 |
|
150 |
+
| [MS MARCO](https://microsoft.github.io/msmarco/) triplets | [paper](https://doi.org/10.1145/3404835.3462804) | 9,144,553 |
|
151 |
+
| [GOOAQ: Open Question Answering with Diverse Answer Types](https://github.com/allenai/gooaq) | [paper](https://arxiv.org/pdf/2104.08727.pdf) | 3,012,496 |
|
152 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 1,198,260 |
|
153 |
+
| [Code Search](https://huggingface.co/datasets/code_search_net) | - | 1,151,414 |
|
154 |
+
| [COCO](https://cocodataset.org/#home) Image captions | [paper](https://link.springer.com/chapter/10.1007%2F978-3-319-10602-1_48) | 828,395|
|
155 |
+
| [SPECTER](https://github.com/allenai/specter) citation triplets | [paper](https://doi.org/10.18653/v1/2020.acl-main.207) | 684,100 |
|
156 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Question, Answer) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 681,164 |
|
157 |
+
| [Yahoo Answers](https://www.kaggle.com/soumikrakshit/yahoo-answers-dataset) (Title, Question) | [paper](https://proceedings.neurips.cc/paper/2015/hash/250cf8b51c773f3f8dc8b4be867a9a02-Abstract.html) | 659,896 |
|
158 |
+
| [SearchQA](https://huggingface.co/datasets/search_qa) | [paper](https://arxiv.org/abs/1704.05179) | 582,261 |
|
159 |
+
| [Eli5](https://huggingface.co/datasets/eli5) | [paper](https://doi.org/10.18653/v1/p19-1346) | 325,475 |
|
160 |
+
| [Flickr 30k](https://shannon.cs.illinois.edu/DenotationGraph/) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/229/33) | 317,695 |
|
161 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles) | | 304,525 |
|
162 |
+
| AllNLI ([SNLI](https://nlp.stanford.edu/projects/snli/) and [MultiNLI](https://cims.nyu.edu/~sbowman/multinli/) | [paper SNLI](https://doi.org/10.18653/v1/d15-1075), [paper MultiNLI](https://doi.org/10.18653/v1/n18-1101) | 277,230 |
|
163 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (bodies) | | 250,519 |
|
164 |
+
| [Stack Exchange](https://huggingface.co/datasets/flax-sentence-embeddings/stackexchange_xml) Duplicate questions (titles+bodies) | | 250,460 |
|
165 |
+
| [Sentence Compression](https://github.com/google-research-datasets/sentence-compression) | [paper](https://www.aclweb.org/anthology/D13-1155/) | 180,000 |
|
166 |
+
| [Wikihow](https://github.com/pvl/wikihow_pairs_dataset) | [paper](https://arxiv.org/abs/1810.09305) | 128,542 |
|
167 |
+
| [Altlex](https://github.com/chridey/altlex/) | [paper](https://aclanthology.org/P16-1135.pdf) | 112,696 |
|
168 |
+
| [Quora Question Triplets](https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs) | - | 103,663 |
|
169 |
+
| [Simple Wikipedia](https://cs.pomona.edu/~dkauchak/simplification/) | [paper](https://www.aclweb.org/anthology/P11-2117/) | 102,225 |
|
170 |
+
| [Natural Questions (NQ)](https://ai.google.com/research/NaturalQuestions) | [paper](https://transacl.org/ojs/index.php/tacl/article/view/1455) | 100,231 |
|
171 |
+
| [SQuAD2.0](https://rajpurkar.github.io/SQuAD-explorer/) | [paper](https://aclanthology.org/P18-2124.pdf) | 87,599 |
|
172 |
+
| [TriviaQA](https://huggingface.co/datasets/trivia_qa) | - | 73,346 |
|
173 |
+
| **Total** | | **1,170,060,424** |
|
model/all-MiniLM-L6-v2/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 1536,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 6,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.53.0",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
model/all-MiniLM-L6-v2/config_sentence_transformers.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "5.0.0",
|
4 |
+
"transformers": "4.53.0",
|
5 |
+
"pytorch": "2.7.1+cpu"
|
6 |
+
},
|
7 |
+
"model_type": "SentenceTransformer",
|
8 |
+
"prompts": {
|
9 |
+
"query": "",
|
10 |
+
"document": ""
|
11 |
+
},
|
12 |
+
"default_prompt_name": null,
|
13 |
+
"similarity_fn_name": "cosine"
|
14 |
+
}
|
model/all-MiniLM-L6-v2/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1377e9af0ca0b016a9f2aa584d6fc71ab3ea6804fae21ef9fb1416e2944057ac
|
3 |
+
size 90864192
|
model/all-MiniLM-L6-v2/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
model/all-MiniLM-L6-v2/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
model/all-MiniLM-L6-v2/special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
model/all-MiniLM-L6-v2/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model/all-MiniLM-L6-v2/tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": false,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"max_length": 128,
|
51 |
+
"model_max_length": 256,
|
52 |
+
"never_split": null,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
+
"pad_token": "[PAD]",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"stride": 0,
|
59 |
+
"strip_accents": null,
|
60 |
+
"tokenize_chinese_chars": true,
|
61 |
+
"tokenizer_class": "BertTokenizer",
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
+
"unk_token": "[UNK]"
|
65 |
+
}
|
model/all-MiniLM-L6-v2/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
temp.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
|
3 |
+
# This downloads the model to your local cache
|
4 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
5 |
+
print("Model downloading")
|
6 |
+
model.save('model/all-MiniLM-L6-v2')
|