Spaces:

Ais203
/

aigen

Sleeping

App Files Files Community

Ais commited on 20 days ago

Commit

bf0f8a4

verified ·

1 Parent(s): 18f4dad

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +186 -145

app/main.py CHANGED Viewed

@@ -1,149 +1,190 @@
-import os
-import torch
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel
-from starlette.middleware.cors import CORSMiddleware
-# === Setup FastAPI ===
-app = FastAPI()
-# === CORS (optional for frontend access) ===
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# === Load API Key from Hugging Face Secrets ===
-API_KEY = os.getenv("API_KEY", "undefined")
-# === Model Settings ===
-BASE_MODEL = "Qwen/Qwen2-0.5B-Instruct"
-ADAPTER_PATH = "adapter"
-print("🔧 Loading tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
-print("🧠 Loading base model on CPU...")
-base_model = AutoModelForCausalLM.from_pretrained(
-    BASE_MODEL,
-    trust_remote_code=True,
-    torch_dtype=torch.float32
-).cpu()
-print("🔗 Applying LoRA adapter...")
-model = PeftModel.from_pretrained(base_model, ADAPTER_PATH).cpu()
-model.eval()
-print("✅ Model and adapter loaded successfully.")
-# === Root Route ===
-@app.get("/")
-def root():
-    return {"message": "🧠 Qwen2.5-0.5B-Instruct API is running on CPU!"}
-# === Chat Completion API ===
-@app.post("/v1/chat/completions")
-async def chat(request: Request):
-    # ✅ API Key Authorization
-    auth_header = request.headers.get("Authorization", "")
-    if not auth_header.startswith("Bearer "):
-        return JSONResponse(status_code=401, content={"error": "Missing Bearer token in Authorization header."})
-    token = auth_header.replace("Bearer ", "").strip()
-    if token != API_KEY:
-        return JSONResponse(status_code=401, content={"error": "Invalid API key."})
-    # ✅ Parse Request
-    try:
-        body = await request.json()
-        messages = body.get("messages", [])
-        if not messages or not isinstance(messages, list):
-            raise ValueError("Invalid or missing 'messages' field.")
-        temperature = body.get("temperature", 0.7)
-        max_tokens = body.get("max_tokens", 512)
-    except Exception as e:
-        return JSONResponse(status_code=400, content={"error": f"Bad request: {str(e)}"})
-    # ✅ FIXED: Only use last 4 messages to prevent stacking
-    recent_messages = messages[-4:] if len(messages) > 4 else messages
-    # ✅ Build clean conversation prompt
-    formatted_prompt = ""
-    for message in recent_messages:
-        role = message.get("role", "")
-        content = message.get("content", "")
-        if role == "system":
-            formatted_prompt += f"<|im_start|>system\n{content}<|im_end|>\n"
-        elif role == "user":
-            formatted_prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
-        elif role == "assistant":
-            formatted_prompt += f"<|im_start|>assistant\n{content}<|im_end|>\n"
-    # Add the assistant start token for generation
-    formatted_prompt += "<|im_start|>assistant\n"
-    print(f"🤖 Processing {len(recent_messages)} recent messages")
-    inputs = tokenizer(formatted_prompt, return_tensors="pt").to("cpu")
-    # ✅ Generate Response
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=0.9,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id,
-            eos_token_id=tokenizer.eos_token_id
-        )
-    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # ✅ FIXED: Extract ONLY the new assistant response
-    final_answer = decoded.split("<|im_start|>assistant\n")[-1].strip()
-    # Remove any end tokens or artifacts
-    if "<|im_end|>" in final_answer:
-        final_answer = final_answer.split("<|im_end|>")[0].strip()
-    # Remove any repeated system prompts or guidelines that leaked through
-    if "Guidelines:" in final_answer:
-        final_answer = final_answer.split("Guidelines:")[0].strip()
-    if "Response format:" in final_answer:
-        final_answer = final_answer.split("Response format:")[0].strip()
-    # Remove VS Code context if it leaked through
-    if "[VS Code Context:" in final_answer:
-        lines = final_answer.split('\n')
-        cleaned_lines = [line for line in lines if not line.strip().startswith('[VS Code Context:')]
-        final_answer = '\n'.join(cleaned_lines).strip()
-    print(f"✅ Clean response: {final_answer[:100]}...")
-    # ✅ OpenAI-style Response
-    return {
-        "id": "chatcmpl-local-001",
-        "object": "chat.completion",
-        "model": "Qwen2.5-0.5B-Instruct-LoRA",
-        "choices": [
-            {
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": final_answer
-                },
-                "finish_reason": "stop"
-            }
-        ]
-    }

+// chat.ts - Fixed Apollo AI Chat Module
+import * as vscode from 'vscode';
+// Use global fetch if available (VS Code >=1.74), otherwise fallback to dynamic import of node-fetch
+async function getFetch(): Promise<any> {
+  if (typeof (globalThis as any).fetch !== 'undefined') {
+    return (globalThis as any).fetch;
+  } else {
+    try {
+      const fetch = require('node-fetch');
+      return fetch.default || fetch;
+    } catch (error) {
+      throw new Error('Unable to load fetch. Please ensure node-fetch is installed.');
+    }
+  }
+}
+// Configuration
+const API_URL = 'https://ais0909-aigen.hf.space/v1/chat/completions';
+const API_KEY = 'aigenapikey1234567890';
+const MAX_RETRIES = 3;
+const TIMEOUT_MS = 300000; // 5 minutes
+interface APIResponse {
+  choices?: Array<{
+    message?: { content?: string };
+    text?: string;
+  }>;
+  generated_text?: string;
+  error?: string;
+  id?: string;
+  object?: string;
+  model?: string;
+}
+interface ChatContext {
+  currentFile?: string;
+  language?: string;
+  workspaceFolder?: string;
+  selectedText?: string;
+}
+export class ApolloAI {
+  private static conversationHistory: Array<{role: string, content: string}> = [];
+  private static context: ChatContext = {};
+  static setContext(context: ChatContext) {
+    this.context = context;
+  }
+  static addToHistory(role: 'user' | 'assistant', content: string) {
+    this.conversationHistory.push({ role, content });
+    // Keep only last 2 messages to prevent conversation stacking
+    if (this.conversationHistory.length > 2) {
+      this.conversationHistory = this.conversationHistory.slice(-2);
+    }
+  }
+  static clearHistory() {
+    this.conversationHistory = [];
+  }
+  static getHistory() {
+    return [...this.conversationHistory];
+  }
+}
+export async function askAI(prompt: string, options: {
+  temperature?: number;
+  maxTokens?: number;
+  includeContext?: boolean;
+  retries?: number;
+  forceMode?: boolean;
+} = {}): Promise<string> {
+  const {
+    temperature = 0.7,
+    maxTokens = 1500,
+    includeContext = true,
+    retries = MAX_RETRIES,
+    forceMode = false
+  } = options;
+  console.log('🤖 Apollo AI: Starting request for prompt:', prompt.substring(0, 100) + '...');
+  console.log('🔧 Force mode:', forceMode);
+  // Build messages array for proper conversation
+  const messages = [];
+  // ✅ FIXED: Much simpler system messages
+  if (forceMode) {
+    messages.push({
+      role: 'system',
+      content: 'Give direct, brief answers only. No explanations.'
+    });
+  } else {
+    messages.push({
+      role: 'system',
+      content: 'You are a helpful assistant.'
+    });
+  }
+  // Add conversation history (only if includeContext is true and we have history)
+  if (includeContext && ApolloAI.getHistory().length > 0) {
+    const history = ApolloAI.getHistory().slice(-2); // Last 2 messages only
+    for (const msg of history) {
+      messages.push({
+        role: msg.role,
+        content: msg.content
+      });
+    }
+  }
+  // Add current user message
+  messages.push({
+    role: 'user',
+    content: prompt
+  });
+  // Add VS Code context if available (but not in conversation history)
+  const editor = vscode.window.activeTextEditor;
+  if (includeContext && editor && !forceMode) {
+    const fileName = editor.document.fileName.split(/[/\\]/).pop();
+    const language = editor.document.languageId;
+    messages[messages.length - 1].content += `\n\n[VS Code Context: Editing ${fileName} (${language})]`;
+  }
+  const headers = {
+    'Authorization': `Bearer ${API_KEY}`,
+    'Content-Type': 'application/json',
+    'User-Agent': 'Apollo-AI-VSCode-Extension/1.2.0'
+  };
+  const body = {
+    messages: messages,
+    temperature: forceMode ? 0.3 : temperature, // Lower temperature for force mode
+    max_tokens: forceMode ? 200 : maxTokens,    // Much shorter responses for force mode
+    stream: false
+  };
+  for (let attempt = 1; attempt <= retries; attempt++) {
+    try {
+      const fetchImpl = await getFetch();
+      console.log(`🚀 Apollo AI: Attempt ${attempt}/${retries}, sending request to API...`);
+      console.log('📤 Request body:', JSON.stringify(body, null, 2));
+      const controller = new AbortController();
+      const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_MS);
+      const res = await fetchImpl(API_URL, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify(body),
+        signal: controller.signal
+      });
+      clearTimeout(timeoutId);
+      console.log('📨 Apollo AI: Received response, status:', res.status);
+      if (!res.ok) {
+        const errorText = await res.text().catch(() => 'Unable to read error response');
+        console.error(`❌ Apollo AI: API Error ${res.status}: ${errorText}`);
+        if (res.status === 429) {
+          throw new Error('⏱️ Rate limit exceeded. Please wait a moment and try again.');
+        } else if (res.status === 401) {
+          throw new Error('🔑 Authentication failed. Please check your API key.');
+        } else if (res.status >= 500) {
+          throw new Error('🔧 Server error. The AI service is temporarily unavailable.');
+        }
+        throw new Error(`API Error (${res.status}): ${res.statusText}`);
+      }
+      const json: APIResponse = await res.json();
+      console.log('📦 Apollo AI: Raw JSON response:', JSON.stringify(json, null, 2));
+      // ✅ FIXED: Extract response from proper JSON structure
+      let responseText = '';
+      // Handle the actual API response format
+      if (json.choices && json.choices[0] && json.choices[0].message) {
+        responseText = json.choices[0].message.content || '';
+        console.log('✅ Extracted content from JSON response:', responseText.substring(0, 100) + '...');
+      } else if (json.generated_text) {
+        responseText = json.generated_text;
+        console.log('✅ Extracted generated_text from response