Spaces:

sugiv
/

LeetMonkey-8bit-GGUF-Inference

Sleeping

App Files Files Community

sugiv commited on Sep 9, 2024

Commit

4f2457b

1 Parent(s): 27cdec6

Gosh this Leetmonkey

Browse files

Files changed (1) hide show

app.py +75 -125

app.py CHANGED Viewed

@@ -2,12 +2,9 @@ import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 import re
-from datasets import load_dataset
 import random
 import logging
 import os
-import autopep8
-import textwrap
 import jwt
 from typing import Dict, Any
@@ -25,33 +22,9 @@ JWT_ALGORITHM = "HS256"
 MODEL_NAME = "leetmonkey_peft__q8_0.gguf"
 REPO_ID = "sugiv/leetmonkey-peft-gguf"
-# Load the dataset
-dataset = load_dataset("sugiv/leetmonkey_python_dataset")
-train_dataset = dataset["train"]
-def download_model(model_name):
-    logger.info(f"Downloading model: {model_name}")
-    model_path = hf_hub_download(
-        repo_id=REPO_ID,
-        filename=model_name,
-        cache_dir="./models",
-        force_download=True,
-        resume_download=True
-    )
-    logger.info(f"Model downloaded: {model_path}")
-    return model_path
-# Download and load the 8-bit model at startup
-model_path = download_model(MODEL_NAME)
-llm = Llama(
-    model_path=model_path,
-    n_ctx=1024,
-    n_threads=8,
-    n_gpu_layers=-1,  # Use all available GPU layers
-    verbose=False,
-    n_batch=512,
-    mlock=True
-)
 logger.info("8-bit model loaded successfully")
 # Generation parameters
@@ -65,7 +38,17 @@ generation_kwargs = {
     "repeat_penalty": 1.1
 }
-def generate_solution(instruction):
     system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
     full_prompt = f"""### Instruction:
 {system_prompt}
@@ -80,117 +63,84 @@ Here's the complete Python function implementation:
 ```python
 """
-    for chunk in llm(full_prompt, stream=True, **generation_kwargs):
-        yield chunk["choices"][0]["text"]
-def extract_and_format_code(text):
-    # Extract code between triple backticks
-    code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
     if code_match:
         code = code_match.group(1)
     else:
-        code = text
-    # Dedent the code to remove any common leading whitespace
-    code = textwrap.dedent(code)
-    # Split the code into lines
-    lines = code.split('\n')
-    # Ensure proper indentation
-    indented_lines = []
-    for line in lines:
-        if line.strip().startswith('class') or line.strip().startswith('def'):
-            indented_lines.append(line)  # Keep class and function definitions as is
-        elif line.strip():  # If the line is not empty
-            indented_lines.append('    ' + line)  # Add 4 spaces of indentation
-        else:
-            indented_lines.append(line)  # Keep empty lines as is
-    formatted_code = '\n'.join(indented_lines)
-    try:
-        return autopep8.fix_code(formatted_code)
-    except:
-        return formatted_code
-def select_random_problem():
-    return random.choice(train_dataset)['instruction']
-def verify_token(token: str) -> bool:
-    try:
-        jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
-        return True
-    except jwt.PyJWTError:
-        return False
-last_generated_solution = ""
-def api_generate_solution(instruction: str, token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
-    global last_generated_solution
-    generated_text = "".join(list(generate_solution(instruction)))
-    last_generated_solution = extract_and_format_code(generated_text)
-    return {"solution": last_generated_solution}
-def api_stream_solution(instruction: str, token: str) -> Dict[str, Any]:
-    if not verify_token(token):
-        return {"error": "Invalid token"}
     def generate():
-        global last_generated_solution
-        generated_text = ""
-        for token in generate_solution(instruction):
-            generated_text += token
-            yield {"token": token}
-        last_generated_solution = extract_and_format_code(generated_text)
-        yield {"solution": last_generated_solution}
     return generate()
-def api_explain_solution(token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
-    if not last_generated_solution:
-        return {"error": "No solution has been generated yet"}
-    explanation_prompt = f"Explain the following Python code:\n\n{last_generated_solution}\n\nExplanation:"
-    explanation = llm(explanation_prompt, max_tokens=256)["choices"][0]["text"]
-    return {"explanation": explanation}
-def api_random_problem(token: str) -> Dict[str, Any]:
-    if not verify_token(token):
-        return {"error": "Invalid token"}
-    return {"problem": select_random_problem()}
-def gradio_api(api_name: str, *args):
-    if api_name == "generate_solution":
-        return api_generate_solution(*args)
-    elif api_name == "stream_solution":
-        return api_stream_solution(*args)
-    elif api_name == "explain_solution":
-        return api_explain_solution(*args)
-    elif api_name == "random_problem":
-        return api_random_problem(*args)
-    else:
-        return {"error": "Invalid API name"}
-iface = gr.Interface(
-    fn=gradio_api,
-    inputs=[
-        gr.Textbox(label="API Name"),
-        gr.Textbox(label="Problem Instruction"),
-        gr.Textbox(label="JWT Token")
-    ],
-    outputs=gr.JSON(label="API Response"),
-    title="LeetCode Problem Solver API",
-    description="Provide the API name, problem instruction (if required), and JWT token to use the desired functionality."
 )
 if __name__ == "__main__":
-    iface.launch(share=True)

 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
 import re
 import random
 import logging
 import os
 import jwt
 from typing import Dict, Any
 MODEL_NAME = "leetmonkey_peft__q8_0.gguf"
 REPO_ID = "sugiv/leetmonkey-peft-gguf"
+# Load the model
+model_path = hf_hub_download(repo_id=REPO_ID, filename=MODEL_NAME, cache_dir="./models")
+llm = Llama(model_path=model_path, n_ctx=1024, n_threads=8, n_gpu_layers=-1)
 logger.info("8-bit model loaded successfully")
 # Generation parameters
     "repeat_penalty": 1.1
 }
+def verify_token(token: str) -> bool:
+    try:
+        jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
+        return True
+    except jwt.PyJWTError:
+        return False
+def generate_solution(instruction: str, token: str) -> Dict[str, Any]:
+    if not verify_token(token):
+        return {"error": "Invalid token"}
     system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
     full_prompt = f"""### Instruction:
 {system_prompt}
 ```python
 """
+    response = llm(full_prompt, **generation_kwargs)
+    generated_text = response["choices"][0]["text"]
+    # Extract and format code
+    code_match = re.search(r'```python\s*(.*?)\s*```', generated_text, re.DOTALL)
     if code_match:
         code = code_match.group(1)
     else:
+        code = generated_text
+    return {"solution": code}
+def stream_solution(instruction: str, token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
+    system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
+    full_prompt = f"""### Instruction:
+{system_prompt}
+Implement the following function for the LeetCode problem:
+{instruction}
+### Response:
+Here's the complete Python function implementation:
+```python
+"""
     def generate():
+        for chunk in llm(full_prompt, stream=True, **generation_kwargs):
+            yield chunk["choices"][0]["text"]
     return generate()
+def random_problem(token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
+    # This is a placeholder. You should replace it with actual logic to fetch a random problem from your dataset.
+    problems = [
+        "Implement a function to reverse a linked list",
+        "Write a function to find the maximum subarray sum",
+        "Implement a function to check if a binary tree is balanced"
+    ]
+    return {"problem": random.choice(problems)}
+# Create Gradio interfaces for each endpoint
+generate_interface = gr.Interface(
+    fn=generate_solution,
+    inputs=[gr.Textbox(label="Problem Instruction"), gr.Textbox(label="JWT Token")],
+    outputs=gr.JSON(),
+    title="Generate Solution API",
+    description="Provide a LeetCode problem instruction and a valid JWT token to generate a solution."
+)
+stream_interface = gr.Interface(
+    fn=stream_solution,
+    inputs=[gr.Textbox(label="Problem Instruction"), gr.Textbox(label="JWT Token")],
+    outputs=gr.JSON(),
+    title="Stream Solution API",
+    description="Provide a LeetCode problem instruction and a valid JWT token to stream a solution."
+)
+random_problem_interface = gr.Interface(
+    fn=random_problem,
+    inputs=gr.Textbox(label="JWT Token"),
+    outputs=gr.JSON(),
+    title="Random Problem API",
+    description="Provide a valid JWT token to get a random LeetCode problem."
+)
+# Combine interfaces
+demo = gr.TabbedInterface(
+    [generate_interface, stream_interface, random_problem_interface],
+    ["Generate Solution", "Stream Solution", "Random Problem"]
 )
 if __name__ == "__main__":
+    demo.launch()