Spaces:

sugiv
/

LeetMonkey-8bit-GGUF-Inference

Sleeping

App Files Files Community

sugiv commited on Sep 8, 2024

Commit

394f072

1 Parent(s): 55f0c97

First version with APIs

Browse files

Files changed (2) hide show

app.py +177 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+import re
+from datasets import load_dataset
+import random
+import logging
+import os
+import autopep8
+import textwrap
+import jwt
+from datetime import datetime, timedelta
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# JWT settings
+JWT_SECRET = os.environ.get("JWT_SECRET", "your-secret-key")
+JWT_ALGORITHM = "HS256"
+# Model settings
+MODEL_NAME = "leetmonkey_peft__q8_0.gguf"
+REPO_ID = "sugiv/leetmonkey-peft-gguf"
+def download_model(model_name):
+    logger.info(f"Downloading model: {model_name}")
+    model_path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=model_name,
+        cache_dir="./models",
+        force_download=True,
+        resume_download=True
+    )
+    logger.info(f"Model downloaded: {model_path}")
+    return model_path
+# Download and load the 8-bit model at startup
+model_path = download_model(MODEL_NAME)
+llm = Llama(
+    model_path=model_path,
+    n_ctx=1024,
+    n_threads=8,
+    n_gpu_layers=-1,  # Use all available GPU layers
+    verbose=False,
+    n_batch=512,
+    mlock=True
+)
+logger.info("8-bit model loaded successfully")
+# Load the dataset
+dataset = load_dataset("sugiv/leetmonkey_python_dataset")
+train_dataset = dataset["train"]
+# Generation parameters
+generation_kwargs = {
+    "max_tokens": 512,
+    "stop": ["```", "### Instruction:", "### Response:"],
+    "echo": False,
+    "temperature": 0.05,
+    "top_k": 10,
+    "top_p": 0.9,
+    "repeat_penalty": 1.1
+}
+def generate_solution(instruction):
+    system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
+    full_prompt = f"""### Instruction:
+{system_prompt}
+Implement the following function for the LeetCode problem:
+{instruction}
+### Response:
+Here's the complete Python function implementation:
+```python
+"""
+    for chunk in llm(full_prompt, stream=True, **generation_kwargs):
+        yield chunk["choices"][0]["text"]
+def extract_and_format_code(text):
+    # Extract code between triple backticks
+    code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
+    if code_match:
+        code = code_match.group(1)
+    else:
+        code = text
+    # Dedent the code to remove any common leading whitespace
+    code = textwrap.dedent(code)
+    # Split the code into lines
+    lines = code.split('\n')
+    # Ensure proper indentation
+    indented_lines = []
+    for line in lines:
+        if line.strip().startswith('class') or line.strip().startswith('def'):
+            indented_lines.append(line)  # Keep class and function definitions as is
+        elif line.strip():  # If the line is not empty
+            indented_lines.append('    ' + line)  # Add 4 spaces of indentation
+        else:
+            indented_lines.append(line)  # Keep empty lines as is
+    formatted_code = '\n'.join(indented_lines)
+    try:
+        return autopep8.fix_code(formatted_code)
+    except:
+        return formatted_code
+def select_random_problem():
+    return random.choice(train_dataset)['instruction']
+def stream_solution(problem):
+    logger.info("Generating solution")
+    generated_text = ""
+    for token in generate_solution(problem):
+        generated_text += token
+        yield generated_text
+    formatted_code = extract_and_format_code(generated_text)
+    logger.info("Solution generated successfully")
+    yield formatted_code
+def verify_token(token):
+    try:
+        jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
+        return True
+    except:
+        return False
+def generate_token():
+    expiration = datetime.utcnow() + timedelta(hours=1)
+    return jwt.encode({"exp": expiration}, JWT_SECRET, algorithm=JWT_ALGORITHM)
+def api_random_problem(token):
+    if not verify_token(token):
+        return {"error": "Invalid token"}
+    return {"problem": select_random_problem()}
+def api_generate_solution(problem, token):
+    if not verify_token(token):
+        return {"error": "Invalid token"}
+    solution = "".join(list(stream_solution(problem)))
+    return {"solution": solution}
+def api_explain_solution(solution, token):
+    if not verify_token(token):
+        return {"error": "Invalid token"}
+    explanation_prompt = f"Explain the following Python code:\n\n{solution}\n\nExplanation:"
+    explanation = llm(explanation_prompt, max_tokens=256)["choices"][0]["text"]
+    return {"explanation": explanation}
+iface = gr.Interface(
+    fn=[api_random_problem, api_generate_solution, api_explain_solution, generate_token],
+    inputs=[
+        gr.Textbox(label="JWT Token"),
+        gr.Textbox(label="Problem"),
+        gr.Textbox(label="Solution")
+    ],
+    outputs=[
+        gr.JSON(label="Random Problem"),
+        gr.JSON(label="Generated Solution"),
+        gr.JSON(label="Explanation"),
+        gr.Textbox(label="New JWT Token")
+    ],
+    title="LeetCode Problem Solver API",
+    description="API endpoints for generating and explaining LeetCode solutions."
+)
+if __name__ == "__main__":
+    logger.info("Starting Gradio API")
+    iface.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+llama-cpp-python
+datasets
+transformers
+autopep8
+huggingface_hub