Spaces:

sugiv
/

LeetMonkey-8bit-GGUF-Inference

Sleeping

App Files Files Community

sugiv commited on Sep 10, 2024

Commit

0ea97e9

1 Parent(s): 283cfad

Adding logic to rate limit, get JWT token with user identity

Browse files

Files changed (1) hide show

app.py +32 -97

app.py CHANGED Viewed

@@ -16,14 +16,12 @@ import threading
 import hashlib
 # Rate limiting data structures
-ip_usage = defaultdict(int)
-session_usage = defaultdict(int)
 last_reset_time = time.time()
 rate_limit_lock = threading.Lock()
 # Constants
-MAX_IP_USAGE = 10
-MAX_SESSION_USAGE = 2
 RESET_INTERVAL = 24 * 60 * 60  # 24 hours in seconds
 # Set up logging
@@ -50,7 +48,6 @@ llm = Llama(model_path=model_path, n_ctx=1024, n_threads=8, n_gpu_layers=-1, ver
 logger.info("8-bit model loaded successfully")
 # User data storage
-user_data = {}
 token_to_problem_solution = {}
 # Generation parameters
@@ -64,18 +61,6 @@ generation_kwargs = {
     "repeat_penalty": 1.1
 }
-def generate_user_identifier(request: gr.Request) -> str:
-    ip = request.client.ip
-    user_agent = request.headers.get('User-Agent', '')
-    return hashlib.sha256(f"{ip}{user_agent}".encode()).hexdigest()
-def generate_token(user_identifier: str) -> str:
-    payload = {
-        'exp': int(time.time()) + 3600,  # 1 hour expiration
-        'user_id': user_identifier
-    }
-    return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM)
 def verify_token(token: str) -> bool:
     try:
         jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
@@ -83,63 +68,52 @@ def verify_token(token: str) -> bool:
     except jwt.PyJWTError:
         return False
-def check_rate_limit(ip, session):
     global last_reset_time
     with rate_limit_lock:
         current_time = time.time()
         if current_time - last_reset_time >= RESET_INTERVAL:
-            ip_usage.clear()
-            session_usage.clear()
             last_reset_time = current_time
-        if ip_usage[ip] >= MAX_IP_USAGE:
-            return False, "IP rate limit exceeded. Please try again in 24 hours."
-        if session_usage[session] >= MAX_SESSION_USAGE:
-            return False, "Session rate limit exceeded. Please try again in 24 hours."
-        ip_usage[ip] += 1
-        session_usage[session] += 1
         return True, ""
 def extract_and_format_code(text):
-    # Extract code between triple backticks
     code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
     if code_match:
         code = code_match.group(1)
     else:
         code = text
-    # Dedent the code to remove any common leading whitespace
     code = textwrap.dedent(code)
-    # Split the code into lines
     lines = code.split('\n')
-    # Ensure proper indentation
     indented_lines = []
     for line in lines:
         if line.strip().startswith('class') or line.strip().startswith('def'):
-            indented_lines.append(line)  # Keep class and function definitions as is
-        elif line.strip():  # If the line is not empty
-            indented_lines.append('    ' + line)  # Add 4 spaces of indentation
         else:
-            indented_lines.append(line)  # Keep empty lines as is
     formatted_code = '\n'.join(indented_lines)
     try:
         return autopep8.fix_code(formatted_code)
     except:
         return formatted_code
 def generate_explanation(problem: str, solution: str, token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
     problem_solution_hash = hashlib.sha256(f"{problem}{solution}".encode()).hexdigest()
     if token not in token_to_problem_solution or token_to_problem_solution[token] != problem_solution_hash:
         return {"error": "No matching problem-solution pair found for this token"}
     system_prompt = "You are a Python coding assistant specialized in explaining LeetCode problem solutions. Provide a clear and concise explanation of the given solution."
     full_prompt = f"""### Instruction:
 {system_prompt}
@@ -156,26 +130,20 @@ Explain this solution step by step.
 Here's the explanation of the solution:
 """
     generated_text = ""
     for chunk in llm(full_prompt, stream=True, **generation_kwargs):
-        generated_text += chunk["choices"][0]["text"]
     return {"explanation": generated_text}
-def generate_solution(instruction: str, token: str, request: gr.Request) -> Dict[str, Any]:
-    ip = request.client.ip
-    session = request.client.session
-    user_identifier = generate_user_identifier(request)
-    is_allowed, message = check_rate_limit(ip, session)
-    if not is_allowed:
-        return {"error": message}
     if not verify_token(token):
         return {"error": "Invalid token"}
     system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
     full_prompt = f"""### Instruction:
 {system_prompt}
@@ -189,7 +157,6 @@ Here's the complete Python function implementation:
 ```python
 """
     generated_text = ""
     for chunk in llm(full_prompt, stream=True, **generation_kwargs):
         generated_text += chunk["choices"][0]["text"]
@@ -199,52 +166,24 @@ Here's the complete Python function implementation:
     token_to_problem_solution[token] = problem_solution_hash
     return {"solution": formatted_code}
-def random_problem(token: str, request: gr.Request) -> Dict[str, Any]:
-    ip = request.client.ip
-    session = request.client.session
-    user_identifier = generate_user_identifier(request)
-    is_allowed, message = check_rate_limit(ip, session)
-    if not is_allowed:
-        return {"error": message}
     if not verify_token(token):
         return {"error": "Invalid token"}
-    # Select a random problem from the dataset
-    random_item = random.choice(train_dataset)
-    # Extract the instruction (problem statement) from the randomly selected item
     problem = random_item['instruction']
-    user_data[token] = {"problem": problem, "solution": None}
     return {"problem": problem}
-def explain_solution(token: str, problem: str, solution: str, request: gr.Request) -> Dict[str, Any]:
-    ip = request.client.ip
-    session = request.client.session
-    user_identifier = generate_user_identifier(request)
-    is_allowed, message = check_rate_limit(ip, session)
-    if not is_allowed:
-        return {"error": message}
-    if not verify_token(token):
-        return {"error": "Invalid token"}
-    problem_solution_hash = hashlib.sha256(f"{problem}{solution}".encode()).hexdigest()
-    if token not in token_to_problem_solution or token_to_problem_solution[token] != problem_solution_hash:
-        return {"error": "No matching problem-solution pair found for this token"}
-    return generate_explanation(problem, solution, token)
 # Create Gradio interfaces
 generate_interface = gr.Interface(
     fn=generate_solution,
     inputs=[
         gr.Textbox(label="Problem Instruction"),
-        gr.Textbox(label="JWT Token"),
-        gr.Request()
     ],
     outputs=gr.JSON(),
     title="Generate Solution API",
@@ -253,26 +192,22 @@ generate_interface = gr.Interface(
 random_problem_interface = gr.Interface(
     fn=random_problem,
-    inputs=[
-        gr.Textbox(label="JWT Token"),
-        gr.Request()
-    ],
     outputs=gr.JSON(),
     title="Random Problem API",
     description="Provide a valid JWT token to get a random LeetCode problem."
 )
 explain_interface = gr.Interface(
-    fn=explain_solution,
     inputs=[
-        gr.Textbox(label="JWT Token"),
         gr.Textbox(label="Problem"),
         gr.Textbox(label="Solution"),
-        gr.Request()
     ],
     outputs=gr.JSON(),
     title="Explain Solution API",
-    description="Provide a valid JWT token, problem, and solution to get an explanation of the solution."
 )
 demo = gr.TabbedInterface(

 import hashlib
 # Rate limiting data structures
+token_usage = defaultdict(int)
 last_reset_time = time.time()
 rate_limit_lock = threading.Lock()
 # Constants
+MAX_TOKEN_USAGE = 10
 RESET_INTERVAL = 24 * 60 * 60  # 24 hours in seconds
 # Set up logging
 logger.info("8-bit model loaded successfully")
 # User data storage
 token_to_problem_solution = {}
 # Generation parameters
     "repeat_penalty": 1.1
 }
 def verify_token(token: str) -> bool:
     try:
         jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
     except jwt.PyJWTError:
         return False
+def check_rate_limit(token: str):
     global last_reset_time
     with rate_limit_lock:
         current_time = time.time()
         if current_time - last_reset_time >= RESET_INTERVAL:
+            token_usage.clear()
             last_reset_time = current_time
+        if token_usage[token] >= MAX_TOKEN_USAGE:
+            return False, "Rate limit exceeded. Please try again later."
+        token_usage[token] += 1
         return True, ""
 def extract_and_format_code(text):
     code_match = re.search(r'```python\s*(.*?)\s*```', text, re.DOTALL)
     if code_match:
         code = code_match.group(1)
     else:
         code = text
     code = textwrap.dedent(code)
     lines = code.split('\n')
     indented_lines = []
     for line in lines:
         if line.strip().startswith('class') or line.strip().startswith('def'):
+            indented_lines.append(line)
+        elif line.strip():
+            indented_lines.append('    ' + line)
         else:
+            indented_lines.append(line)
     formatted_code = '\n'.join(indented_lines)
     try:
         return autopep8.fix_code(formatted_code)
     except:
         return formatted_code
 def generate_explanation(problem: str, solution: str, token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
+    is_allowed, message = check_rate_limit(token)
+    if not is_allowed:
+        return {"error": message}
     problem_solution_hash = hashlib.sha256(f"{problem}{solution}".encode()).hexdigest()
     if token not in token_to_problem_solution or token_to_problem_solution[token] != problem_solution_hash:
         return {"error": "No matching problem-solution pair found for this token"}
     system_prompt = "You are a Python coding assistant specialized in explaining LeetCode problem solutions. Provide a clear and concise explanation of the given solution."
     full_prompt = f"""### Instruction:
 {system_prompt}
 Here's the explanation of the solution:
 """
     generated_text = ""
     for chunk in llm(full_prompt, stream=True, **generation_kwargs):
+        generated_text += chunk["choices"]["text"]
     return {"explanation": generated_text}
+def generate_solution(instruction: str, token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
+    is_allowed, message = check_rate_limit(token)
+    if not is_allowed:
+        return {"error": message}
     system_prompt = "You are a Python coding assistant specialized in solving LeetCode problems. Provide only the complete implementation of the given function. Ensure proper indentation and formatting. Do not include any explanations or multiple solutions."
     full_prompt = f"""### Instruction:
 {system_prompt}
 ```python
 """
     generated_text = ""
     for chunk in llm(full_prompt, stream=True, **generation_kwargs):
         generated_text += chunk["choices"][0]["text"]
     token_to_problem_solution[token] = problem_solution_hash
     return {"solution": formatted_code}
+def random_problem(token: str) -> Dict[str, Any]:
     if not verify_token(token):
         return {"error": "Invalid token"}
+    is_allowed, message = check_rate_limit(token)
+    if not is_allowed:
+        return {"error": message}
+    random_item = random.choice(train_dataset)
     problem = random_item['instruction']
     return {"problem": problem}
 # Create Gradio interfaces
 generate_interface = gr.Interface(
     fn=generate_solution,
     inputs=[
         gr.Textbox(label="Problem Instruction"),
+        gr.Textbox(label="JWT Token")
     ],
     outputs=gr.JSON(),
     title="Generate Solution API",
 random_problem_interface = gr.Interface(
     fn=random_problem,
+    inputs=[gr.Textbox(label="JWT Token")],
     outputs=gr.JSON(),
     title="Random Problem API",
     description="Provide a valid JWT token to get a random LeetCode problem."
 )
 explain_interface = gr.Interface(
+    fn=generate_explanation,
     inputs=[
         gr.Textbox(label="Problem"),
         gr.Textbox(label="Solution"),
+        gr.Textbox(label="JWT Token")
     ],
     outputs=gr.JSON(),
     title="Explain Solution API",
+    description="Provide a problem, solution, and valid JWT token to get an explanation of the solution."
 )
 demo = gr.TabbedInterface(