Spaces:

OscarFAI
/

inference

Runtime error

App Files Files Community

OscarFAI commited on Mar 11

Commit

de6224b

1 Parent(s): 3835819

Testing Top P and System Prompt

Browse files

Files changed (1) hide show

app.py +19 -5

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
 DESCRIPTION = '''
 <div>
-<h1 style="text-align: center;">deepseek-ai/DeepSeek-R1-Distill-Llama-8B</h1>
 </div>
 '''
@@ -23,7 +23,7 @@ LICENSE = """
 PLACEHOLDER = """
 <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
-   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">DeepSeek-R1-Distill-Llama-8B</h1>
    <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
 </div>
 """
@@ -55,7 +55,9 @@ terminators = [
 def chat_llama3_8b(message: str,
               history: list,
               temperature: float,
-              max_new_tokens: int
              ) -> str:
     """
     Generate a streaming response using the llama3-8b model.
@@ -64,10 +66,12 @@ def chat_llama3_8b(message: str,
         history (list): The conversation history used by ChatInterface.
         temperature (float): The temperature for generating the response.
         max_new_tokens (int): The maximum number of new tokens to generate.
     Returns:
         str: The generated response.
     """
-    conversation = []
     for user, assistant in history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
@@ -82,6 +86,7 @@ def chat_llama3_8b(message: str,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
         eos_token_id=terminators,
     )
     # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
@@ -122,6 +127,16 @@ with gr.Blocks(fill_height=True, css=css) as demo:
                       value=4096,
                       label="Max new tokens",
                       render=False ),
             ],
         examples=[
             ['Who Are you?']
@@ -132,4 +147,3 @@ with gr.Blocks(fill_height=True, css=css) as demo:
 if __name__ == "__main__":
     demo.launch()

 DESCRIPTION = '''
 <div>
+<h1 style="text-align: center;">Oscar's Model</h1>
 </div>
 '''
 PLACEHOLDER = """
 <div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
+   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">Oscar's Uncensored Model</h1>
    <p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Ask me anything...</p>
 </div>
 """
 def chat_llama3_8b(message: str,
               history: list,
               temperature: float,
+              max_new_tokens: int,
+              top_p: float,
+              system_prompt: str
              ) -> str:
     """
     Generate a streaming response using the llama3-8b model.
         history (list): The conversation history used by ChatInterface.
         temperature (float): The temperature for generating the response.
         max_new_tokens (int): The maximum number of new tokens to generate.
+        top_p (float): The top_p value for nucleus sampling.
+        system_prompt (str): The system prompt to guide the conversation.
     Returns:
         str: The generated response.
     """
+    conversation = [{"role": "system", "content": system_prompt}]
     for user, assistant in history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
         max_new_tokens=max_new_tokens,
         do_sample=True,
         temperature=temperature,
+        top_p=top_p,
         eos_token_id=terminators,
     )
     # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
                       value=4096,
                       label="Max new tokens",
                       render=False ),
+            gr.Slider(minimum=0,
+                      maximum=1,
+                      step=0.1,
+                      value=0.9,
+                      label="Top_p",
+                      render=False),
+            gr.Textbox(lines=2,
+                       placeholder="Enter system prompt here...",
+                       label="System Prompt",
+                       render=False),
             ],
         examples=[
             ['Who Are you?']
 if __name__ == "__main__":
     demo.launch()