Spaces:

KantaHayashiAI
/

EvaByte-SFT

Sleeping

KantaHayashiAI commited on Apr 30

Commit

67f3195

verified ·

1 Parent(s): 8af9f91

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ MAX_INPUT_TOKEN_LENGTH = 32000
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-tokenizer = AutoTokenizer.from_pretrained("evabyte/EvaByte-SFT", trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained("evabyte/EvaByte-SFT", torch_dtype=torch.bfloat16, trust_remote_code=True).eval().to("cuda")
 @spaces.GPU(duration=120)
@@ -34,8 +34,6 @@ def generate(
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
-    top_k: int = 50,
-    repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
@@ -52,10 +50,7 @@ def generate(
         max_new_tokens=max_new_tokens,
         do_sample=True,
         top_p=top_p,
-        top_k=top_k,
         temperature=temperature,
-        num_beams=1,
-        repetition_penalty=repetition_penalty,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
@@ -90,20 +85,6 @@ demo = gr.ChatInterface(
             step=0.05,
             value=0.9,
         ),
-        gr.Slider(
-            label="Top-k",
-            minimum=1,
-            maximum=1000,
-            step=1,
-            value=50,
-        ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.05,
-            value=1.2,
-        ),
     ],
     stop_btn=None,
     examples=[

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained("EvaByte/EvaByte", trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained("evabyte/EvaByte-SFT", torch_dtype=torch.bfloat16, trust_remote_code=True).eval().to("cuda")
 @spaces.GPU(duration=120)
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
 ) -> Iterator[str]:
     conversation = [*chat_history, {"role": "user", "content": message}]
         max_new_tokens=max_new_tokens,
         do_sample=True,
         top_p=top_p,
         temperature=temperature,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
             step=0.05,
             value=0.9,
         ),
     ],
     stop_btn=None,
     examples=[