Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| from typing import Iterator | |
| import sambanova | |
| def generate( | |
| message: str, | |
| chat_history: list[tuple[str, str]], | |
| max_new_tokens: int = 1024, | |
| temperature: float = 0.6, | |
| top_p: float = 0.9, | |
| top_k: int = 50, | |
| repetition_penalty: float = 1.2, | |
| ) -> Iterator[str]: | |
| conversation = [] | |
| for user, assistant in chat_history: | |
| conversation.extend( | |
| [ | |
| {"role": "user", "content": user}, | |
| {"role": "assistant", "content": assistant}, | |
| ] | |
| ) | |
| conversation.append({"role": "user", "content": message}) | |
| outputs = [] | |
| for text in sambanova.Streamer(conversation, new_tokens=max_new_tokens, | |
| temperature=temperature, top_k=top_k, top_p=top_p): | |
| outputs.append(text) | |
| yield "".join(outputs) | |
| MAX_MAX_NEW_TOKENS = 2048 | |
| DEFAULT_MAX_NEW_TOKENS = 1024 | |
| MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096")) | |
| chat_interface = gr.ChatInterface( | |
| fn=generate, | |
| additional_inputs=[ | |
| gr.Slider( | |
| label="Max new tokens", | |
| minimum=1, | |
| maximum=MAX_MAX_NEW_TOKENS, | |
| step=1, | |
| value=DEFAULT_MAX_NEW_TOKENS, | |
| ), | |
| gr.Slider( | |
| label="Temperature", | |
| minimum=0.1, | |
| maximum=4.0, | |
| step=0.1, | |
| value=0.6, | |
| ), | |
| gr.Slider( | |
| label="Top-p (nucleus sampling)", | |
| minimum=0.05, | |
| maximum=1.0, | |
| step=0.05, | |
| value=0.9, | |
| ), | |
| gr.Slider( | |
| label="Top-k", | |
| minimum=1, | |
| maximum=1000, | |
| step=1, | |
| value=50, | |
| ), | |
| gr.Slider( | |
| label="Repetition penalty", | |
| minimum=1.0, | |
| maximum=2.0, | |
| step=0.05, | |
| value=1.2, | |
| ), | |
| ], | |
| stop_btn=None, | |
| fill_height=True, | |
| examples=[ | |
| ["Which one is bigger? 4.9 or 4.11"], | |
| ["Can you explain briefly to me what is the Python programming language?"], | |
| ["Explain the plot of Cinderella in a sentence."], | |
| ["How many hours does it take a man to eat a Helicopter?"], | |
| ["Write a 100-word article on 'Benefits of Open-Source in AI research'"], | |
| ], | |
| cache_examples=False, | |
| ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown('# Sambanova model inference LLAMA 405B') | |
| chat_interface.render() | |
| if __name__ == "__main__": | |
| demo.queue(max_size=20).launch() | |