""" # Inference import gradio as gr app = gr.load( "google/gemma-2-2b-it", src = "models", inputs = [gr.Textbox(label = "Input")], outputs = [gr.Textbox(label = "Output")], title = "Google Gemma", description = "Inference", examples = [ ["Hello, World."] ] ).launch() """ """ # Pipeline import gradio as gr from transformers import pipeline pipe = pipeline(model = "google/gemma-2-2b-it") def fn(input): output = pipe( input, max_new_tokens = 2048 ) return output[0]["generated_text"]#[len(input):] app = gr.Interface( fn = fn, inputs = [gr.Textbox(label = "Input")], outputs = [gr.Textbox(label = "Output")], title = "Google Gemma", description = "Pipeline", examples = [ ["Hello, World."] ] ).launch() """ import gradio as gr from huggingface_hub import InferenceClient import os token = os.getenv("HF_TOKEN") client = InferenceClient(api_key=token) messages = [ { "role": "user", "content": "Tell me a story" } ] stream = client.chat.completions.create( model="google/gemma-2-2b-it", messages=messages, temperature=0.5, max_tokens=2048, top_p=0.7, stream=True ) for chunk in stream: print(chunk.choices[0].delta.content)