import gradio as gr #import transformers #from transformers import pipeline from llama_cpp import Llama model="TheBloke/Nous-Hermes-13B-GGML" model="https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q4_K_S.bin" def question_answer(context, question): text=context + "\n\nQuestion: \"\"\"\n" + question + "\nPlease use markdown formatting for answer. \nAnswer:\n" llm = Llama(model_path=model) output = llm(text, max_tokens=33, stop=["### Response", "\n"], echo=True) print(output) return output.choices[0].text ''' Output is of the form: { "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", "object": "text_completion", "created": 1679561337, "model": "./models/7B/ggml-model.bin", "choices": [ { "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", "index": 0, "logprobs": None, "finish_reason": "stop" } ], "usage": { "prompt_tokens": 14, "completion_tokens": 28, "total_tokens": 42 } } ''' #generator = pipeline(model=model, device_map="auto") #return generator(text) app=gr.Interface(fn=question_answer, inputs=["text", "text"], outputs=["textbox", "text"]) app.launch()