Spaces:

RamAnanth1
/

Dolly-v2

Runtime error

File size: 3,856 Bytes

3fb614b
8eefb9c
d2758fe
8eefb9c
42435e1
3fb614b
 
 
 
 
 
 
 
 
c9152b5
2dd04ea
8eefb9c
c9152b5
d2758fe
c9152b5
8eefb9c
3fb614b
8eefb9c
d75f5ef
 
 
 
3ab0fd6
 
6e8442c
3fb614b
 
 
d75f5ef
 
 
3fb614b
 
 
 
 
 
d75f5ef
7bd3786
3fb614b
37e448d
3fb614b
 
8ca96c0
1424ecd
3fb614b
 
 
 
 
43cbeb6
 
3fb614b

import gradio as gr
from instruct_pipeline import InstructionTextGenerationPipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

import torch

theme = gr.themes.Monochrome(
    primary_hue="indigo",
    secondary_hue="blue",
    neutral_hue="slate",
    radius_size=gr.themes.sizes.radius_sm,
    font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)

tokenizer = AutoTokenizer.from_pretrained("databricks/dolly-v2-12b", padding_side="left")
model = AutoModelForCausalLM.from_pretrained("databricks/dolly-v2-12b", device_map="auto", load_in_8bit=True)

generate_text = InstructionTextGenerationPipeline(model=model, tokenizer=tokenizer)

#generate_text = pipeline(model="databricks/dolly-v2-12b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")

def generate(instruction): 
    response = generate_text(instruction)
    result = ""
    for word in response.split(" "):
        result += word + " "
    print(result)
    #     yield result
    return response
        
examples = [
    "Instead of making a peanut butter and jelly sandwich, what else could I combine peanut butter with in a sandwich? Give five ideas",
    "How do I make a campfire?",
    "Write me a tweet about the release of Dolly 2.0, a new LLM",
    "Explain to me the difference between nuclear fission and fusion.",
    "I'm selling my Nikon D-750, write a short blurb for my ad."
]

def process_example(args):
    for x in generate(args):
        pass
    return x
    
css = ".generating {visibility: hidden}"

with gr.Blocks(theme='gradio/seafoam', analytics_enabled=False, css=css) as demo:
    with gr.Column():
        gr.Markdown(
            """ ## Dolly 2.0
            Dolly 2.0 is a 12B parameter language model based on the EleutherAI pythia model family and fine-tuned exclusively on a new, high-quality human generated instruction following dataset, crowdsourced among Databricks employees. For more details, please refer to the [model card](https://huggingface.co/databricks/dolly-v2-12b)
            
            Type in the box below and click the button to generate answers to your most pressing questions!
            
      """
        )
        gr.HTML("<p>You can duplicate this Space to run it privately without a queue for shorter queue times  : <a style='display:inline-block' href='https://huggingface.co/spaces/RamAnanth1/Dolly-v2?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a> </p>")

        with gr.Row():
            with gr.Column(scale=3):
                instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")

                with gr.Box():
                    gr.Markdown("**Answer**")
                    output = gr.Markdown(elem_id="q-output")
                submit = gr.Button("Generate", variant="primary")
                gr.Examples(
                    examples=examples,
                    inputs=[instruction],
                    cache_examples=False,
                    fn=process_example,
                    outputs=[output],
                )


    submit.click(generate, inputs=[instruction], outputs=[output])
    instruction.submit(generate, inputs=[instruction], outputs=[output])

demo.queue(concurrency_count=16).launch(debug=True)