MadsGalsgaard commited on
Commit
9ae8177
·
verified ·
1 Parent(s): 4dd6ef8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +179 -39
app.py CHANGED
@@ -441,57 +441,197 @@
441
 
442
  ###########new clientkey
443
 
444
- import gradio as gr
445
- from huggingface_hub import InferenceClient
446
 
447
- # Hugging Face Inference Client setup
448
- client = InferenceClient(
449
- model="meta-llama/Meta-Llama-3.1-8B-Instruct" # Replace with your actual token
450
- )
451
 
452
- # Function to interact with the Hugging Face model
453
- def chat_with_model(message, history):
454
- # Prepare conversation history for the model
455
- conversation = [{"role": "system", "content": "You are a helpful assistant."}]
456
 
457
- for past_message, past_response in history:
458
- conversation.append({"role": "user", "content": past_message})
459
- conversation.append({"role": "assistant", "content": past_response})
460
 
461
- # Add new user message to the conversation
462
- conversation.append({"role": "user", "content": message})
463
 
464
- # Generate response using the Inference API
465
- responses = client.chat_completion(
466
- messages=conversation,
467
- max_tokens=500,
468
- stream=True
469
- )
470
 
471
- # Capture streamed response
472
- response_text = ""
473
- for response in responses:
474
- delta_content = response.choices[0].delta.content
475
- response_text += delta_content
476
 
477
- history.append((message, response_text))
478
 
479
- return history, history # Update both chatbot history and visible chat
480
 
481
- # Create Gradio interface
482
- with gr.Blocks() as demo:
483
- chatbot = gr.Chatbot(height=600)
484
- msg_input = gr.Textbox(show_label=False, placeholder="Type your message...")
485
 
486
- with gr.Row():
487
- clear_btn = gr.Button("Clear Chat")
488
 
489
- # Setting up interaction between user input and the chatbot
490
- msg_input.submit(chat_with_model, [msg_input, chatbot], [chatbot, chatbot])
491
- clear_btn.click(lambda: None, None, chatbot, queue=False)
492
 
493
- gr.Markdown("## Llama 3.1 Chatbot")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
 
495
- # Launch Gradio demo
496
  if __name__ == "__main__":
497
  demo.launch()
 
441
 
442
  ###########new clientkey
443
 
444
+ # import gradio as gr
445
+ # from huggingface_hub import InferenceClient
446
 
447
+ # # Hugging Face Inference Client setup
448
+ # client = InferenceClient(
449
+ # model="meta-llama/Meta-Llama-3.1-8B-Instruct" # Replace with your actual token
450
+ # )
451
 
452
+ # # Function to interact with the Hugging Face model
453
+ # def chat_with_model(message, history):
454
+ # # Prepare conversation history for the model
455
+ # conversation = [{"role": "system", "content": "You are a helpful assistant."}]
456
 
457
+ # for past_message, past_response in history:
458
+ # conversation.append({"role": "user", "content": past_message})
459
+ # conversation.append({"role": "assistant", "content": past_response})
460
 
461
+ # # Add new user message to the conversation
462
+ # conversation.append({"role": "user", "content": message})
463
 
464
+ # # Generate response using the Inference API
465
+ # responses = client.chat_completion(
466
+ # messages=conversation,
467
+ # max_tokens=500,
468
+ # stream=True
469
+ # )
470
 
471
+ # # Capture streamed response
472
+ # response_text = ""
473
+ # for response in responses:
474
+ # delta_content = response.choices[0].delta.content
475
+ # response_text += delta_content
476
 
477
+ # history.append((message, response_text))
478
 
479
+ # return history, history # Update both chatbot history and visible chat
480
 
481
+ # # Create Gradio interface
482
+ # with gr.Blocks() as demo:
483
+ # chatbot = gr.Chatbot(height=600)
484
+ # msg_input = gr.Textbox(show_label=False, placeholder="Type your message...")
485
 
486
+ # with gr.Row():
487
+ # clear_btn = gr.Button("Clear Chat")
488
 
489
+ # # Setting up interaction between user input and the chatbot
490
+ # msg_input.submit(chat_with_model, [msg_input, chatbot], [chatbot, chatbot])
491
+ # clear_btn.click(lambda: None, None, chatbot, queue=False)
492
 
493
+ # gr.Markdown("## Llama 3.1 Chatbot")
494
+
495
+ # # Launch Gradio demo
496
+ # if __name__ == "__main__":
497
+ # demo.launch()
498
+
499
+
500
+ import os
501
+ import time
502
+ import spaces
503
+ import torch
504
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
505
+ import gradio as gr
506
+ from threading import Thread
507
+
508
+ MODEL = "THUDM/LongWriter-llama3.1-8b"
509
+
510
+ TITLE = "<h1><center>AreaX LLC-llama3.1-8b</center></h1>"
511
+
512
+ PLACEHOLDER = """
513
+ <center>
514
+ <p>Hi! I'm AreaX AI Agent, capable of generating 10,000+ words. How can I assist you today?</p>
515
+ </center>
516
+ """
517
+
518
+ CSS = """
519
+ .duplicate-button {
520
+ margin: auto !important;
521
+ color: white !important;
522
+ background: black !important;
523
+ border-radius: 100vh !important;
524
+ }
525
+ h3 {
526
+ text-align: center;
527
+ }
528
+ """
529
+
530
+ device = "cuda" if torch.cuda.is_available() else "cpu"
531
+
532
+ tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
533
+ model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
534
+ model = model.eval()
535
+
536
+ @spaces.GPU()
537
+ def stream_chat(
538
+ message: str,
539
+ history: list,
540
+ system_prompt: str,
541
+ temperature: float = 0.5,
542
+ max_new_tokens: int = 32768,
543
+ top_p: float = 1.0,
544
+ top_k: int = 50,
545
+ ):
546
+ print(f'message: {message}')
547
+ print(f'history: {history}')
548
+
549
+ full_prompt = f"<<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
550
+ for prompt, answer in history:
551
+ full_prompt += f"[INST]{prompt}[/INST]{answer}"
552
+ full_prompt += f"[INST]{message}[/INST]"
553
+
554
+ inputs = tokenizer(full_prompt, truncation=False, return_tensors="pt").to(device)
555
+ context_length = inputs.input_ids.shape[-1]
556
+
557
+ streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
558
+
559
+ generate_kwargs = dict(
560
+ inputs=inputs.input_ids,
561
+ max_new_tokens=max_new_tokens,
562
+ do_sample=True,
563
+ top_p=top_p,
564
+ top_k=top_k,
565
+ temperature=temperature,
566
+ num_beams=1,
567
+ streamer=streamer,
568
+ )
569
+
570
+ thread = Thread(target=model.generate, kwargs=generate_kwargs)
571
+ thread.start()
572
+
573
+ buffer = ""
574
+ for new_text in streamer:
575
+ buffer += new_text
576
+ yield buffer
577
+
578
+ chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
579
+
580
+ with gr.Blocks(css=CSS, theme="soft") as demo:
581
+ gr.HTML(TITLE)
582
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
583
+ gr.ChatInterface(
584
+ fn=stream_chat,
585
+ chatbot=chatbot,
586
+ fill_height=True,
587
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
588
+ additional_inputs=[
589
+ gr.Textbox(
590
+ value="You are a helpful assistant capable of generating long-form content.",
591
+ label="System Prompt",
592
+ render=False,
593
+ ),
594
+ gr.Slider(
595
+ minimum=0,
596
+ maximum=1,
597
+ step=0.1,
598
+ value=0.5,
599
+ label="Temperature",
600
+ render=False,
601
+ ),
602
+ gr.Slider(
603
+ minimum=1024,
604
+ maximum=32768,
605
+ step=1024,
606
+ value=32768,
607
+ label="Max new tokens",
608
+ render=False,
609
+ ),
610
+ gr.Slider(
611
+ minimum=0.0,
612
+ maximum=1.0,
613
+ step=0.1,
614
+ value=1.0,
615
+ label="Top p",
616
+ render=False,
617
+ ),
618
+ gr.Slider(
619
+ minimum=1,
620
+ maximum=100,
621
+ step=1,
622
+ value=50,
623
+ label="Top k",
624
+ render=False,
625
+ ),
626
+ ],
627
+ examples=[
628
+ ["Write a 5000-word comprehensive guide on machine learning for beginners."],
629
+ ["Create a detailed 3000-word business plan for a sustainable energy startup."],
630
+ ["Compose a 2000-word short story set in a futuristic underwater city."],
631
+ ["Develop a 4000-word research proposal on the potential effects of climate change on global food security."],
632
+ ],
633
+ cache_examples=False,
634
+ )
635
 
 
636
  if __name__ == "__main__":
637
  demo.launch()