Tijmen2 commited on
Commit
05eca7f
·
verified ·
1 Parent(s): 8729011

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -87
app.py DELETED
@@ -1,87 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import copy
4
- from llama_cpp import Llama
5
- from huggingface_hub import hf_hub_download
6
-
7
-
8
- llm = Llama(
9
- model_path=hf_hub_download(
10
- repo_id=os.environ.get("REPO_ID", "microsoft/Phi-3-mini-4k-instruct-gguf"),
11
- filename=os.environ.get("MODEL_FILE", "Phi-3-mini-4k-instruct-q4.gguf"),
12
- ),
13
- n_ctx=2048,
14
- n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
15
- )
16
-
17
-
18
- def generate_text(
19
- message,
20
- history: list[tuple[str, str]],
21
- system_message,
22
- max_tokens,
23
- temperature,
24
- top_p,
25
- ):
26
- temp = ""
27
- input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
28
- for interaction in history:
29
- input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
30
-
31
- input_prompt = input_prompt + str(message) + " [/INST] "
32
-
33
- output = llm(
34
- input_prompt,
35
- temperature=temperature,
36
- top_p=top_p,
37
- top_k=40,
38
- repeat_penalty=1.1,
39
- max_tokens=max_tokens,
40
- stop=[
41
- "<|prompter|>",
42
- "<|endoftext|>",
43
- "<|endoftext|> \n",
44
- "ASSISTANT:",
45
- "USER:",
46
- "SYSTEM:",
47
- ],
48
- stream=True,
49
- )
50
- for out in output:
51
- stream = copy.deepcopy(out)
52
- temp += stream["choices"][0]["text"]
53
- yield temp
54
-
55
-
56
- demo = gr.ChatInterface(
57
- generate_text,
58
- title="llama-cpp-python on GPU",
59
- description="Running LLM with https://github.com/abetlen/llama-cpp-python",
60
- examples=[
61
- ['How to setup a human base on Mars? Give short answer.'],
62
- ['Explain theory of relativity to me like I’m 8 years old.'],
63
- ['What is 9,000 * 9,000?'],
64
- ['Write a pun-filled happy birthday message to my friend Alex.'],
65
- ['Justify why a penguin might make a good king of the jungle.']
66
- ],
67
- cache_examples=False,
68
- retry_btn=None,
69
- undo_btn="Delete Previous",
70
- clear_btn="Clear",
71
- additional_inputs=[
72
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
73
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
74
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
75
- gr.Slider(
76
- minimum=0.1,
77
- maximum=1.0,
78
- value=0.95,
79
- step=0.05,
80
- label="Top-p (nucleus sampling)",
81
- ),
82
- ],
83
- )
84
-
85
-
86
- if __name__ == "__main__":
87
- demo.launch()