Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,11 +14,11 @@ print(time_load_model_start)
|
|
14 |
# Load the model and tokenizer outside of the functions
|
15 |
llm = pipeline("text-generation",
|
16 |
model=my_config['model_name'],
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
)
|
23 |
time_load_model_end = time.time()
|
24 |
elapsed_time = time_load_model_end - time_load_model_start
|
@@ -32,5 +32,4 @@ def get_answer(llm):
|
|
32 |
|
33 |
#gr.ChatInterface(get_llama_response).launch()
|
34 |
demo = gr.Interface(fn=get_answer, inputs="text", outputs="text")
|
35 |
-
|
36 |
demo.launch(share=True)
|
|
|
14 |
# Load the model and tokenizer outside of the functions
|
15 |
llm = pipeline("text-generation",
|
16 |
model=my_config['model_name'],
|
17 |
+
tokenizer=AutoTokenizer.from_pretrained(my_config['model_name']),
|
18 |
+
do_sample=my_config['do_sample'],
|
19 |
+
temperature=my_config['temperature'],
|
20 |
+
repetition_penalty=my_config['repetition_penalty'],
|
21 |
+
max_new_tokens=my_config['max_new_tokens']
|
22 |
)
|
23 |
time_load_model_end = time.time()
|
24 |
elapsed_time = time_load_model_end - time_load_model_start
|
|
|
32 |
|
33 |
#gr.ChatInterface(get_llama_response).launch()
|
34 |
demo = gr.Interface(fn=get_answer, inputs="text", outputs="text")
|
|
|
35 |
demo.launch(share=True)
|