polpoDevs commited on
Commit
f48f572
·
verified ·
1 Parent(s): 17018e7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ import time
4
+ import streamlit as st
5
+ from transformers import pipeline, Conversation, AutoTokenizer
6
+ #"meta-llama/Llama-2-13b-chat-hf"
7
+ my_config = {'model_name': "BramVanroy/Llama-2-13b-chat-dutch", 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500}
8
+
9
+ print(f"Loading the model: {my_config['model_name']}....")
10
+ time_load_model_start = time.time()
11
+
12
+ print(time_load_model_start)
13
+
14
+ # Load the model and tokenizer outside of the functions
15
+ llm = pipeline("text-generation",
16
+ model=my_config['model_name'],
17
+ #tokenizer=AutoTokenizer.from_pretrained(my_config['model_name']),
18
+ #do_sample=my_config['do_sample'],
19
+ ##temperature=my_config['temperature'],
20
+ #repetition_penalty=my_config['repetition_penalty'],
21
+ #max_new_tokens=my_config['max_new_tokens']
22
+ )
23
+ time_load_model_end = time.time()
24
+ elapsed_time = time_load_model_end - time_load_model_start
25
+ print(f"Elapsed time to load the model: {elapsed_time:.2f} sec")
26
+
27
+
28
+ def get_answer(llm):
29
+
30
+ return "tekst output"
31
+
32
+
33
+ #gr.ChatInterface(get_llama_response).launch()
34
+ demo = gr.Interface(fn=get_answer, inputs="text", outputs="text")
35
+
36
+ demo.launch(share=True)