fede97 commited on
Commit
aac6e8d
·
verified ·
1 Parent(s): 44460a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gradio app for the LLM model --> use the retr environment
2
+ # Run the script and open the link in the browser.
3
+
4
+ import os
5
+ import json
6
+ import pandas as pd
7
+ import datasets
8
+ import gradio as gr
9
+ import torch
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
11
+
12
+ # training from scratch with latbert tokenizer
13
+ CHECKPOINT_PATH= 'scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi/'
14
+ CHECKPOINT_PATH= 'itserr/latin_llm_alpha'
15
+
16
+ print(f"Loading model from: {CHECKPOINT_PATH}")
17
+ tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ'])
18
+ model = AutoModelForCausalLM.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ'])
19
+
20
+ preference_dataset_name= "itserr/latin_gpt_preferences"
21
+ global dataset_hf
22
+ dataset_hf = datasets.load_dataset(preference_dataset_name, token=os.environ['HF_TOKEN_READ'], download_mode='force_redownload')
23
+ dataset_hf = dataset_hf['train'].to_pandas()
24
+ print(dataset_hf.shape)
25
+
26
+ description="""
27
+ This is a Latin Language Model (LLM) based on GPT-2 and it was trained on a large corpus of Latin texts and can generate text in Latin. \n
28
+ Demo instructions:
29
+ - Enter a prompt in Latin in the Input Text box.
30
+ - Select the temperature value to control the randomness of the generated text (higher value produce a more creative and unstable answer).
31
+ - Click the 'Generate Text' button to trigger model generation.
32
+ - (Optional) insert a Feedback text in the box.
33
+ - Click the 'Like' or 'Dislike' button to judge the generation correctness.
34
+ """
35
+ title= "(L<sup>2</sup>) - Latin Language Model"
36
+ article= "hello world ..."
37
+ examples= ['Accidere ex una scintilla', 'Audacter calumniare,', 'Consolatium misero comites']
38
+ logo_image= '/work/pnrr_itserr/latin_model/demo_gpt/ITSERR_row_logo.png'
39
+
40
+ def generate_text(prompt, slider):
41
+ if torch.cuda.is_available(): device = torch.device("cuda")
42
+ else:
43
+ device = torch.device("cpu")
44
+ print("No GPU available")
45
+
46
+ print("***** Generate *****")
47
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
48
+ #generated_text = text_generator(prompt, max_length=100)
49
+ generated_text = text_generator(prompt, max_length=50, do_sample=True, temperature=slider, repetition_penalty=2.0, truncation=True)
50
+ return generated_text[0]['generated_text']
51
+
52
+ # Function to handle user preferences
53
+ def handle_preference(preference, input, output, feedback, temp_value):
54
+ """
55
+ Format values stored in preferences:
56
+ - input text
57
+ - output generated text
58
+ - user feedback
59
+ - float temperature value
60
+ """
61
+ # first time staring from a csv file (edited the present one), then work with parquet file
62
+ # input_text,generated_text,feedback,temperature,like,dislike,count_like,count_dislike
63
+ global dataset_hf
64
+ if input == output:
65
+ output_tuple= ("", "")
66
+ else:
67
+ output_tuple= (input, output.split(input)[-1])
68
+ if preference == "like":
69
+ dislike=0
70
+ like=1
71
+ count_like= dataset_hf.iloc[-1]['count_like']
72
+ count_dislike= dataset_hf.iloc[-1]['count_dislike']
73
+ if output_tuple[1] != "" :
74
+ count_like= dataset_hf.iloc[-1]['count_like'] + 1
75
+
76
+ elif preference == "dislike":
77
+ dislike=1
78
+ like=0
79
+ count_like= dataset_hf.iloc[-1]['count_like']
80
+ count_dislike= dataset_hf.iloc[-1]['count_dislike']
81
+ if output_tuple[1] != "" :
82
+ count_dislike= dataset_hf.iloc[-1]['count_dislike'] + 1
83
+
84
+ inp_text= output_tuple[0]
85
+ out_text= output_tuple[1]
86
+ new_data = pd.DataFrame({'input_text': inp_text, 'generated_text': out_text, 'feedback': feedback,
87
+ 'temperature': float(temp_value), 'like': like, 'dislike': dislike,
88
+ 'count_like': count_like, 'count_dislike': count_dislike}, index=[0])
89
+ dataset_hf = pd.concat([dataset_hf, new_data], ignore_index=True)
90
+ hf_dataset = datasets.Dataset.from_pandas(dataset_hf)
91
+ dataset_dict = datasets.DatasetDict({"train": hf_dataset})
92
+ dataset_dict.push_to_hub(preference_dataset_name, token=os.environ['HF_TOKEN_WRITE'])
93
+
94
+ # print dataset statistics
95
+ print(f"Admin log: like: {count_like} and dislike: {count_dislike}")
96
+ return f"You select '{preference}' as answer of the model generation. Thank you for your time!"
97
+
98
+ custom_css = """
99
+ #logo {
100
+ display: block;
101
+ margin-left: auto;
102
+ margin-right: auto;
103
+ width: 280px;
104
+ height: 140px;
105
+ }
106
+ """
107
+
108
+ with gr.Blocks(css=custom_css) as demo:
109
+ gr.Image(logo_image, elem_id="logo")
110
+ gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
111
+ gr.Markdown(description)
112
+
113
+ with gr.Row():
114
+ with gr.Column():
115
+ input_text = gr.Textbox(lines=5, placeholder="Enter latin text here...", label="Input Text")
116
+ with gr.Column():
117
+ output_text = gr.Textbox(lines=5, placeholder="Output text will appear here...", label="Output Text")
118
+
119
+ gr.Examples(examples=examples, inputs=input_text)
120
+ temperature_slider = gr.Slider(minimum=0.1, maximum=5.0, step=0.1, value=1.0, label="Temperature")
121
+
122
+ clean_button = gr.Button("Generate Text")
123
+ clean_button.click(fn=generate_text, inputs=[input_text, temperature_slider], outputs=output_text)
124
+ feedback_output = gr.Textbox(lines=1, placeholder="If you want to provide a feedback, please fill this box ...", label="Feedback")
125
+
126
+ with gr.Row():
127
+ like_button = gr.Button("Like")
128
+ dislike_button = gr.Button("Dislike")
129
+
130
+ button_output = gr.Textbox(lines=1, placeholder="Please submit your choice", label="Latin Language Model Demo")
131
+ like_button.click(fn=lambda x,y,z,v: handle_preference("like", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output)
132
+ dislike_button.click(fn=lambda x,y,z,v: handle_preference("dislike", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output)
133
+ #gr.Markdown(article)
134
+
135
+ demo.launch(share=True, debug=True)