Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# gradio app for the LLM model --> use the retr environment
|
2 |
+
# Run the script and open the link in the browser.
|
3 |
+
|
4 |
+
import os
|
5 |
+
import json
|
6 |
+
import pandas as pd
|
7 |
+
import datasets
|
8 |
+
import gradio as gr
|
9 |
+
import torch
|
10 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
11 |
+
|
12 |
+
# training from scratch with latbert tokenizer
|
13 |
+
CHECKPOINT_PATH= 'scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi/'
|
14 |
+
CHECKPOINT_PATH= 'itserr/latin_llm_alpha'
|
15 |
+
|
16 |
+
print(f"Loading model from: {CHECKPOINT_PATH}")
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ'])
|
18 |
+
model = AutoModelForCausalLM.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ'])
|
19 |
+
|
20 |
+
preference_dataset_name= "itserr/latin_gpt_preferences"
|
21 |
+
global dataset_hf
|
22 |
+
dataset_hf = datasets.load_dataset(preference_dataset_name, token=os.environ['HF_TOKEN_READ'], download_mode='force_redownload')
|
23 |
+
dataset_hf = dataset_hf['train'].to_pandas()
|
24 |
+
print(dataset_hf.shape)
|
25 |
+
|
26 |
+
description="""
|
27 |
+
This is a Latin Language Model (LLM) based on GPT-2 and it was trained on a large corpus of Latin texts and can generate text in Latin. \n
|
28 |
+
Demo instructions:
|
29 |
+
- Enter a prompt in Latin in the Input Text box.
|
30 |
+
- Select the temperature value to control the randomness of the generated text (higher value produce a more creative and unstable answer).
|
31 |
+
- Click the 'Generate Text' button to trigger model generation.
|
32 |
+
- (Optional) insert a Feedback text in the box.
|
33 |
+
- Click the 'Like' or 'Dislike' button to judge the generation correctness.
|
34 |
+
"""
|
35 |
+
title= "(L<sup>2</sup>) - Latin Language Model"
|
36 |
+
article= "hello world ..."
|
37 |
+
examples= ['Accidere ex una scintilla', 'Audacter calumniare,', 'Consolatium misero comites']
|
38 |
+
logo_image= '/work/pnrr_itserr/latin_model/demo_gpt/ITSERR_row_logo.png'
|
39 |
+
|
40 |
+
def generate_text(prompt, slider):
|
41 |
+
if torch.cuda.is_available(): device = torch.device("cuda")
|
42 |
+
else:
|
43 |
+
device = torch.device("cpu")
|
44 |
+
print("No GPU available")
|
45 |
+
|
46 |
+
print("***** Generate *****")
|
47 |
+
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
|
48 |
+
#generated_text = text_generator(prompt, max_length=100)
|
49 |
+
generated_text = text_generator(prompt, max_length=50, do_sample=True, temperature=slider, repetition_penalty=2.0, truncation=True)
|
50 |
+
return generated_text[0]['generated_text']
|
51 |
+
|
52 |
+
# Function to handle user preferences
|
53 |
+
def handle_preference(preference, input, output, feedback, temp_value):
|
54 |
+
"""
|
55 |
+
Format values stored in preferences:
|
56 |
+
- input text
|
57 |
+
- output generated text
|
58 |
+
- user feedback
|
59 |
+
- float temperature value
|
60 |
+
"""
|
61 |
+
# first time staring from a csv file (edited the present one), then work with parquet file
|
62 |
+
# input_text,generated_text,feedback,temperature,like,dislike,count_like,count_dislike
|
63 |
+
global dataset_hf
|
64 |
+
if input == output:
|
65 |
+
output_tuple= ("", "")
|
66 |
+
else:
|
67 |
+
output_tuple= (input, output.split(input)[-1])
|
68 |
+
if preference == "like":
|
69 |
+
dislike=0
|
70 |
+
like=1
|
71 |
+
count_like= dataset_hf.iloc[-1]['count_like']
|
72 |
+
count_dislike= dataset_hf.iloc[-1]['count_dislike']
|
73 |
+
if output_tuple[1] != "" :
|
74 |
+
count_like= dataset_hf.iloc[-1]['count_like'] + 1
|
75 |
+
|
76 |
+
elif preference == "dislike":
|
77 |
+
dislike=1
|
78 |
+
like=0
|
79 |
+
count_like= dataset_hf.iloc[-1]['count_like']
|
80 |
+
count_dislike= dataset_hf.iloc[-1]['count_dislike']
|
81 |
+
if output_tuple[1] != "" :
|
82 |
+
count_dislike= dataset_hf.iloc[-1]['count_dislike'] + 1
|
83 |
+
|
84 |
+
inp_text= output_tuple[0]
|
85 |
+
out_text= output_tuple[1]
|
86 |
+
new_data = pd.DataFrame({'input_text': inp_text, 'generated_text': out_text, 'feedback': feedback,
|
87 |
+
'temperature': float(temp_value), 'like': like, 'dislike': dislike,
|
88 |
+
'count_like': count_like, 'count_dislike': count_dislike}, index=[0])
|
89 |
+
dataset_hf = pd.concat([dataset_hf, new_data], ignore_index=True)
|
90 |
+
hf_dataset = datasets.Dataset.from_pandas(dataset_hf)
|
91 |
+
dataset_dict = datasets.DatasetDict({"train": hf_dataset})
|
92 |
+
dataset_dict.push_to_hub(preference_dataset_name, token=os.environ['HF_TOKEN_WRITE'])
|
93 |
+
|
94 |
+
# print dataset statistics
|
95 |
+
print(f"Admin log: like: {count_like} and dislike: {count_dislike}")
|
96 |
+
return f"You select '{preference}' as answer of the model generation. Thank you for your time!"
|
97 |
+
|
98 |
+
custom_css = """
|
99 |
+
#logo {
|
100 |
+
display: block;
|
101 |
+
margin-left: auto;
|
102 |
+
margin-right: auto;
|
103 |
+
width: 280px;
|
104 |
+
height: 140px;
|
105 |
+
}
|
106 |
+
"""
|
107 |
+
|
108 |
+
with gr.Blocks(css=custom_css) as demo:
|
109 |
+
gr.Image(logo_image, elem_id="logo")
|
110 |
+
gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
|
111 |
+
gr.Markdown(description)
|
112 |
+
|
113 |
+
with gr.Row():
|
114 |
+
with gr.Column():
|
115 |
+
input_text = gr.Textbox(lines=5, placeholder="Enter latin text here...", label="Input Text")
|
116 |
+
with gr.Column():
|
117 |
+
output_text = gr.Textbox(lines=5, placeholder="Output text will appear here...", label="Output Text")
|
118 |
+
|
119 |
+
gr.Examples(examples=examples, inputs=input_text)
|
120 |
+
temperature_slider = gr.Slider(minimum=0.1, maximum=5.0, step=0.1, value=1.0, label="Temperature")
|
121 |
+
|
122 |
+
clean_button = gr.Button("Generate Text")
|
123 |
+
clean_button.click(fn=generate_text, inputs=[input_text, temperature_slider], outputs=output_text)
|
124 |
+
feedback_output = gr.Textbox(lines=1, placeholder="If you want to provide a feedback, please fill this box ...", label="Feedback")
|
125 |
+
|
126 |
+
with gr.Row():
|
127 |
+
like_button = gr.Button("Like")
|
128 |
+
dislike_button = gr.Button("Dislike")
|
129 |
+
|
130 |
+
button_output = gr.Textbox(lines=1, placeholder="Please submit your choice", label="Latin Language Model Demo")
|
131 |
+
like_button.click(fn=lambda x,y,z,v: handle_preference("like", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output)
|
132 |
+
dislike_button.click(fn=lambda x,y,z,v: handle_preference("dislike", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output)
|
133 |
+
#gr.Markdown(article)
|
134 |
+
|
135 |
+
demo.launch(share=True, debug=True)
|