Spaces:

Francesco26061993
/

RacoGPT

Sleeping

App Files Files Community

Francesco26061993 commited on Nov 6, 2024

Commit

daa2868

1 Parent(s): 47793bc

Stop bot response generation

Browse files

Files changed (1) hide show

app.py +28 -9

app.py CHANGED Viewed

@@ -15,8 +15,8 @@ if not st.session_state["is_logged_in"]:
     st.stop()
 # Recupera le secrets da Hugging Face
-model_repo = os.getenv("MODEL_REPO")  # Repository del modello di base
-hf_token = os.getenv("HF_TOKEN")  # Token Hugging Face
 # Carica il modello di base con caching
 @st.cache_resource
@@ -26,7 +26,7 @@ def load_model():
     model.config.use_cache = True
     return tokenizer, model
-# Funzione per generare una risposta in tempo reale
 def generate_llama_response_stream(user_input, tokenizer, model, max_length=512):
     eos_token = tokenizer.eos_token if tokenizer.eos_token else ""
     input_ids = tokenizer.encode(user_input + eos_token, return_tensors="pt")
@@ -36,6 +36,9 @@ def generate_llama_response_stream(user_input, tokenizer, model, max_length=512)
     # Genera un token alla volta e aggiorna il placeholder
     for i in range(max_length):
         output = model.generate(input_ids, max_new_tokens=1, pad_token_id=tokenizer.eos_token_id, use_cache=True)
         new_token_id = output[:, -1].item()
         new_token = tokenizer.decode([new_token_id], skip_special_tokens=True)
@@ -50,6 +53,8 @@ def generate_llama_response_stream(user_input, tokenizer, model, max_length=512)
         if new_token_id == tokenizer.eos_token_id:
             break
     return response_text
 # Inizializza lo stato della sessione
@@ -62,6 +67,9 @@ if 'msg' not in ss:
 if 'chat_history' not in ss:
     ss.chat_history = None
 # Carica il modello e tokenizer
 tokenizer, model = load_model()
@@ -74,21 +82,32 @@ for message in ss.msg:
         with st.chat_message("RacoGPT"):
             st.markdown(f"**RacoGPT:** {message['content']}")
-# Gestione dell'input e disabilitazione
-if (prompt := st.chat_input("Scrivi il tuo messaggio...", disabled=ss.is_chat_input_disabled) or "").strip():
-    # Salva il messaggio dell'utente e disabilita l'input
-    if not ss.is_chat_input_disabled:
         ss.msg.append({"role": "user", "content": prompt})
         with st.chat_message("user"):
             ss.is_chat_input_disabled = True
             st.markdown(f"**Tu:** {prompt}")
     # Genera la risposta del bot con digitazione in tempo reale
     with st.spinner("RacoGPT sta generando una risposta..."):
-        response = generate_llama_response_stream(prompt, tokenizer, model)
-    # Mostra il messaggio finale del bot dopo che la risposta è completata
     ss.msg.append({"role": "RacoGPT", "content": response})
     with st.chat_message("RacoGPT"):
         st.markdown(f"**RacoGPT:** {response}")

     st.stop()
 # Recupera le secrets da Hugging Face
+model_repo = st.secrets["MODEL_REPO"]  # Repository del modello di base
+hf_token = st.secrets["HF_TOKEN"]  # Token Hugging Face
 # Carica il modello di base con caching
 @st.cache_resource
     model.config.use_cache = True
     return tokenizer, model
+# Funzione per generare una risposta in tempo reale con supporto per l'interruzione
 def generate_llama_response_stream(user_input, tokenizer, model, max_length=512):
     eos_token = tokenizer.eos_token if tokenizer.eos_token else ""
     input_ids = tokenizer.encode(user_input + eos_token, return_tensors="pt")
     # Genera un token alla volta e aggiorna il placeholder
     for i in range(max_length):
+        if ss.get("stop_generation", False):
+            break  # Interrompe il ciclo se l'utente ha premuto "stop"
         output = model.generate(input_ids, max_new_tokens=1, pad_token_id=tokenizer.eos_token_id, use_cache=True)
         new_token_id = output[:, -1].item()
         new_token = tokenizer.decode([new_token_id], skip_special_tokens=True)
         if new_token_id == tokenizer.eos_token_id:
             break
+    # Reimposta lo stato di "stop"
+    ss.stop_generation = False
     return response_text
 # Inizializza lo stato della sessione
 if 'chat_history' not in ss:
     ss.chat_history = None
+if 'stop_generation' not in ss:
+    ss.stop_generation = False
 # Carica il modello e tokenizer
 tokenizer, model = load_model()
         with st.chat_message("RacoGPT"):
             st.markdown(f"**RacoGPT:** {message['content']}")
+# Contenitore per gestire la mutua esclusione tra input e pulsante di stop
+input_container = st.empty()
+if not ss.is_chat_input_disabled:
+    # Mostra la barra di input per inviare il messaggio
+    with input_container:
+        prompt = st.chat_input("Scrivi il tuo messaggio...")
+    if prompt:
+        # Salva il messaggio dell'utente e disabilita l'input
         ss.msg.append({"role": "user", "content": prompt})
         with st.chat_message("user"):
             ss.is_chat_input_disabled = True
             st.markdown(f"**Tu:** {prompt}")
+            st.rerun()
+else:
+    # Mostra il pulsante di "Stop Generazione" al posto della barra di input
+    with input_container:
+        if st.button("🛑 Stop Generazione", key="stop_button"):
+            ss.stop_generation = True  # Interrompe la generazione impostando il flag
     # Genera la risposta del bot con digitazione in tempo reale
     with st.spinner("RacoGPT sta generando una risposta..."):
+        response = generate_llama_response_stream(ss.msg[-1]['content'], tokenizer, model)
+    # Mostra il messaggio finale del bot dopo che la risposta è completata o interrotta
     ss.msg.append({"role": "RacoGPT", "content": response})
     with st.chat_message("RacoGPT"):
         st.markdown(f"**RacoGPT:** {response}")