VicGerardoPR commited on
Commit
61d9097
verified
1 Parent(s): b351fea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -33
app.py CHANGED
@@ -1,41 +1,92 @@
1
  import streamlit as st
2
- import requests
3
- import os
 
 
 
 
 
4
 
5
- # Obtener el token de los secretos
6
- API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-Guard-2-8B"
7
- headers = {"Authorization": f"Bearer {os.getenv('YOUR_HUGGING_FACE_TOKEN')}"}
8
-
9
- def query(payload):
10
- response = requests.post(API_URL, headers=headers, json=payload)
11
  try:
12
- response.raise_for_status()
13
- return response.json()
14
- except requests.exceptions.HTTPError as err:
15
- st.error(f"HTTP error occurred: {err}")
16
- except Exception as err:
17
- st.error(f"Other error occurred: {err}")
18
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- st.title("LLaMA Chatbot")
21
- st.subheader("Ask anything to the LLaMA model!")
22
 
23
- user_input = st.text_input("You: ")
24
- if user_input:
25
- output = query({"inputs": user_input})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Imprimir la respuesta completa para depuraci贸n
28
- st.write("API response:", output)
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- if output:
31
- # Verificar las posibles claves en la respuesta
32
- if "generated_text" in output:
33
- response = output["generated_text"]
34
- elif isinstance(output, list) and "generated_text" in output[0]:
35
- response = output[0]["generated_text"]
36
- else:
37
- response = "Sorry, I couldn't generate a response."
38
- else:
39
- response = "Sorry, I couldn't generate a response."
40
 
41
- st.write(f"Chatbot: {response}")
 
1
  import streamlit as st
2
+ from transformers import (
3
+ AutoModelForCausalLM,
4
+ AutoTokenizer,
5
+ pipeline,
6
+ BitsAndBytesConfig
7
+ )
8
+ import torch
9
 
10
+ # 1. Configuraci贸n del Modelo
11
+ @st.cache_resource
12
+ def load_model():
 
 
 
13
  try:
14
+ quantization_config = BitsAndBytesConfig(
15
+ load_in_4bit=True,
16
+ bnb_4bit_compute_dtype=torch.float16
17
+ )
18
+
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ "microsoft/Phi-3-mini-4k-instruct",
21
+ device_map="auto",
22
+ quantization_config=quantization_config,
23
+ trust_remote_code=True
24
+ )
25
+
26
+ tokenizer = AutoTokenizer.from_pretrained(
27
+ "microsoft/Phi-3-mini-4k-instruct"
28
+ )
29
+
30
+ return model, tokenizer
31
+
32
+ except Exception as e:
33
+ st.error(f"Error cargando el modelo: {str(e)}")
34
+ return None, None
35
+
36
+ # 2. Interfaz de Streamlit
37
+ st.title("馃 Chatbot Optimizado para M1")
38
+ st.markdown("Usando Microsoft Phi-3-mini - [Hugging Face](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct)")
39
+
40
+ # 3. Inicializaci贸n de Sesi贸n
41
+ if "messages" not in st.session_state:
42
+ st.session_state.messages = [
43
+ {"role": "assistant", "content": "隆Hola! Soy tu asistente AI. 驴En qu茅 puedo ayudarte?"}
44
+ ]
45
 
46
+ # 4. Carga del Modelo
47
+ model, tokenizer = load_model()
48
 
49
+ # 5. Funci贸n de Generaci贸n
50
+ def generate_response(prompt):
51
+ try:
52
+ messages = [
53
+ {"role": "user", "content": prompt}
54
+ ]
55
+
56
+ inputs = tokenizer.apply_chat_template(
57
+ messages,
58
+ return_tensors="pt"
59
+ ).to(model.device)
60
+
61
+ outputs = model.generate(
62
+ inputs,
63
+ max_new_tokens=512,
64
+ temperature=0.7,
65
+ top_p=0.9,
66
+ do_sample=True,
67
+ pad_token_id=tokenizer.eos_token_id
68
+ )
69
+
70
+ return tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
71
 
72
+ except Exception as e:
73
+ return f"Error generando respuesta: {str(e)}"
74
+
75
+ # 6. Interacci贸n del Usuario
76
+ for message in st.session_state.messages:
77
+ with st.chat_message(message["role"]):
78
+ st.markdown(message["content"])
79
+
80
+ if prompt := st.chat_input("Escribe tu mensaje..."):
81
+ # Mostrar input del usuario
82
+ st.session_state.messages.append({"role": "user", "content": prompt})
83
+ with st.chat_message("user"):
84
+ st.markdown(prompt)
85
 
86
+ # Generar respuesta
87
+ with st.chat_message("assistant"):
88
+ with st.spinner("Pensando..."):
89
+ response = generate_response(prompt)
90
+ st.markdown(response)
 
 
 
 
 
91
 
92
+ st.session_state.messages.append({"role": "assistant", "content": response})