File size: 3,051 Bytes
3ceffca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
from transformers import AutoTokenizer
from llm_engine import HuggingFaceEndpoint, ChatHuggingFace
from huggingface_hub import login
from PyPDF2 import PdfReader
from docx import Document
import csv
import json
import os

huggingface_token = os.getenv('HUGGINGFACE_TOKEN')

# Realizar el inicio de sesión de Hugging Face solo si el token está disponible
if huggingface_token:
    login(token=huggingface_token)

# Configuración del modelo
@st.cache_resource
def load_llm():
    llm = HuggingFaceEndpoint(
        repo_id="mistralai/Mistral-7B-Instruct-v0.3",
        task="text-generation"
    )
    llm_engine_hf = ChatHuggingFace(llm=llm)
    tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
    return llm_engine_hf, tokenizer

llm_engine_hf, tokenizer = load_llm()

st.title("LexAIcon")
st.write("Puedes conversar con este chatbot basado en Mistral7B-Instruct y subir archivos para que el chatbot los procese.")

if "generated" not in st.session_state:
    st.session_state["generated"] = []
if "past" not in st.session_state:
    st.session_state["past"] = []

def generate_response(prompt):
    response = llm_engine_hf.invoke(prompt)
    return response

def handle_uploaded_file(uploaded_file):
    try:
        if uploaded_file.name.endswith(".txt"):
            text = uploaded_file.read().decode("utf-8")
        elif uploaded_file.name.endswith(".pdf"):
            reader = PdfReader(uploaded_file)
            text = ""
            for page in range(len(reader.pages)):
                text += reader.pages[page].extract_text()
        elif uploaded_file.name.endswith(".docx"):
            doc = Document(uploaded_file)
            text = "\n".join([para.text for para in doc.paragraphs])
        elif uploaded_file.name.endswith(".csv"):
            text = ""
            content = uploaded_file.read().decode("utf-8").splitlines()
            reader = csv.reader(content)
            text = " ".join([" ".join(row) for row in reader])
        elif uploaded_file.name.endswith(".json"):
            data = json.load(uploaded_file)
            text = json.dumps(data, indent=4)
        else:
            text = "Tipo de archivo no soportado."
        return text
    except Exception as e:
        return str(e)

# Entrada del usuario
user_input = st.text_input("Tú: ", "")

# Manejo de archivos subidos
uploaded_files = st.file_uploader("Sube un archivo", type=["txt", "pdf", "docx", "csv", "json"], accept_multiple_files=True)

if st.button("Enviar"):
    if user_input:
        response = generate_response(user_input)
        st.session_state.generated.append({"user": user_input, "bot": response})

if st.session_state["generated"]:
    for chat in st.session_state["generated"]:
        st.write(f"Tú: {chat['user']}")
        st.write(f"Chatbot: {chat['bot']}")

if uploaded_files:
    for uploaded_file in uploaded_files:
        st.write(f"Archivo subido: {uploaded_file.name}")
        file_content = handle_uploaded_file(uploaded_file)
        st.write(file_content)