Spaces:

StreamLight
/

cstb

Sleeping

App Files Files Community

StreamLight commited on Feb 10, 2024

Commit

2ca61f3

verified ·

1 Parent(s): a4705d3

Upload 4 files

Browse files

Files changed (3) hide show

app3.py +110 -0
chapes-fluides.xlsx +0 -0
requirements.txt +128 -0

app3.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import streamlit as st
+from huggingface_hub import InferenceClient
+import re
+import pandas as pd
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+headers = {"Authorization": "Bearer {API_TOKEN}"}
+API_URL = "https://api-inference.huggingface.co/models/"
+df = pd.read_excel('chapes-fluides.xlsx')
+inference_client = InferenceClient(token=API_TOKEN)
+# Function to vectorize text - assuming this is already defined in your code
+def create_index(data, text_column, model):
+    # Encode the text column to generate embeddings
+    embeddings = model.encode(data[text_column].tolist())
+    # Dimension of embeddings
+    dimension = embeddings.shape[1]
+    # Prepare the embeddings and their IDs for FAISS
+    db_vectors = embeddings.astype(np.float32)
+    db_ids = np.arange(len(data)).astype(np.int64)
+    # Normalize the embeddings
+    faiss.normalize_L2(db_vectors)
+    # Create and configure the FAISS index
+    index = faiss.IndexFlatIP(dimension)
+    index = faiss.IndexIDMap(index)
+    index.add_with_ids(db_vectors, db_ids)
+    return index, embeddings
+#Function to vectorize txt, use model.encode
+def vectorize_text(model, text):
+    # Encode the question to generate its embedding
+    question_embedding = model.encode([text])
+    # Convert to float32 for compatibility with FAISS
+    question_embedding = question_embedding.astype(np.float32)
+    # Normalize the embedding
+    faiss.normalize_L2(question_embedding)
+    return question_embedding
+def extract_context(indices, df,i):
+    # Extracting only the first index
+    index_i = indices[0][i]
+    context = df.iloc[index_i]['text_segment']
+    return context
+def generate_answer_from_context(context, client, model,prompt):
+    try:
+        # Use a hypothetical text generation method if available
+        answer = client.text_generation(prompt=prompt, model=model, max_new_tokens=250)
+        answer_cleaned = re.sub(r'^.*Answer:', '', answer).strip()
+        return answer_cleaned
+    except Exception as e:
+        print(f"Error encountered: {e}")
+        return None
+# Load model
+model_sentence_transformers = SentenceTransformer('intfloat/multilingual-e5-base')
+model_reponse_mixtral_instruct="mistralai/Mixtral-8x7B-Instruct-v0.1"
+#Load the index
+index_reloaded = faiss.read_index("./index/chapes_fluides_e5.index")
+K=2
+# Streamlit app interface
+st.title("CSTB App")
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if user_question := st.chat_input("Votre question : "):
+    # Vectorize the user question and search in the FAISS index
+    st.session_state.messages.append({"role": "user", "content": user_question})
+    question_embedding = vectorize_text(model_sentence_transformers, user_question)
+    D, I = index_reloaded.search(question_embedding, K)  # question_embedding is already 2D
+    # Extract context for the top K results
+    context = extract_context(I, df, 0) + ' ' + extract_context(I, df, 1)
+    prompts = [
+        f"Répondre à cette question : {user_question} en utilisant le contexte suivant {context}. Etre le plus précis possible et ne pas faire de phrase qui ne se finit pas \nReponse:"
+        #Autre prompt possible
+        #f"Contexte: {context}\nQuestion: {user_question}\nReponse:",
+    ]
+    # Generate answers using different prompts
+    answers = [generate_answer_from_context(context, inference_client, model_reponse_mixtral_instruct,prompts[i]) for i in range(len(prompts))]
+    # Display answers
+    for i, answer in enumerate(answers):
+        if answer:
+            st.session_state.messages.append({"role": "assistant", "content": answer})
+            #st.markdown(answer)
+            #st.session_state.messages.append({"role": "assistant", "content": answer})
+        else:
+            st.session_state.messages.append({"role": "assistant", "content": "Failed to generate an answer."})
+    for message in st.session_state.messages:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])

chapes-fluides.xlsx ADDED Viewed

Binary file (61.7 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,128 @@

+absl-py==2.0.0
+altair==5.2.0
+asttokens==2.4.1
+astunparse==1.6.3
+attrs==23.2.0
+blinker==1.7.0
+cachetools==5.3.2
+certifi==2023.11.17
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comm==0.2.0
+debugpy==1.8.0
+decorator==5.1.1
+distlib==0.3.8
+et-xmlfile==1.1.0
+exceptiongroup==1.2.0
+executing==2.0.1
+faiss-cpu==1.7.4
+filelock==3.13.1
+flatbuffers==23.5.26
+fsspec==2023.12.2
+gast==0.4.0
+gitdb==4.0.11
+GitPython==3.1.40
+google-auth==2.25.2
+google-auth-oauthlib==1.2.0
+google-pasta==0.2.0
+grpcio==1.60.0
+h5py==3.10.0
+huggingface-hub==0.19.4
+idna==3.6
+importlib-metadata==6.11.0
+ipykernel==6.27.1
+ipython==8.18.1
+jedi==0.19.1
+Jinja2==3.1.2
+joblib==1.3.2
+jsonschema==4.20.0
+jsonschema-specifications==2023.12.1
+jupyter_client==8.6.0
+jupyter_core==5.5.0
+keras==2.15.0
+Keras-Preprocessing==1.1.2
+libclang==16.0.6
+Markdown==3.5.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.3
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+ml-dtypes==0.2.0
+mpmath==1.3.0
+nest-asyncio==1.5.8
+networkx==3.2.1
+nltk==3.8.1
+numpy==1.26.2
+oauthlib==3.2.2
+openpyxl==3.1.2
+opt-einsum==3.3.0
+packaging==23.2
+pandas==2.1.4
+parso==0.8.3
+pillow==10.2.0
+platformdirs==4.1.0
+prompt-toolkit==3.0.43
+protobuf==4.23.4
+psutil==5.9.7
+pure-eval==0.2.2
+pyarrow==14.0.2
+pyasn1==0.5.1
+pyasn1-modules==0.3.0
+pydeck==0.8.1b0
+Pygments==2.17.2
+PyMuPDF==1.23.7
+PyMuPDFb==1.23.7
+python-dateutil==2.8.2
+pytz==2023.3.post1
+pywin32==306
+PyYAML==6.0.1
+pyzmq==25.1.2
+referencing==0.32.0
+regex==2023.10.3
+requests==2.31.0
+requests-oauthlib==1.3.1
+rich==13.7.0
+rpds-py==0.16.2
+rsa==4.9
+safetensors==0.4.1
+scikit-learn==1.3.2
+scipy==1.11.4
+sentence-transformers==2.2.2
+sentencepiece==0.1.99
+six==1.16.0
+smmap==5.0.1
+stack-data==0.6.3
+streamlit==1.29.0
+sympy==1.12
+tenacity==8.2.3
+tensorboard==2.15.1
+tensorboard-data-server==0.7.2
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.10.1
+tensorflow-estimator==2.15.0
+tensorflow-hub==0.15.0
+tensorflow-io-gcs-filesystem==0.31.0
+tensorflow-text==2.10.0
+termcolor==2.4.0
+threadpoolctl==3.2.0
+tokenizers==0.15.0
+toml==0.10.2
+toolz==0.12.0
+torch==2.1.2
+torchvision==0.16.2
+tornado==6.4
+tqdm==4.66.1
+traitlets==5.14.0
+transformers==4.36.2
+typing_extensions==4.9.0
+tzdata==2023.3
+tzlocal==5.2
+urllib3==2.1.0
+validators==0.22.0
+virtualenv==20.25.0
+watchdog==3.0.0
+wcwidth==0.2.12
+Werkzeug==3.0.1
+wrapt==1.14.1
+zipp==3.17.0