Spaces:

proj-sicoob
/

chatbot-carometro

Sleeping

App Files Files Community

leandroaraujodev commited on Dec 12, 2024

Commit

44e4658

verified ·

1 Parent(s): bbe9f97

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -44

app.py CHANGED Viewed

@@ -24,9 +24,9 @@ from llama_index.retrievers.bm25 import BM25Retriever
 from llama_index.core.retrievers import QueryFusionRetriever
 from llama_index.vector_stores.chroma import ChromaVectorStore
 from llama_index.core import VectorStoreIndex
-from llama_index.llms.huggingface import HuggingFaceLLM
-from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
-from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 import chromadb
 #Configuração da imagem da aba
@@ -70,47 +70,47 @@ if sidebar_option == "gpt-3.5-turbo":
     from llama_index.embeddings.openai import OpenAIEmbedding
     Settings.llm = OpenAI(model="gpt-3.5-turbo")
     Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-ada-002")
-elif sidebar_option == 'NuExtract-1.5':
-    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-    logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
-    #Embedding do huggingface
-    Settings.embed_model = HuggingFaceEmbedding(
-        model_name="BAAI/bge-small-en-v1.5"
-    )
-    #Carregamento do modelo local, descomentar o modelo desejado
-    llm = HuggingFaceLLM(
-        context_window=2048,
-        max_new_tokens=2048,
-        generate_kwargs={"do_sample": False},
-        #query_wrapper_prompt=query_wrapper_prompt,
-        #model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
-        #model_name="Qwen/Qwen2.5-14B-Instruct",
-        # model_name="meta-llama/Llama-3.2-3B",
-        #model_name="HuggingFaceH4/zephyr-7b-beta",
-        # model_name="meta-llama/Meta-Llama-3-8B",
-        model_name="numind/NuExtract-1.5",
-        #model_name="meta-llama/Llama-3.2-3B",
-        tokenizer_name="numind/NuExtract-1.5",
-        device_map="auto",
-        tokenizer_kwargs={"max_length": 512},
-        # uncomment this if using CUDA to reduce memory usage
-        model_kwargs={"torch_dtype": torch.bfloat16},
-    )
-    chat = [
-        {"role": "user", "content": "Hello, how are you?"},
-        {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
-        {"role": "user", "content": "I'd like to show off how chat templating works!"},
-    ]
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
-    tokenizer.apply_chat_template(chat, tokenize=False)
-    Settings.chunk_size = 512
-    Settings.llm = llm
 else:
     raise Exception("Opção de LLM inválida!")

 from llama_index.core.retrievers import QueryFusionRetriever
 from llama_index.vector_stores.chroma import ChromaVectorStore
 from llama_index.core import VectorStoreIndex
+# from llama_index.llms.huggingface import HuggingFaceLLM
+# from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
+# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 import chromadb
 #Configuração da imagem da aba
     from llama_index.embeddings.openai import OpenAIEmbedding
     Settings.llm = OpenAI(model="gpt-3.5-turbo")
     Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-ada-002")
+# elif sidebar_option == 'NuExtract-1.5':
+#     logging.basicConfig(stream=sys.stdout, level=logging.INFO)
+#     logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+#     #Embedding do huggingface
+#     Settings.embed_model = HuggingFaceEmbedding(
+#         model_name="BAAI/bge-small-en-v1.5"
+#     )
+#     #Carregamento do modelo local, descomentar o modelo desejado
+#     llm = HuggingFaceLLM(
+#         context_window=2048,
+#         max_new_tokens=2048,
+#         generate_kwargs={"do_sample": False},
+#         #query_wrapper_prompt=query_wrapper_prompt,
+#         #model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
+#         #model_name="Qwen/Qwen2.5-14B-Instruct",
+#         # model_name="meta-llama/Llama-3.2-3B",
+#         #model_name="HuggingFaceH4/zephyr-7b-beta",
+#         # model_name="meta-llama/Meta-Llama-3-8B",
+#         model_name="numind/NuExtract-1.5",
+#         #model_name="meta-llama/Llama-3.2-3B",
+#         tokenizer_name="numind/NuExtract-1.5",
+#         device_map="auto",
+#         tokenizer_kwargs={"max_length": 512},
+#         # uncomment this if using CUDA to reduce memory usage
+#         model_kwargs={"torch_dtype": torch.bfloat16},
+#     )
+#     chat = [
+#         {"role": "user", "content": "Hello, how are you?"},
+#         {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
+#         {"role": "user", "content": "I'd like to show off how chat templating works!"},
+#     ]
+#     from transformers import AutoTokenizer
+#     tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
+#     tokenizer.apply_chat_template(chat, tokenize=False)
+#     Settings.chunk_size = 512
+#     Settings.llm = llm
 else:
     raise Exception("Opção de LLM inválida!")