leandroaraujodev commited on
Commit
44e4658
·
verified ·
1 Parent(s): bbe9f97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -44
app.py CHANGED
@@ -24,9 +24,9 @@ from llama_index.retrievers.bm25 import BM25Retriever
24
  from llama_index.core.retrievers import QueryFusionRetriever
25
  from llama_index.vector_stores.chroma import ChromaVectorStore
26
  from llama_index.core import VectorStoreIndex
27
- from llama_index.llms.huggingface import HuggingFaceLLM
28
- from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
29
- from llama_index.embeddings.huggingface import HuggingFaceEmbedding
30
  import chromadb
31
 
32
  #Configuração da imagem da aba
@@ -70,47 +70,47 @@ if sidebar_option == "gpt-3.5-turbo":
70
  from llama_index.embeddings.openai import OpenAIEmbedding
71
  Settings.llm = OpenAI(model="gpt-3.5-turbo")
72
  Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-ada-002")
73
- elif sidebar_option == 'NuExtract-1.5':
74
- logging.basicConfig(stream=sys.stdout, level=logging.INFO)
75
- logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
76
-
77
- #Embedding do huggingface
78
- Settings.embed_model = HuggingFaceEmbedding(
79
- model_name="BAAI/bge-small-en-v1.5"
80
- )
81
- #Carregamento do modelo local, descomentar o modelo desejado
82
-
83
- llm = HuggingFaceLLM(
84
- context_window=2048,
85
- max_new_tokens=2048,
86
- generate_kwargs={"do_sample": False},
87
- #query_wrapper_prompt=query_wrapper_prompt,
88
- #model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
89
- #model_name="Qwen/Qwen2.5-14B-Instruct",
90
- # model_name="meta-llama/Llama-3.2-3B",
91
- #model_name="HuggingFaceH4/zephyr-7b-beta",
92
- # model_name="meta-llama/Meta-Llama-3-8B",
93
- model_name="numind/NuExtract-1.5",
94
- #model_name="meta-llama/Llama-3.2-3B",
95
- tokenizer_name="numind/NuExtract-1.5",
96
- device_map="auto",
97
- tokenizer_kwargs={"max_length": 512},
98
- # uncomment this if using CUDA to reduce memory usage
99
- model_kwargs={"torch_dtype": torch.bfloat16},
100
- )
101
- chat = [
102
- {"role": "user", "content": "Hello, how are you?"},
103
- {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
104
- {"role": "user", "content": "I'd like to show off how chat templating works!"},
105
- ]
106
-
107
- from transformers import AutoTokenizer
108
-
109
- tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
110
- tokenizer.apply_chat_template(chat, tokenize=False)
111
-
112
- Settings.chunk_size = 512
113
- Settings.llm = llm
114
 
115
  else:
116
  raise Exception("Opção de LLM inválida!")
 
24
  from llama_index.core.retrievers import QueryFusionRetriever
25
  from llama_index.vector_stores.chroma import ChromaVectorStore
26
  from llama_index.core import VectorStoreIndex
27
+ # from llama_index.llms.huggingface import HuggingFaceLLM
28
+ # from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
29
+ # from llama_index.embeddings.huggingface import HuggingFaceEmbedding
30
  import chromadb
31
 
32
  #Configuração da imagem da aba
 
70
  from llama_index.embeddings.openai import OpenAIEmbedding
71
  Settings.llm = OpenAI(model="gpt-3.5-turbo")
72
  Settings.embed_model = OpenAIEmbedding(model_name="text-embedding-ada-002")
73
+ # elif sidebar_option == 'NuExtract-1.5':
74
+ # logging.basicConfig(stream=sys.stdout, level=logging.INFO)
75
+ # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
76
+
77
+ # #Embedding do huggingface
78
+ # Settings.embed_model = HuggingFaceEmbedding(
79
+ # model_name="BAAI/bge-small-en-v1.5"
80
+ # )
81
+ # #Carregamento do modelo local, descomentar o modelo desejado
82
+
83
+ # llm = HuggingFaceLLM(
84
+ # context_window=2048,
85
+ # max_new_tokens=2048,
86
+ # generate_kwargs={"do_sample": False},
87
+ # #query_wrapper_prompt=query_wrapper_prompt,
88
+ # #model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
89
+ # #model_name="Qwen/Qwen2.5-14B-Instruct",
90
+ # # model_name="meta-llama/Llama-3.2-3B",
91
+ # #model_name="HuggingFaceH4/zephyr-7b-beta",
92
+ # # model_name="meta-llama/Meta-Llama-3-8B",
93
+ # model_name="numind/NuExtract-1.5",
94
+ # #model_name="meta-llama/Llama-3.2-3B",
95
+ # tokenizer_name="numind/NuExtract-1.5",
96
+ # device_map="auto",
97
+ # tokenizer_kwargs={"max_length": 512},
98
+ # # uncomment this if using CUDA to reduce memory usage
99
+ # model_kwargs={"torch_dtype": torch.bfloat16},
100
+ # )
101
+ # chat = [
102
+ # {"role": "user", "content": "Hello, how are you?"},
103
+ # {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
104
+ # {"role": "user", "content": "I'd like to show off how chat templating works!"},
105
+ # ]
106
+
107
+ # from transformers import AutoTokenizer
108
+
109
+ # tokenizer = AutoTokenizer.from_pretrained("numind/NuExtract-1.5")
110
+ # tokenizer.apply_chat_template(chat, tokenize=False)
111
+
112
+ # Settings.chunk_size = 512
113
+ # Settings.llm = llm
114
 
115
  else:
116
  raise Exception("Opção de LLM inválida!")