Upload folder using huggingface_hub
Browse files- Test_RAG.py +39 -38
Test_RAG.py
CHANGED
|
@@ -282,50 +282,51 @@ print(f"Loading model from {model_dir}")
|
|
| 282 |
|
| 283 |
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
|
| 284 |
|
| 285 |
-
|
| 286 |
-
# llm = HuggingFacePipeline.from_model_id(
|
| 287 |
-
# model_id= "meta-llama/Meta-Llama-3-8B",
|
| 288 |
-
# #meta-llama/Meta-Llama-3-8B------------/meta-llama/Llama-3.2-3B-Instruct
|
| 289 |
-
# task="text-generation",
|
| 290 |
-
# backend="openvino",
|
| 291 |
-
# model_kwargs={
|
| 292 |
-
# "device": llm_device,
|
| 293 |
-
# "ov_config": ov_config,
|
| 294 |
-
# "trust_remote_code": True,
|
| 295 |
-
# },
|
| 296 |
-
# pipeline_kwargs={"max_new_tokens": 2},
|
| 297 |
-
# )
|
| 298 |
-
from optimum.intel.openvino import OVModelForCausalLM
|
| 299 |
-
from transformers import pipeline
|
| 300 |
print("starting setting llm model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
|
|
|
| 304 |
|
| 305 |
-
# 使用 OpenVINO 导出模型
|
| 306 |
-
model = OVModelForCausalLM.from_pretrained(
|
| 307 |
-
model_id,
|
| 308 |
-
export=True, # 将模型转换为 OpenVINO 格式
|
| 309 |
-
use_cache=False,
|
| 310 |
-
ov_config=ov_config,
|
| 311 |
-
trust_remote_code=True # 支持远程代码的信任问题
|
| 312 |
-
)
|
| 313 |
|
| 314 |
-
#
|
| 315 |
-
|
| 316 |
|
| 317 |
-
#
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
|
| 326 |
-
# Step 3: 执行推理
|
| 327 |
-
output = llm("2 + 2 =")
|
| 328 |
-
print(output)
|
| 329 |
|
| 330 |
# print("test:2+2:")
|
| 331 |
# print(llm.invoke("2 + 2 ="))
|
|
|
|
| 282 |
|
| 283 |
ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": ""}
|
| 284 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
print("starting setting llm model")
|
| 286 |
+
llm = HuggingFacePipeline.from_model_id(
|
| 287 |
+
model_id="meta-llama/Meta-Llama-3-8B",
|
| 288 |
+
task="text-generation",
|
| 289 |
+
backend="openvino",
|
| 290 |
+
model_kwargs={
|
| 291 |
+
"device": llm_device.value,
|
| 292 |
+
"ov_config": ov_config,
|
| 293 |
+
"trust_remote_code": True,
|
| 294 |
+
},
|
| 295 |
+
pipeline_kwargs={"max_new_tokens": 2},
|
| 296 |
+
)
|
| 297 |
|
| 298 |
+
print(llm.invoke("2 + 2 ="))
|
| 299 |
+
# from optimum.intel.openvino import OVModelForCausalLM
|
| 300 |
+
# from transformers import pipeline
|
| 301 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
|
| 303 |
+
# model_id = "meta-llama/Meta-Llama-3-8B"
|
| 304 |
+
# ov_config = {"PERFORMANCE_HINT": "LATENCY"} # 这是一个例子,检查你的实际 ov_config
|
| 305 |
|
| 306 |
+
# # 使用 OpenVINO 导出模型
|
| 307 |
+
# model = OVModelForCausalLM.from_pretrained(
|
| 308 |
+
# model_id,
|
| 309 |
+
# export=True, # 将模型转换为 OpenVINO 格式
|
| 310 |
+
# use_cache=False,
|
| 311 |
+
# ov_config=ov_config,
|
| 312 |
+
# trust_remote_code=True # 支持远程代码的信任问题
|
| 313 |
+
# )
|
| 314 |
+
|
| 315 |
+
# # 保存 OpenVINO 模型
|
| 316 |
+
# model.save_pretrained("./openvino_llama_model")
|
| 317 |
+
|
| 318 |
+
# # Step 2: 加载保存的 OpenVINO 模型并设置推理任务
|
| 319 |
+
# llm_device = "CPU" # 确保你根据环境设置正确的设备
|
| 320 |
+
# llm = pipeline(
|
| 321 |
+
# task="text-generation",
|
| 322 |
+
# model=OVModelForCausalLM.from_pretrained("./openvino_llama_model"),
|
| 323 |
+
# device=llm_device,
|
| 324 |
+
# max_new_tokens=2 # 生成的最大新token数量
|
| 325 |
+
# )
|
| 326 |
|
| 327 |
+
# # Step 3: 执行推理
|
| 328 |
+
# output = llm("2 + 2 =")
|
| 329 |
+
# print(output)
|
| 330 |
|
| 331 |
# print("test:2+2:")
|
| 332 |
# print(llm.invoke("2 + 2 ="))
|