Spaces:

parkerjj
/

BuckLakeAI

Sleeping

App Files Files Community

parkerjj commited on Nov 19, 2024

Commit

4e6d2ce

1 Parent(s): 8bf0955

从 Hugging Face Hub 下载 Word2Vec 模型，移除本地路径搜索逻辑

Browse files

Files changed (1) hide show

preprocess.py +27 -33

preprocess.py CHANGED Viewed

@@ -71,7 +71,7 @@ class LazyWord2Vec:
     @property
     def model(self):
         if self._model is None:
-            print("Loading Word2Vec model...")
             self._model = KeyedVectors.load(self.model_path, mmap='r')
         return self._model
@@ -88,43 +88,37 @@ class LazyWord2Vec:
         return key in self.model
 # 加载预训练的 Google News Word2Vec 模型
-# 定义路径列表
-search_paths = ["/BuckLake/Model/",
-                "/Users/parker/Development/Server/BuckLake/Model/",
-                "/Users/liuyue/Work/BuckLake/Model/"]
-# 获取当前文件所在目录的路径
-current_directory = os.getcwd()
-print(f"Current directory: {current_directory}")
-current_directory = os.path.dirname(os.path.abspath(__file__))
-# 添加相对于当前项目的路径
-# search_paths.insert(0, os.path.join(current_directory, 'model'))
-search_paths.insert(1, os.path.join(current_directory, '..', 'Model'))
-# 定义相对路径
-filename = 'word2vec-google-news-300.model'
-# 初始化word2vec_path为None
-word2vec_path = None
-# 遍历路径列表
-for path in search_paths:
-    potential_path = os.path.join(path, filename)
-    if os.path.exists(potential_path):
-        word2vec_path = potential_path
-        break
-    else:
-        print(f"{potential_path} not found.")
-# 如果找到路径，加载模型
-if word2vec_path:
-    print(f"Loading Word2Vec model from {word2vec_path}...")
-    word2vec_model = LazyWord2Vec(word2vec_path)
-else:
-    raise FileNotFoundError(f"{filename} not found in any of the search paths: {search_paths}")
 def pos_tagging(text):

     @property
     def model(self):
         if self._model is None:
+            print(f"Loading Word2Vec model from path: {self.model_path}...")
             self._model = KeyedVectors.load(self.model_path, mmap='r')
         return self._model
         return key in self.model
 # 加载预训练的 Google News Word2Vec 模型
+# 定义模型名称
+from huggingface_hub import hf_hub_download
+import os
+# 定义 Hugging Face 的 repository 信息
+repo_id = "fse/word2vec-google-news-300"  # 替换为实际的仓库ID
+filename = "word2vec-google-news-300.model"  # 文件名
+# 确保本地保存目录存在
+#os.makedirs(local_model_path, exist_ok=True)
+# 尝试从 Hugging Face 下载模型文件
+try:
+    print(f"Downloading {filename} from Hugging Face Hub...")
+    downloaded_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=filename
+    )
+    downloaded_path_npy = hf_hub_download(
+        repo_id=repo_id,
+        filename="word2vec-google-news-300.model.vectors.npy"
+    )
+    print(f"Model downloaded to {downloaded_path}")
+except Exception as e:
+    raise RuntimeError(f"Failed to download {filename} from Hugging Face Hub: {e}")
+# 加载模型
+print(f"Loading Word2Vec model from {downloaded_path}...")
+word2vec_model = LazyWord2Vec(downloaded_path)
 def pos_tagging(text):