convert-to-onnx_timestamped

Running

App Files Files Community

urroxyz commited on Apr 25

Commit

b2ef551

verified ·

1 Parent(s): ea01d50

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -133

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from huggingface_hub import HfApi, whoami
 from torch.jit import TracerWarning
 from transformers import AutoConfig, GenerationConfig
-# Suppress TorchScript tracer warnings in this process
 warnings.filterwarnings("ignore", category=TracerWarning)
 logging.basicConfig(level=logging.INFO)
@@ -22,8 +22,6 @@ logger = logging.getLogger(__name__)
 @dataclass
 class Config:
-    """Application configuration."""
     hf_token: str
     hf_username: str
     transformers_version: str = "3.5.0"
@@ -35,7 +33,6 @@ class Config:
     @classmethod
     def from_env(cls) -> "Config":
-        """Create config from environment variables and secrets."""
         system_token = st.secrets.get("HF_TOKEN")
         user_token = st.session_state.get("user_hf_token")
         if user_token:
@@ -45,22 +42,17 @@ class Config:
                 os.getenv("SPACE_AUTHOR_NAME") or whoami(token=system_token)["name"]
             )
         hf_token = user_token or system_token
         if not hf_token:
             raise ValueError("HF_TOKEN must be set")
         return cls(hf_token=hf_token, hf_username=hf_username)
 class ModelConverter:
-    """Handles model conversion and upload operations."""
     def __init__(self, config: Config):
         self.config = config
         self.api = HfApi(token=config.hf_token)
     def _get_ref_type(self) -> str:
-        """Determine the reference type for the transformers repository."""
         url = f"{self.config.transformers_base_url}/tags/{self.config.transformers_version}.tar.gz"
         try:
             return "tags" if urlopen(url).getcode() == 200 else "heads"
@@ -69,14 +61,11 @@ class ModelConverter:
             return "heads"
     def setup_repository(self) -> None:
-        """Download and setup transformers repository if needed."""
         if self.config.repo_path.exists():
             return
         ref_type = self._get_ref_type()
         archive_url = f"{self.config.transformers_base_url}/{ref_type}/{self.config.transformers_version}.tar.gz"
         archive_path = Path(f"./transformers_{self.config.transformers_version}.tar.gz")
         try:
             urlretrieve(archive_url, archive_path)
             self._extract_archive(archive_path)
@@ -87,96 +76,66 @@ class ModelConverter:
             archive_path.unlink(missing_ok=True)
     def _extract_archive(self, archive_path: Path) -> None:
-        """Extract the downloaded archive."""
-        import tarfile
-        import tempfile
         with tempfile.TemporaryDirectory() as tmp_dir:
             with tarfile.open(archive_path, "r:gz") as tar:
                 tar.extractall(tmp_dir)
-            extracted_folder = next(Path(tmp_dir).iterdir())
-            extracted_folder.rename(self.config.repo_path)
     def convert_model(self, input_model_id: str) -> Tuple[bool, Optional[str]]:
-        """
-        Convert the model to ONNX format, always exporting attention maps.
-        Relocate generation parameters, suppress tracer warnings, and
-        strip out both relocation and tracer warnings from stderr.
-        """
         try:
-            # Prepare local directory for config edits
             model_dir = self.config.repo_path / "models" / input_model_id
             model_dir.mkdir(parents=True, exist_ok=True)
-            # Load and relocate generation parameters
-            base_config = AutoConfig.from_pretrained(input_model_id)
-            gen_config = GenerationConfig.from_model_config(base_config)
-            # Remove generation params from base config
-            for key in gen_config.to_dict():
-                if hasattr(base_config, key):
-                    setattr(base_config, key, None)
-            base_config.save_pretrained(model_dir)
-            gen_config.save_pretrained(model_dir)
-            # Build conversion command with global warning ignore
             cmd = [
                 sys.executable,
-                "-W", "ignore",
                 "-m", "scripts.convert",
                 "--quantize",
                 "--trust_remote_code",
                 "--model_id", input_model_id,
                 "--output_attentions",
             ]
             result = subprocess.run(
                 cmd,
                 cwd=self.config.repo_path,
                 capture_output=True,
                 text=True,
-                env=os.environ.copy(),
             )
-            # Filter out relocation and tracer warnings
-            lines = []
-            for ln in result.stderr.splitlines():
-                if ln.startswith("Moving the following attributes"):
-                    continue
-                if "TracerWarning" in ln:
-                    continue
-                lines.append(ln)
-            stderr = "\n".join(lines)
             if result.returncode != 0:
                 return False, stderr
             return True, stderr
         except Exception as e:
             return False, str(e)
     def upload_model(self, input_model_id: str, output_model_id: str) -> Optional[str]:
-        """Upload the converted model to Hugging Face."""
-        model_folder_path = self.config.repo_path / "models" / input_model_id
         try:
             self.api.create_repo(output_model_id, exist_ok=True, private=False)
-            readme_path = f"{model_folder_path}/README.md"
-            if not os.path.exists(readme_path):
-                with open(readme_path, "w") as file:
-                    file.write(self.generate_readme(input_model_id))
-            self.api.upload_folder(
-                folder_path=str(model_folder_path),
-                repo_id=output_model_id
-            )
             return None
         except Exception as e:
             return str(e)
         finally:
-            import shutil
-            shutil.rmtree(model_folder_path, ignore_errors=True)
     def generate_readme(self, imi: str) -> str:
         return (
@@ -187,76 +146,31 @@ class ModelConverter:
             "---\n\n"
             f"# {imi.split('/')[-1]} (ONNX)\n\n"
             f"This is an ONNX version of [{imi}](https://huggingface.co/{imi}). "
-            "It was automatically converted and uploaded using "
-            "[this space](https://huggingface.co/spaces/onnx-community/convert-to-onnx).\n"
         )
 def main():
-    """Main application entry point."""
-    st.write("## Convert a Hugging Face model to ONNX (with attentions)")
     try:
         config = Config.from_env()
-        converter = ModelConverter(config)
-        converter.setup_repository()
-        input_model_id = st.text_input(
-            "Enter the Hugging Face model ID to convert. Example: `EleutherAI/pythia-14m`"
-        )
-        if not input_model_id:
-            return
-        st.text_input(
-            "Optional: Your Hugging Face write token. Fill it if you want to upload under your account.",
-            type="password",
-            key="user_hf_token",
-        )
-        if config.hf_username == input_model_id.split("/")[0]:
-            same_repo = st.checkbox("Upload ONNX weights to the same repository?")
-        else:
-            same_repo = False
-        model_name = input_model_id.split("/")[-1]
-        output_model_id = f"{config.hf_username}/{model_name}"
-        if not same_repo:
-            output_model_id += "-ONNX"
-        output_model_url = f"{config.hf_base_url}/{output_model_id}"
-        if not same_repo and converter.api.repo_exists(output_model_id):
-            st.write("This model has already been converted! 🎉")
-            st.link_button(f"Go to {output_model_id}", output_model_url, type="primary")
-            return
-        st.write("Destination repository:")
-        st.code(output_model_url, language="plaintext")
-        if not st.button(label="Proceed", type="primary"):
-            return
-        with st.spinner("Converting model (including attention maps)…"):
-            success, stderr = converter.convert_model(input_model_id)
-            if not success:
-                st.error(f"Conversion failed: {stderr}")
-                return
-            st.success("Conversion successful!")
-            st.code(stderr)
-        with st.spinner("Uploading model…"):
-            error = converter.upload_model(input_model_id, output_model_id)
-            if error:
-                st.error(f"Upload failed: {error}")
-                return
-            st.success("Upload successful!")
-            st.write("You can now view the model on Hugging Face:")
-            st.link_button(f"Go to {output_model_id}", output_model_url, type="primary")
     except Exception as e:
-        logger.exception("Application error")
-        st.error(f"An error occurred: {str(e)}")
-if __name__ == "__main__":
-    main()

 from torch.jit import TracerWarning
 from transformers import AutoConfig, GenerationConfig
+# Suppress local TorchScript TracerWarnings
 warnings.filterwarnings("ignore", category=TracerWarning)
 logging.basicConfig(level=logging.INFO)
 @dataclass
 class Config:
     hf_token: str
     hf_username: str
     transformers_version: str = "3.5.0"
     @classmethod
     def from_env(cls) -> "Config":
         system_token = st.secrets.get("HF_TOKEN")
         user_token = st.session_state.get("user_hf_token")
         if user_token:
                 os.getenv("SPACE_AUTHOR_NAME") or whoami(token=system_token)["name"]
             )
         hf_token = user_token or system_token
         if not hf_token:
             raise ValueError("HF_TOKEN must be set")
         return cls(hf_token=hf_token, hf_username=hf_username)
 class ModelConverter:
     def __init__(self, config: Config):
         self.config = config
         self.api = HfApi(token=config.hf_token)
     def _get_ref_type(self) -> str:
         url = f"{self.config.transformers_base_url}/tags/{self.config.transformers_version}.tar.gz"
         try:
             return "tags" if urlopen(url).getcode() == 200 else "heads"
             return "heads"
     def setup_repository(self) -> None:
         if self.config.repo_path.exists():
             return
         ref_type = self._get_ref_type()
         archive_url = f"{self.config.transformers_base_url}/{ref_type}/{self.config.transformers_version}.tar.gz"
         archive_path = Path(f"./transformers_{self.config.transformers_version}.tar.gz")
         try:
             urlretrieve(archive_url, archive_path)
             self._extract_archive(archive_path)
             archive_path.unlink(missing_ok=True)
     def _extract_archive(self, archive_path: Path) -> None:
+        import tarfile, tempfile
         with tempfile.TemporaryDirectory() as tmp_dir:
             with tarfile.open(archive_path, "r:gz") as tar:
                 tar.extractall(tmp_dir)
+            next(Path(tmp_dir).iterdir()).rename(self.config.repo_path)
     def convert_model(self, input_model_id: str) -> Tuple[bool, Optional[str]]:
         try:
+            # Prepare model dir
             model_dir = self.config.repo_path / "models" / input_model_id
             model_dir.mkdir(parents=True, exist_ok=True)
+            # Relocate generation params
+            base_cfg = AutoConfig.from_pretrained(input_model_id)
+            gen_cfg = GenerationConfig.from_model_config(base_cfg)
+            for k in gen_cfg.to_dict():
+                if hasattr(base_cfg, k): setattr(base_cfg, k, None)
+            base_cfg.save_pretrained(model_dir)
+            gen_cfg.save_pretrained(model_dir)
+            # Set verbose logging
+            env = os.environ.copy()
+            env["TRANSFORMERS_VERBOSITY"] = "debug"
+            # Build command with debug
             cmd = [
                 sys.executable,
                 "-m", "scripts.convert",
                 "--quantize",
                 "--trust_remote_code",
                 "--model_id", input_model_id,
                 "--output_attentions",
+                "--debug"
             ]
             result = subprocess.run(
                 cmd,
                 cwd=self.config.repo_path,
                 capture_output=True,
                 text=True,
+                env=env,
             )
+            # Filter warnings
+            filtered = [ln for ln in result.stderr.splitlines() if not ln.startswith("Moving the following attributes") and "TracerWarning" not in ln]
+            stderr = "\n".join(filtered)
             if result.returncode != 0:
                 return False, stderr
             return True, stderr
         except Exception as e:
             return False, str(e)
     def upload_model(self, input_model_id: str, output_model_id: str) -> Optional[str]:
+        model_folder = self.config.repo_path / "models" / input_model_id
         try:
             self.api.create_repo(output_model_id, exist_ok=True, private=False)
+            readme = model_folder / "README.md"
+            if not readme.exists():
+                readme.write_text(self.generate_readme(input_model_id))
+            self.api.upload_folder(folder_path=str(model_folder), repo_id=output_model_id)
             return None
         except Exception as e:
             return str(e)
         finally:
+            import shutil; shutil.rmtree(model_folder, ignore_errors=True)
     def generate_readme(self, imi: str) -> str:
         return (
             "---\n\n"
             f"# {imi.split('/')[-1]} (ONNX)\n\n"
             f"This is an ONNX version of [{imi}](https://huggingface.co/{imi}). "
+            "Converted with debug logs and attention maps.\n"
         )
 def main():
+    st.write("## Convert a Hugging Face model to ONNX (with debug)")
     try:
         config = Config.from_env()
+        conv = ModelConverter(config)
+        conv.setup_repository()
+        input_id = st.text_input("Model ID e.g. EleutherAI/pythia-14m")
+        if not input_id: return
+        st.text_input("HF write token (optional)", type="password", key="user_hf_token")
+        same = st.checkbox("Upload to same repo?", value=False) if config.hf_username == input_id.split("/")[0] else False
+        name = input_id.split("/")[-1]; out = f"{config.hf_username}/{name}" + ("" if same else "-ONNX")
+        url = f"{config.hf_base_url}/{out}"; st.code(url)
+        if not st.button("Proceed"): return
+        with st.spinner("Converting (debug)..."):
+            ok, err = conv.convert_model(input_id)
+            if not ok: st.error(f"Conversion failed: {err}"); return
+            st.success("Conversion successful!"); st.code(err)
+        with st.spinner("Uploading..."):
+            err2 = conv.upload_model(input_id, out)
+            if err2: st.error(f"Upload failed: {err2}"); return
+            st.success("Upload successful!"); st.link_button(f"Go to {out}", url)
     except Exception as e:
+        logger.exception(e); st.error(f"Error: {e}")
+if __name__ == "__main__": main()