Spaces:

Allahbux
/

uncensored_Ai

Sleeping

App Files Files Community

Allahbux commited on Jan 28

Commit

27e4736

verified ·

1 Parent(s): f005dd2

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -1,56 +1,59 @@
 import streamlit as st
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
 import json
-import requests
 import os
 # Streamlit app configuration
 st.set_page_config(page_title="AI Chatbot", layout="centered")
-# Fix the model's configuration before loading
 def fix_model_config(model_name):
-    # Download the configuration file from the model repository
     config_url = f"https://huggingface.co/{model_name}/resolve/main/config.json"
     config_path = "config.json"
     if not os.path.exists(config_path):
         response = requests.get(config_url)
-        response.raise_for_status()  # Raise an error if the request fails
         with open(config_path, "w") as f:
             f.write(response.text)
-    # Load the configuration and modify rope_scaling if necessary
     with open(config_path, "r") as f:
         config = json.load(f)
     if "rope_scaling" in config:
         config["rope_scaling"] = {
-            "type": "linear",  # Replace the problematic rope_scaling type
             "factor": config["rope_scaling"].get("factor", 1.0)
         }
-    # Save the modified configuration
     with open(config_path, "w") as f:
         json.dump(config, f)
     return config_path
-# Load the model pipeline
 @st.cache_resource
 def load_pipeline():
     model_name = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
     # Fix the model configuration
     fixed_config_path = fix_model_config(model_name)
-    # Load tokenizer and model
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         config=fixed_config_path,
-        device_map="auto"  # Use GPU if available
     )
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
 pipe = load_pipeline()

 import streamlit as st
 from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+import torch
 import json
 import os
 # Streamlit app configuration
 st.set_page_config(page_title="AI Chatbot", layout="centered")
+# Fix the model's configuration dynamically
 def fix_model_config(model_name):
+    # Load the configuration file directly from the Hugging Face hub
     config_url = f"https://huggingface.co/{model_name}/resolve/main/config.json"
     config_path = "config.json"
     if not os.path.exists(config_path):
+        import requests
         response = requests.get(config_url)
+        response.raise_for_status()  # Ensure the request is successful
         with open(config_path, "w") as f:
             f.write(response.text)
+    # Load the configuration JSON
     with open(config_path, "r") as f:
         config = json.load(f)
+    # Fix the `rope_scaling` field
     if "rope_scaling" in config:
         config["rope_scaling"] = {
+            "type": "linear",  # Replace the problematic structure with supported format
             "factor": config["rope_scaling"].get("factor", 1.0)
         }
+    # Save the fixed configuration locally
     with open(config_path, "w") as f:
         json.dump(config, f)
     return config_path
+# Load the pipeline
 @st.cache_resource
 def load_pipeline():
     model_name = "Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2"
     # Fix the model configuration
     fixed_config_path = fix_model_config(model_name)
+    # Load the tokenizer and model with the fixed configuration
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         config=fixed_config_path,
+        torch_dtype=torch.float16,  # Use mixed precision for faster inference
+        device_map="auto"          # Automatically allocate to GPU if available
     )
     return pipeline("text-generation", model=model, tokenizer=tokenizer)
 pipe = load_pipeline()