Allahbux commited on
Commit
2033596
·
verified ·
1 Parent(s): 27e4736

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -1,20 +1,20 @@
1
  import streamlit as st
2
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
  import json
5
  import os
 
 
6
 
7
  # Streamlit app configuration
8
  st.set_page_config(page_title="AI Chatbot", layout="centered")
9
 
10
- # Fix the model's configuration dynamically
11
  def fix_model_config(model_name):
12
- # Load the configuration file directly from the Hugging Face hub
13
  config_url = f"https://huggingface.co/{model_name}/resolve/main/config.json"
14
  config_path = "config.json"
15
 
16
  if not os.path.exists(config_path):
17
- import requests
18
  response = requests.get(config_url)
19
  response.raise_for_status() # Ensure the request is successful
20
  with open(config_path, "w") as f:
@@ -27,7 +27,7 @@ def fix_model_config(model_name):
27
  # Fix the `rope_scaling` field
28
  if "rope_scaling" in config:
29
  config["rope_scaling"] = {
30
- "type": "linear", # Replace the problematic structure with supported format
31
  "factor": config["rope_scaling"].get("factor", 1.0)
32
  }
33
 
@@ -45,12 +45,12 @@ def load_pipeline():
45
  # Fix the model configuration
46
  fixed_config_path = fix_model_config(model_name)
47
 
48
- # Load the tokenizer and model with the fixed configuration
49
  tokenizer = AutoTokenizer.from_pretrained(model_name)
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
52
  config=fixed_config_path,
53
- torch_dtype=torch.float16, # Use mixed precision for faster inference
54
  device_map="auto" # Automatically allocate to GPU if available
55
  )
56
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, LlamaConfig
 
3
  import json
4
  import os
5
+ import requests
6
+ import torch
7
 
8
  # Streamlit app configuration
9
  st.set_page_config(page_title="AI Chatbot", layout="centered")
10
 
11
+ # Fix and modify the model configuration dynamically
12
  def fix_model_config(model_name):
13
+ # Download the configuration file from the Hugging Face hub
14
  config_url = f"https://huggingface.co/{model_name}/resolve/main/config.json"
15
  config_path = "config.json"
16
 
17
  if not os.path.exists(config_path):
 
18
  response = requests.get(config_url)
19
  response.raise_for_status() # Ensure the request is successful
20
  with open(config_path, "w") as f:
 
27
  # Fix the `rope_scaling` field
28
  if "rope_scaling" in config:
29
  config["rope_scaling"] = {
30
+ "type": "linear", # Only keep 'type' and 'factor'
31
  "factor": config["rope_scaling"].get("factor", 1.0)
32
  }
33
 
 
45
  # Fix the model configuration
46
  fixed_config_path = fix_model_config(model_name)
47
 
48
+ # Use the fixed configuration to load the model
49
  tokenizer = AutoTokenizer.from_pretrained(model_name)
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
52
  config=fixed_config_path,
53
+ torch_dtype=torch.float16, # Mixed precision for efficiency
54
  device_map="auto" # Automatically allocate to GPU if available
55
  )
56