Allahbux commited on
Commit
1236115
·
verified ·
1 Parent(s): 2033596

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -24
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, LlamaConfig
3
  import json
4
  import os
5
  import requests
@@ -10,32 +10,27 @@ st.set_page_config(page_title="AI Chatbot", layout="centered")
10
 
11
  # Fix and modify the model configuration dynamically
12
  def fix_model_config(model_name):
13
- # Download the configuration file from the Hugging Face hub
14
  config_url = f"https://huggingface.co/{model_name}/resolve/main/config.json"
15
- config_path = "config.json"
16
 
17
- if not os.path.exists(config_path):
 
18
  response = requests.get(config_url)
19
- response.raise_for_status() # Ensure the request is successful
20
- with open(config_path, "w") as f:
21
- f.write(response.text)
22
 
23
- # Load the configuration JSON
24
- with open(config_path, "r") as f:
25
- config = json.load(f)
 
 
 
26
 
27
- # Fix the `rope_scaling` field
28
- if "rope_scaling" in config:
29
- config["rope_scaling"] = {
30
- "type": "linear", # Only keep 'type' and 'factor'
31
- "factor": config["rope_scaling"].get("factor", 1.0)
32
- }
33
 
34
- # Save the fixed configuration locally
35
- with open(config_path, "w") as f:
36
- json.dump(config, f)
37
-
38
- return config_path
39
 
40
  # Load the pipeline
41
  @st.cache_resource
@@ -45,15 +40,16 @@ def load_pipeline():
45
  # Fix the model configuration
46
  fixed_config_path = fix_model_config(model_name)
47
 
48
- # Use the fixed configuration to load the model
49
  tokenizer = AutoTokenizer.from_pretrained(model_name)
50
  model = AutoModelForCausalLM.from_pretrained(
51
  model_name,
52
  config=fixed_config_path,
53
- torch_dtype=torch.float16, # Mixed precision for efficiency
54
- device_map="auto" # Automatically allocate to GPU if available
55
  )
56
 
 
57
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
58
 
59
  pipe = load_pipeline()
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
3
  import json
4
  import os
5
  import requests
 
10
 
11
  # Fix and modify the model configuration dynamically
12
  def fix_model_config(model_name):
 
13
  config_url = f"https://huggingface.co/{model_name}/resolve/main/config.json"
14
+ fixed_config_path = "fixed_config.json"
15
 
16
+ # Download and modify config.json
17
+ if not os.path.exists(fixed_config_path):
18
  response = requests.get(config_url)
19
+ response.raise_for_status()
20
+ config = response.json()
 
21
 
22
+ # Fix the `rope_scaling` field
23
+ if "rope_scaling" in config:
24
+ config["rope_scaling"] = {
25
+ "type": "linear",
26
+ "factor": config["rope_scaling"].get("factor", 1.0)
27
+ }
28
 
29
+ # Save the fixed config
30
+ with open(fixed_config_path, "w") as f:
31
+ json.dump(config, f)
 
 
 
32
 
33
+ return fixed_config_path
 
 
 
 
34
 
35
  # Load the pipeline
36
  @st.cache_resource
 
40
  # Fix the model configuration
41
  fixed_config_path = fix_model_config(model_name)
42
 
43
+ # Load tokenizer and model
44
  tokenizer = AutoTokenizer.from_pretrained(model_name)
45
  model = AutoModelForCausalLM.from_pretrained(
46
  model_name,
47
  config=fixed_config_path,
48
+ torch_dtype=torch.float16,
49
+ device_map="auto"
50
  )
51
 
52
+ # Return the text generation pipeline
53
  return pipeline("text-generation", model=model, tokenizer=tokenizer)
54
 
55
  pipe = load_pipeline()