Spaces:

willwade
/

AACKGDemo

Sleeping

App Files Files Community

willwade commited on May 15

Commit

238c097

1 Parent(s): 35169ba

migrate to qt models

Browse files

Files changed (3) hide show

app.py +4 -4
requirements.txt +2 -0
utils.py +36 -3

app.py CHANGED Viewed

@@ -22,8 +22,8 @@ AVAILABLE_MODELS = {
 # Initialize the social graph manager
 social_graph = SocialGraphManager("social_graph.json")
-# Initialize the suggestion generator with Gemma 3 4B (default)
-suggestion_generator = SuggestionGenerator("google/gemma-3-4b-it")
 # Test the model to make sure it's working
 test_result = suggestion_generator.test_model()
@@ -153,7 +153,7 @@ def generate_suggestions(
     user_input,
     suggestion_type,
     selected_topic=None,
-    model_name="google/gemma-3-4b-it",
     temperature=0.7,
     mood=3,
     progress=gr.Progress(),
@@ -462,7 +462,7 @@ with gr.Blocks(title="Will's AAC Communication Aid", css="custom.css") as demo:
             with gr.Row():
                 model_dropdown = gr.Dropdown(
                     choices=list(AVAILABLE_MODELS.keys()),
-                    value="google/gemma-3-4b-it",
                     label="Language Model",
                     info="Select which AI model to use for generating responses",
                 )

 # Initialize the social graph manager
 social_graph = SocialGraphManager("social_graph.json")
+# Initialize the suggestion generator with Gemma 3 1B (default - smaller model to save memory)
+suggestion_generator = SuggestionGenerator("google/gemma-3-1b-it")
 # Test the model to make sure it's working
 test_result = suggestion_generator.test_model()
     user_input,
     suggestion_type,
     selected_topic=None,
+    model_name="google/gemma-3-1b-it",
     temperature=0.7,
     mood=3,
     progress=gr.Progress(),
             with gr.Row():
                 model_dropdown = gr.Dropdown(
                     choices=list(AVAILABLE_MODELS.keys()),
+                    value="google/gemma-3-1b-it",
                     label="Language Model",
                     info="Select which AI model to use for generating responses",
                 )

requirements.txt CHANGED Viewed

@@ -4,3 +4,5 @@ sentence-transformers>=2.2.2
 torch>=2.0.0
 numpy>=1.24.0
 openai-whisper>=20231117

 torch>=2.0.0
 numpy>=1.24.0
 openai-whisper>=20231117
+bitsandbytes>=0.41.0
+accelerate>=0.21.0

utils.py CHANGED Viewed

@@ -216,6 +216,8 @@ class SuggestionGenerator:
             if is_gated_model:
                 # Try to get token from environment
                 import os
                 token = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get(
                     "HF_TOKEN"
@@ -231,14 +233,31 @@ class SuggestionGenerator:
                     from transformers import AutoTokenizer, AutoModelForCausalLM
                     try:
                         tokenizer = AutoTokenizer.from_pretrained(
                             model_name, token=token
                         )
                         model = AutoModelForCausalLM.from_pretrained(
-                            model_name, token=token
                         )
                         self.generator = pipeline(
-                            "text-generation", model=model, tokenizer=tokenizer
                         )
                     except Exception as e:
                         print(f"Error loading gated model with token: {e}")
@@ -248,7 +267,21 @@ class SuggestionGenerator:
                         print(
                             "Please visit the model page on Hugging Face Hub and accept the license."
                         )
-                        raise
                 else:
                     print("No Hugging Face token found in environment variables.")
                     print(

             if is_gated_model:
                 # Try to get token from environment
                 import os
+                import torch
+                from transformers import BitsAndBytesConfig
                 token = os.environ.get("HUGGING_FACE_HUB_TOKEN") or os.environ.get(
                     "HF_TOKEN"
                     from transformers import AutoTokenizer, AutoModelForCausalLM
                     try:
+                        # Configure 4-bit quantization to save memory
+                        quantization_config = BitsAndBytesConfig(
+                            load_in_4bit=True,
+                            bnb_4bit_compute_dtype=torch.float16,
+                            bnb_4bit_quant_type="nf4",
+                            bnb_4bit_use_double_quant=True,
+                        )
                         tokenizer = AutoTokenizer.from_pretrained(
                             model_name, token=token
                         )
+                        # Load model with quantization
                         model = AutoModelForCausalLM.from_pretrained(
+                            model_name,
+                            token=token,
+                            quantization_config=quantization_config,
+                            device_map="auto",
                         )
                         self.generator = pipeline(
+                            "text-generation",
+                            model=model,
+                            tokenizer=tokenizer,
+                            torch_dtype=torch.float16,
                         )
                     except Exception as e:
                         print(f"Error loading gated model with token: {e}")
                         print(
                             "Please visit the model page on Hugging Face Hub and accept the license."
                         )
+                        # Try loading without quantization as fallback
+                        try:
+                            print("Trying to load model without quantization...")
+                            tokenizer = AutoTokenizer.from_pretrained(
+                                model_name, token=token
+                            )
+                            model = AutoModelForCausalLM.from_pretrained(
+                                model_name, token=token
+                            )
+                            self.generator = pipeline(
+                                "text-generation", model=model, tokenizer=tokenizer
+                            )
+                        except Exception as e2:
+                            print(f"Fallback loading also failed: {e2}")
+                            raise e
                 else:
                     print("No Hugging Face token found in environment variables.")
                     print(