ZIISA2

Runtime error

App Files Files Community

akashmishra358 commited on Feb 6

Commit

8cc1ee4

verified ·

1 Parent(s): 1f2501b

Update model.py

Browse files

changed the previous did code with the new suggestion from deepseek about the env variables

Files changed (1) hide show

model.py +64 -41

model.py CHANGED Viewed

@@ -4,77 +4,100 @@ from transformers import BitsAndBytesConfig
 from transformers.utils import is_flash_attn_2_available
 import yaml
 import torch
 import nltk
 def load_configs(config_file: str) -> dict:
     with open(config_file, "r") as f:
         configs = yaml.safe_load(f)
     return configs
 class RAGModel:
     def __init__(self, configs) -> None:
         self.configs = configs
-        self.device = configs["model"]["device"]
-        model_url = configs["model"]["generation_model"]
-        # quantization_config = BitsAndBytesConfig(
-        #     load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
-        # )
         self.model = AutoModelForCausalLM.from_pretrained(
             model_url,
             torch_dtype=torch.float16,
-            # quantization_config=quantization_config,
             low_cpu_mem_usage=False,
             attn_implementation="sdpa",
-        ).to(self.device)
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_url,
         )
     def create_prompt(self, query, topk_items: list[str]):
-        context =  "\n-".join(c for c in topk_items)
-        base_prompt = f"""You are an alternate to goole search. Your job is to answer the user query in as detailed manner as possible.
-        you have access to the internet and other relevent data related to the user's question.
-        Give time for yourself to read the context and user query and extract relevent data and then answer the query.
-        make sure your answers is as detailed as posssbile.
-        Do not return thinking process, just return the answer.
-        Give the output structured as a Wikipedia article.
-        Now use the following context items to answer the user query
-        context: {context}
-        user query : {query}
         """
         dialog_template = [{"role": "user", "content": base_prompt}]
         prompt = self.tokenizer.apply_chat_template(
-            conversation=dialog_template, tokenize=False, add_feneration_prompt=True
         )
         return prompt
     def answer_query(self, query: str, topk_items: list[str]):
         prompt = self.create_prompt(query, topk_items)
-        input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
-        output = self.model.generate(**input_ids, temperature=0.7, max_new_tokens=512, do_sample=True)
-        text = self.tokenizer.decode(output[0])
-        text = text.replace(prompt, "").replace("<bos>", "").replace("<eos>", "")
         return text
 if __name__ == "__main__":
-    configs = load_configs(config_file="rag.configs.yml")
-    query = "The height of burj khalifa is 1000 meters and it was built in 2023. What is the height of burgj khalifa"
-    # g = GoogleSearch(query)
-    # data = g.all_page_data
-    # d = Document(data, 512)
-    # doc_chunks = d.doc()
-    # s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
-    # topk, u = s.semantic_search(query=query, k=32)
-    r = RAGModel(configs)
-    output = r.answer_query(query=query, topk_items=[""])
-    print(output)

 from transformers.utils import is_flash_attn_2_available
 import yaml
 import torch
+import os  # Added for environment variables
 import nltk
 def load_configs(config_file: str) -> dict:
     with open(config_file, "r") as f:
         configs = yaml.safe_load(f)
     return configs
 class RAGModel:
     def __init__(self, configs) -> None:
         self.configs = configs
+        # 1. Get Hugging Face token (critical fix)
+        self.hf_token = os.getenv("HUGGINGFACE_TOKEN") or configs["model"].get("hf_token")
+        if not self.hf_token:
+            raise ValueError(
+                "Missing Hugging Face token! Set either:\n"
+                "1. HUGGINGFACE_TOKEN environment variable\n"
+                "2. hf_token in config.yml"
+            )
+        # 2. Fix model URL key (typo correction)
+        model_url = configs["model"]["generation_model"]  # Fixed "genration_model" -> "generation_model"
+        # 3. Add authentication to model loading
         self.model = AutoModelForCausalLM.from_pretrained(
             model_url,
+            token=self.hf_token,  # Added authentication
             torch_dtype=torch.float16,
             low_cpu_mem_usage=False,
             attn_implementation="sdpa",
+            device_map="auto"  # Better device handling
+        )
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_url,
+            token=self.hf_token  # Added authentication
         )
     def create_prompt(self, query, topk_items: list[str]):
+        context = "\n-".join(c for c in topk_items)
+        # Improved prompt template
+        base_prompt = f"""You are an AI search assistant. Use this context to answer:
+        Context: {context}
+        Question: {query}
+        Answer in Wikipedia-style format with these requirements:
+        - Detailed technical explanations
+        - Historical context where relevant
+        - Numerical data when available
+        - Markdown formatting for structure
         """
         dialog_template = [{"role": "user", "content": base_prompt}]
+        # 4. Fix typo in apply_chat_template
         prompt = self.tokenizer.apply_chat_template(
+            conversation=dialog_template,
+            tokenize=False,
+            add_generation_prompt=True  # Fixed "feneration" -> "generation"
         )
         return prompt
     def answer_query(self, query: str, topk_items: list[str]):
         prompt = self.create_prompt(query, topk_items)
+        input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+        # Improved generation parameters
+        output = self.model.generate(
+            **input_ids,
+            temperature=0.7,
+            max_new_tokens=1024,
+            do_sample=True,
+            top_p=0.9,
+            repetition_penalty=1.1
+        )
+        # Better text cleanup
+        text = self.tokenizer.decode(
+            output[0],
+            skip_special_tokens=True,  # Better than manual replace
+            clean_up_tokenization_spaces=True
+        )
         return text
 if __name__ == "__main__":
+    # Test with authentication
+    configs = load_configs("rag.configs.yml")
+    # Add temporary token check
+    if "HUGGINGFACE_TOKEN" not in os.environ:
+        raise RuntimeError("Set HUGGINGFACE_TOKEN environment variable first!")
+    rag = RAGModel(configs)
+    print(rag.answer_query("What's the height of Burj Khalifa?", ["Burj Khalifa is 828 meters tall"]))