ZIISA2

Runtime error

App Files Files Community

akashmishra358 commited on Feb 6

Commit

fabdf09

verified ·

1 Parent(s): 9a85b0b

Update model.py

Browse files

back to old changes with just the word generation corrected

Files changed (1) hide show

model.py +41 -64

model.py CHANGED Viewed

@@ -4,100 +4,77 @@ from transformers import BitsAndBytesConfig
 from transformers.utils import is_flash_attn_2_available
 import yaml
 import torch
-import os  # Added for environment variables
 import nltk
 def load_configs(config_file: str) -> dict:
     with open(config_file, "r") as f:
         configs = yaml.safe_load(f)
     return configs
 class RAGModel:
     def __init__(self, configs) -> None:
         self.configs = configs
-        # 1. Get Hugging Face token (critical fix)
-        self.hf_token = os.getenv("HUGGINGFACE_TOKEN") or configs["model"].get("hf_token")
-        if not self.hf_token:
-            raise ValueError(
-                "Missing Hugging Face token! Set either:\n"
-                "1. HUGGINGFACE_TOKEN environment variable\n"
-                "2. hf_token in config.yml"
-            )
-        # 2. Fix model URL key (typo correction)
-        model_url = configs["model"]["generation_model"]  # Fixed "genration_model" -> "generation_model"
-        # 3. Add authentication to model loading
         self.model = AutoModelForCausalLM.from_pretrained(
             model_url,
-            token=self.hf_token,  # Added authentication
             torch_dtype=torch.float16,
             low_cpu_mem_usage=False,
             attn_implementation="sdpa",
-            device_map="auto"  # Better device handling
-        )
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_url,
-            token=self.hf_token  # Added authentication
         )
     def create_prompt(self, query, topk_items: list[str]):
-        context = "\n-".join(c for c in topk_items)
-        # Improved prompt template
-        base_prompt = f"""You are an AI search assistant. Use this context to answer:
-        Context: {context}
-        Question: {query}
-        Answer in Wikipedia-style format with these requirements:
-        - Detailed technical explanations
-        - Historical context where relevant
-        - Numerical data when available
-        - Markdown formatting for structure
         """
         dialog_template = [{"role": "user", "content": base_prompt}]
-        # 4. Fix typo in apply_chat_template
         prompt = self.tokenizer.apply_chat_template(
-            conversation=dialog_template,
-            tokenize=False,
-            add_generation_prompt=True  # Fixed "feneration" -> "generation"
         )
         return prompt
     def answer_query(self, query: str, topk_items: list[str]):
         prompt = self.create_prompt(query, topk_items)
-        input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
-        # Improved generation parameters
-        output = self.model.generate(
-            **input_ids,
-            temperature=0.7,
-            max_new_tokens=1024,
-            do_sample=True,
-            top_p=0.9,
-            repetition_penalty=1.1
-        )
-        # Better text cleanup
-        text = self.tokenizer.decode(
-            output[0],
-            skip_special_tokens=True,  # Better than manual replace
-            clean_up_tokenization_spaces=True
-        )
         return text
 if __name__ == "__main__":
-    # Test with authentication
-    configs = load_configs("rag.configs.yml")
-    # Add temporary token check
-    if "HUGGINGFACE_TOKEN" not in os.environ:
-        raise RuntimeError("Set HUGGINGFACE_TOKEN environment variable first!")
-    rag = RAGModel(configs)
-    print(rag.answer_query("What's the height of Burj Khalifa?", ["Burj Khalifa is 828 meters tall"]))

 from transformers.utils import is_flash_attn_2_available
 import yaml
 import torch
 import nltk
 def load_configs(config_file: str) -> dict:
     with open(config_file, "r") as f:
         configs = yaml.safe_load(f)
     return configs
 class RAGModel:
     def __init__(self, configs) -> None:
         self.configs = configs
+        self.device = configs["model"]["device"]
+        model_url = configs["model"]["generation_model"]
+        # quantization_config = BitsAndBytesConfig(
+        #     load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
+        # )
         self.model = AutoModelForCausalLM.from_pretrained(
             model_url,
             torch_dtype=torch.float16,
+            # quantization_config=quantization_config,
             low_cpu_mem_usage=False,
             attn_implementation="sdpa",
+        ).to(self.device)
         self.tokenizer = AutoTokenizer.from_pretrained(
             model_url,
         )
     def create_prompt(self, query, topk_items: list[str]):
+        context =  "\n-".join(c for c in topk_items)
+        base_prompt = f"""You are an alternate to goole search. Your job is to answer the user query in as detailed manner as possible.
+        you have access to the internet and other relevent data related to the user's question.
+        Give time for yourself to read the context and user query and extract relevent data and then answer the query.
+        make sure your answers is as detailed as posssbile.
+        Do not return thinking process, just return the answer.
+        Give the output structured as a Wikipedia article.
+        Now use the following context items to answer the user query
+        context: {context}
+        user query : {query}
         """
         dialog_template = [{"role": "user", "content": base_prompt}]
         prompt = self.tokenizer.apply_chat_template(
+            conversation=dialog_template, tokenize=False, add_feneration_prompt=True
         )
         return prompt
     def answer_query(self, query: str, topk_items: list[str]):
         prompt = self.create_prompt(query, topk_items)
+        input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+        output = self.model.generate(**input_ids, temperature=0.7, max_new_tokens=512, do_sample=True)
+        text = self.tokenizer.decode(output[0])
+        text = text.replace(prompt, "").replace("<bos>", "").replace("<eos>", "")
         return text
 if __name__ == "__main__":
+    configs = load_configs(config_file="rag.configs.yml")
+    query = "The height of burj khalifa is 1000 meters and it was built in 2023. What is the height of burgj khalifa"
+    # g = GoogleSearch(query)
+    # data = g.all_page_data
+    # d = Document(data, 512)
+    # doc_chunks = d.doc()
+    # s = SemanticSearch(doc_chunks, "all-mpnet-base-v2", "mps")
+    # topk, u = s.semantic_search(query=query, k=32)
+    r = RAGModel(configs)
+    output = r.answer_query(query=query, topk_items=[""])
+    print(output)