sugiv commited on
Commit
8fdb4d5
·
1 Parent(s): e29c3c7

Adding a simple monkey search for Leetcode - Darn LeetMonkey

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -5,7 +5,7 @@ from pinecone_text.sparse import SpladeEncoder
5
  from sentence_transformers import SentenceTransformer
6
  import transformers
7
  transformers.logging.set_verbosity_error()
8
- from transformers import AutoTokenizer, AutoModelForCausalLM
9
 
10
 
11
  import os
@@ -16,6 +16,11 @@ pc = Pinecone(api_key=PINECONE_API_KEY)
16
  index_name = "leetmonkey-sparse-dense"
17
  index = pc.Index(index_name)
18
 
 
 
 
 
 
19
 
20
  # Initialize models
21
  device = 'cpu'
@@ -25,7 +30,7 @@ dense_model = SentenceTransformer('sentence-transformers/all-Mpnet-base-v2', dev
25
  # Load the quantized Llama 2 model and tokenizer
26
  model_name = "TheBloke/Llama-2-7B-Chat-GPTQ"
27
  tokenizer = AutoTokenizer.from_pretrained(model_name)
28
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", trust_remote_code=True)
29
 
30
  def search_problems(query, top_k=5):
31
  dense_query = dense_model.encode([query])[0].tolist()
 
5
  from sentence_transformers import SentenceTransformer
6
  import transformers
7
  transformers.logging.set_verbosity_error()
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GPTQConfig
9
 
10
 
11
  import os
 
16
  index_name = "leetmonkey-sparse-dense"
17
  index = pc.Index(index_name)
18
 
19
+ quantization_config = GPTQConfig(
20
+ disable_exllama=True
21
+ )
22
+
23
+
24
 
25
  # Initialize models
26
  device = 'cpu'
 
30
  # Load the quantized Llama 2 model and tokenizer
31
  model_name = "TheBloke/Llama-2-7B-Chat-GPTQ"
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
33
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", quantization_config=quantization_config)
34
 
35
  def search_problems(query, top_k=5):
36
  dense_query = dense_model.encode([query])[0].tolist()