Gopikanth123 commited on
Commit
da79ef9
·
verified ·
1 Parent(s): 6fe1f69

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +25 -12
main.py CHANGED
@@ -14,21 +14,31 @@ HF_TOKEN = os.getenv("HF_TOKEN")
14
  if not HF_TOKEN:
15
  raise ValueError("HF_TOKEN environment variable not set.")
16
 
17
- repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 
18
  llm_client = InferenceClient(
19
  model=repo_id,
20
  token=HF_TOKEN,
21
  )
22
 
23
- # Configure Llama index settings
24
- Settings.llm = HuggingFaceInferenceAPI(
25
- model_name=repo_id,
26
- tokenizer_name=repo_id,
27
- context_window=3000,
28
- token=HF_TOKEN,
29
- max_new_tokens=512,
30
- generate_kwargs={"temperature": 0.1},
31
- )
 
 
 
 
 
 
 
 
 
32
  # Settings.embed_model = HuggingFaceEmbedding(
33
  # model_name="BAAI/bge-small-en-v1.5"
34
  # )
@@ -40,9 +50,12 @@ Settings.embed_model = HuggingFaceEmbedding(
40
  model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
41
  )
42
 
 
 
 
43
  # Configure tokenizer and model if required
44
- tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
45
- model = AutoModel.from_pretrained("xlm-roberta-base")
46
 
47
  PERSIST_DIR = "db"
48
  PDF_DIRECTORY = 'data'
 
14
  if not HF_TOKEN:
15
  raise ValueError("HF_TOKEN environment variable not set.")
16
 
17
+ # repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
18
+ repo_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
19
  llm_client = InferenceClient(
20
  model=repo_id,
21
  token=HF_TOKEN,
22
  )
23
 
24
+ # # Configure Llama index settings
25
+ # Settings.llm = HuggingFaceInferenceAPI(
26
+ # model_name=repo_id,
27
+ # tokenizer_name=repo_id,
28
+ # context_window=3000,
29
+ # token=HF_TOKEN,
30
+ # max_new_tokens=512,
31
+ # generate_kwargs={"temperature": 0.1},
32
+ # )
33
+ # Configure Llama index settings with the new model
34
+ Settings.llm = HuggingFaceInferenceAPI(
35
+ model_name=repo_id,
36
+ tokenizer_name=repo_id, # Use the same tokenizer as the model
37
+ context_window=3000,
38
+ token=HF_TOKEN,
39
+ max_new_tokens=512,
40
+ generate_kwargs={"temperature": 0.1},
41
+ )
42
  # Settings.embed_model = HuggingFaceEmbedding(
43
  # model_name="BAAI/bge-small-en-v1.5"
44
  # )
 
50
  model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
51
  )
52
 
53
+ # # Configure tokenizer and model if required
54
+ # tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
55
+ # model = AutoModel.from_pretrained("xlm-roberta-base")
56
  # Configure tokenizer and model if required
57
+ tokenizer = AutoTokenizer.from_pretrained(repo_id) # Use the tokenizer from the new model
58
+ model = AutoModel.from_pretrained(repo_id) # Load the new model
59
 
60
  PERSIST_DIR = "db"
61
  PDF_DIRECTORY = 'data'