Gopikanth123 commited on
Commit
0edf0f8
·
verified ·
1 Parent(s): a6dac5d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +17 -252
main.py CHANGED
@@ -7,10 +7,6 @@ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
  from huggingface_hub import InferenceClient
8
  from transformers import AutoTokenizer, AutoModel
9
  from deep_translator import GoogleTranslator
10
- from transformers import AutoModelForCausalLM, AutoTokenizer
11
- import torch
12
- from accelerate import infer_auto_device_map
13
-
14
 
15
 
16
  # Ensure HF_TOKEN is set
@@ -18,31 +14,21 @@ HF_TOKEN = os.getenv("HF_TOKEN")
18
  if not HF_TOKEN:
19
  raise ValueError("HF_TOKEN environment variable not set.")
20
 
21
- # repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
22
- repo_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
23
  llm_client = InferenceClient(
24
  model=repo_id,
25
  token=HF_TOKEN,
26
  )
27
 
28
- # # Configure Llama index settings
29
- # Settings.llm = HuggingFaceInferenceAPI(
30
- # model_name=repo_id,
31
- # tokenizer_name=repo_id,
32
- # context_window=3000,
33
- # token=HF_TOKEN,
34
- # max_new_tokens=512,
35
- # generate_kwargs={"temperature": 0.1},
36
- # )
37
- # Configure Llama index settings with the new model
38
- # Settings.llm = HuggingFaceInferenceAPI(
39
- # model_name=repo_id,
40
- # tokenizer_name=repo_id, # Use the same tokenizer as the model
41
- # context_window=3000,
42
- # token=HF_TOKEN,
43
- # max_new_tokens=512,
44
- # generate_kwargs={"temperature": 0.1},
45
- # )
46
  # Settings.embed_model = HuggingFaceEmbedding(
47
  # model_name="BAAI/bge-small-en-v1.5"
48
  # )
@@ -50,35 +36,14 @@ llm_client = InferenceClient(
50
  # Settings.embed_model = HuggingFaceEmbedding(
51
  # model_name="xlm-roberta-base" # XLM-RoBERTa model for multilingual support
52
  # )
53
- # Settings.embed_model = HuggingFaceEmbedding(
54
- # model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
55
- # )
56
-
57
- # # Configure tokenizer and model if required
58
- # tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
59
- # model = AutoModel.from_pretrained("xlm-roberta-base")
60
- # Configure tokenizer and model if required
61
- tokenizer = AutoTokenizer.from_pretrained(repo_id) # Use the tokenizer from the new model
62
- # model = AutoModel.from_pretrained(repo_id) # Load the new model
63
- model = AutoModelForCausalLM.from_pretrained(
64
- repo_id,
65
- load_in_4bit=True, # Load in 4-bit quantization
66
- torch_dtype=torch.float16,
67
- device_map="auto",
68
- )
69
- # Configure Llama index settings
70
- Settings.llm = HuggingFaceInferenceAPI(
71
- model_name=repo_id,
72
- tokenizer_name=repo_id, # Use the same tokenizer as the model
73
- context_window=2048, # Reduce context window to save memory
74
- token=HF_TOKEN,
75
- max_new_tokens=256, # Reduce max tokens to save memory
76
- generate_kwargs={"temperature": 0.1},
77
- )
78
- # Use a smaller embedding model
79
  Settings.embed_model = HuggingFaceEmbedding(
80
- model_name="sentence-transformers/all-MiniLM-L6-v2" # Smaller and faster
81
  )
 
 
 
 
 
82
  PERSIST_DIR = "db"
83
  PDF_DIRECTORY = 'data'
84
 
@@ -279,204 +244,4 @@ def chat():
279
  return jsonify({"response": f"An error occurred: {str(e)}"})
280
 
281
  if __name__ == '__main__':
282
- app.run(debug=True)
283
- # import os
284
- # import shutil
285
- # from flask import Flask, render_template, request, jsonify
286
- # from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
287
- # from llama_index.llms.huggingface import HuggingFaceInferenceAPI
288
- # from llama_index.embeddings.huggingface import HuggingFaceEmbedding
289
- # from huggingface_hub import InferenceClient
290
- # from transformers import AutoTokenizer, AutoModelForCausalLM
291
- # from deep_translator import GoogleTranslator
292
- # import torch
293
-
294
- # # Ensure HF_TOKEN is set
295
- # HF_TOKEN = os.getenv("HF_TOKEN")
296
- # if not HF_TOKEN:
297
- # raise ValueError("HF_TOKEN environment variable not set.")
298
-
299
- # # Model configuration
300
- # repo_id = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
301
-
302
- # # Load tokenizer
303
- # tokenizer = AutoTokenizer.from_pretrained(repo_id)
304
-
305
- # # Load quantized model
306
- # try:
307
- # model = AutoModelForCausalLM.from_pretrained(
308
- # repo_id,
309
- # device_map="auto", # Automatically distribute across available devices
310
- # load_in_8bit=True, # Quantize to 8-bit precision
311
- # torch_dtype=torch.float16 # Use 16-bit precision
312
- # )
313
- # except ImportError:
314
- # raise ImportError("The 'bitsandbytes' library is required for quantization. Please install it using `pip install bitsandbytes`.")
315
-
316
- # # Configure Llama index settings
317
- # Settings.llm = HuggingFaceInferenceAPI(
318
- # model_name=repo_id,
319
- # tokenizer_name=repo_id, # Use the same tokenizer as the model
320
- # context_window=2048, # Reduce context window to save memory
321
- # token=HF_TOKEN,
322
- # max_new_tokens=256, # Reduce max tokens to save memory
323
- # generate_kwargs={"temperature": 0.1},
324
- # )
325
-
326
- # # Use a smaller embedding model
327
- # Settings.embed_model = HuggingFaceEmbedding(
328
- # model_name="sentence-transformers/all-MiniLM-L6-v2" # Smaller and faster
329
- # )
330
-
331
- # # Directories
332
- # PERSIST_DIR = "db"
333
- # PDF_DIRECTORY = 'data'
334
-
335
- # # Ensure directories exist
336
- # os.makedirs(PDF_DIRECTORY, exist_ok=True)
337
- # os.makedirs(PERSIST_DIR, exist_ok=True)
338
-
339
- # chat_history = []
340
- # current_chat_history = []
341
-
342
- # def data_ingestion_from_directory():
343
- # # Clear previous data by removing the persist directory
344
- # if os.path.exists(PERSIST_DIR):
345
- # shutil.rmtree(PERSIST_DIR) # Remove the persist directory and all its contents
346
-
347
- # # Recreate the persist directory after removal
348
- # os.makedirs(PERSIST_DIR, exist_ok=True)
349
-
350
- # # Load new documents from the directory
351
- # new_documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
352
-
353
- # # Create a new index with the new documents
354
- # index = VectorStoreIndex.from_documents(new_documents)
355
-
356
- # # Persist the new index
357
- # index.storage_context.persist(persist_dir=PERSIST_DIR)
358
-
359
- # def handle_query(query):
360
- # chat_text_qa_msgs = [
361
- # (
362
- # "user",
363
- # """
364
- # You are the Hotel voice chatbot and your name is hotel helper. Your goal is to provide accurate, professional, and helpful answers to user queries based on the hotel's data. Always ensure your responses are clear and concise. Give response within 10-15 words only. You need to give an answer in the same language used by the user.
365
- # {context_str}
366
- # Question:
367
- # {query_str}
368
- # """
369
- # )
370
- # ]
371
- # text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
372
-
373
- # storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
374
- # index = load_index_from_storage(storage_context)
375
- # context_str = ""
376
- # for past_query, response in reversed(current_chat_history):
377
- # if past_query.strip():
378
- # context_str += f"User asked: '{past_query}'\nBot answered: '{response}'\n"
379
- # query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
380
- # print(query)
381
- # answer = query_engine.query(query)
382
- # if hasattr(answer, 'response'):
383
- # response = answer.response
384
- # elif isinstance(answer, dict) and 'response' in answer:
385
- # response = answer['response']
386
- # else:
387
- # response = "Sorry, I couldn't find an answer."
388
- # current_chat_history.append((query, response))
389
- # return response
390
-
391
- # app = Flask(__name__)
392
-
393
- # # Data ingestion
394
- # data_ingestion_from_directory()
395
-
396
- # # Generate Response
397
- # def generate_response(query, language):
398
- # try:
399
- # # Call the handle_query function to get the response
400
- # bot_response = handle_query(query)
401
-
402
- # # Map of supported languages
403
- # supported_languages = {
404
- # "hindi": "hi",
405
- # "bengali": "bn",
406
- # "telugu": "te",
407
- # "marathi": "mr",
408
- # "tamil": "ta",
409
- # "gujarati": "gu",
410
- # "kannada": "kn",
411
- # "malayalam": "ml",
412
- # "punjabi": "pa",
413
- # "odia": "or",
414
- # "urdu": "ur",
415
- # "assamese": "as",
416
- # "sanskrit": "sa",
417
- # "arabic": "ar",
418
- # "australian": "en-AU",
419
- # "bangla-india": "bn-IN",
420
- # "chinese": "zh-CN",
421
- # "dutch": "nl",
422
- # "french": "fr",
423
- # "filipino": "tl",
424
- # "greek": "el",
425
- # "indonesian": "id",
426
- # "italian": "it",
427
- # "japanese": "ja",
428
- # "korean": "ko",
429
- # "latin": "la",
430
- # "nepali": "ne",
431
- # "portuguese": "pt",
432
- # "romanian": "ro",
433
- # "russian": "ru",
434
- # "spanish": "es",
435
- # "swedish": "sv",
436
- # "thai": "th",
437
- # "ukrainian": "uk",
438
- # "turkish": "tr"
439
- # }
440
-
441
- # # Initialize the translated text
442
- # translated_text = bot_response
443
-
444
- # # Translate only if the language is supported and not English
445
- # try:
446
- # if language in supported_languages:
447
- # target_lang = supported_languages[language]
448
- # translated_text = GoogleTranslator(source='en', target=target_lang).translate(bot_response)
449
- # print(translated_text)
450
- # else:
451
- # print(f"Unsupported language: {language}")
452
- # except Exception as e:
453
- # # Handle translation errors
454
- # print(f"Translation error: {e}")
455
- # translated_text = "Sorry, I couldn't translate the response."
456
-
457
- # # Append to chat history
458
- # chat_history.append((query, translated_text))
459
- # return translated_text
460
- # except Exception as e:
461
- # return f"Error fetching the response: {str(e)}"
462
-
463
- # # Route for the homepage
464
- # @app.route('/')
465
- # def index():
466
- # return render_template('index.html')
467
-
468
- # # Route to handle chatbot messages
469
- # @app.route('/chat', methods=['POST'])
470
- # def chat():
471
- # try:
472
- # user_message = request.json.get("message")
473
- # language = request.json.get("language")
474
- # if not user_message:
475
- # return jsonify({"response": "Please say something!"})
476
- # bot_response = generate_response(user_message, language)
477
- # return jsonify({"response": bot_response})
478
- # except Exception as e:
479
- # return jsonify({"response": f"An error occurred: {str(e)}"})
480
-
481
- # if __name__ == '__main__':
482
- # app.run(debug=True)
 
7
  from huggingface_hub import InferenceClient
8
  from transformers import AutoTokenizer, AutoModel
9
  from deep_translator import GoogleTranslator
 
 
 
 
10
 
11
 
12
  # Ensure HF_TOKEN is set
 
14
  if not HF_TOKEN:
15
  raise ValueError("HF_TOKEN environment variable not set.")
16
 
17
+ repo_id = "meta-llama/Meta-Llama-3-8B-Instruct"
 
18
  llm_client = InferenceClient(
19
  model=repo_id,
20
  token=HF_TOKEN,
21
  )
22
 
23
+ # Configure Llama index settings
24
+ Settings.llm = HuggingFaceInferenceAPI(
25
+ model_name=repo_id,
26
+ tokenizer_name=repo_id,
27
+ context_window=3000,
28
+ token=HF_TOKEN,
29
+ max_new_tokens=512,
30
+ generate_kwargs={"temperature": 0.1},
31
+ )
 
 
 
 
 
 
 
 
 
32
  # Settings.embed_model = HuggingFaceEmbedding(
33
  # model_name="BAAI/bge-small-en-v1.5"
34
  # )
 
36
  # Settings.embed_model = HuggingFaceEmbedding(
37
  # model_name="xlm-roberta-base" # XLM-RoBERTa model for multilingual support
38
  # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  Settings.embed_model = HuggingFaceEmbedding(
40
+ model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
41
  )
42
+
43
+ # Configure tokenizer and model if required
44
+ tokenizer = AutoTokenizer.from_pretrained("xlm-roberta-base")
45
+ model = AutoModel.from_pretrained("xlm-roberta-base")
46
+
47
  PERSIST_DIR = "db"
48
  PDF_DIRECTORY = 'data'
49
 
 
244
  return jsonify({"response": f"An error occurred: {str(e)}"})
245
 
246
  if __name__ == '__main__':
247
+ app.run(debug=True)