Spaces:

tushar-r-pawar
/

internlm2_5-7b-chat_with_airllm

Sleeping

App Files Files Community

tushar-r-pawar commited on Jul 9, 2024

Commit

54ac477

verified ·

1 Parent(s): 2971ea5

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -8

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import streamlit as st
 import os
 from dotenv import load_dotenv
-from airllm import AutoModel
 # Load environment variables
 load_dotenv()
@@ -11,13 +11,15 @@ load_dotenv()
 # Retrieve the API token from the environment variables
 api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-# Initialize model and tokenizer using the AutoModel from AirLLM
 MAX_LENGTH = 128
-model = AutoModel.from_pretrained("internlm/internlm2_5-7b")
 # Streamlit app configuration
 st.set_page_config(
-    page_title="Conversational Chatbot with internlm2_5-7b-chat and AirLLM",
     page_icon="🤖",
     layout="wide",
     initial_sidebar_state="expanded",
@@ -67,22 +69,27 @@ user_input = st.text_input("You: ", "")
 if st.button("Send"):
     if user_input:
         # Tokenize user input
-        input_tokens = model.tokenizer(user_input,
             return_tensors="pt",
             return_attention_mask=False,
             truncation=True,
             max_length=MAX_LENGTH,
             padding=False)
         # Generate response
         generation_output = model.generate(
-            input_tokens['input_ids'].cuda(),
             max_new_tokens=20,
             use_cache=True,
             return_dict_in_generate=True)
         # Decode response
-        response = model.tokenizer.decode(generation_output.sequences[0])
         st.text_area("Bot:", value=response, height=200, max_chars=None)
     else:
         st.warning("Please enter a message.")

 import torch
 import streamlit as st
 import os
 from dotenv import load_dotenv
+from airllm import AirLLMInternLM
+from transformers import AutoTokenizer, GenerationConfig
 # Load environment variables
 load_dotenv()
 # Retrieve the API token from the environment variables
 api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+# Initialize model and tokenizer
 MAX_LENGTH = 128
+model_name = "internlm/internlm2_5-7b"
+model = AirLLMInternLM.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Streamlit app configuration
 st.set_page_config(
+    page_title="Conversational Chatbot with internlm2_5-7b-chat",
     page_icon="🤖",
     layout="wide",
     initial_sidebar_state="expanded",
 if st.button("Send"):
     if user_input:
         # Tokenize user input
+        input_tokens = tokenizer(user_input,
             return_tensors="pt",
             return_attention_mask=False,
             truncation=True,
             max_length=MAX_LENGTH,
             padding=False)
+        # Check if CUDA is available and use it if possible
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        model.to(device)
+        input_tokens = input_tokens.to(device)
         # Generate response
         generation_output = model.generate(
+            input_ids=input_tokens['input_ids'],
             max_new_tokens=20,
             use_cache=True,
             return_dict_in_generate=True)
         # Decode response
+        response = tokenizer.decode(generation_output.sequences[0], skip_special_tokens=True)
         st.text_area("Bot:", value=response, height=200, max_chars=None)
     else:
         st.warning("Please enter a message.")