Spaces:

traversaal-internal
/

nomic-embedding

Sleeping

File size: 4,663 Bytes

37c786c
 
6b43b0b
 
 
 
 
37c786c
 
6b43b0b
 
 
 
 
37c786c
 
 
 
 
6b43b0b
 
 
 
 
 
 
 
 
 
 
 
37c786c
 
6b43b0b
 
 
 
 
 
37c786c
 
 
6b43b0b
37c786c
 
 
6b43b0b
 
37c786c
6b43b0b
 
37c786c
 
6b43b0b
 
 
 
 
 
 
 
37c786c
6b43b0b
 
 
 
 
f68fb03
5ed0967
f68fb03
 
40c0bf4
6b43b0b
 
f68fb03
 
 
 
 
6b43b0b
 
 
 
 
f68fb03
 
 
 
 
 
 
 
 
 
 
 
6b43b0b
 
37c786c
f68fb03
37c786c
 
 
6b43b0b
f68fb03
d47acd6
6b43b0b
37c786c
 
 
 
 
 
f68fb03
6b43b0b
f68fb03
40c0bf4
37c786c
 
 
 
 
 
 
 
6b43b0b
5ed0967
f68fb03
37c786c
 
 
 
 
 
 
 
5ed0967
 
e6887b2



# import os
# import gradio as gr
# import numpy as np
# from transformers import AutoTokenizer, AutoModel
# import time 
# import torch

# # :white_check_mark: Setup environment
# os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
# hf_token = os.environ.get("HF_TOKEN")
# if not hf_token:
#     raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")

# # Check for GPU availability
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")

# # :white_check_mark: Load model and tokenizer
# text_tokenizer = AutoTokenizer.from_pretrained(
#     "nomic-ai/nomic-embed-text-v1.5",
#     trust_remote_code=True,
#     token=hf_token,
#     cache_dir=os.environ["HF_HOME"]
# )
# text_model = AutoModel.from_pretrained(
#     "nomic-ai/nomic-embed-text-v1.5",
#     trust_remote_code=True,
#     token=hf_token,
#     cache_dir=os.environ["HF_HOME"]
# ).to(device)  # Move model to GPU if available

# # :white_check_mark: Embedding function
# def get_text_embeddings(text):
#     """
#     Converts input text into a dense embedding using the Nomic embedding model.
#     These embeddings are used to query Qdrant for semantically relevant document chunks.
#     """
#     inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)  # Move inputs to same device as model
#     with torch.no_grad():  # Disable gradient calculation for inference
#         outputs = text_model(**inputs)
#     embeddings = outputs.last_hidden_state.mean(dim=1)
#     print(embeddings[0].detach().cpu().numpy())
#     return embeddings[0].detach().cpu().numpy()

# # :white_check_mark: Gradio interface function
# def embed_text_interface(text):
#     strt_time = time.time()
#     embedding = get_text_embeddings(text)
#     print(f"Total time taken by nomic to embed: {time.time()-strt_time}")
#     return embedding

# # :white_check_mark: Gradio UI
# interface = gr.Interface(
#     fn=embed_text_interface,
#     inputs=gr.Textbox(label="Enter text to embed", lines=5),
#     outputs=gr.Textbox(label="Embedding vector"),
#     title="Text Embedding with Nomic AI",
#     description="Enter some text, and get its embedding vector using Nomic's embedding model."
# )

# # :white_check_mark: Launch the app
# if __name__ == "__main__":
#     interface.launch()


import os
import gradio as gr
import numpy as np
from transformers import AutoTokenizer, AutoModel
import time 
import torch

# :white_check_mark: Setup environment
os.makedirs(os.environ.get("HF_HOME", "./hf_cache"), exist_ok=True)
hf_token = os.environ.get("HF_TOKEN")
if not hf_token:
    raise EnvironmentError(":x: Environment variable HF_TOKEN is not set.")

# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# :white_check_mark: Load model and tokenizer
text_tokenizer = AutoTokenizer.from_pretrained(
    "nomic-ai/nomic-embed-text-v1.5",
    trust_remote_code=True,
    token=hf_token,
    cache_dir=os.environ["HF_HOME"]
)
text_model = AutoModel.from_pretrained(
    "nomic-ai/nomic-embed-text-v1.5",
    trust_remote_code=True,
    token=hf_token,
    cache_dir=os.environ["HF_HOME"]
).to(device)  # Move model to GPU if available


def get_text_embeddings(text):
    """Returns embedding as NumPy array"""
    inputs = text_tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)
    with torch.no_grad():
        outputs = text_model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings[0].detach().cpu().numpy()

def format_embedding(embedding):
    """Formats the embedding as 'embedding: [x.xx, x.xx, ...]'"""
    formatted = ", ".join([f"{x:.3f}" for x in embedding])
    return f"embedding: [{formatted}]"
import json

def embed_text_interface(text):
    strt_time = time.time()
    embedding = get_text_embeddings(text)
    print(f"Total time taken by nomic to embed: {time.time()-strt_time}")
    
    # Convert to list and format for display
    embedding_list = embedding.tolist()
    formatted = {
        "embedding": embedding_list,
        "shape": len(embedding_list)
    }
    return formatted

interface = gr.Interface(
    fn=embed_text_interface,
    inputs=gr.Textbox(label="Input Text", lines=5),
    outputs=gr.JSON(label="Embedding Vector"),  # Using JSON output
    title="Nomic Text Embeddings",
    description="Returns embeddings as a Python list",
    examples=[
        ["This is a sample text"],
        ["Another example sentence"]
    ]
)
if __name__ == "__main__":
    interface.queue(api_open=True).launch()