frankjosh commited on
Commit
62db363
·
verified ·
1 Parent(s): 1c4d662

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -52,10 +52,10 @@ def load_data():
52
  return data
53
 
54
  @st.cache_resource
55
- def precompute_embeddings(data: pd.DataFrame, _tokenizer, model, batch_size=BATCH_SIZE):
56
  """
57
  Precompute embeddings for repository metadata to optimize query performance.
58
- The tokenizer is excluded from caching as it is unhashable.
59
  """
60
  class TextDataset(Dataset):
61
  def __init__(self, texts: List[str], tokenizer, max_length=512):
@@ -108,7 +108,7 @@ def precompute_embeddings(data: pd.DataFrame, _tokenizer, model, batch_size=BATC
108
 
109
  embeddings = []
110
  for batch in dataloader:
111
- batch_embeddings = generate_embeddings_batch(model, batch, device)
112
  embeddings.extend(batch_embeddings)
113
 
114
  data['embedding'] = embeddings
 
52
  return data
53
 
54
  @st.cache_resource
55
+ def precompute_embeddings(data: pd.DataFrame, _tokenizer, _model, batch_size=BATCH_SIZE):
56
  """
57
  Precompute embeddings for repository metadata to optimize query performance.
58
+ The tokenizer and model are excluded from caching as they are unhashable.
59
  """
60
  class TextDataset(Dataset):
61
  def __init__(self, texts: List[str], tokenizer, max_length=512):
 
108
 
109
  embeddings = []
110
  for batch in dataloader:
111
+ batch_embeddings = generate_embeddings_batch(_model, batch, device)
112
  embeddings.extend(batch_embeddings)
113
 
114
  data['embedding'] = embeddings