Spaces:

JasonTPhillipsJr
/

SpaGAN

Running

App Files Files Community

JasonTPhillipsJr commited on Nov 11, 2024

Commit

60a8335

verified ·

1 Parent(s): 530b1c5

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -12

app.py CHANGED Viewed

@@ -13,15 +13,18 @@ from PIL import Image
 device = torch.device('cpu')
 #Spacy Initialization Section
 nlp = spacy.load("./models/en_core_web_sm")
 #BERT Initialization Section
 bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
 bert_model = BertModel.from_pretrained("bert-base-uncased")
 bert_model.to(device)
 bert_model.eval()
 #SpaBERT Initialization Section
 data_file_path = 'models/spabert/datasets/SpaBERTPivots.json'    #Sample file otherwise this model will take too long on CPU.
 pretrained_model_path = 'models/spabert/datasets/fine-spabert-base-uncased-finetuned-osm-mn.pth'
@@ -37,6 +40,7 @@ spaBERT_model.load_state_dict(pre_trained_model, strict=False)
 spaBERT_model.to(device)
 spaBERT_model.eval()
 #Load data using SpatialDataset
 spatialDataset = PbfMapDataset(data_file_path = data_file_path,
                                         tokenizer = bert_tokenizer,
@@ -57,6 +61,7 @@ entity_index_dict = {entity['pivot_name']: i for i, entity in enumerate(spatialD
 # Ensure names are stored in lowercase for case-insensitive matching
 entity_index_dict = {name.lower(): index for name, index in entity_index_dict.items()}
 #Pre-aquire the SpaBERT embeddings for all geo-entities within our dataset
 def process_entity(batch, model, device):
     input_ids = batch['masked_input'].to(device)
@@ -91,8 +96,6 @@ for batch in (data_loader):
     spaBERT_embedding, input_ids = process_entity(batch, spaBERT_model, device)
     spaBERT_embeddings.append(spaBERT_embedding)
-#st.write("SpaBERT Embedding shape:", spaBERT_embedding[0].shape)
-#st.write("SpaBERT Embedding:", spaBERT_embedding[0])
 embedding_cache = {}
@@ -109,16 +112,18 @@ def get_bert_embedding(review_text):
     bert_embedding = outputs.last_hidden_state[:, 0, :].detach()     #CLS Token
     return bert_embedding
 #Get SpaBERT Embedding for geo-entity
 def get_spaBert_embedding(entity):
     entity_index = entity_index_dict.get(entity.lower(), None)
     if entity_index is None:
         st.write("Got Bert embedding for: ", entity)
-        return get_bert_embedding(entity)
     else:
         st.write("Got SpaBert embedding for: ", entity)
         return spaBERT_embeddings[entity_index]
 #Go through each review, identify all geo-entities, then extract their SpaBERT embedings
 def processSpatialEntities(review, nlp):
     doc = nlp(review)
@@ -131,7 +136,11 @@ def processSpatialEntities(review, nlp):
             spaBert_emb = get_spaBert_embedding(text)
             token_embeddings.append((text, spaBert_emb))
             st.write("Geo-Entity Found in review: ", text)
-    return token_embeddings
 # Function to read reviews from a text file
 def load_reviews_from_file(file_path):
@@ -163,14 +172,6 @@ st.write("**Color Key:**")
 for label, (color, description) in COLOR_MAP.items():
     st.markdown(f"- **{label}**: <span style='color:{color}'>{color}</span> - {description}", unsafe_allow_html=True)
-# Text input
-#user_input = st.text_area("Input Text", height=200)
-# Define example reviews for testing
-#example_reviews = {
-#    "Review 1": "Meh. My brother lives near the Italian Market in South Philly. I went for a visit. Luckily for me, my brother and his girlfriend are foodies. I was able to taste many different cuisines in Philly. Coming from San Francisco, there are places I don't go due to the tourist trap aura and the non-authenticity of it all (Fisherman’s Wharf, Chinatown, etc.). But when I was in Philly, I had to have a cheesesteak... and I had to go to the two most famous places, which of course are right across the street from one another, in a big rivalry, and featured on the Food Network! How cheesy, but essential. We split two, both "wit whiz"? (cheese whiz) one from Geno's and one from Pat's. Pat's was much tastier than Geno's. The meat was seasoned, and the bun and cheese had much more flavor... better of the two... it seems.",
-#    "Review 2": "Google, headquartered in Mountain View, is a leading tech company in the United States.",
-#}
 review_file_path = "models/spabert/datasets/SampleReviews.txt"
 example_reviews = load_reviews_from_file(review_file_path)

 device = torch.device('cpu')
 #Spacy Initialization Section
 nlp = spacy.load("./models/en_core_web_sm")
 #BERT Initialization Section
 bert_tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
 bert_model = BertModel.from_pretrained("bert-base-uncased")
 bert_model.to(device)
 bert_model.eval()
 #SpaBERT Initialization Section
 data_file_path = 'models/spabert/datasets/SpaBERTPivots.json'    #Sample file otherwise this model will take too long on CPU.
 pretrained_model_path = 'models/spabert/datasets/fine-spabert-base-uncased-finetuned-osm-mn.pth'
 spaBERT_model.to(device)
 spaBERT_model.eval()
 #Load data using SpatialDataset
 spatialDataset = PbfMapDataset(data_file_path = data_file_path,
                                         tokenizer = bert_tokenizer,
 # Ensure names are stored in lowercase for case-insensitive matching
 entity_index_dict = {name.lower(): index for name, index in entity_index_dict.items()}
 #Pre-aquire the SpaBERT embeddings for all geo-entities within our dataset
 def process_entity(batch, model, device):
     input_ids = batch['masked_input'].to(device)
     spaBERT_embedding, input_ids = process_entity(batch, spaBERT_model, device)
     spaBERT_embeddings.append(spaBERT_embedding)
 embedding_cache = {}
     bert_embedding = outputs.last_hidden_state[:, 0, :].detach()     #CLS Token
     return bert_embedding
 #Get SpaBERT Embedding for geo-entity
 def get_spaBert_embedding(entity):
     entity_index = entity_index_dict.get(entity.lower(), None)
     if entity_index is None:
         st.write("Got Bert embedding for: ", entity)
+        return get_bert_embedding(entity)                            #Fallback in-case SpaBERT could not resolve entity to retrieve embedding. Rare-cases only.
     else:
         st.write("Got SpaBert embedding for: ", entity)
         return spaBERT_embeddings[entity_index]
 #Go through each review, identify all geo-entities, then extract their SpaBERT embedings
 def processSpatialEntities(review, nlp):
     doc = nlp(review)
             spaBert_emb = get_spaBert_embedding(text)
             token_embeddings.append((text, spaBert_emb))
             st.write("Geo-Entity Found in review: ", text)
+    processed_embedding = torch.cat(token_embeddings, dim=0)
+    st.write("processed embedding shape: " processed_embedding.shape)
+    return processed_embedding
 # Function to read reviews from a text file
 def load_reviews_from_file(file_path):
 for label, (color, description) in COLOR_MAP.items():
     st.markdown(f"- **{label}**: <span style='color:{color}'>{color}</span> - {description}", unsafe_allow_html=True)
 review_file_path = "models/spabert/datasets/SampleReviews.txt"
 example_reviews = load_reviews_from_file(review_file_path)