Spaces:

yachay
/

byt5-geotagging-es

Build error

App Files Files Community

byt5-geotagging-es / app.py

yachay

Update app.py

7cde107 about 2 years ago

raw

history blame

4.97 kB

	# Import necessary libraries
	import streamlit as st
	import pandas as pd
	from transformers import PretrainedConfig, PreTrainedModel, T5EncoderModel, AutoTokenizer
	import torch
	import torch.nn as nn
	import copy
	import pydeck as pdk

	keep_layer_count=6


	class ByT5ForTextGeotaggingConfig(PretrainedConfig):
	model_type = "byt5_for_text_geotagging"

	def __init__(self, n_clusters, model_name_or_path, class_to_location=None, **kwargs):
	super(ByT5ForTextGeotaggingConfig, self).__init__(**kwargs)
	self.n_clusters = n_clusters
	self.model_name_or_path = model_name_or_path
	self.class_to_location = class_to_location or {}


	def to_diff_dict(self):
	# Convert the configuration to a dictionary
	config_dict = self.to_dict()

	# Get the default configuration for comparison
	default_config_dict = PretrainedConfig().to_dict()

	# Return the differences
	diff_dict = {k: v for k, v in config_dict.items() if k not in default_config_dict or v != default_config_dict[k]}

	return diff_dict


	def deleteEncodingLayers(model, num_layers_to_keep): # must pass in the full bert model
	oldModuleList = model.encoder.block
	newModuleList = torch.nn.ModuleList()

	# Now iterate over all layers, only keepign only the relevant layers.
	for i in range(0, num_layers_to_keep):
	newModuleList.append(oldModuleList[i])

	# create a copy of the model, modify it with the new list, and return
	copyOfModel = copy.deepcopy(model)
	copyOfModel.encoder.block = newModuleList

	return copyOfModel

	class ByT5ForTextGeotagging(PreTrainedModel):
	config_class = ByT5ForTextGeotaggingConfig

	def __init__(self, config):
	super(ByT5ForTextGeotagging, self).__init__(config)

	self.byt5 = T5EncoderModel.from_pretrained(config.model_name_or_path)
	if keep_layer_count is not None:
	self.byt5 = deleteEncodingLayers(self.byt5, keep_layer_count)

	hidden_size = self.byt5.config.d_model
	self.fc3 = nn.Linear(hidden_size, config.n_clusters)

	def forward(self, input, return_coordinates=False):
	input = self.byt5(input[:, 0, :].squeeze(1))['last_hidden_state']
	input = input[:, 0, :].squeeze(1)
	logits = self.fc3(input)

	if return_coordinates:
	class_idx = torch.argmax(logits, dim=1).item()
	coordinates = self.config.class_to_location.get(str(class_idx))
	return logits, coordinates
	else:
	return logits


	@st.cache_data(ttl=None, persist=True)
	def load_model_and_tokenizer():
	byt5_tokenizer = AutoTokenizer.from_pretrained("yachay/byt5-geotagging-es", token="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")
	model = ByT5ForTextGeotagging.from_pretrained("yachay/byt5-geotagging-es", token="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")
	return byt5_tokenizer, model

	byt5_tokenizer, model = load_model_and_tokenizer()


	def geolocate_text_byt5(text):
	input_tensor = byt5_tokenizer(text, return_tensors="pt", truncation=True, max_length=140)['input_ids']
	logits, (lat, lon) = model(input_tensor.unsqueeze(0), return_coordinates=True)
	return lat, lon

	if 'text_input_state' not in st.session_state:
	st.session_state.text_input_state = ""

	# When an example button is clicked, update the session state
	def set_example_text(example_text):
	st.session_state.text_input_state = example_text

	example_texts = [
	"Disfrutando de una paella deliciosa en las playas de #Valencia 🥘☀️",
	"¡Barcelona es increíble! #VacacionesEnEspaña",
	"Me encantó el ceviche en Lima. #Perú",
	"Bailando tango en las calles de Buenos Aires",
	"Admirando las hermosas playas de Cancún. #México"
	]


	# Streamlit interface
	st.title('GeoTagging using ByT5')

	st.write('Examples:')
	for example in example_texts:
	if st.button(f"{example}"):
	set_example_text(example)

	# Get text input and update session state when it's modified
	text_input = st.text_input('Enter your text:', value=st.session_state.text_input_state)
	if text_input != st.session_state.text_input_state:
	st.session_state.text_input_state = text_input

	if text_input:
	location = geolocate_text_byt5(text_input)
	st.write('Predicted Location: ', location)

	# Render map with pydeck
	map_data = pd.DataFrame(
	[[location[0], location[1]]],
	columns=["lat", "lon"]
	)

	st.pydeck_chart(pdk.Deck(
	map_style='mapbox://styles/mapbox/light-v9',
	initial_view_state=pdk.ViewState(
	latitude=location[0],
	longitude=location[1],
	zoom=11,
	pitch=50,
	),
	layers=[
	pdk.Layer(
	'ScatterplotLayer',
	data=map_data,
	get_position='[lon, lat]',
	get_color='[200, 30, 0, 160]',
	get_radius=200,
	),
	],
	))