ruslanmv commited on
Commit
c28812c
·
1 Parent(s): 43ba7b5
Files changed (2) hide show
  1. backend.py +79 -243
  2. requirements.txt +1 -1
backend.py CHANGED
@@ -1,3 +1,13 @@
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import pandas as pd
3
  import requests
@@ -5,183 +15,107 @@ from PIL import Image, UnidentifiedImageError
5
  from io import BytesIO
6
  import matplotlib.pyplot as plt
7
  import urllib3
8
- from transformers import pipeline
9
- from transformers import BitsAndBytesConfig
10
  import torch
11
  import textwrap
12
- import pandas as pd
13
- import numpy as np
14
- from haversine import haversine # Install haversine library: pip install haversine
15
- from transformers import AutoProcessor, LlavaForConditionalGeneration
16
- from transformers import BitsAndBytesConfig
17
- import torch
18
-
19
  from huggingface_hub import InferenceClient
20
- IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
21
- IS_SPACE = os.environ.get("SPACE_ID", None) is not None
22
 
23
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
24
  LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
25
- print(f"Using device: {device}")
26
- print(f"low memory: {LOW_MEMORY}")
27
- # Define BitsAndBytesConfig
28
 
29
- # Ensure model is on the correct device
30
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
31
 
 
32
  quantization_config = BitsAndBytesConfig(
33
  load_in_4bit=True,
34
  bnb_4bit_compute_dtype=torch.float16
35
  )
36
 
 
 
 
 
37
 
38
- model_id = "llava-hf/llava-1.5-7b-hf"
39
-
40
- processor = AutoProcessor.from_pretrained(model_id)
41
-
42
-
43
- model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, device_map="auto")
44
- model.to(device)
45
-
46
-
47
- import os
48
- import requests
49
 
50
- url = 'https://github.com/ruslanmv/watsonx-with-multimodal-llava/raw/master/geocoded_hotels.csv'
51
- filename = 'geocoded_hotels.csv'
 
 
52
 
53
- # Check if the file already exists
54
- if not os.path.isfile(filename):
 
55
  response = requests.get(url)
56
-
57
  if response.status_code == 200:
58
- with open(filename, 'wb') as f:
59
  f.write(response.content)
60
- print(f"File {filename} downloaded successfully!")
61
  else:
62
  print(f"Error downloading file. Status code: {response.status_code}")
63
  else:
64
- print(f"File {filename} already exists.")
65
-
66
- import os
67
- import pandas as pd
68
- from datasets import load_dataset
69
- import pyarrow
70
-
71
- # 1. Get the Current Directory
72
- current_directory = os.getcwd()
73
-
74
- # 2. Construct the Full Path to the CSV File
75
- csv_file_path = os.path.join(current_directory, 'hotel_multimodal.csv')
76
 
77
- # 3. Check if the file exists
78
  if not os.path.exists(csv_file_path):
79
- # If not, download the dataset
80
- print("File not found, downloading from Hugging Face...")
81
-
82
  dataset = load_dataset("ruslanmv/hotel-multimodal")
83
-
84
- # Convert the 'train' dataset to a DataFrame using .to_pandas()
85
  df_hotels = dataset['train'].to_pandas()
86
-
87
- # 4.Save to CSV
88
  df_hotels.to_csv(csv_file_path, index=False)
89
  print("Dataset downloaded and saved as CSV.")
90
-
91
-
92
- # 5. Read the CSV file
93
- df_hotels = pd.read_csv(csv_file_path)
94
-
95
- print("DataFrame loaded:")
96
- geocoded_hotels_path = os.path.join(current_directory, 'geocoded_hotels.csv')
97
- # Read the CSV file
98
- geocoded_hotels = pd.read_csv(geocoded_hotels_path)
99
-
100
- import requests
101
 
102
  def get_current_location():
103
  try:
104
  response = requests.get('https://ipinfo.io/json')
105
  data = response.json()
106
-
107
  location = data.get('loc', '')
108
  if location:
109
- latitude, longitude = map(float, location.split(','))
110
- return latitude, longitude
111
  else:
112
  return None, None
113
  except Exception as e:
114
  print(f"An error occurred: {e}")
115
  return None, None
116
 
117
- latitude, longitude = get_current_location()
118
- if latitude and longitude:
119
- print(f"Current location: Latitude = {latitude}, Longitude = {longitude}")
120
- else:
121
- print("Could not retrieve the current location.")
122
-
123
-
124
- from geopy.geocoders import Nominatim
125
-
126
  def get_coordinates(location_name):
127
- """Fetches latitude and longitude coordinates for a given location name.
128
-
129
- Args:
130
- location_name (str): The name of the location (e.g., "Rome, Italy").
131
-
132
- Returns:
133
- tuple: A tuple containing the latitude and longitude (float values),
134
- or None if the location is not found.
135
- """
136
-
137
  geolocator = Nominatim(user_agent="coordinate_finder")
138
  location = geolocator.geocode(location_name)
139
-
140
  if location:
141
  return location.latitude, location.longitude
142
  else:
143
- return None # Location not found
144
-
145
-
146
 
147
  def find_nearby(place=None):
148
- if place!=None:
149
  coordinates = get_coordinates(place)
150
  if coordinates:
151
  latitude, longitude = coordinates
152
  print(f"The coordinates of {place} are: Latitude: {latitude}, Longitude: {longitude}")
153
  else:
154
  print(f"Location not found: {place}")
 
155
  else:
156
  latitude, longitude = get_current_location()
157
- if latitude and longitude:
158
- print(f"Current location: Latitude = {latitude}, Longitude = {longitude}")
159
- # Load the geocoded_hotels DataFrame
160
- current_directory = os.getcwd()
161
- geocoded_hotels_path = os.path.join(current_directory, 'geocoded_hotels.csv')
162
- geocoded_hotels = pd.read_csv(geocoded_hotels_path)
163
-
164
- # Define input coordinates for the reference location
165
- reference_latitude = latitude
166
- reference_longitude = longitude
167
-
168
- # Haversine Distance Function
169
- def calculate_haversine_distance(lat1, lon1, lat2, lon2):
170
- """Calculates the Haversine distance between two points on the Earth's surface."""
171
- return haversine((lat1, lon1), (lat2, lon2))
172
-
173
- # Calculate distances to all other points in the DataFrame
174
  geocoded_hotels['distance_km'] = geocoded_hotels.apply(
175
- lambda row: calculate_haversine_distance(
176
- reference_latitude, reference_longitude, row['latitude'], row['longitude']
177
- ),
178
  axis=1
179
  )
180
-
181
- # Sort by distance and get the top 5 closest points
182
  closest_hotels = geocoded_hotels.sort_values(by='distance_km').head(5)
183
-
184
- # Display the results
185
  print("The 5 closest locations are:\n")
186
  print(closest_hotels)
187
  return closest_hotels
@@ -189,60 +123,16 @@ def find_nearby(place=None):
189
  @spaces.GPU
190
  # Define the respond function
191
  def search_hotel(place=None):
192
- import os
193
- import pandas as pd
194
- import requests
195
- from PIL import Image, UnidentifiedImageError
196
- from io import BytesIO
197
- import urllib3
198
- from transformers import pipeline
199
- from transformers import BitsAndBytesConfig
200
- import torch
201
-
202
- # Suppress the InsecureRequestWarning
203
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
204
-
205
- # 1. Get the Current Directory
206
- current_directory = os.getcwd()
207
- # 2. Construct the Full Path to the CSV File
208
- csv_file_path = os.path.join(current_directory, 'hotel_multimodal.csv')
209
- # Read the CSV file
210
- df_hotels = pd.read_csv(csv_file_path)
211
- geocoded_hotels_path = os.path.join(current_directory, 'geocoded_hotels.csv')
212
- # Read the CSV file
213
- geocoded_hotels = pd.read_csv(geocoded_hotels_path)
214
-
215
- # Assuming find_nearby function is defined elsewhere
216
  df_found = find_nearby(place)
217
-
218
- # Converting df_found[["hotel_id"]].values to a list
219
  hotel_ids = df_found["hotel_id"].values.tolist()
220
-
221
- # Extracting rows from df_hotels where hotel_id is in the list hotel_ids
222
  filtered_df = df_hotels[df_hotels['hotel_id'].isin(hotel_ids)]
223
-
224
- # Ordering filtered_df by the order of hotel_ids
225
  filtered_df['hotel_id'] = pd.Categorical(filtered_df['hotel_id'], categories=hotel_ids, ordered=True)
226
  filtered_df = filtered_df.sort_values('hotel_id').reset_index(drop=True)
227
-
228
- # Define the quantization config and model ID
229
- quantization_config = BitsAndBytesConfig(
230
- load_in_4bit=True,
231
- bnb_4bit_compute_dtype=torch.float16
232
- )
233
-
234
- model_id = "llava-hf/llava-1.5-7b-hf"
235
-
236
- # Initialize the pipeline
237
- pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config})
238
-
239
- # Group by hotel_id and take the first 2 image URLs for each hotel
240
  grouped_df = filtered_df.groupby('hotel_id', observed=True).head(2)
241
-
242
- # Create a new DataFrame for storing image descriptions
243
  description_data = []
244
 
245
- # Download and generate descriptions for the images
246
  for index, row in grouped_df.iterrows():
247
  hotel_id = row['hotel_id']
248
  hotel_name = row['hotel_name']
@@ -250,108 +140,71 @@ def search_hotel(place=None):
250
 
251
  try:
252
  response = requests.get(image_url, verify=False)
253
- response.raise_for_status() # Check for request errors
254
  img = Image.open(BytesIO(response.content))
255
 
256
- # Generate description for the image
257
- prompt = "USER: <image>\nAnalyze this image. Give me feedback on whether this hotel is worth visiting based on the picture. Provide a summary review.\nASSISTANT:"
258
- outputs = pipe(img, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
259
  description = outputs[0]["generated_text"].split("\nASSISTANT:")[-1].strip()
260
 
261
- # Append data to the list
262
- description_data.append({
263
- 'hotel_name': hotel_name,
264
- 'hotel_id': hotel_id,
265
- 'image': img,
266
- 'description': description
267
- })
268
  except (requests.RequestException, UnidentifiedImageError):
269
  print(f"Skipping image at URL: {image_url}")
270
 
271
- # Create a DataFrame from the description data
272
- description_df = pd.DataFrame(description_data)
273
- return description_df
274
-
275
 
276
  def show_hotels(place=None):
277
  description_df = search_hotel(place)
278
-
279
- # Calculate the number of rows needed
 
280
  num_images = len(description_df)
281
- num_rows = (num_images + 1) // 2 # Two images per row
282
 
283
  fig, axs = plt.subplots(num_rows * 2, 2, figsize=(20, 10 * num_rows))
284
 
285
  current_index = 0
286
-
287
  for _, row in description_df.iterrows():
288
  img = row['image']
289
  description = row['description']
290
 
291
- if img is None: # Skip if the image is missing
292
  continue
293
 
294
  row_idx = (current_index // 2) * 2
295
  col_idx = current_index % 2
296
 
297
- # Plot the image
298
  axs[row_idx, col_idx].imshow(img)
299
  axs[row_idx, col_idx].axis('off')
300
  axs[row_idx, col_idx].set_title(f"{row['hotel_name']}\nHotel ID: {row['hotel_id']} Image {current_index + 1}", fontsize=16)
301
 
302
- # Wrap the description text
303
  wrapped_description = "\n".join(textwrap.wrap(description, width=50))
304
-
305
- # Plot the description
306
  axs[row_idx + 1, col_idx].text(0.5, 0.5, wrapped_description, ha='center', va='center', wrap=True, fontsize=14)
307
  axs[row_idx + 1, col_idx].axis('off')
308
 
309
  current_index += 1
310
 
311
- # Hide any unused subplots
312
- total_plots = (current_index + 1) // 2 * 2
313
- for j in range(current_index, total_plots * 2):
314
- row_idx = (j // 2) * 2
315
- col_idx = j % 2
316
- if row_idx < num_rows * 2:
317
- axs[row_idx, col_idx].axis('off')
318
- if row_idx + 1 < num_rows * 2:
319
- axs[row_idx + 1, col_idx].axis('off')
320
-
321
  plt.tight_layout()
322
  plt.show()
323
 
324
  def grouped_description(description_df):
325
-
326
- # Group by 'hotel_id' and aggregate descriptions
327
- grouped_descriptions = description_df.groupby('hotel_id')['description'].apply(lambda x: ' '.join(x.astype(str))).reset_index()
328
-
329
- # Merge with original DataFrame to get hotel names
330
- result_df = pd.merge(grouped_descriptions, description_df[['hotel_id', 'hotel_name']], on='hotel_id', how='left')
331
-
332
- # Drop duplicates and keep only the first occurrence of each hotel_id
333
- result_df = result_df.drop_duplicates(subset='hotel_id', keep='first')
334
-
335
- # Reorder columns
336
- result_df = result_df[['hotel_name', 'hotel_id', 'description']]
337
- return result_df
338
-
339
- # prompt: please create a new python function that given the result_df as an input create a single prompt where for given hotel_name you append the hotel_id and description , such we can use later this as context for a future llm query
340
 
341
  def create_prompt_result(result_df):
342
- prompt = ""
343
- for _, row in result_df.iterrows():
344
- hotel_name = row['hotel_name']
345
- hotel_id = row['hotel_id']
346
- description = row['description']
347
- prompt += f"Hotel Name: {hotel_name}\nHotel ID: {hotel_id}\nDescription: {description}\n\n"
348
- return prompt
349
- from transformers import pipeline, BitsAndBytesConfig
350
- import torch
351
- from langchain import PromptTemplate
352
 
353
- # Create a LangChain prompt template for the hotel recommendation
354
- hotel_recommendation_template = """
355
  <s>[INST] <<SYS>>
356
  You are a helpful and informative chatbot assistant.
357
  <</SYS>>
@@ -359,27 +212,10 @@ Based on the following hotel descriptions, recommend the best hotel:
359
  {context_result}
360
  [/INST]
361
  """
 
362
  @spaces.GPU
363
  # Define the respond function
364
- # Use LangChain to create a prompt based on the template
365
- def build_prompt(context_result):
366
- prompt_template = PromptTemplate(template=hotel_recommendation_template)
367
- return prompt_template.format(context_result=context_result)
368
-
369
- # Quantization configuration for efficient model loading
370
- quantization_config = BitsAndBytesConfig(
371
- load_in_4bit=True,
372
- bnb_4bit_compute_dtype=torch.float16
373
- )
374
-
375
- # Initialize the text generation pipeline
376
- pipe_text = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.2",
377
- model_kwargs={"quantization_config": quantization_config})
378
-
379
  def generate_text_response(prompt):
380
  outputs = pipe_text(prompt, max_new_tokens=500)
381
- # Extract only the response after the instruction token
382
  response = outputs[0]['generated_text'].split("[/INST]")[-1].strip()
383
- return response
384
- #place='Genova Italia'
385
- #show_hotels(place)
 
1
+ from huggingface_hub import InferenceClient
2
+ IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
3
+ IS_SPACE = os.environ.get("SPACE_ID", None) is not None
4
+
5
+ device = "cuda" if torch.cuda.is_available() else "cpu"
6
+ LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
7
+ print(f"Using device: {device}")
8
+ print(f"low memory: {LOW_MEMORY}")
9
+
10
+
11
  import os
12
  import pandas as pd
13
  import requests
 
15
  from io import BytesIO
16
  import matplotlib.pyplot as plt
17
  import urllib3
18
+ from transformers import pipeline, BitsAndBytesConfig
 
19
  import torch
20
  import textwrap
21
+ from haversine import haversine
22
+ from geopy.geocoders import Nominatim
 
 
 
 
 
23
  from huggingface_hub import InferenceClient
 
 
24
 
25
+ # Constants
26
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27
  LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
28
+ MODEL_ID = "llava-hf/llava-1.5-7b-hf"
29
+ TEXT_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
 
30
 
31
+ # Print device and memory info
32
+ print(f"Using device: {DEVICE}")
33
+ print(f"Low memory: {LOW_MEMORY}")
34
 
35
+ # Quantization configuration for efficient model loading
36
  quantization_config = BitsAndBytesConfig(
37
  load_in_4bit=True,
38
  bnb_4bit_compute_dtype=torch.float16
39
  )
40
 
41
+ # Load models only once
42
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
43
+ model = LlavaForConditionalGeneration.from_pretrained(MODEL_ID, quantization_config=quantization_config, device_map="auto").to(DEVICE)
44
+ pipe_image_to_text = pipeline("image-to-text", model=model, model_kwargs={"quantization_config": quantization_config})
45
 
46
+ # Initialize the text generation pipeline
47
+ pipe_text = pipeline("text-generation", model=TEXT_MODEL_ID, model_kwargs={"quantization_config": quantization_config})
 
 
 
 
 
 
 
 
 
48
 
49
+ # Ensure data files are available
50
+ current_directory = os.getcwd()
51
+ geocoded_hotels_path = os.path.join(current_directory, 'geocoded_hotels.csv')
52
+ csv_file_path = os.path.join(current_directory, 'hotel_multimodal.csv')
53
 
54
+ # Load geocoded hotels data
55
+ if not os.path.isfile(geocoded_hotels_path):
56
+ url = 'https://github.com/ruslanmv/watsonx-with-multimodal-llava/raw/master/geocoded_hotels.csv'
57
  response = requests.get(url)
 
58
  if response.status_code == 200:
59
+ with open(geocoded_hotels_path, 'wb') as f:
60
  f.write(response.content)
61
+ print(f"File {geocoded_hotels_path} downloaded successfully!")
62
  else:
63
  print(f"Error downloading file. Status code: {response.status_code}")
64
  else:
65
+ print(f"File {geocoded_hotels_path} already exists.")
66
+ geocoded_hotels = pd.read_csv(geocoded_hotels_path)
 
 
 
 
 
 
 
 
 
 
67
 
68
+ # Load hotel dataset
69
  if not os.path.exists(csv_file_path):
 
 
 
70
  dataset = load_dataset("ruslanmv/hotel-multimodal")
 
 
71
  df_hotels = dataset['train'].to_pandas()
 
 
72
  df_hotels.to_csv(csv_file_path, index=False)
73
  print("Dataset downloaded and saved as CSV.")
74
+ else:
75
+ df_hotels = pd.read_csv(csv_file_path)
 
 
 
 
 
 
 
 
 
76
 
77
  def get_current_location():
78
  try:
79
  response = requests.get('https://ipinfo.io/json')
80
  data = response.json()
 
81
  location = data.get('loc', '')
82
  if location:
83
+ return map(float, location.split(','))
 
84
  else:
85
  return None, None
86
  except Exception as e:
87
  print(f"An error occurred: {e}")
88
  return None, None
89
 
 
 
 
 
 
 
 
 
 
90
  def get_coordinates(location_name):
 
 
 
 
 
 
 
 
 
 
91
  geolocator = Nominatim(user_agent="coordinate_finder")
92
  location = geolocator.geocode(location_name)
 
93
  if location:
94
  return location.latitude, location.longitude
95
  else:
96
+ return None
 
 
97
 
98
  def find_nearby(place=None):
99
+ if place:
100
  coordinates = get_coordinates(place)
101
  if coordinates:
102
  latitude, longitude = coordinates
103
  print(f"The coordinates of {place} are: Latitude: {latitude}, Longitude: {longitude}")
104
  else:
105
  print(f"Location not found: {place}")
106
+ return None
107
  else:
108
  latitude, longitude = get_current_location()
109
+ if not latitude or not longitude:
110
+ print("Could not retrieve the current location.")
111
+ return None
112
+
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  geocoded_hotels['distance_km'] = geocoded_hotels.apply(
114
+ lambda row: haversine((latitude, longitude), (row['latitude'], row['longitude'])),
 
 
115
  axis=1
116
  )
117
+
 
118
  closest_hotels = geocoded_hotels.sort_values(by='distance_km').head(5)
 
 
119
  print("The 5 closest locations are:\n")
120
  print(closest_hotels)
121
  return closest_hotels
 
123
  @spaces.GPU
124
  # Define the respond function
125
  def search_hotel(place=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  df_found = find_nearby(place)
127
+ if df_found is None:
128
+ return pd.DataFrame()
129
  hotel_ids = df_found["hotel_id"].values.tolist()
 
 
130
  filtered_df = df_hotels[df_hotels['hotel_id'].isin(hotel_ids)]
 
 
131
  filtered_df['hotel_id'] = pd.Categorical(filtered_df['hotel_id'], categories=hotel_ids, ordered=True)
132
  filtered_df = filtered_df.sort_values('hotel_id').reset_index(drop=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  grouped_df = filtered_df.groupby('hotel_id', observed=True).head(2)
 
 
134
  description_data = []
135
 
 
136
  for index, row in grouped_df.iterrows():
137
  hotel_id = row['hotel_id']
138
  hotel_name = row['hotel_name']
 
140
 
141
  try:
142
  response = requests.get(image_url, verify=False)
143
+ response.raise_for_status()
144
  img = Image.open(BytesIO(response.content))
145
 
146
+ prompt = "USER: <image>\nAnalyze this image. Give me feedback on whether this hotel is worth visiting based on the picture. Provide a summary review.\nASSISTANT:"
147
+ outputs = pipe_image_to_text(img, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
 
148
  description = outputs[0]["generated_text"].split("\nASSISTANT:")[-1].strip()
149
 
150
+ description_data.append({'hotel_name': hotel_name, 'hotel_id': hotel_id, 'image': img, 'description': description})
 
 
 
 
 
 
151
  except (requests.RequestException, UnidentifiedImageError):
152
  print(f"Skipping image at URL: {image_url}")
153
 
154
+ return pd.DataFrame(description_data)
 
 
 
155
 
156
  def show_hotels(place=None):
157
  description_df = search_hotel(place)
158
+ if description_df.empty:
159
+ print("No hotels found.")
160
+ return
161
  num_images = len(description_df)
162
+ num_rows = (num_images + 1) // 2
163
 
164
  fig, axs = plt.subplots(num_rows * 2, 2, figsize=(20, 10 * num_rows))
165
 
166
  current_index = 0
 
167
  for _, row in description_df.iterrows():
168
  img = row['image']
169
  description = row['description']
170
 
171
+ if img is None:
172
  continue
173
 
174
  row_idx = (current_index // 2) * 2
175
  col_idx = current_index % 2
176
 
 
177
  axs[row_idx, col_idx].imshow(img)
178
  axs[row_idx, col_idx].axis('off')
179
  axs[row_idx, col_idx].set_title(f"{row['hotel_name']}\nHotel ID: {row['hotel_id']} Image {current_index + 1}", fontsize=16)
180
 
 
181
  wrapped_description = "\n".join(textwrap.wrap(description, width=50))
 
 
182
  axs[row_idx + 1, col_idx].text(0.5, 0.5, wrapped_description, ha='center', va='center', wrap=True, fontsize=14)
183
  axs[row_idx + 1, col_idx].axis('off')
184
 
185
  current_index += 1
186
 
 
 
 
 
 
 
 
 
 
 
187
  plt.tight_layout()
188
  plt.show()
189
 
190
  def grouped_description(description_df):
191
+ grouped_descriptions = description_df.groupby('hotel_id')['description'].apply(lambda x: ' '.join(x.astype(str))).reset_index()
192
+ result_df = pd.merge(grouped_descriptions, description_df[['hotel_id', 'hotel_name']], on='hotel_id', how='left')
193
+ result_df = result_df.drop_duplicates(subset='hotel_id', keep='first')
194
+ result_df = result_df[['hotel_name', 'hotel_id', 'description']]
195
+ return result_df
 
 
 
 
 
 
 
 
 
 
196
 
197
  def create_prompt_result(result_df):
198
+ prompt = ""
199
+ for _, row in result_df.iterrows():
200
+ hotel_name = row['hotel_name']
201
+ hotel_id = row['hotel_id']
202
+ description = row['description']
203
+ prompt += f"Hotel Name: {hotel_name}\nHotel ID: {hotel_id}\nDescription: {description}\n\n"
204
+ return prompt
 
 
 
205
 
206
+ def build_prompt(context_result):
207
+ hotel_recommendation_template = """
208
  <s>[INST] <<SYS>>
209
  You are a helpful and informative chatbot assistant.
210
  <</SYS>>
 
212
  {context_result}
213
  [/INST]
214
  """
215
+ return hotel_recommendation_template.format(context_result=context_result)
216
  @spaces.GPU
217
  # Define the respond function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  def generate_text_response(prompt):
219
  outputs = pipe_text(prompt, max_new_tokens=500)
 
220
  response = outputs[0]['generated_text'].split("[/INST]")[-1].strip()
221
+ return response
 
 
requirements.txt CHANGED
@@ -6,4 +6,4 @@ haversine
6
  langchain
7
  accelerate
8
  bitsandbytes
9
- transformers @ git+https://github.com/huggingface/transformers.git
 
6
  langchain
7
  accelerate
8
  bitsandbytes
9
+ transformers