mkaramb commited on
Commit
60d2516
·
verified ·
1 Parent(s): 28ecb63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -99
app.py CHANGED
@@ -2,7 +2,6 @@ import os
2
  # Upload credential json file from default compute service account
3
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
4
 
5
- import pandas as pd
6
  from google.api_core.client_options import ClientOptions
7
  from google.cloud import documentai_v1 as documentai
8
  from google.cloud.documentai_v1.types import RawDocument
@@ -11,13 +10,7 @@ import zipfile
11
  import os
12
  import io
13
  import gradio as gr
14
- import pathlib
15
- import textwrap
16
- import json
17
- import google.generativeai as genai
18
- from IPython.display import display
19
- from IPython.display import Markdown
20
- import random
21
  import tempfile
22
 
23
  # Global DataFrame declaration
@@ -63,53 +56,6 @@ def unzip_and_find_jpgs(file_path):
63
  jpg_files.append(full_path)
64
  return jpg_files
65
 
66
- def get_random_pairs_list(shots, num_pairs=2):
67
- keys = random.sample(list(shots.keys()), num_pairs)
68
- return [(key, shots[key]) for key in keys]
69
-
70
- def construct_prompt(input_text, random_pairs):
71
- # Example setup based on your specified format
72
- prompt = \
73
- """
74
- Follow the examples below. Your response should contain only JSON. If you
75
- encounter two dates in an input, prefer the earliest. If the answer is not
76
- exact, try your best, but do not use excess wording. If you are completely
77
- unsure or there is no answer, insert UNKNOWN.
78
-
79
- Input 1:
80
- {random_pairs[0][0]}
81
-
82
- Output 1:
83
- {{"Collector":"{random_pairs[0][1]['Collector']}","Location":"{random_pairs[0][1]['Location']}","Taxon":"{random_pairs[0][1]['Taxon']}","Date":"{random_pairs[0][1]['Date']}"}}
84
-
85
- Input 2:
86
- {random_pairs[1][0]}
87
-
88
- Output 2:
89
- {{"Collector":"{random_pairs[1][1]['Collector']}","Location":"{random_pairs[1][1]['Location']}","Taxon":"{random_pairs[1][1]['Taxon']}","Date":"{random_pairs[1][1]['Date']}"}}
90
-
91
- Input 3:
92
- {input_text}
93
- Output 3:
94
- """
95
- return prompt
96
-
97
- def process_responses(responses):
98
- structured_responses = []
99
- for response in responses:
100
- try:
101
- # Assuming response is a string of JSON data
102
- parsed_json = json.loads(response.text)
103
- structured_responses.append(parsed_json)
104
- except json.JSONDecodeError:
105
- structured_responses.append({
106
- "Collector": "UNKNOWN",
107
- "Location": "UNKNOWN",
108
- "Taxon": "UNKNOWN",
109
- "Date": "UNKNOWN"
110
- })
111
- return structured_responses
112
-
113
  def process_images(uploaded_file):
114
  global results_df
115
  results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
@@ -130,47 +76,6 @@ def process_images(uploaded_file):
130
  "Translated Text": translated_text
131
  }])
132
  results_df = pd.concat([results_df, new_row], ignore_index=True)
133
-
134
- # Configure the generative AI model
135
- genai.configure(api_key='AIzaSyB9iHlqAgz5TEF36Kg_fJLJvoIDCJkqwJI')
136
- model = genai.GenerativeModel('gemini-pro')
137
-
138
- # Prepare data for few-shot learning
139
- shots = \
140
- {
141
- "Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6663 NCIL 14 September 2017 N° 2581259 TIBET PE CHINESE NATIONAL HERBARIUM (PE) 02334125 #PE6663 COMPOSITAE Aster albescens (DC.) Hand.-Mazz. A: it (Guo-Jin ZHANG) 01 April 2018"\
142
- :{"Collector":"Guo-Jin, Zhang",
143
- "Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
144
- "Taxon":"Aster albescens (DC.) Hand.-Mazz., Compositae (Asteraceae) family",
145
- "Date":"14 September 2017"
146
- },
147
-
148
- "PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang Spiral Leaf Green 17 May 2018"
149
- :{"Collector":"UNKNOWN",
150
- "Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
151
- "Taxon":"Spiral Leaf Green",
152
- "Date":"17 May 2018"
153
- },
154
-
155
- "Honey Plants Research Institute of the Chinese Academy of Agricultural Sciences Collection No.: 13687. May 7, 1993 Habitat Roadside Altitude: 1600 * Characters Shrub No. Herbarium of the Institute of Botany, Chinese Academy of Sciences Collector 3687 Scientific Name Height: m (cm) Diameter at breast height m (cm) Flower: White Fruit: Notes Blooming period: from January to July Honey: Scientific Name: Rosa Sericea Lindl. Appendix: Collector: cm 1 2 3 4 25 CHINESE NATIONAL HERBARUM ( 01833954 No 1479566 * Herbarium of the Institute of Botany, Chinese Academy of Sciences Sichuan SZECHUAN DET. Rosa sercea Lindl. var. Various Zhi 2009-02-16"
156
- :{"Collector":"UNKNOWN",
157
- "Location":"Sichuan, China",
158
- "Taxon":"Rosa sericea Lindl., with possible variant identification as 'var. Various Zhi'",
159
- "Date":"7 May 1993",
160
- },
161
- }
162
-
163
- responses = []
164
- for input_text in results_df["Translated Text"]:
165
- random_pairs = get_random_pairs_list(shots)
166
- prompt = construct_prompt(input_text, random_pairs)
167
- response = model.generate_content(prompt)
168
- responses.append(response)
169
-
170
- # Processing responses
171
- json_responses = process_responses(responses)
172
- results_df = pd.concat([results_df, pd.DataFrame(json_responses)], axis=1)
173
-
174
  except Exception as e:
175
  return f"An error occurred: {str(e)}"
176
 
@@ -180,7 +85,7 @@ def process_images(uploaded_file):
180
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") # Create a temp file
181
  results_df.to_csv(temp_file.name, index=False) # Save DataFrame to CSV
182
  temp_file.close() # Close the file
183
-
184
  # Return HTML and the path to the CSV file
185
  return html_output, temp_file.name
186
 
@@ -194,8 +99,8 @@ with gr.Blocks() as interface:
194
  html_output = gr.HTML()
195
  with gr.Row():
196
  file_output = gr.File()
197
-
198
  file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
199
 
200
  if __name__ == "__main__":
201
- interface.launch()
 
2
  # Upload credential json file from default compute service account
3
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
4
 
 
5
  from google.api_core.client_options import ClientOptions
6
  from google.cloud import documentai_v1 as documentai
7
  from google.cloud.documentai_v1.types import RawDocument
 
10
  import os
11
  import io
12
  import gradio as gr
13
+ import pandas as pd
 
 
 
 
 
 
14
  import tempfile
15
 
16
  # Global DataFrame declaration
 
56
  jpg_files.append(full_path)
57
  return jpg_files
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def process_images(uploaded_file):
60
  global results_df
61
  results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
 
76
  "Translated Text": translated_text
77
  }])
78
  results_df = pd.concat([results_df, new_row], ignore_index=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  except Exception as e:
80
  return f"An error occurred: {str(e)}"
81
 
 
85
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") # Create a temp file
86
  results_df.to_csv(temp_file.name, index=False) # Save DataFrame to CSV
87
  temp_file.close() # Close the file
88
+
89
  # Return HTML and the path to the CSV file
90
  return html_output, temp_file.name
91
 
 
99
  html_output = gr.HTML()
100
  with gr.Row():
101
  file_output = gr.File()
102
+
103
  file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
104
 
105
  if __name__ == "__main__":
106
+ interface.launch(debug=True)