Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,6 @@ import os
|
|
2 |
# Upload credential json file from default compute service account
|
3 |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
|
4 |
|
5 |
-
import pandas as pd
|
6 |
from google.api_core.client_options import ClientOptions
|
7 |
from google.cloud import documentai_v1 as documentai
|
8 |
from google.cloud.documentai_v1.types import RawDocument
|
@@ -11,13 +10,7 @@ import zipfile
|
|
11 |
import os
|
12 |
import io
|
13 |
import gradio as gr
|
14 |
-
import
|
15 |
-
import textwrap
|
16 |
-
import json
|
17 |
-
import google.generativeai as genai
|
18 |
-
from IPython.display import display
|
19 |
-
from IPython.display import Markdown
|
20 |
-
import random
|
21 |
import tempfile
|
22 |
|
23 |
# Global DataFrame declaration
|
@@ -63,53 +56,6 @@ def unzip_and_find_jpgs(file_path):
|
|
63 |
jpg_files.append(full_path)
|
64 |
return jpg_files
|
65 |
|
66 |
-
def get_random_pairs_list(shots, num_pairs=2):
|
67 |
-
keys = random.sample(list(shots.keys()), num_pairs)
|
68 |
-
return [(key, shots[key]) for key in keys]
|
69 |
-
|
70 |
-
def construct_prompt(input_text, random_pairs):
|
71 |
-
# Example setup based on your specified format
|
72 |
-
prompt = \
|
73 |
-
"""
|
74 |
-
Follow the examples below. Your response should contain only JSON. If you
|
75 |
-
encounter two dates in an input, prefer the earliest. If the answer is not
|
76 |
-
exact, try your best, but do not use excess wording. If you are completely
|
77 |
-
unsure or there is no answer, insert UNKNOWN.
|
78 |
-
|
79 |
-
Input 1:
|
80 |
-
{random_pairs[0][0]}
|
81 |
-
|
82 |
-
Output 1:
|
83 |
-
{{"Collector":"{random_pairs[0][1]['Collector']}","Location":"{random_pairs[0][1]['Location']}","Taxon":"{random_pairs[0][1]['Taxon']}","Date":"{random_pairs[0][1]['Date']}"}}
|
84 |
-
|
85 |
-
Input 2:
|
86 |
-
{random_pairs[1][0]}
|
87 |
-
|
88 |
-
Output 2:
|
89 |
-
{{"Collector":"{random_pairs[1][1]['Collector']}","Location":"{random_pairs[1][1]['Location']}","Taxon":"{random_pairs[1][1]['Taxon']}","Date":"{random_pairs[1][1]['Date']}"}}
|
90 |
-
|
91 |
-
Input 3:
|
92 |
-
{input_text}
|
93 |
-
Output 3:
|
94 |
-
"""
|
95 |
-
return prompt
|
96 |
-
|
97 |
-
def process_responses(responses):
|
98 |
-
structured_responses = []
|
99 |
-
for response in responses:
|
100 |
-
try:
|
101 |
-
# Assuming response is a string of JSON data
|
102 |
-
parsed_json = json.loads(response.text)
|
103 |
-
structured_responses.append(parsed_json)
|
104 |
-
except json.JSONDecodeError:
|
105 |
-
structured_responses.append({
|
106 |
-
"Collector": "UNKNOWN",
|
107 |
-
"Location": "UNKNOWN",
|
108 |
-
"Taxon": "UNKNOWN",
|
109 |
-
"Date": "UNKNOWN"
|
110 |
-
})
|
111 |
-
return structured_responses
|
112 |
-
|
113 |
def process_images(uploaded_file):
|
114 |
global results_df
|
115 |
results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
|
@@ -130,47 +76,6 @@ def process_images(uploaded_file):
|
|
130 |
"Translated Text": translated_text
|
131 |
}])
|
132 |
results_df = pd.concat([results_df, new_row], ignore_index=True)
|
133 |
-
|
134 |
-
# Configure the generative AI model
|
135 |
-
genai.configure(api_key='AIzaSyB9iHlqAgz5TEF36Kg_fJLJvoIDCJkqwJI')
|
136 |
-
model = genai.GenerativeModel('gemini-pro')
|
137 |
-
|
138 |
-
# Prepare data for few-shot learning
|
139 |
-
shots = \
|
140 |
-
{
|
141 |
-
"Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6663 NCIL 14 September 2017 N° 2581259 TIBET PE CHINESE NATIONAL HERBARIUM (PE) 02334125 #PE6663 COMPOSITAE Aster albescens (DC.) Hand.-Mazz. A: it (Guo-Jin ZHANG) 01 April 2018"\
|
142 |
-
:{"Collector":"Guo-Jin, Zhang",
|
143 |
-
"Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
|
144 |
-
"Taxon":"Aster albescens (DC.) Hand.-Mazz., Compositae (Asteraceae) family",
|
145 |
-
"Date":"14 September 2017"
|
146 |
-
},
|
147 |
-
|
148 |
-
"PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5'37.15"N, 91°7'24.74"E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang Spiral Leaf Green 17 May 2018"
|
149 |
-
:{"Collector":"UNKNOWN",
|
150 |
-
"Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
|
151 |
-
"Taxon":"Spiral Leaf Green",
|
152 |
-
"Date":"17 May 2018"
|
153 |
-
},
|
154 |
-
|
155 |
-
"Honey Plants Research Institute of the Chinese Academy of Agricultural Sciences Collection No.: 13687. May 7, 1993 Habitat Roadside Altitude: 1600 * Characters Shrub No. Herbarium of the Institute of Botany, Chinese Academy of Sciences Collector 3687 Scientific Name Height: m (cm) Diameter at breast height m (cm) Flower: White Fruit: Notes Blooming period: from January to July Honey: Scientific Name: Rosa Sericea Lindl. Appendix: Collector: cm 1 2 3 4 25 CHINESE NATIONAL HERBARUM ( 01833954 No 1479566 * Herbarium of the Institute of Botany, Chinese Academy of Sciences Sichuan SZECHUAN DET. Rosa sercea Lindl. var. Various Zhi 2009-02-16"
|
156 |
-
:{"Collector":"UNKNOWN",
|
157 |
-
"Location":"Sichuan, China",
|
158 |
-
"Taxon":"Rosa sericea Lindl., with possible variant identification as 'var. Various Zhi'",
|
159 |
-
"Date":"7 May 1993",
|
160 |
-
},
|
161 |
-
}
|
162 |
-
|
163 |
-
responses = []
|
164 |
-
for input_text in results_df["Translated Text"]:
|
165 |
-
random_pairs = get_random_pairs_list(shots)
|
166 |
-
prompt = construct_prompt(input_text, random_pairs)
|
167 |
-
response = model.generate_content(prompt)
|
168 |
-
responses.append(response)
|
169 |
-
|
170 |
-
# Processing responses
|
171 |
-
json_responses = process_responses(responses)
|
172 |
-
results_df = pd.concat([results_df, pd.DataFrame(json_responses)], axis=1)
|
173 |
-
|
174 |
except Exception as e:
|
175 |
return f"An error occurred: {str(e)}"
|
176 |
|
@@ -180,7 +85,7 @@ def process_images(uploaded_file):
|
|
180 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") # Create a temp file
|
181 |
results_df.to_csv(temp_file.name, index=False) # Save DataFrame to CSV
|
182 |
temp_file.close() # Close the file
|
183 |
-
|
184 |
# Return HTML and the path to the CSV file
|
185 |
return html_output, temp_file.name
|
186 |
|
@@ -194,8 +99,8 @@ with gr.Blocks() as interface:
|
|
194 |
html_output = gr.HTML()
|
195 |
with gr.Row():
|
196 |
file_output = gr.File()
|
197 |
-
|
198 |
file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
|
199 |
|
200 |
if __name__ == "__main__":
|
201 |
-
interface.launch()
|
|
|
2 |
# Upload credential json file from default compute service account
|
3 |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
|
4 |
|
|
|
5 |
from google.api_core.client_options import ClientOptions
|
6 |
from google.cloud import documentai_v1 as documentai
|
7 |
from google.cloud.documentai_v1.types import RawDocument
|
|
|
10 |
import os
|
11 |
import io
|
12 |
import gradio as gr
|
13 |
+
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
import tempfile
|
15 |
|
16 |
# Global DataFrame declaration
|
|
|
56 |
jpg_files.append(full_path)
|
57 |
return jpg_files
|
58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
def process_images(uploaded_file):
|
60 |
global results_df
|
61 |
results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
|
|
|
76 |
"Translated Text": translated_text
|
77 |
}])
|
78 |
results_df = pd.concat([results_df, new_row], ignore_index=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
except Exception as e:
|
80 |
return f"An error occurred: {str(e)}"
|
81 |
|
|
|
85 |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv") # Create a temp file
|
86 |
results_df.to_csv(temp_file.name, index=False) # Save DataFrame to CSV
|
87 |
temp_file.close() # Close the file
|
88 |
+
|
89 |
# Return HTML and the path to the CSV file
|
90 |
return html_output, temp_file.name
|
91 |
|
|
|
99 |
html_output = gr.HTML()
|
100 |
with gr.Row():
|
101 |
file_output = gr.File()
|
102 |
+
|
103 |
file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
|
104 |
|
105 |
if __name__ == "__main__":
|
106 |
+
interface.launch(debug=True)
|