Spaces:

spark-ds549
/

Chinese-Label-Transcription

Sleeping

App Files Files Community

mkaramb commited on Apr 25, 2024

Commit

60d2516

verified ·

1 Parent(s): 28ecb63

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -99

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 # Upload credential json file from default compute service account
 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
-import pandas as pd
 from google.api_core.client_options import ClientOptions
 from google.cloud import documentai_v1 as documentai
 from google.cloud.documentai_v1.types import RawDocument
@@ -11,13 +10,7 @@ import zipfile
 import os
 import io
 import gradio as gr
-import pathlib
-import textwrap
-import json
-import google.generativeai as genai
-from IPython.display import display
-from IPython.display import Markdown
-import random
 import tempfile
 # Global DataFrame declaration
@@ -63,53 +56,6 @@ def unzip_and_find_jpgs(file_path):
                     jpg_files.append(full_path)
     return jpg_files
-def get_random_pairs_list(shots, num_pairs=2):
-    keys = random.sample(list(shots.keys()), num_pairs)
-    return [(key, shots[key]) for key in keys]
-def construct_prompt(input_text, random_pairs):
-    # Example setup based on your specified format
-    prompt = \
-    """
-    Follow the examples below. Your response should contain only JSON. If you
-    encounter two dates in an input, prefer the earliest. If the answer is not
-    exact, try your best, but do not use excess wording. If you are completely
-    unsure or there is no answer, insert UNKNOWN.
-    Input 1:
-    {random_pairs[0][0]}
-    Output 1:
-    {{"Collector":"{random_pairs[0][1]['Collector']}","Location":"{random_pairs[0][1]['Location']}","Taxon":"{random_pairs[0][1]['Taxon']}","Date":"{random_pairs[0][1]['Date']}"}}
-    Input 2:
-    {random_pairs[1][0]}
-    Output 2:
-    {{"Collector":"{random_pairs[1][1]['Collector']}","Location":"{random_pairs[1][1]['Location']}","Taxon":"{random_pairs[1][1]['Taxon']}","Date":"{random_pairs[1][1]['Date']}"}}
-    Input 3:
-    {input_text}
-    Output 3:
-    """
-    return prompt
-def process_responses(responses):
-    structured_responses = []
-    for response in responses:
-        try:
-            # Assuming response is a string of JSON data
-            parsed_json = json.loads(response.text)
-            structured_responses.append(parsed_json)
-        except json.JSONDecodeError:
-            structured_responses.append({
-                "Collector": "UNKNOWN",
-                "Location": "UNKNOWN",
-                "Taxon": "UNKNOWN",
-                "Date": "UNKNOWN"
-            })
-    return structured_responses
 def process_images(uploaded_file):
     global results_df
     results_df = results_df.iloc[0:0]  # Clear the DataFrame if re-running this cell
@@ -130,47 +76,6 @@ def process_images(uploaded_file):
                 "Translated Text": translated_text
             }])
             results_df = pd.concat([results_df, new_row], ignore_index=True)
-        # Configure the generative AI model
-        genai.configure(api_key='AIzaSyB9iHlqAgz5TEF36Kg_fJLJvoIDCJkqwJI')
-        model = genai.GenerativeModel('gemini-pro')
-        # Prepare data for few-shot learning
-        shots = \
-        {
-            "Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5&#39;37.15&quot;N, 91°7&#39;24.74&quot;E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6663 NCIL 14 September 2017 N° 2581259 TIBET PE CHINESE NATIONAL HERBARIUM (PE) 02334125 #PE6663 COMPOSITAE Aster albescens (DC.) Hand.-Mazz. A: it (Guo-Jin ZHANG) 01 April 2018"\
-            :{"Collector":"Guo-Jin, Zhang",
-              "Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
-              "Taxon":"Aster albescens (DC.) Hand.-Mazz., Compositae (Asteraceae) family",
-              "Date":"14 September 2017"
-            },
-            "PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5&#39;37.15&quot;N, 91°7&#39;24.74&quot;E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang CHINA, Xizang, Lhoka City, Lhozhag County, Lhakang Town, Kharchhu Gompa vicinity 28°5&#39;37.15&quot;N, 91°7&#39;24.74&quot;E; 3934 m Herbs. Slopes near roadsides. PE-Xizang Expedition #PE6673 9 NSIT Chinese National Herbarium (PE) Plants of Xizang Spiral Leaf Green 17 May 2018"
-            :{"Collector":"UNKNOWN",
-              "Location":"Xizang, Tibet, China, Lhoka City, Lhozhag County, Lhakang Town, near Kharchhu Gompa",
-              "Taxon":"Spiral Leaf Green",
-              "Date":"17 May 2018"
-            },
-            "Honey Plants Research Institute of the Chinese Academy of Agricultural Sciences Collection No.: 13687. May 7, 1993 Habitat Roadside Altitude: 1600 * Characters Shrub No. Herbarium of the Institute of Botany, Chinese Academy of Sciences Collector 3687 Scientific Name Height: m (cm) Diameter at breast height m (cm) Flower: White Fruit: Notes Blooming period: from January to July Honey: Scientific Name: Rosa Sericea Lindl. Appendix: Collector: cm 1 2 3 4 25 CHINESE NATIONAL HERBARUM ( 01833954 No 1479566 * Herbarium of the Institute of Botany, Chinese Academy of Sciences Sichuan SZECHUAN DET. Rosa sercea Lindl. var. Various Zhi 2009-02-16"
-            :{"Collector":"UNKNOWN",
-              "Location":"Sichuan, China",
-              "Taxon":"Rosa sericea Lindl., with possible variant identification as 'var. Various Zhi'",
-              "Date":"7 May 1993",
-            },
-        }
-        responses = []
-        for input_text in results_df["Translated Text"]:
-            random_pairs = get_random_pairs_list(shots)
-            prompt = construct_prompt(input_text, random_pairs)
-            response = model.generate_content(prompt)
-            responses.append(response)
-        # Processing responses
-        json_responses = process_responses(responses)
-        results_df = pd.concat([results_df, pd.DataFrame(json_responses)], axis=1)
     except Exception as e:
         return f"An error occurred: {str(e)}"
@@ -180,7 +85,7 @@ def process_images(uploaded_file):
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")  # Create a temp file
     results_df.to_csv(temp_file.name, index=False)  # Save DataFrame to CSV
     temp_file.close()  # Close the file
     # Return HTML and the path to the CSV file
     return html_output, temp_file.name
@@ -194,8 +99,8 @@ with gr.Blocks() as interface:
         html_output = gr.HTML()
     with gr.Row():
         file_output = gr.File()
     file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
 if __name__ == "__main__":
-    interface.launch()

 # Upload credential json file from default compute service account
 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
 from google.api_core.client_options import ClientOptions
 from google.cloud import documentai_v1 as documentai
 from google.cloud.documentai_v1.types import RawDocument
 import os
 import io
 import gradio as gr
+import pandas as pd
 import tempfile
 # Global DataFrame declaration
                     jpg_files.append(full_path)
     return jpg_files
 def process_images(uploaded_file):
     global results_df
     results_df = results_df.iloc[0:0]  # Clear the DataFrame if re-running this cell
                 "Translated Text": translated_text
             }])
             results_df = pd.concat([results_df, new_row], ignore_index=True)
     except Exception as e:
         return f"An error occurred: {str(e)}"
     temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")  # Create a temp file
     results_df.to_csv(temp_file.name, index=False)  # Save DataFrame to CSV
     temp_file.close()  # Close the file
     # Return HTML and the path to the CSV file
     return html_output, temp_file.name
         html_output = gr.HTML()
     with gr.Row():
         file_output = gr.File()
     file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
 if __name__ == "__main__":
+    interface.launch(debug=True)