Update app.py
Browse files
app.py
CHANGED
@@ -3,19 +3,15 @@ import json
|
|
3 |
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer
|
4 |
from datasets import Dataset
|
5 |
|
6 |
-
# Load
|
7 |
-
def load_data(
|
8 |
-
with open(preprocessed_file.name, 'r') as f:
|
9 |
-
preprocessed_data = json.load(f)
|
10 |
-
|
11 |
with open(tokenized_file.name, 'r') as f:
|
12 |
tokenized_data = json.load(f)
|
13 |
-
|
14 |
-
return preprocessed_data, tokenized_data
|
15 |
|
16 |
# Fine-tune the model
|
17 |
-
def fine_tune_model(
|
18 |
-
|
19 |
|
20 |
# Convert tokenized data to Dataset
|
21 |
dataset = Dataset.from_dict(tokenized_data)
|
@@ -60,12 +56,11 @@ def fine_tune_model(preprocessed_file, tokenized_file, progress=gr.Progress()):
|
|
60 |
iface = gr.Interface(
|
61 |
fn=fine_tune_model,
|
62 |
inputs=[
|
63 |
-
gr.File(label="Upload Preprocessed Data JSON"),
|
64 |
gr.File(label="Upload Tokenized Data JSON")
|
65 |
],
|
66 |
outputs=gr.Textbox(label="Processing Information"),
|
67 |
title="Fine-Tune Patent BERT Model",
|
68 |
-
description="Upload
|
69 |
live=True # Enable live updates for progress
|
70 |
)
|
71 |
|
|
|
3 |
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer
|
4 |
from datasets import Dataset
|
5 |
|
6 |
+
# Load tokenized data
|
7 |
+
def load_data(tokenized_file):
|
|
|
|
|
|
|
8 |
with open(tokenized_file.name, 'r') as f:
|
9 |
tokenized_data = json.load(f)
|
10 |
+
return tokenized_data
|
|
|
11 |
|
12 |
# Fine-tune the model
|
13 |
+
def fine_tune_model(tokenized_file, progress=gr.Progress()):
|
14 |
+
tokenized_data = load_data(tokenized_file)
|
15 |
|
16 |
# Convert tokenized data to Dataset
|
17 |
dataset = Dataset.from_dict(tokenized_data)
|
|
|
56 |
iface = gr.Interface(
|
57 |
fn=fine_tune_model,
|
58 |
inputs=[
|
|
|
59 |
gr.File(label="Upload Tokenized Data JSON")
|
60 |
],
|
61 |
outputs=gr.Textbox(label="Processing Information"),
|
62 |
title="Fine-Tune Patent BERT Model",
|
63 |
+
description="Upload tokenized JSON file to fine-tune the BERT model.",
|
64 |
live=True # Enable live updates for progress
|
65 |
)
|
66 |
|