bhlewis commited on
Commit
40f0be0
·
verified ·
1 Parent(s): 67f83c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -11
app.py CHANGED
@@ -3,19 +3,15 @@ import json
3
  from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer
4
  from datasets import Dataset
5
 
6
- # Load preprocessed and tokenized data
7
- def load_data(preprocessed_file, tokenized_file):
8
- with open(preprocessed_file.name, 'r') as f:
9
- preprocessed_data = json.load(f)
10
-
11
  with open(tokenized_file.name, 'r') as f:
12
  tokenized_data = json.load(f)
13
-
14
- return preprocessed_data, tokenized_data
15
 
16
  # Fine-tune the model
17
- def fine_tune_model(preprocessed_file, tokenized_file, progress=gr.Progress()):
18
- preprocessed_data, tokenized_data = load_data(preprocessed_file, tokenized_file)
19
 
20
  # Convert tokenized data to Dataset
21
  dataset = Dataset.from_dict(tokenized_data)
@@ -60,12 +56,11 @@ def fine_tune_model(preprocessed_file, tokenized_file, progress=gr.Progress()):
60
  iface = gr.Interface(
61
  fn=fine_tune_model,
62
  inputs=[
63
- gr.File(label="Upload Preprocessed Data JSON"),
64
  gr.File(label="Upload Tokenized Data JSON")
65
  ],
66
  outputs=gr.Textbox(label="Processing Information"),
67
  title="Fine-Tune Patent BERT Model",
68
- description="Upload preprocessed and tokenized JSON files to fine-tune the BERT model.",
69
  live=True # Enable live updates for progress
70
  )
71
 
 
3
  from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments, AutoTokenizer
4
  from datasets import Dataset
5
 
6
+ # Load tokenized data
7
+ def load_data(tokenized_file):
 
 
 
8
  with open(tokenized_file.name, 'r') as f:
9
  tokenized_data = json.load(f)
10
+ return tokenized_data
 
11
 
12
  # Fine-tune the model
13
+ def fine_tune_model(tokenized_file, progress=gr.Progress()):
14
+ tokenized_data = load_data(tokenized_file)
15
 
16
  # Convert tokenized data to Dataset
17
  dataset = Dataset.from_dict(tokenized_data)
 
56
  iface = gr.Interface(
57
  fn=fine_tune_model,
58
  inputs=[
 
59
  gr.File(label="Upload Tokenized Data JSON")
60
  ],
61
  outputs=gr.Textbox(label="Processing Information"),
62
  title="Fine-Tune Patent BERT Model",
63
+ description="Upload tokenized JSON file to fine-tune the BERT model.",
64
  live=True # Enable live updates for progress
65
  )
66