|
import gradio as gr |
|
from datasets import Dataset |
|
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer |
|
import pandas as pd |
|
from huggingface_hub import login |
|
import torch |
|
|
|
def train_model(file, hf_token): |
|
try: |
|
|
|
if not hf_token: |
|
return "Please provide a Hugging Face token" |
|
login(hf_token) |
|
|
|
|
|
df = pd.read_csv(file.name) |
|
dataset = Dataset.from_pandas(df) |
|
|
|
|
|
model_name = "facebook/opt-125m" |
|
device_map = "cpu" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
device_map=device_map, |
|
torch_dtype=torch.float32 |
|
) |
|
|
|
|
|
training_args = TrainingArguments( |
|
output_dir="./results", |
|
num_train_epochs=3, |
|
per_device_train_batch_size=1, |
|
learning_rate=3e-5, |
|
save_strategy="epoch", |
|
push_to_hub=True, |
|
hub_token=hf_token, |
|
no_cuda=True, |
|
report_to="none" |
|
) |
|
|
|
|
|
trainer = Trainer( |
|
model=model, |
|
args=training_args, |
|
train_dataset=dataset, |
|
tokenizer=tokenizer |
|
) |
|
|
|
|
|
trainer.train() |
|
|
|
|
|
model.push_to_hub(f"cheberle/product-classifier-{pd.Timestamp.now().strftime('%Y%m%d')}") |
|
|
|
return "Training completed successfully!" |
|
|
|
except Exception as e: |
|
return f"Error occurred: {str(e)}" |
|
|
|
|
|
demo = gr.Interface( |
|
fn=train_model, |
|
inputs=[ |
|
gr.File(label="Upload your CSV file"), |
|
gr.Textbox(label="Hugging Face Token", type="password") |
|
], |
|
outputs="text", |
|
title="Product Classifier Training", |
|
description="Upload your CSV data to train a product classifier model on CPU." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=False) |
|
|