Spaces:
Sleeping
Sleeping
| from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer | |
| from datasets import load_dataset | |
| # Model Pre-trained | |
| MODEL_NAME = "indobenchmark/indobert-base-p2" | |
| # Load Dataset | |
| dataset = load_dataset("csv", data_files="dataset.csv") | |
| # Tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| def preprocess(data): | |
| return tokenizer(data['pertanyaan'], padding="max_length", truncation=True) | |
| # Preprocessing | |
| dataset = dataset.map(preprocess, batched=True) | |
| dataset = dataset.rename_column("jawaban", "labels") | |
| dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"]) | |
| # Load Model | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2) | |
| # Training Arguments | |
| training_args = TrainingArguments( | |
| output_dir="./results", | |
| evaluation_strategy="epoch", | |
| learning_rate=2e-5, | |
| per_device_train_batch_size=16, | |
| num_train_epochs=3, | |
| save_total_limit=2 | |
| ) | |
| # Trainer | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset['train'], | |
| eval_dataset=dataset['validation'] | |
| ) | |
| # Train Model | |
| trainer.train() | |
| # Save Model | |
| model.save_pretrained("./fine_tuned_model") | |
| print("Model telah dilatih ulang dan disimpan ke './fine_tuned_model'.") | |