|
|
|
|
|
import torch |
|
|
from datasets import load_dataset, Dataset |
|
|
from peft import LoraConfig, AutoPeftModelForCausalLM, get_peft_model, PeftModel |
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments |
|
|
from trl import SFTTrainer |
|
|
import os |
|
|
|
|
|
|
|
|
|
|
|
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct" |
|
|
OUTPUT_DIR = "./fine_tunned_dodel_ul2" |
|
|
DATASET_NAME = "your_dataset" |
|
|
|
|
|
|
|
|
bnb_config = BitsAndBytesConfig( |
|
|
load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True |
|
|
) |
|
|
try: |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True, |
|
|
quantization_config=bnb_config, |
|
|
|
|
|
) |
|
|
except torch.cuda.OutOfMemoryError: |
|
|
print("Erreur de mémoire GPU. Tentative avec des paramètres réduits...") |
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
MODEL_ID, |
|
|
torch_dtype=torch.float16, |
|
|
device_map="auto", |
|
|
trust_remote_code=True, |
|
|
quantization_config=bnb_config, |
|
|
|
|
|
load_in_8bit=True |
|
|
) |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
lora_config = LoraConfig( |
|
|
r=8, |
|
|
lora_alpha=32, |
|
|
lora_dropout=0.05, |
|
|
bias="none", |
|
|
task_type="CAUSAL_LM" |
|
|
) |
|
|
|
|
|
model = get_peft_model(model, lora_config) |
|
|
|
|
|
dataset = dataset = load_dataset( |
|
|
"json", |
|
|
data_files={ |
|
|
"train": "datasets/train.json", |
|
|
"validation": "datasets/validation.json", |
|
|
"test": "datasets/test.json" |
|
|
} |
|
|
) |
|
|
|
|
|
def tokenize_function(examples): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
Fonction de prétraitement des exemples : |
|
|
- Concatène le contexte et la question pour créer les entrées |
|
|
- Tokenise les entrées et les réponses cibles |
|
|
""" |
|
|
|
|
|
inputs = [context + " " + question for context, question in zip(examples["context"], examples["question"])] |
|
|
|
|
|
|
|
|
targets = examples["response"] |
|
|
|
|
|
|
|
|
model_inputs = tokenizer( |
|
|
inputs, |
|
|
padding="max_length", |
|
|
truncation=True, |
|
|
max_length=512, |
|
|
return_tensors="pt" |
|
|
) |
|
|
|
|
|
|
|
|
with tokenizer.as_target_tokenizer(): |
|
|
labels = tokenizer( |
|
|
targets, |
|
|
padding="max_length", |
|
|
truncation=True, |
|
|
max_length=128, |
|
|
return_tensors="pt" |
|
|
) |
|
|
|
|
|
|
|
|
model_inputs["input_ids"] = model_inputs["input_ids"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return model_inputs |
|
|
|
|
|
train_data = dataset['train'] |
|
|
eval_data = dataset['validation'] |
|
|
train_dataset = train_data.map(tokenize_function, batched=True) |
|
|
eval_dataset = eval_data.map(tokenize_function, batched=True) if eval_data else None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
peft_config = LoraConfig( |
|
|
r=8, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" |
|
|
) |
|
|
training_args = TrainingArguments( |
|
|
output_dir=OUTPUT_DIR, |
|
|
num_train_epochs=3, |
|
|
per_device_train_batch_size=4, |
|
|
gradient_accumulation_steps=16, |
|
|
learning_rate=2e-4, |
|
|
fp16=True, |
|
|
save_strategy="epoch", |
|
|
gradient_checkpointing=True, |
|
|
max_grad_norm=0.3, |
|
|
push_to_hub=True |
|
|
) |
|
|
|
|
|
trainer = SFTTrainer( |
|
|
model=model, |
|
|
train_dataset=train_dataset, |
|
|
eval_dataset=eval_dataset, |
|
|
peft_config=peft_config, |
|
|
|
|
|
args=training_args, |
|
|
tokenizer=tokenizer, |
|
|
|
|
|
max_seq_length=1024 |
|
|
) |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
model.train() |
|
|
|
|
|
torch.set_grad_enabled(True) |
|
|
trainer.train() |
|
|
|
|
|
|
|
|
if isinstance(model, PeftModel): |
|
|
merged_model = model.merge_and_unload() |
|
|
else: |
|
|
merged_model = model |
|
|
|
|
|
|
|
|
merged_model.save_pretrained(OUTPUT_DIR) |
|
|
tokenizer.save_pretrained(OUTPUT_DIR) |
|
|
|
|
|
merged_model.push_to_hub(f"{os.getenv('HF_USERNAME')}/Meta-Llama-3.1-8B-Instruct-finetuned") |
|
|
tokenizer.push_to_hub(f"{os.getenv('HF_USERNAME')}/Meta-Llama-3.1-8B-Instruct-finetuned") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except RuntimeError as e: |
|
|
print(f"Erreur d'entraînement : {str(e)}") |
|
|
raise |
|
|
|