Using C3A for sequence classification

In this example, we fine-tune Roberta (base) on a sequence classification task using C3A.

Imports

python

#  To run this notebook, please run `pip install evaluate` to install additional dependencies not covered by PEFT.
import torch
from torch.optim import AdamW
from torch.utils.data import DataLoader
from peft import (
    get_peft_model,
    C3AConfig,
    PeftType,
)
from peft.utils import infer_device

import evaluate
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup, set_seed, AutoConfig
from tqdm import tqdm

Parameters

python

batch_size = 32
model_name_or_path = "roberta-base"
task = "mrpc"
peft_type = PeftType.C3A
device = infer_device()
num_epochs = 5 # for better results, increase this number
block_size = 768  # for better results, increase this number
max_length = 512
torch.manual_seed(0)

python

peft_config = C3AConfig(
    task_type="SEQ_CLS", 
    block_size=block_size,
    target_modules=["query", "value"],
)
head_lr = 4e-6  # the learning rate for the classification head for NLU tasks
ft_lr = 3e-1   # the learning rate for C3A parameters, a much larger LR than that is usually used, at least 1e-1

Loading data

python

if any(k in model_name_or_path for k in ("gpt", "opt", "bloom")):
    padding_side = "left"
else:
    padding_side = "right"

tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side=padding_side)
if getattr(tokenizer, "pad_token_id") is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

python

datasets = load_dataset("glue", task)
metric = evaluate.load("glue", task)

python

def tokenize_function(examples):
    # max_length=None => use the model max length (it's actually the default)
    outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=max_length)
    return outputs


tokenized_datasets = datasets.map(
    tokenize_function,
    batched=True,
    remove_columns=["idx", "sentence1", "sentence2"],
)

# We also rename the 'label' column to 'labels' which is the expected name for labels by the models of the
# transformers library
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")

python

def collate_fn(examples):
    return tokenizer.pad(examples, padding="longest", return_tensors="pt")


# Instantiate dataloaders.
train_dataloader = DataLoader(tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size)
eval_dataloader = DataLoader(
    tokenized_datasets["validation"], shuffle=False, collate_fn=collate_fn, batch_size=batch_size
)

Preparing the C3A model

python

model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True, max_length=None)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

python

head_param = list(map(id, model.classifier.parameters()))

others_param = filter(lambda p: id(p) not in head_param, model.parameters()) 

optimizer = AdamW([
    {"params": model.classifier.parameters(), "lr": head_lr},
    {"params": others_param, "lr": ft_lr}
],weight_decay=0.)


# Instantiate scheduler
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer=optimizer,
    num_warmup_steps=0.06 * (len(train_dataloader) * num_epochs),
    num_training_steps=(len(train_dataloader) * num_epochs),
)

Training

python

model.to(device)
for epoch in range(num_epochs):
    model.train()
    for step, batch in enumerate(tqdm(train_dataloader)):
        batch.to(device)
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    model.eval()
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch.to(device)
        with torch.no_grad():
            outputs = model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        predictions, references = predictions, batch["labels"]
        metric.add_batch(
            predictions=predictions,
            references=references,
        )

    eval_metric = metric.compute()
    print(f"epoch {epoch}:", eval_metric)

python

account_id = "Your-Hugging-Face-Hub-Account"
token = "Your-Hugging-Face-Hub-Token"

python

model.push_to_hub(f"{account_id}/roberta-base-mrpc-peft-c3a", token=token)

Load adapters from the Hub

You can also directly load adapters from the Hub using the commands below:

python

import torch
from peft import PeftModel, PeftConfig
from transformers import AutoTokenizer

python

peft_model_id = f"{account_id}/roberta-base-mrpc-peft-c3a"
config = PeftConfig.from_pretrained(peft_model_id)
inference_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

python

# Load the FourierFT model
inference_model = PeftModel.from_pretrained(inference_model, peft_model_id, config=config)

python

inference_model.to(device)
inference_model.eval()
for step, batch in enumerate(tqdm(eval_dataloader)):
    batch.to(device)
    with torch.no_grad():
        outputs = inference_model(**batch)
    predictions = outputs.logits.argmax(dim=-1)
    predictions, references = predictions, batch["labels"]
    metric.add_batch(
        predictions=predictions,
        references=references,
    )

eval_metric = metric.compute()
print(eval_metric)

Using C3A for sequence classification

Using C3A for sequence classification

Imports

Parameters

Loading data

Preparing the C3A model

Training

Share adapters on the 🤗 Hub

Load adapters from the Hub