Instructions to use Maaac/CodeLLaMA-Linux-BugFix with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Maaac/CodeLLaMA-Linux-BugFix with PEFT:
Task type is invalid.
- Notebooks
- Google Colab
- Kaggle
| # QLoRA fine-tuning for CodeLLaMA-7B-Instruct on 1x H200 | |
| # Requirements: transformers, peft, accelerate, bitsandbytes, datasets | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| TrainingArguments, | |
| Trainer, | |
| BitsAndBytesConfig, | |
| DataCollatorForSeq2Seq | |
| ) | |
| from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training | |
| from datasets import load_dataset | |
| import torch | |
| import os | |
| import wandb | |
| os.environ["WANDB_PROJECT"] = "codellama-7b-instruct-qlora-linux-bugfix" | |
| os.environ["WANDB_NAME"] = "run-v1" | |
| # Paths and model | |
| BASE_MODEL = "codellama/CodeLLaMA-7b-Instruct-hf" | |
| DATA_PATH = "../dataset/training_data_100k.jsonl" | |
| OUTPUT_DIR = "./output/qlora-codellama-bugfix" | |
| # Load dataset (prompt-completion format) | |
| dataset = load_dataset("json", data_files=DATA_PATH, split="train") | |
| # BitsandBytes config for QLoRA | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 # optimized for H100/H200 | |
| ) | |
| # Load tokenizer and model | |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=True) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.padding_side = "right" | |
| model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| quantization_config=bnb_config, | |
| device_map="auto" | |
| ) | |
| model = prepare_model_for_kbit_training(model) | |
| model.gradient_checkpointing_enable() | |
| torch.backends.cuda.matmul.allow_tf32 = True | |
| # Apply QLoRA (LoRA config) | |
| lora_config = LoraConfig( | |
| r=64, | |
| lora_alpha=16, | |
| lora_dropout=0.1, | |
| bias="none", | |
| task_type="CAUSAL_LM" | |
| ) | |
| model = get_peft_model(model, lora_config) | |
| model.config.use_cache = False | |
| model.config.return_dict = True | |
| model.config.pad_token_id = tokenizer.pad_token_id | |
| model.print_trainable_parameters() | |
| # Format and tokenize the dataset | |
| model_max_len = tokenizer.model_max_length | |
| def format(example): | |
| prompt_ids = tokenizer(example["prompt"], truncation=True, max_length=1024)["input_ids"] | |
| completion_ids = tokenizer(example["completion"], truncation=True, max_length=512)["input_ids"] | |
| input_ids = prompt_ids + completion_ids | |
| labels = [-100] * len(prompt_ids) + completion_ids | |
| # pad both input_ids and labels to the same length | |
| max_len = min(len(input_ids), tokenizer.model_max_length) | |
| input_ids = input_ids[:max_len] | |
| labels = labels[:max_len] | |
| return { | |
| "input_ids": input_ids, | |
| "labels": labels, | |
| } | |
| # Sanity check | |
| print("__ Sanity checking one example...") | |
| sample = format(dataset[0]) | |
| test_input = torch.tensor(sample["input_ids"]).unsqueeze(0).to(model.device) | |
| test_labels = torch.tensor(sample["labels"]).unsqueeze(0).to(model.device) | |
| model.train() | |
| out = model(input_ids=test_input, labels=test_labels) | |
| assert out.loss.requires_grad, "Sanity check failed: Loss does not require grad." | |
| print("__ Sanity check passed. Proceeding to map()...") | |
| # Apply formatting to entire dataset | |
| dataset = dataset.map(format, remove_columns=["prompt", "completion"]) | |
| collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model, return_tensors="pt", pad_to_multiple_of=8) | |
| # Training arguments | |
| training_args = TrainingArguments( | |
| report_to="wandb", | |
| run_name="codellama-7b-instruct-qlora-linux-bugfix", | |
| logging_dir=f"{OUTPUT_DIR}/logs", | |
| output_dir=OUTPUT_DIR, | |
| num_train_epochs=3, | |
| per_device_train_batch_size=64, | |
| gradient_accumulation_steps=4, | |
| learning_rate=2e-4, | |
| lr_scheduler_type="cosine", | |
| warmup_ratio=0.03, | |
| gradient_checkpointing=True, | |
| bf16=True, # Important for H200 | |
| fp16=False, | |
| max_grad_norm=1.0, | |
| save_strategy="steps", | |
| save_steps=500, | |
| save_total_limit=2, | |
| logging_steps=50, | |
| push_to_hub=False, | |
| label_names=["labels"], | |
| remove_unused_columns=False, # Critical to prevent data loss | |
| ) | |
| # Trainer setup | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=dataset, | |
| tokenizer=tokenizer, | |
| data_collator=collator | |
| ) | |
| # Begin training | |
| model.train() | |
| print(f"Track this run in Weights & Biases: https://wandb.ai/{os.environ['WANDB_PROJECT']}/{os.environ['WANDB_NAME']}") | |
| trainer.train(resume_from_checkpoint=True) | |
| # Save final model | |
| model.save_pretrained(OUTPUT_DIR, safe_serialization=True) | |
| tokenizer.save_pretrained(OUTPUT_DIR) | |
| print(f"[DONE] Model saved to {OUTPUT_DIR}") | |