Select Git revision
calibration_preprocess.py
use-model.py 5.89 KiB
import accelerate
import bitsandbytes as bnb
from datasets import load_dataset
from llama.hf import LLaMATokenizer
import os
import peft
from peft import (
get_peft_model,
LoraConfig,
PeftConfig,
PeftModel,
prepare_model_for_kbit_training,
)
import sys
import torch
import transformers
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
from transformers import DataCollatorWithPadding
from torch.utils.data import DataLoader
# VISIBLE_DEVICE = 3
# print(str(VISIBLE_DEVICE))
# os.environ["CUDA_VISIBLE_DEVICES"] = str(VISIBLE_DEVICE)
# DEVICE = f"cuda:{VISIBLE_DEVICE}"
DEVICE = "cuda"
# MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
MODEL_NAME = "tiiuae/falcon-7b"
# DATASET = ("HiTZ/euscrawl",)
DATASET = ("HiTZ/alpaca_mt", "eu")
MAX_EMB_SIZE = 256
TAGS = {
"human": "human",
"assistant": "assistant",
"input": "question",
"output": "answer",
}
print(f"Transformers version: {transformers.__version__}")
print(f"Accelerate version: {accelerate.__version__}")
print(f"PEFT version: {peft.__version__}")
def print_trainable_parameters(model):
all_param = 0
trainable_params = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params {all_param} || trainable% {100 * trainable_params / all_param}"
)
def generate_prompt(data_point):
return f"""
<{TAGS["human"]}>: {data_point[TAGS["input"]]}
<{TAGS["assistant"]}>: {data_point[TAGS["output"]]}
""".strip()
def generate_and_tokenize_prompt(data_point, tokenizer):
full_prompt = generate_prompt(data_point)
tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
return tokenized_full_prompt
if __name__ == '__main__':
print("run")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
llm_int8_enable_fp32_cpu_offload=True,
)
# "lm_head": "cpu",
device_map = {
"transformer.word_embeddings": 0,
"transformer.word_embeddings_layernorm": 0,
"lm_head": 0,
"transformer.h": 0,
"transformer.ln_f": 0,
}
# device_map="auto",
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
# device_map="auto",
device_map=device_map,
trust_remote_code=True,
quantization_config=bnb_config,
)
# tokenizer = LLaMATokenizer.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
'''
tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
max_length=MAX_EMB_SIZE,
padding=True,
truncation=True,
add_eos_token=True,
add_bos_token=True,
)
'''
tokenizer.pad_token = tokenizer.eos_token
# print(model)
# print_trainable_parameters(model)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
r=16,
lora_alpha=32,
# FIXME: llama2
# target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "down_proj", "up_proj"],
target_modules=["query_key_value"],
lora_dropout=0.05,
bias="none",
task_type="CASUAL_LM",
)
model = get_peft_model(model, config)
print_trainable_parameters(model)
'''
prompt = f"""
<{TAGS["human"]}>: Elikadura orekatu bat eduki eta fruta eta barazki ugari eduki?
<{TAGS["assistant"]}>:
""".strip()
'''
prompt = f"""
<{TAGS["human"]}>: How can I create an account?
<{TAGS["assistant"]}>:
""".strip()
print(prompt)
generation_config = model.generation_config
generation_config.max_new_tokens = 150
generation_config.temperature = 0.7
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.inference_mode():
outputs = model.generate(
input_ids=encoding.input_ids,
attention_mask=encoding.attention_mask,
generation_config=generation_config,
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
# train_dataset, test_dataset = load_dataset("HiTZ/euscrawl", split=['train[:10%]', 'test[:10%]'])
# dataset, = load_dataset(*DATASET, split=['train[:1%]'])
dataset = load_dataset("json", data_files="Ecommerce_FAQ_Chatbot_dataset.json")
# dataset = dataset.remove_columns(["input", "prompt"])
# dataset = dataset.rename_column("instruction", "input")
# dataset = dataset.rename_column("instruction", TAGS["input"])
# dataset = dataset.rename_column("output", TAGS["output"])
# print(next(iter(dataset['train'])))
print(dataset["train"][0])
dataset = dataset["train"].shuffle().map(generate_and_tokenize_prompt, fn_kwargs={"tokenizer": tokenizer})
print(dataset)
training_args = transformers.TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_train_epochs=1,
learning_rate=2e-4,
fp16=True,
save_total_limit=3,
logging_steps=1,
output_dir="llama2-eu-output",
max_steps=80,
optim="paged_adamw_8bit",
lr_scheduler_type="cosine",
warmup_ratio=0.05,
remove_unused_columns=False,
)
trainer = transformers.Trainer(
model=model,
train_dataset=dataset,
args=training_args,
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()
print("End training.")
model.save_pretrained("trained-model")
print("END")