Skip to content
Snippets Groups Projects
Select Git revision
  • 3bf4c6d798f3d7fad9d9a1193776c2470a18dcdd
  • master default
2 results

source-github.js

Blame
  • use-model.py 5.89 KiB
    
    import accelerate
    import bitsandbytes as bnb
    from datasets import load_dataset
    from llama.hf import LLaMATokenizer
    import os
    import peft
    from peft import (
        get_peft_model,
        LoraConfig,
        PeftConfig,
        PeftModel,
        prepare_model_for_kbit_training,
    )
    import sys
    import torch
    import transformers
    from transformers import (
        AutoConfig,
        AutoModelForCausalLM,
        AutoTokenizer,
        BitsAndBytesConfig,
    )
    
    from transformers import DataCollatorWithPadding
    from torch.utils.data import DataLoader
    
    # VISIBLE_DEVICE = 3
    # print(str(VISIBLE_DEVICE))
    # os.environ["CUDA_VISIBLE_DEVICES"] = str(VISIBLE_DEVICE)
    
    # DEVICE = f"cuda:{VISIBLE_DEVICE}"
    DEVICE = "cuda"
    # MODEL_NAME = "meta-llama/Llama-2-7b-chat-hf"
    MODEL_NAME = "tiiuae/falcon-7b"
    
    # DATASET = ("HiTZ/euscrawl",)
    DATASET = ("HiTZ/alpaca_mt", "eu")
    
    MAX_EMB_SIZE = 256
    
    TAGS = {
        "human": "human",
        "assistant": "assistant",
        "input": "question",
        "output": "answer",
    }
    
    print(f"Transformers version: {transformers.__version__}")
    print(f"Accelerate version: {accelerate.__version__}")
    print(f"PEFT version: {peft.__version__}")
    
    def print_trainable_parameters(model):
        all_param = 0
        trainable_params = 0
        for _, param in model.named_parameters():
            all_param += param.numel()
            if param.requires_grad:
                trainable_params += param.numel()
    
        print(
            f"trainable params: {trainable_params} || all params {all_param} || trainable% {100 * trainable_params / all_param}"
        )
    
    def generate_prompt(data_point):
        return f"""
    <{TAGS["human"]}>: {data_point[TAGS["input"]]}
    <{TAGS["assistant"]}>: {data_point[TAGS["output"]]}
    """.strip()
    
    def generate_and_tokenize_prompt(data_point, tokenizer):
        full_prompt = generate_prompt(data_point)
        tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
        return tokenized_full_prompt
    
    
    if __name__ == '__main__':
        print("run")
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.bfloat16,
    	llm_int8_enable_fp32_cpu_offload=True,
        )
    
        # "lm_head": "cpu",
        device_map = {
            "transformer.word_embeddings": 0,
    	"transformer.word_embeddings_layernorm": 0,
    	"lm_head": 0,
    	"transformer.h": 0,
    	"transformer.ln_f": 0,
        }
    
        # device_map="auto",
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME,
            # device_map="auto",
    	device_map=device_map,
            trust_remote_code=True,
            quantization_config=bnb_config,
        )
    
        # tokenizer = LLaMATokenizer.from_pretrained(MODEL_NAME)
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
        '''
        tokenizer = AutoTokenizer.from_pretrained(
            MODEL_NAME,
            max_length=MAX_EMB_SIZE,
            padding=True,
            truncation=True,
            add_eos_token=True,
            add_bos_token=True,
        )
        '''
        tokenizer.pad_token = tokenizer.eos_token
    
        # print(model)
        # print_trainable_parameters(model)
        model.gradient_checkpointing_enable()
        model = prepare_model_for_kbit_training(model)
    
        config = LoraConfig(
            r=16,
            lora_alpha=32,
            # FIXME: llama2
            # target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "down_proj", "up_proj"],
            target_modules=["query_key_value"],
            lora_dropout=0.05,
            bias="none",
            task_type="CASUAL_LM",
        )
    
        model = get_peft_model(model, config)
        print_trainable_parameters(model)
    
        '''
        prompt = f"""
        <{TAGS["human"]}>: Elikadura orekatu bat eduki eta fruta eta barazki ugari eduki?
        <{TAGS["assistant"]}>:
        """.strip()
        '''
        prompt = f"""
        <{TAGS["human"]}>: How can I create an account?
        <{TAGS["assistant"]}>:
        """.strip()
        print(prompt)
    
        generation_config = model.generation_config
        generation_config.max_new_tokens = 150
        generation_config.temperature = 0.7
        generation_config.top_p = 0.7
        generation_config.num_return_sequences = 1
        generation_config.pad_token_id = tokenizer.eos_token_id
        generation_config.eos_token_id = tokenizer.eos_token_id
    
        encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
        with torch.inference_mode():
            outputs = model.generate(
                input_ids=encoding.input_ids,
                attention_mask=encoding.attention_mask,
                generation_config=generation_config,
            )
        print(tokenizer.decode(outputs[0], skip_special_tokens=True))
    
        # train_dataset, test_dataset = load_dataset("HiTZ/euscrawl", split=['train[:10%]', 'test[:10%]'])
        # dataset, = load_dataset(*DATASET, split=['train[:1%]'])
    
        dataset = load_dataset("json", data_files="Ecommerce_FAQ_Chatbot_dataset.json")
        # dataset = dataset.remove_columns(["input", "prompt"])
        # dataset = dataset.rename_column("instruction", "input")
        # dataset = dataset.rename_column("instruction", TAGS["input"])
        # dataset = dataset.rename_column("output", TAGS["output"])
        # print(next(iter(dataset['train'])))
        print(dataset["train"][0])
    
        dataset = dataset["train"].shuffle().map(generate_and_tokenize_prompt, fn_kwargs={"tokenizer": tokenizer})
        print(dataset)
    
        training_args = transformers.TrainingArguments(
            per_device_train_batch_size=1,
            gradient_accumulation_steps=4,
            num_train_epochs=1,
            learning_rate=2e-4,
            fp16=True,
            save_total_limit=3,
            logging_steps=1,
            output_dir="llama2-eu-output",
            max_steps=80,
            optim="paged_adamw_8bit",
            lr_scheduler_type="cosine",
            warmup_ratio=0.05,
            remove_unused_columns=False,
        )
    
        trainer = transformers.Trainer(
            model=model,
            train_dataset=dataset,
            args=training_args,
            data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
        )
    
        model.config.use_cache = False
        trainer.train()
    
        print("End training.")
    
        model.save_pretrained("trained-model")
    
        print("END")