llama3b可运行版本

This commit is contained in:
黄子寒 2025-03-09 22:05:53 +08:00
parent ebea43bbb3
commit 45b7a67876
12 changed files with 201097 additions and 10 deletions

View File

@ -40,9 +40,9 @@ Due to the size of the data, you need to download and unzip the data file data.z
export WANDB_DISABLED=true export WANDB_DISABLED=true
wandb offline wandb offline
CUDA_VISIBLE_DEVICES=0 nohup python finetune_kopa.py \ CUDA_VISIBLE_DEVICES=0 nohup python finetune_kopa.py \
--base_model 'YOUR LLM PATH' \ --base_model 'models/Llama-3.2-3B-Instruct' \
--data_path 'data/CoDeX-S-train.json' \ --data_path 'data/CoDeX-S-train.json' \
--output_dir 'YOUR SAVE PATH' \ --output_dir 'output' \
--num_epochs 3 \ --num_epochs 3 \
--lora_r 64 \ --lora_r 64 \
--learning_rate 3e-4 \ --learning_rate 3e-4 \

BIN
data/CoDeX-S-rotate.pth Normal file

Binary file not shown.

36562
data/CoDeX-S-test.json Normal file

File diff suppressed because it is too large Load Diff

1
data/CoDeX-S-train.json Normal file

File diff suppressed because one or more lines are too long

1
data/CoDeX-S-valid.json Normal file

File diff suppressed because one or more lines are too long

BIN
data/FB15K-237N-rotate.pth Normal file

Binary file not shown.

164522
data/FB15K-237N-test.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -21,16 +21,16 @@ from peft import (
prepare_model_for_int8_training, prepare_model_for_int8_training,
set_peft_model_state_dict, set_peft_model_state_dict,
) )
from transformers import LlamaForCausalLM, LlamaTokenizer from transformers import LlamaForCausalLM, AutoTokenizer
from utils.prompter import Prompter from utils.prompter import Prompter
def train( def train(
# model/data params # model/data params
base_model: str = "", # the only required argument base_model = "models/Llama-3.2-3B-Instruct",
data_path: str = "YOUR LLM PATH", data_path: str = "data/CoDeX-S-train.json",
output_dir: str = "./lora-alpaca", output_dir: str = "output",
# training hyperparams # training hyperparams
batch_size: int = 16, batch_size: int = 16,
micro_batch_size: int = 16, micro_batch_size: int = 16,
@ -110,7 +110,7 @@ def train(
device_map=device_map, device_map=device_map,
) )
tokenizer = LlamaTokenizer.from_pretrained(base_model) tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=True)
tokenizer.pad_token_id = ( tokenizer.pad_token_id = (
0 # unk. we want this to be different from the eos token 0 # unk. we want this to be different from the eos token

View File

@ -17,7 +17,7 @@ class KoPA(nn.Module):
self.embeddings = PrefixKGEmbedding( self.embeddings = PrefixKGEmbedding(
num_ent=2034, num_ent=2034,
num_rel=42, num_rel=42,
dim_llm=4096, dim_llm=3072,
num_prefix=1 num_prefix=1
) )
@ -73,7 +73,7 @@ class KoPAWithAdapter(nn.Module):
self.embeddings = PretrainKGEmbedding( self.embeddings = PretrainKGEmbedding(
pretrain_ent_embs=ent_embs, pretrain_ent_embs=ent_embs,
pretrain_rel_embs=rel_embs, pretrain_rel_embs=rel_embs,
dim_llm=4096, dim_llm=3072,
num_prefix=num_prefix num_prefix=num_prefix
) )
else: else:

View File

@ -6,7 +6,6 @@ black
black[jupyter] black[jupyter]
datasets datasets
fire fire
git+https://github.com/huggingface/peft@smangrul/release-v0.3.0
transformers>=4.28.0 transformers>=4.28.0
sentencepiece sentencepiece
gradio gradio