llama3b可运行版本

This commit is contained in:
黄子寒 2025-03-09 22:05:53 +08:00
parent ebea43bbb3
commit 45b7a67876
12 changed files with 201097 additions and 10 deletions

View File

@ -40,9 +40,9 @@ Due to the size of the data, you need to download and unzip the data file data.z
export WANDB_DISABLED=true
wandb offline
CUDA_VISIBLE_DEVICES=0 nohup python finetune_kopa.py \
--base_model 'YOUR LLM PATH' \
--base_model 'models/Llama-3.2-3B-Instruct' \
--data_path 'data/CoDeX-S-train.json' \
--output_dir 'YOUR SAVE PATH' \
--output_dir 'output' \
--num_epochs 3 \
--lora_r 64 \
--learning_rate 3e-4 \

BIN
data/CoDeX-S-rotate.pth Normal file

Binary file not shown.

36562
data/CoDeX-S-test.json Normal file

File diff suppressed because it is too large Load Diff

1
data/CoDeX-S-train.json Normal file

File diff suppressed because one or more lines are too long

1
data/CoDeX-S-valid.json Normal file

File diff suppressed because one or more lines are too long

BIN
data/FB15K-237N-rotate.pth Normal file

Binary file not shown.

164522
data/FB15K-237N-test.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -21,16 +21,16 @@ from peft import (
prepare_model_for_int8_training,
set_peft_model_state_dict,
)
from transformers import LlamaForCausalLM, LlamaTokenizer
from transformers import LlamaForCausalLM, AutoTokenizer
from utils.prompter import Prompter
def train(
# model/data params
base_model: str = "", # the only required argument
data_path: str = "YOUR LLM PATH",
output_dir: str = "./lora-alpaca",
base_model = "models/Llama-3.2-3B-Instruct",
data_path: str = "data/CoDeX-S-train.json",
output_dir: str = "output",
# training hyperparams
batch_size: int = 16,
micro_batch_size: int = 16,
@ -110,7 +110,7 @@ def train(
device_map=device_map,
)
tokenizer = LlamaTokenizer.from_pretrained(base_model)
tokenizer = AutoTokenizer.from_pretrained(base_model,use_fast=True)
tokenizer.pad_token_id = (
0 # unk. we want this to be different from the eos token

View File

@ -17,7 +17,7 @@ class KoPA(nn.Module):
self.embeddings = PrefixKGEmbedding(
num_ent=2034,
num_rel=42,
dim_llm=4096,
dim_llm=3072,
num_prefix=1
)
@ -73,7 +73,7 @@ class KoPAWithAdapter(nn.Module):
self.embeddings = PretrainKGEmbedding(
pretrain_ent_embs=ent_embs,
pretrain_rel_embs=rel_embs,
dim_llm=4096,
dim_llm=3072,
num_prefix=num_prefix
)
else:

View File

@ -6,7 +6,6 @@ black
black[jupyter]
datasets
fire
git+https://github.com/huggingface/peft@smangrul/release-v0.3.0
transformers>=4.28.0
sentencepiece
gradio