From 6e7bd5e5d4954cf11cb7659dafdab9ef209df877 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E5=8F=8B=E6=98=89?= Date: Tue, 19 Mar 2024 18:03:26 +0800 Subject: [PATCH] GLM-6B ft --- xtuner_config/ChatGLM3-6b-ft.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xtuner_config/ChatGLM3-6b-ft.md b/xtuner_config/ChatGLM3-6b-ft.md index 37015c8..a1867be 100644 --- a/xtuner_config/ChatGLM3-6b-ft.md +++ b/xtuner_config/ChatGLM3-6b-ft.md @@ -65,8 +65,7 @@ LLM 的微调一般指指令微调过程。所谓指令微调,是说我们使 def process_func(example): MAX_LENGTH = 512 input_ids, labels = [], [] - instruction = tokenizer.encode(text="\n".join(["<|system|>", "现在你是一个心理专家,我有一些心理问题,请你用专业的知识帮我解决。", "<|user|>", - example["system"] + example["input"] + "<|assistant|>"]).strip() + "\n", + instruction = tokenizer.encode(text="\n".join(["<|system|>", example["system"], "<|user|>", example["input"] + "<|assistant|>"]).strip() + "\n", add_special_tokens=True, truncation=True, max_length=MAX_LENGTH) response = tokenizer.encode(text=example["output"], add_special_tokens=False, truncation=True,