Merge branch 'SmartFlowAI:main' into main

2024-03-19 20:14:33 +08:00 · 2024-03-19 20:14:33 +08:00 · feb252d5b5
commit feb252d5b5
parent b542929c1d af53e9744d
3 changed files with 4 additions and 3 deletions
--- a/README.md
+++ b/README.md
@ -230,6 +230,7 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git
 |         [SantiagoTOP](https://github.com/santiagoTOP)         |                     太原理工大学在读硕士                     | | |
 |         [zealot52099](https://github.com/zealot52099)         |                       AI搬用工                        | |清洗数据、RAG|
 |             [wwwyfff](https://github.com/wwwyfff)             |                      复旦大学在读硕士                      | ||
 |             [jkhumor](https://github.com/jkhumor)             |                      南开大学在读硕士                      | |RAG|
 ### 版权说明
--- a/README_EN.md
+++ b/README_EN.md
@ -246,6 +246,7 @@ This project uses Git for version control. You can see the currently available v
 |         [SantiagoTOP](https://github.com/santiagoTOP)         |          Taiyuan University of Technology, Master's student          | | |
 |         [zealot52099](https://github.com/zealot52099)         |                               AI Mover                               | |Data Processing and RAG|
 |             [wwwyfff](https://github.com/wwwyfff)             |                  FuDan University, Master's student                  | ||
 |             [jkhumor](https://github.com/jkhumor)             |                 Nankai University, Master's student                  | |RAG|
 ### Copyright Notice
--- a/xtuner_config/ChatGLM3-6b-ft.md
+++ b/xtuner_config/ChatGLM3-6b-ft.md
@ -65,8 +65,7 @@ LLM 的微调一般指指令微调过程。所谓指令微调，是说我们使
 def process_func(example):
    MAX_LENGTH = 512
    input_ids, labels = [], []
-    instruction = tokenizer.encode(text="\n".join(["<|system|>", "现在你是一个心理专家，我有一些心理问题，请你用专业的知识帮我解决。", "<|user|>", 
+    instruction = tokenizer.encode(text="\n".join(["<|system|>", example["system"], "<|user|>", example["input"] + "<|assistant|>"]).strip() + "\n",
                                    example["system"] + example["input"] + "<|assistant|>"]).strip() + "\n",
                                    add_special_tokens=True, truncation=True, max_length=MAX_LENGTH)
    response = tokenizer.encode(text=example["output"], add_special_tokens=False, truncation=True,