From bee025ca0f87fe370ada55e3d8300d59c02bf686 Mon Sep 17 00:00:00 2001
From: MING_X <ming_zch@foxmail.com>
Date: Thu, 9 May 2024 02:18:07 +0800
Subject: [PATCH] Update README.md

---
 README.md                                     | 18 ++++++++--------
 README_EN.md                                  | 20 +++++++++---------
 datasets/LICENSE                              | 21 -------------------
 datasets/README.md                            |  2 +-
 datasets/README_EN.md                         |  2 +-
 datasets/processed/Book_QA_Process.md         | 16 ++++++++++----
 .../ruozhiba_raw_data_process.py              |  2 +-
 datasets/{ => processed}/split_dataset.py     |  0
 datasets/{ => processed}/split_shuffle.py     |  0
 .../Baby_EmoLLM.ipynb                         |  0
 {docs => quick_start}/quick_start.md          |  0
 {docs => quick_start}/quick_start_EN.md       |  0
 12 files changed, 34 insertions(+), 47 deletions(-)
 delete mode 100644 datasets/LICENSE
 rename datasets/{ => processed}/ruozhiba_raw_data_process.py (97%)
 rename datasets/{ => processed}/split_dataset.py (100%)
 rename datasets/{ => processed}/split_shuffle.py (100%)
 rename Baby_EmoLLM.ipynb => quick_start/Baby_EmoLLM.ipynb (100%)
 rename {docs => quick_start}/quick_start.md (100%)
 rename {docs => quick_start}/quick_start_EN.md (100%)
diff --git a/README.md b/README.md
index bfe0e99..813d44e 100644
--- a/README.md
+++ b/README.md
@@ -172,12 +172,12 @@
   - [🔗框架图](#框架图)
   - [目录](#目录)
           - [开发前的配置要求](#开发前的配置要求)
-          - [**使用指南**](#使用指南)
+          - [使用指南](#使用指南)
     - [🍪快速体验](#快速体验)
     - [📌数据构建](#数据构建)
     - [🎨微调指南](#微调指南)
     - [🔧部署指南](#部署指南)
-    - [⚙RAG(检索增强生成)Pipeline](#rag检索增强生成pipeline)
+    - [⚙RAG(检索增强生成)](#rag检索增强生成)
     - [使用到的框架](#使用到的框架)
       - [如何参与本项目](#如何参与本项目)
     - [作者（排名不分先后）](#作者排名不分先后)
@@ -192,7 +192,7 @@
 
 - 硬件：A100 40G（仅针对InternLM2_7B_chat+qlora微调+deepspeed zero2优化）
 
-###### **使用指南**
+###### 使用指南
 
 1. Clone the repo
 
@@ -211,7 +211,8 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git
 
 ### 🍪快速体验
 
-- 请阅读[快速体验](docs/quick_start.md)查阅
+- 请阅读[快速体验](quick_start/quick_start.md)查阅
+- 快速上手：[Baby EmoLLM](quick_start/Baby_EmoLLM.ipynb)
 
 
 ### 📌数据构建
@@ -229,9 +230,9 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git
 - Demo部署：详见[部署指南](demo/README.md)
 - 基于[LMDeploy](https://github.com/InternLM/lmdeploy/)的量化部署：详见[deploy](./deploy/lmdeploy.md)
 
-### ⚙RAG(检索增强生成)Pipeline
+### ⚙RAG(检索增强生成)
 
-- 详见[RAG](./rag/)
+- 详见[RAG](rag/README.md)
 
 <details>
 <summary>更多详情</summary>
@@ -307,11 +308,10 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git
 
 ### 特别鸣谢
 
-- [Sanbu](https://github.com/sanbuphy)
 - [上海人工智能实验室](https://www.shlab.org.cn/)
-- [闻星大佬（小助手）](https://github.com/vansin)
-- [扫地升（公众号宣传）](https://mp.weixin.qq.com/s/78lrRl2tlXEKUfElnkVx4A)
+- [闻星（浦语小助手）](https://github.com/vansin)
 - 阿布（北大心理学硕士）
+- [Sanbu](https://github.com/sanbuphy)
 - [HatBoy](https://github.com/hatboy)
 
 <!-- links -->
diff --git a/README_EN.md b/README_EN.md
index dad2c09..0954449 100644
--- a/README_EN.md
+++ b/README_EN.md
@@ -173,12 +173,12 @@ The Model aims to fully understand and promote the mental health of individuals,
   - [Roadmap](#roadmap)
   - [Contents](#contents)
           - [Pre-development Configuration Requirements.](#pre-development-configuration-requirements)
-          - [**User Guide**](#user-guide)
+          - [User Guide](#user-guide)
     - [🍪Quick start](#quick-start)
     - [📌Data Construction](#data-construction)
     - [🎨Fine-tuning Guide](#fine-tuning-guide)
     - [🔧Deployment Guide](#deployment-guide)
-    - [⚙RAG (Retrieval Augmented Generation) Pipeline](#rag-retrieval-augmented-generation-pipeline)
+    - [⚙RAG (Retrieval Augmented Generation)](#rag-retrieval-augmented-generation)
     - [Frameworks Used](#frameworks-used)
       - [How to participate in this project](#how-to-participate-in-this-project)
     - [Version control](#version-control)
@@ -193,7 +193,7 @@ The Model aims to fully understand and promote the mental health of individuals,
 
 - A100 40G (specifically for InternLM2_7B_chat + qlora fine-tuning + deepspeed zero2 optimization)
 
-###### **User Guide**
+###### User Guide
 
 1. Clone the repo
 
@@ -211,7 +211,8 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git
 
 
 ### 🍪Quick start
-- Please read [Quick Start](docs/quick_start_EN.md) to see.
+- Please read [Quick Start](quick_start/quick_start_EN.md) to see.
+- Quick coding: [Baby EmoLLM](quick_start/Baby_EmoLLM.ipynb)
 
 ### 📌Data Construction
 
@@ -228,9 +229,9 @@ For details, see the [fine-tuning guide](xtuner_config/README_EN.md)
 - Demo deployment: see [deployment guide](./demo/README_EN.md) for details.
 - Quantitative deployment based on [LMDeploy](https://github.com/InternLM/lmdeploy/): see [deploy](./deploy/lmdeploy_EN.md)
 
-### ⚙RAG (Retrieval Augmented Generation) Pipeline
+### ⚙RAG (Retrieval Augmented Generation)
 
-- See [RAG](./rag/)
+- See [RAG](rag/README_EN.md)
 
 <details>
 <summary>Additional Details</summary>
@@ -297,11 +298,10 @@ The project is licensed under the MIT License. Please refer to the details
 
 ### Acknowledgments
 
-- [Sanbu](https://github.com/sanbuphy)
 - [Shanghai Artificial Intelligence Laboratory](https://www.shlab.org.cn/)
-- [Vanin](https://github.com/vansin)
-- [Bloom up (WeChat Official Account Promotion)](https://mp.weixin.qq.com/s/78lrRl2tlXEKUfElnkVx4A)
-- Abu (M.A. in Psychology, Peking University)
+- [Vansin](https://github.com/vansin)
+- A.bu (M.A. in Psychology, Peking University)
+- [Sanbuphy](https://github.com/sanbuphy)
 - [HatBoy](https://github.com/hatboy)
 
 <!-- links -->
diff --git a/datasets/LICENSE b/datasets/LICENSE
deleted file mode 100644
index e5ebd65..0000000
--- a/datasets/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2024 SmartFlowAI
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/datasets/README.md b/datasets/README.md
index f9390f4..bc43551 100644
--- a/datasets/README.md
+++ b/datasets/README.md
@@ -2,7 +2,7 @@
 
 * 数据集按用处分为两种类型：**General** 和 **Role-play**
 * 数据按格式分为两种类型：**QA** 和 **Conversation**
-* 数据汇总：General（**6个数据集**）；Role-play（**5个数据集**）
+* 数据汇总：General（**8个数据集**）；Role-play（**5个数据集**）
 
 ## 数据集类型
 
diff --git a/datasets/README_EN.md b/datasets/README_EN.md
index 7180468..e21482b 100644
--- a/datasets/README_EN.md
+++ b/datasets/README_EN.md
@@ -2,7 +2,7 @@
 
 * Category of dataset: **General** and **Role-play**
 * Type of data: **QA** and **Conversation**
-* Summary: General(**6 datasets**), Role-play(**5 datasets**)
+* Summary: General(**8 datasets**), Role-play(**5 datasets**)
 
  ## Category
 * **General**: generic dataset, including psychological Knowledge, counseling technology, etc.
diff --git a/datasets/processed/Book_QA_Process.md b/datasets/processed/Book_QA_Process.md
index 7ca8c1c..f4e7897 100755
--- a/datasets/processed/Book_QA_Process.md
+++ b/datasets/processed/Book_QA_Process.md
@@ -1,8 +1,15 @@
-## 一共有两个 .py 文件，分别为Book_QA_process_Step_1.py和Book_QA_process_Step_2.py
+# Book_QA_process
+
+共两个python文件，分别为Book_QA_process_Step_1.py和Book_QA_process_Step_2.py
+
 ### Book_QA_process_Step_1.py
-    该代码是将我们生成的QA对jsonl数据转换为json格式
+
+* 该代码是将我们生成的QA对jsonl数据转换为json格式
+
 ### Book_QA_process_Step_2.py
-    该代码是将第一步生成的json格式数据转化为可用于指令微调的数据格式，并添加system，即：
+* 该代码是将第一步生成的json格式数据转化为可用于指令微调的数据格式，并添加system，即：
+
+  ```json
     {
         "conversation": [
             {
@@ -11,4 +18,5 @@
                 "output": "Answer"
             }
         ]
-    }
\ No newline at end of file
+    }
+```
\ No newline at end of file
diff --git a/datasets/ruozhiba_raw_data_process.py b/datasets/processed/ruozhiba_raw_data_process.py
similarity index 97%
rename from datasets/ruozhiba_raw_data_process.py
rename to datasets/processed/ruozhiba_raw_data_process.py
index 9f49353..ecfa425 100644
--- a/datasets/ruozhiba_raw_data_process.py
+++ b/datasets/processed/ruozhiba_raw_data_process.py
@@ -2,7 +2,7 @@ import json
 
 # 打开JSON文件并读取其内容
 
-file_name = 'ruozhiba_raw.jsonl' 
+file_name = '../ruozhiba_raw.jsonl' 
 
 # with open(f'data/{file_name}', 'r', encoding='utf-8') as file:
 #     data = json.load(file)
diff --git a/datasets/split_dataset.py b/datasets/processed/split_dataset.py
similarity index 100%
rename from datasets/split_dataset.py
rename to datasets/processed/split_dataset.py
diff --git a/datasets/split_shuffle.py b/datasets/processed/split_shuffle.py
similarity index 100%
rename from datasets/split_shuffle.py
rename to datasets/processed/split_shuffle.py
diff --git a/Baby_EmoLLM.ipynb b/quick_start/Baby_EmoLLM.ipynb
similarity index 100%
rename from Baby_EmoLLM.ipynb
rename to quick_start/Baby_EmoLLM.ipynb
diff --git a/docs/quick_start.md b/quick_start/quick_start.md
similarity index 100%
rename from docs/quick_start.md
rename to quick_start/quick_start.md
diff --git a/docs/quick_start_EN.md b/quick_start/quick_start_EN.md
similarity index 100%
rename from docs/quick_start_EN.md
rename to quick_start/quick_start_EN.md