From b3c2607677c9b7cce55eeb398dc046841c083258 Mon Sep 17 00:00:00 2001
From: jupyter <ajupyter@163.com>
Date: Thu, 18 Jan 2024 22:45:55 +0800
Subject: [PATCH 1/2] ADD RE @a

---
 data/qwen_gen_data.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/data/qwen_gen_data.py b/data/qwen_gen_data.py
index 7c6086b..f2dddb8 100644
--- a/data/qwen_gen_data.py
+++ b/data/qwen_gen_data.py
@@ -1,6 +1,7 @@
 import json
 import random
 import argparse
+import re
 
 from tqdm import tqdm
 
@@ -109,36 +110,38 @@ if __name__ == '__main__':
         print(res)
 
         # 一次会话
-        for itm in res.split('\n'):
-            if itm.startswith("病人："):
-                dia_tuple.append(itm.split("：")[1])
-            elif itm.startswith("医生："):
-                dia_tuple.append(itm.split("：")[1])
+        doctor_pattern = r'医生：(.*?)(病人：|$)'
 
-            if len(dia_tuple) == 2 and len(one_conversation['conversation']) == 0:
+        doctor_matches = re.findall(doctor_pattern, res, re.DOTALL)
+        doctor_conversations = [match[0] for match in doctor_matches]
+
+        patient_pattern = r'病人：(.*?)医生：'
+        patient_matches = re.findall(patient_pattern, res, re.DOTALL)
+        patient_conversations = [match for match in patient_matches]
+
+        for doc, pat in zip(doctor_conversations, patient_conversations):
+            if len(one_conversation['conversation']) == 0:
                 one_conversation['conversation'].append(
                     {
                         "system": "现在你是一个心理专家，我有一些心理问题，请你用专业的知识帮我解决。",
-                        "input": dia_tuple[0],
-                        "output": dia_tuple[1]
+                        "input": pat,
+                        "output": doc
                     },
                 )
-                dia_tuple = []
 
-            elif len(dia_tuple) == 2:
+            else:
                 one_conversation['conversation'].append(
                     {
-                        "input": dia_tuple[0],
-                        "output": dia_tuple[1]
+                        "input": pat,
+                        "output": doc
                     },
                 )
-                dia_tuple = []
         conversation_lis.append(one_conversation)
 
         idx += 1
 
-        # 每生成2条数据存储一次
-        if (idx % 2 == 0):
+        # 每生成10条数据存储一次
+        if (idx % 10 == 0):
             path = f'./{args.data}.jsonl'
             save_jsonl(data_lis=conversation_lis, file_path=path)
             conversation_lis = []  # 清空

From e89aa0630007adbb1c676ca14efa42efabe2530a Mon Sep 17 00:00:00 2001
From: jupyter <ajupyter@163.com>
Date: Thu, 18 Jan 2024 22:50:04 +0800
Subject: [PATCH 2/2] ADD run_qwen.bash @aJupyter

---
 data/run_qwen.bash | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)
 create mode 100644 data/run_qwen.bash

diff --git a/data/run_qwen.bash b/data/run_qwen.bash
new file mode 100644
index 0000000..cf07df9
--- /dev/null
+++ b/data/run_qwen.bash
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# 定义生活领域的列表
+areas_of_life=(
+    "工作"
+    "学业"
+    "生活"
+    "身体"
+    "家人"
+    "朋友"
+    "社交"
+    "恋爱"
+    "就业"
+    "责任"
+    "爱好"
+    "环境"
+    "隐私"
+    "安全"
+    "梦想"
+    "自由"
+)
+
+# 使用for循环遍历数组
+for area in "${areas_of_life[@]}"; do
+    echo "当前生活领域: $area"
+    python qwen_gen_data.py --data $area
+done