merge dev (#250)

This commit is contained in:
xzw 2024-06-02 18:06:22 +08:00 committed by GitHub
commit 965bd185e6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 4925 additions and 0 deletions

117
agents/code.py Normal file
View File

@ -0,0 +1,117 @@
import asyncio
import re
from metagpt.actions.write_tutorial import WriteDirectory, WriteContent
from metagpt.logs import logger
from metagpt.roles.role import Role
from metagpt.schema import Message
from metagpt.actions import Action
from metagpt.prompts.tutorial_assistant import DIRECTORY_PROMPT, CONTENT_PROMPT
from metagpt.utils.common import OutputParser
from datetime import datetime
from pathlib import Path
import os
import time
import yaml
# 创建对话
def CreateDir():
path = 'Interlocution'
if os.path.exists(path) == False:
os.makedirs(path)
files = os.listdir(path)
file_num = str(len(files))
# 需提前创建一个Interlocution文件夹
path = Path('Interlocution').joinpath(file_num + '.txt')
with open(path, 'a') as f:
f.write(f'创建时间{datetime.fromtimestamp(int(time.time()))}\n')
return path
# 记录对话
def Recording(question : str, answer : str, path : str):
with open(path, 'a') as f:
question = '病人:{}\n'.format(question)
answer = '医生:{}\n'.format(answer)
f.write(question)
f.write(answer)
class EmoLLM(Action):
def __init__(self, question: str, choice: str):
super().__init__()
with open('config.yml', 'r', encoding='utf-8') as f:
configs = yaml.load(f.read(), Loader=yaml.FullLoader)
self.question = question
self.choice = choice
self.PROMPT_TEMPLATE = configs['PROMPT'][choice]
self.PROMPT = configs['Emoji_PROMPT']
self.name = choice
async def run(self, question):
prompt = self.PROMPT_TEMPLATE.format(question=question)
rsp = await self._aask(prompt)
# 将回答进行专业化处理 -- 暂未完成
# process_rsp =
# 将回答添加表情
prompt = self.PROMPT.format(answer=rsp)
process_rsp = await self._aask(prompt)
text = EmoLLM.parse_code(process_rsp)
return process_rsp
@staticmethod
def parse_code(rsp):
pattern = r'```处理之后的回答(.*?)```'
match = re.search(pattern, rsp, re.DOTALL)
text = match.group(1) if match else rsp
return text
# 设计人设
class ch_aiwei(Role):
"""
角色类继承自Role基类
"""
def __init__(self, question: str, choice: str):
"""
初始化aiwei角色
"""
super().__init__() # 调用基类构造函数
self.question = question
self.choice = choice
self.set_actions([EmoLLM(question=self.question, choice=self.choice)]) # 目前只有一个动作
self._set_react_mode(react_mode='by_order') # 顺序执行
async def _act(self) -> Message:
"""
定义角色行动逻辑
"""
logger.info(f"{self._setting}: 准备 {self.rc.todo}") # 记录日志
todo = self.rc.todo # 按照排列顺序获取执行的动作
msg = self.get_memories(k=1)[0]
# 回答风格化
result = await todo.run(msg.content)
# 构造 Message 对象
msg = Message(content=result, role=self.profile, cause_by=type(todo))
self.rc.memory.add(msg) # 将运行结果添加到记忆
return msg # 返回最终的 Message
async def main():
with open('config.yml', 'r', encoding='utf-8') as f:
configs = yaml.load(f.read(), Loader=yaml.FullLoader)
path = CreateDir()
question = input('你好,请问您需要什么帮助吗?')
role = ch_aiwei(question, '爹系男友')
logger.info(question)
while question != 'exit':
result = await role.run(question)
logger.info(result)
Recording(question, result, path)
question = input()
asyncio.run(main())

4
agents/config.yml Normal file
View File

@ -0,0 +1,4 @@
# 名称可做更改
PROMPT : {'aiwei':'你是一个专业的心理医生,请你用专业的知识帮我解决心理问题。但是要求你用委婉温柔的大姐姐一样的语气回答并解决我的问题:```{question}```并且将字数限制在100字以内', '爹系男友':'你是一个专业的心理医生,请你用专业的知识帮我解决心理问题。但是要求你用细心温柔、阳光开朗男友一样的语气回答并解决我的问题:```{question}```并且将字数限制在100字以内', 'mom':'你是一个专业的心理医生,请你用专业的知识帮我解决心理问题。但是要求你用妈妈一样长辈关心孩子的语气回答并解决我的问题:```{question}```并且将字数限制在100字以内', '科学家':'你是一个专业的心理医生,请你用专业的知识帮我解决心理问题。但是要求你用规格严格、严谨的科学家一样的语气回答并解决我的问题:```{question}```并且将字数限制在100字以内'}
Emoji_PROMPT : '将句子```{answer}```,在合适的位置添加合适表情(如🤔💖😊📚💪🔍🌸🌟🎉🌈😢💡,但不仅限于此)\n返回的格式为```处理之后的回答\n```,请不要包含其他文本。\n```处理之后的回答\n```\n'

269
swift/README.md Normal file
View File

@ -0,0 +1,269 @@
# SWIFT (Scalable lightWeight Infrastructure for Fine-Tuning)
## 📖 目录
- [简介](#-简介)
- [新闻](#-新闻)
- [swift微调](#%EF%B8%8F-swift微调框架的安装与使用)
- [swift量化](#-量化大模型)
- [模型推理推送](#-模型推理)
## 📝 简介
SWIFT支持近**200种LLM和MLLM**多模态大模型的训练、推理、评测和部署。开发者可以直接将SWIFT框架应用到自己的Research和生产环境中实现模型训练评测到应用的完整链路。除支持了[PEFT](https://github.com/huggingface/peft)提供的轻量训练方案外SWIFT也提供了一个完整的Adapters库以支持最新的训练技术如NEFTune、LoRA+、LLaMA-PRO等这个适配器库可以脱离训练脚本直接使用在自己的自定流程中。同时SWIFT也在拓展其他模态的能力目前SWIFT支持了AnimateDiff的全参数训练和LoRA训练。
现在我们项目使用本项目自定义[数据集](https://github.com/SmartFlowAI/EmoLLM/blob/main/datasets)并将其转化成合适的json格式见SWIFT代码部分使用SWIFT进行微调现在项目已完成对Qwen-7b的微调
SWIFT具有丰富的文档体系如有使用问题请请查看[这里](https://github.com/modelscope/swift/tree/main/docs/source/LLM).
大家可以在[Huggingface space](https://huggingface.co/spaces/tastelikefeet/swift) 和 [ModelScope创空间](https://www.modelscope.cn/studios/iic/Scalable-lightWeight-Infrastructure-for-Fine-Tuning/summary) 中体验SWIFT web-ui功能。
## 🎉 新闻
- 🔥2024.04.26: 完成对qwen-7b-chat模型的SWIFT微调并且上传到[Modelscope](https://www.modelscope.cn/models/monbear/qwen-7b-chat-lora/summary).
- 🔥2024.04.27: 完成对qwen-7b-chat微调模型的量化并且上传到[Modelscope](https://www.modelscope.cn/models/monbear/qwen1half-7b-chat-lora/summary).
- 🔥2024.04.29: 获得[AI 赋能大学计划“全国高校行”](https://mp.weixin.qq.com/s/yyaulQ1wBzKq5cXaGl2Wag)一等奖
## 🛠️ swift微调框架的安装与使用
### <u>环境准备</u>
GPU设备: A10, 3090, V100, A100均可.
SWIFT在Python环境中运行。请确保您的Python版本高于3.8。
这里我们对实验环境进行安装其中包含了虚拟环境的创建、ms-swift以及相关依赖的安装。
```bash
# 设置pip全局镜像 (加速下载)
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
# 安装ms-swift
git clone https://github.com/modelscope/swift.git
cd swift
pip install -e '.[llm]'
# 如果你想要使用deepspeed.
pip install deepspeed -U
# 如果你想要使用基于auto_gptq的qlora训练. (推荐, 效果优于bnb)
# 支持auto_gptq的模型: `https://github.com/modelscope/swift/blob/main/docs/source/LLM/支持的模型和数据集.md#模型`
# auto_gptq和cuda版本有对应关系请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
pip install auto_gptq -U
# 如果你想要使用基于bnb的qlora训练.
pip install bitsandbytes -U
# 环境对齐 (通常不需要运行. 如果你运行错误, 可以跑下面的代码, 仓库使用最新环境测试)
pip install -r requirements/framework.txt -U
pip install -r requirements/llm.txt -U
```
### <u>微调大模型</u>
#### 使用python进行微调
```python
# Experimental environment: A10, 3090, V100, ...
# 20GB GPU memory
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
from swift.llm import (
DatasetName, InferArguments, ModelType, SftArguments,
infer_main, sft_main, app_ui_main
)
model_type = ModelType.qwen_7b_chat
sft_args = SftArguments(
model_type=model_type,
dataset=[f'{DatasetName.blossom_math_zh}#2000'],
output_dir='output')
result = sft_main(sft_args)
best_model_checkpoint = result['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
torch.cuda.empty_cache()
infer_args = InferArguments(
ckpt_dir=best_model_checkpoint,
load_dataset_config=True)
# merge_lora(infer_args, device_map='cpu')
result = infer_main(infer_args)
torch.cuda.empty_cache()
app_ui_main(infer_args)
```
#### 使用CLI命令进行微调
```bash
# Experimental environment: A10, 3090, V100, ...
# 20GB GPU memory
CUDA_VISIBLE_DEVICES=0 swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
# 使用自己的数据集(我们这里使用了自己的对话数据集 aiwei.jsonl
CUDA_VISIBLE_DEVICES=0 swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset chatml.jsonl \
--output_dir output \
# 使用DDP
# Experimental environment: 2 * 3090
# 2 * 23GB GPU memory
CUDA_VISIBLE_DEVICES=0,1 \
NPROC_PER_NODE=2 \
swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
# 多机多卡
# node0
CUDA_VISIBLE_DEVICES=0,1,2,3 \
NNODES=2 \
NODE_RANK=0 \
MASTER_ADDR=127.0.0.1 \
NPROC_PER_NODE=4 \
swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
# node1
CUDA_VISIBLE_DEVICES=0,1,2,3 \
NNODES=2 \
NODE_RANK=1 \
MASTER_ADDR=xxx.xxx.xxx.xxx \
NPROC_PER_NODE=4 \
swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
```
为了降低使用门槛swift还贴心的增加了[界面训练推理](https://github.com/modelscope/swift/blob/main/docs/source/GetStarted/%E7%95%8C%E9%9D%A2%E8%AE%AD%E7%BB%83%E6%8E%A8%E7%90%86.md "界面训练推理")的方式。另外还有[sh脚本](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_awq/lora "sh脚本")的使用方式。大家可以Github上查阅swift的[官方文档](https://github.com/modelscope/swift/blob/main/docs/source "官方文档")去了解。
## 📃 量化大模型
swift支持使用awq、gptq、bnb、hqq、eetq技术对模型进行量化。其中awq、gptq量化技术支持vllm进行推理加速需要使用校准数据集量化性能更好但量化速度较慢。而bnb、hqq、eetq无需校准数据量化速度较快。这五种量化方法都支持qlora微调。
awq、gptq需要使用`swift export`进行量化。而bnb、hqq、eetq可以直接在sft和infer时进行快速量化。
从vllm推理加速支持的角度来看更推荐使用awq和gptq进行量化。从量化效果的角度来看更推荐使用awq、hqq和gptq进行量化。而从量化速度的角度来看更推荐使用hqq进行量化。
这里我们推荐使用的是使用awq量化技术进行qlora微调。
### 环境准备
GPU设备: A10, 3090, V100, A100均可.
```bash
# 使用awq量化:
# autoawq和cuda版本有对应关系请按照`https://github.com/casper-hansen/AutoAWQ`选择版本
pip install autoawq -U
# 使用gptq量化:
# auto_gptq和cuda版本有对应关系请按照`https://github.com/PanQiWei/AutoGPTQ#quick-installation`选择版本
pip install auto_gptq -U
# 使用bnb量化
pip install bitsandbytes -U
# 使用hqq量化
# 需要transformers版本>4.40,从源码安装
pip install git+https://github.com/huggingface/transformers
pip install hqq
# 如果要兼容训练需要从源码安装peft
pip install git+https://github.com/huggingface/peft.git
# 使用eetq量化
# 需要transformers版本>4.40,从源码安装
pip install git+https://github.com/huggingface/transformers
# 参考https://github.com/NetEase-FuXi/EETQ
git clone https://github.com/NetEase-FuXi/EETQ.git
cd EETQ/
git submodule update --init --recursive
pip install .
# 如果要兼容训练需要从源码安装peft
pip install git+https://github.com/huggingface/peft.git
# 环境对齐 (通常不需要运行. 如果你运行错误, 可以跑下面的代码, 仓库使用最新环境测试)
pip install -r requirements/framework.txt -U
pip install -r requirements/llm.txt -U
```
## <u>量化微调后模型</u>
```bash
# 使用`alpaca-zh alpaca-en sharegpt-gpt4-mini`作为量化数据集
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir 'output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx' \
--merge_lora true --quant_bits 4 \
--dataset alpaca-zh alpaca-en sharegpt-gpt4-mini --quant_method awq
# 使用微调时使用的数据集作为量化数据集
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir 'output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx' \
--merge_lora true --quant_bits 4 \
--load_dataset_config true --quant_method awq
```
## 🔥 模型推理
### 推理微调后大模型
```bash
# awq/gptq量化模型支持vllm推理加速. 也支持模型部署.
CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx-merged-awq-int4'
```
### 推理效果
```text
<<< 我真的能改变自己吗
当然可以💖!每个人都有改变自己生活轨迹的能力,这需要我们有决心和行动力。首先,你可以尝试从小事做起,比如设定一个健康的生活习惯目标,如每天定时运动或保持良好的饮食习惯。然后,你可以尝试加入一些支持性的社交群体,与他人分享你的进步和挑战,这有助于建立自信并获得他人的鼓励与支持。
--------------------------------------------------
<<< xiexieni
亲爱的,你的感谢💖让我感到温暖。你的积极态度让我深信你有能力去改变和提升自己。请记住,每个人都有自己的节奏和成长过程,不必与他人比较。我们可以一起设定一些小目标,并在实现它们的过程中互相鼓励💪。
--------------------------------------------------
<<< 你叫什么
我是心理健康小分队艾薇知心大姐姐💖。我是一个基于人工智能的聊天机器人,可以提供信息、建议和陪伴。如果你有任何疑问或需要帮助,随时可以向我提问或者分享你的感受🌈。
--------------------------------------------------
```
### 模型推送
- 如果你想将你调试好的模型推送到自己的魔搭社区上,你可以使用下面的命令。之后你可以在魔搭社区首页上的`我创建的`找到你的模型。如果想要发布使用的话记得写`README文档。`
```bash
# 推送原始量化模型
CUDA_VISIBLE_DEVICES=0 swift export \
--model_type qwen1half-7b-chat \
--model_id_or_path qwen1half-7b-chat-gptq-int4 \
--push_to_hub true \
--hub_model_id qwen1half-7b-chat-gptq-int4 \
--hub_token '<your-sdk-token>'
# 推送lora增量模型
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx \
--push_to_hub true \
--hub_model_id qwen1half-4b-chat-lora \
--hub_token '<your-sdk-token>'
# 推送merged模型
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx \
--push_to_hub true \
--hub_model_id qwen1half-4b-chat-lora \
--hub_token '<your-sdk-token>' \
--merge_lora true
# 推送量化后模型
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx \
--push_to_hub true \
--hub_model_id qwen1half-4b-chat-lora \
--hub_token '<your-sdk-token>' \
--merge_lora true \
--quant_bits 4
```

269
swift/README_EN.md Normal file
View File

@ -0,0 +1,269 @@
# SWIFT (Scalable lightWeight Infrastructure for Fine-Tuning)
## 📖 Table of Contents
- [Introduction](#-introduction)
- [News](#-news)
- [Swift finetune](#%EF%B8%8F-installation-and-use-of-the-swift-finetune-framework)
- [Swift quantification](#-quantify-large-models)
- [Model inference and pushes](#-model-inference)
## 📝 Introduction
SWIFT supports the training, inference, evaluation and deployment of nearly 200 LLMs and MLLMs (multimodal large models). Developers can directly apply the SWIFT framework to their own research and production environment, and realize the complete link from model training and evaluation to application. In addition to supporting the lightweight training solutions provided by [PEFT](https://github.com/huggingface/peft), SWIFT also provides a complete library of adapters to support the latest training technologies, such as NEFTune, LoRA+, LLaMA-PRO, etc., which can be used directly in your own custom workflows without training scripts. At the same time, SWIFT is also expanding the capabilities of other modalities, and currently supports AnimateDiff full-parameter training and LoRA training.
Now our project uses this project to customize the [dataset](https://github.com/SmartFlowAI/EmoLLM/blob/main/datasets) and convert it to a suitable json format (see the SWIFT code section), and fine-tune it in SWIFT (the project has now finished fine-tuning Qwen-7b-chat).
SWIFT has a rich documentation system, if you have any questions about using it, please check [here](https://github.com/modelscope/swift/tree/main/docs/source/LLM).
You can find it in the [Huggingface space](https://huggingface.co/spaces/tastelikefeet/swift) and [ModelScope]( https://www.modelscope.cn/studios/iic/Scalable-lightWeight-Infrastructure-for-Fine-Tuning/summary) to experience SWIFT web-ui functionality.
## 🎉 News
- 🔥2024.04.26: Complete the SWIFT fine-tuning of the qwen-7b-chat model and upload it to [ModelScope](https://www.modelscope.cn/models/monbear/qwen-7b-chat-lora/summary).
- 🔥2024.04.27: Complete the quantization of the qwen-7b-chat fine-tuning model and upload it to [ModelScope](https://www.modelscope.cn/models/monbear/qwen1half-7b-chat-lora/summary).
- 🔥2024.04.29: obtain[AI 赋能大学计划“全国高校行”](https://mp.weixin.qq.com/s/yyaulQ1wBzKq5cXaGl2Wag) First prize
## 🛠️ Installation and use of the SWIFT finetune framework
### <u>Environment preparation</u>
GPU devices: A10, 3090, V100, A100 are acceptable.
SWIFT runs in a Python environment. Please make sure your Python version is higher than 3.8.
Here we install the experimental environment, which includes the creation of the virtual environment, ms-swift and the installation of related dependencies.
```bash
# Setting a PIP Global Image (Accelerated Download)
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/
# Install ms-swift
git clone https://github.com/modelscope/swift.git
cd swift
pip install -e '.[llm]'
# If you want to use DeepSpeed.
pip install deepspeed -U
# If you want to use auto_gptq-based Qlora training. (Recommended, better than BNB)
# Models supported by Otto_Goptek: 'Hetps://Github.Com/Moderskope/Swift/Blob/Main/Dox/Seuss/Lem/Supported Models and Datasets. Mike#Model'
# There is a correspondence between Otto_Gheptek and the bold version, please select the version according to 'Hetps://Github.Com/Judgment/Ottogt #Quike-Instarasin'
pip install auto_gptq -U
# If you want to use BNB-based Qlora training.
pip install bitsandbytes -U
# Environment alignment (usually does not need to be run. If you run the error, you can run the following code, the repository is tested with the latest environment)
pip install -r requirements/framework.txt -U
pip install -r requirements/llm.txt -U
```
### <u>Fine-tune large models</u>
#### Fine-tuning using python
```python
# Experimental environment: A10, 3090, V100, ...
# 20GB GPU memory
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
from swift.llm import (
DatasetName, InferArguments, ModelType, SftArguments,
infer_main, sft_main, app_ui_main
)
model_type = ModelType.qwen_7b_chat
sft_args = SftArguments(
model_type=model_type,
dataset=[f'{DatasetName.blossom_math_zh}#2000'],
output_dir='output')
result = sft_main(sft_args)
best_model_checkpoint = result['best_model_checkpoint']
print(f'best_model_checkpoint: {best_model_checkpoint}')
torch.cuda.empty_cache()
infer_args = InferArguments(
ckpt_dir=best_model_checkpoint,
load_dataset_config=True)
# merge_lora(infer_args, device_map='cpu')
result = infer_main(infer_args)
torch.cuda.empty_cache()
app_ui_main(infer_args)
```
#### Use CLI commands to fine-tune
```bash
# Experimental environment: A10, 3090, V100, ...
# 20GB GPU memory
CUDA_VISIBLE_DEVICES=0 swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
# Use your own dataset (we use our own conversation dataset aiwei.jsonl here)
CUDA_VISIBLE_DEVICES=0 swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset chatml.jsonl \
--output_dir output \
# Use DDP
# Experimental environment: 2 * 3090
# 2 * 23GB GPU memory
CUDA_VISIBLE_DEVICES=0,1 \
NPROC_PER_NODE=2 \
swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
# Multi-machine multi-card
# node0
CUDA_VISIBLE_DEVICES=0,1,2,3 \
NNODES=2 \
NODE_RANK=0 \
MASTER_ADDR=127.0.0.1 \
NPROC_PER_NODE=4 \
swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
# node1
CUDA_VISIBLE_DEVICES=0,1,2,3 \
NNODES=2 \
NODE_RANK=1 \
MASTER_ADDR=xxx.xxx.xxx.xxx \
NPROC_PER_NODE=4 \
swift sft \
--model_id_or_path qwen/Qwen-7B-Chat \
--dataset AI-ModelScope/blossom-math-v2 \
--output_dir output \
```
In order to lower the threshold for use, Swift has also thoughtfully added [Interface Training Inference](https://github.com/modelscope/swift/blob/main/docs/source/GetStarted/%E7%95%8C%E9%9D%A2%E8%AE%AD%E7%BB%83%E6%8E%A8%E7%90%86.md "界面训练推理")method。There is also the use of [sh script](https://github.com/modelscope/swift/blob/main/examples/pytorch/llm/scripts/qwen1half_7b_chat_awq/lora "sh脚本")。There is also the use of [sh script]. You can check out the [official documentation](https://github.com/modelscope/swift/blob/main/docs/source "官方文档") of swift on Github.
## 📃 Quantify large models
SWIFT supports the quantification of models using AWQ, GPTQ, BNB, HQQ, and EETQ technologies. Among them, AWQ and GPTQ quantization support VLLM for inference acceleration, which requires the use of calibration datasets, which has better quantization performance but slower quantization speed. On the other hand, BNB, HQQ, and EETQ do not need calibration data, and the quantization speed is faster. All five quantification methods support Qlora fine-tuning.
AWQ and GPTQ need to be quantified using 'swift export'. BNB, HQQ, and EETQ can be quickly quantified directly at SFT and INFER time.
From the perspective of VLLM inference acceleration support, AWQ and GPTQ are more recommended for quantization. From the perspective of quantifying the effect, it is more recommended to use AWQ, HQQ and GPTQ for quantification. From the perspective of quantization speed, it is more recommended to use HQQ for quantization.
Here we recommend using the AWQ quantification technique for QLORA fine-tuning.
### Environment preparation
GPU devices: A10, 3090, V100, A100 are acceptable.
```bash
# Quantify using awq:
# There is a correspondence between the AutoAWQ and CUDA versions, please select the version according to 'https://github.com/casper-hansen/AutoAWQ'
pip install autoawq -U
# Quantification using GPTQ:
# There is a correspondence between Otto_Gheptek and the bold version, please select the version according to 'Hetps://Github.Com/Judgment/Ottogt #Quike-Instarasin'
pip install auto_gptq -U
# Quantifying with BNB:
pip install bitsandbytes -U
# Quantification using HQQ:
# Transformers version > 4.40 is required, installed from source
pip install git+https://github.com/huggingface/transformers
pip install hqq
# If you want to be compatible with training, you need to install PEFT from the source code
pip install git+https://github.com/huggingface/peft.git
# Quantify using EETQ:
# Transformers version > 4.40 is required, installed from source
pip install git+https://github.com/huggingface/transformers
# Refer to https://github.com/NetEase-FuXi/EETQ
git clone https://github.com/NetEase-FuXi/EETQ.git
cd EETQ/
git submodule update --init --recursive
pip install .
# If you want to be compatible with training, you need to install PEFT from the source code
pip install git+https://github.com/huggingface/peft.git
# Environment alignment (usually does not need to be run. If you run the error, you can run the following code, the repository is tested with the latest environment)
pip install -r requirements/framework.txt -U
pip install -r requirements/llm.txt -U
```
## <u>Quantify the fine-tuned model</u>
```bash
# 'alpaca-zh alpaca-en sharegpt-gpt4-mini' was used as the quantitative data set
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir 'output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx' \
--merge_lora true --quant_bits 4 \
--dataset alpaca-zh alpaca-en sharegpt-gpt4-mini --quant_method awq
# Use the dataset used when fine-tuning as the quantification dataset
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir 'output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx' \
--merge_lora true --quant_bits 4 \
--load_dataset_config true --quant_method awq
```
## 🔥 Model inference
### Large model after inference fine-tuning
```bash
# The AWQ/GPTQ quantization model supports VLLM inference acceleration. Model deployment is also supported.
CUDA_VISIBLE_DEVICES=0 swift infer --ckpt_dir 'output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx-merged-awq-int4'
```
### Inference effect
```text
<<< 我真的能改变自己吗
当然可以💖!每个人都有改变自己生活轨迹的能力,这需要我们有决心和行动力。首先,你可以尝试从小事做起,比如设定一个健康的生活习惯目标,如每天定时运动或保持良好的饮食习惯。然后,你可以尝试加入一些支持性的社交群体,与他人分享你的进步和挑战,这有助于建立自信并获得他人的鼓励与支持。
--------------------------------------------------
<<< xiexieni
亲爱的,你的感谢💖让我感到温暖。你的积极态度让我深信你有能力去改变和提升自己。请记住,每个人都有自己的节奏和成长过程,不必与他人比较。我们可以一起设定一些小目标,并在实现它们的过程中互相鼓励💪。
--------------------------------------------------
<<< 你叫什么
我是心理健康小分队艾薇知心大姐姐💖。我是一个基于人工智能的聊天机器人,可以提供信息、建议和陪伴。如果你有任何疑问或需要帮助,随时可以向我提问或者分享你的感受🌈。
--------------------------------------------------
```
### Model push
- If you want to push your debugged model to your own Magic Community, you can use the following command. After that, you can find your model on the `I Created` section of the Moda Community homepage. If you want to publish it, remember to write the `README.md`
```bash
# Push the original quantization model
CUDA_VISIBLE_DEVICES=0 swift export \
--model_type qwen1half-7b-chat \
--model_id_or_path qwen1half-7b-chat-gptq-int4 \
--push_to_hub true \
--hub_model_id qwen1half-7b-chat-gptq-int4 \
--hub_token '<your-sdk-token>'
# Push the LoRa incremental model
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx \
--push_to_hub true \
--hub_model_id qwen1half-4b-chat-lora \
--hub_token '<your-sdk-token>'
# Push the merged model
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx \
--push_to_hub true \
--hub_model_id qwen1half-4b-chat-lora \
--hub_token '<your-sdk-token>' \
--merge_lora true
# Push the quantized model
CUDA_VISIBLE_DEVICES=0 swift export \
--ckpt_dir output/qwen1half-4b-chat/vx-xxx/checkpoint-xxx \
--push_to_hub true \
--hub_model_id qwen1half-4b-chat-lora \
--hub_token '<your-sdk-token>' \
--merge_lora true \
--quant_bits 4
```

4217
swift/data/aiwei.jsonl Normal file

File diff suppressed because it is too large Load Diff

36
swift/src/revert.py Normal file
View File

@ -0,0 +1,36 @@
import json
import jsonlines
def convert_json_to_jsonl(json_file, jsonl_file):
with open(json_file, 'r',encoding='utf-8') as file:
data = json.load(file)
# 将原始数据转换为所需的格式
converted_data = []
for conversation_data in data:
for i, conversation in enumerate(conversation_data["conversation"]):
history = []
if i == 0:
query = conversation["input"]
response = conversation["output"]
# history = []
else:
history.append({"query": conversation["input"], "response": conversation["output"]})
converted_data.append({
"query": query,
"response": response,
"history": history
})
# 输出到JSON Lines格式的文件
# with open('converted.jsonl', 'w', encoding='utf-8') as f:
# for item in converted_data:
# f.write(json.dumps(item, ensure_ascii=False) + '\n')
# 输出到JSON Lines格式的文件
with open(jsonl_file, 'w', encoding='utf-8') as f:
for item in converted_data:
f.write(json.dumps(item, ensure_ascii=False) + '\n')
convert_json_to_jsonl('aiwei.json','aiwei.jsonl')

View File

@ -0,0 +1,10 @@
from modelscope.hub.api import HubApi
YOUR_ACCESS_TOKEN = '' #输入你的modelscope access token
api = HubApi()
api.login(YOUR_ACCESS_TOKEN)
api.push_model(
model_id="", #your_name/model_id
model_dir="./merged" # 本地模型目录要求目录中必须包含configuration.json
)

View File

@ -0,0 +1,3 @@
import os
os.system("openxlab model create --model-repo='' -s ./metafile.yml") # your model-repo and metafile.yml文件