2024-01-23 21:25:12 +08:00
|
|
|
|
import SparkApi
|
|
|
|
|
from prompt import *
|
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
# 以下密钥信息从控制台获取
|
2024-10-21 22:07:39 +08:00
|
|
|
|
appid = "f0f73de5" # 填写控制台中获取的 APPID 信息
|
|
|
|
|
api_secret = "YzkyYjQwMTU0MGZjMmUzMGE1Y2ZjYzBk" # 填写控制台中获取的 APISecret 信息
|
|
|
|
|
api_key = "5773f6f95563708de994d17b7ea5d414" # 填写控制台中获取的 APIKey 信息
|
2024-01-23 21:25:12 +08:00
|
|
|
|
|
|
|
|
|
# 用于配置大模型版本,默认“general/generalv2”
|
2024-10-21 22:07:39 +08:00
|
|
|
|
domain = "4.0Ultra" # v1.5版本
|
2024-01-23 21:25:12 +08:00
|
|
|
|
# domain = "generalv2" # v2.0版本
|
|
|
|
|
# 云端环境的服务地址
|
2024-10-21 22:07:39 +08:00
|
|
|
|
Spark_url = "wss://spark-api.xf-yun.com/v4.0/chat" # v1.5环境的地址
|
2024-01-23 21:25:12 +08:00
|
|
|
|
# Spark_url = "ws://spark-api.xf-yun.com/v2.1/chat" # v2.0环境的地址
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# length = 0
|
|
|
|
|
|
|
|
|
|
def getText(role, content):
|
|
|
|
|
jsoncon = {}
|
|
|
|
|
jsoncon["role"] = role
|
|
|
|
|
jsoncon["content"] = content
|
|
|
|
|
text.append(jsoncon)
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getlength(text):
|
|
|
|
|
length = 0
|
|
|
|
|
for content in text:
|
|
|
|
|
temp = content["content"]
|
|
|
|
|
leng = len(temp)
|
|
|
|
|
length += leng
|
|
|
|
|
return length
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def checklen(text):
|
|
|
|
|
while (getlength(text) > 8000):
|
|
|
|
|
del text[0]
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2024-10-21 22:07:39 +08:00
|
|
|
|
text.clear()
|
2024-01-23 21:25:12 +08:00
|
|
|
|
file_name = 'train3.jsonl'
|
|
|
|
|
conversations = []
|
|
|
|
|
for i in tqdm(range(200)):
|
|
|
|
|
Input = prompt(random.randint(0, 16))
|
|
|
|
|
question = checklen(getText("user", Input))
|
|
|
|
|
SparkApi.answer = ""
|
|
|
|
|
SparkApi.main(appid, api_key, api_secret, Spark_url, domain, question)
|
|
|
|
|
getText("assistant", SparkApi.answer)
|
|
|
|
|
conversations.append(ChatGLM3_6B(SparkApi.answer))
|
|
|
|
|
for item in conversations:
|
|
|
|
|
save_jsonl(item, file_name)
|
|
|
|
|
conversations.clear()
|
|
|
|
|
|