diff --git a/IOTLLM/SensorAPI/LLMscript/script.py b/IOTLLM/SensorAPI/LLMscript/script.py new file mode 100644 index 0000000..8c023ba --- /dev/null +++ b/IOTLLM/SensorAPI/LLMscript/script.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# @Time : 2024/11/12 16:37 +# @Author : 黄子寒 +# @Email : 1064071566@qq.com +# @File : script.py +# @Project : EmoLLM +import requests + + +def query_sensor_data(area_code: str, parameter_type: str) -> dict: + """ + 查询指定区域和数据类型的传感器数据。 + :param area_code: str, 区域代码 (如 "A" 或 "B") + :param parameter_type: str, 数据类型 (如 "moisture"、"temperature"、"conductivity") + :return: dict, 包含查询结果的字典 + """ + url = f"http://127.0.0.1:8000/sensors/api/{area_code}/{parameter_type}/" + try: + response = requests.get(url) + response.raise_for_status() # 检查请求是否成功 + return response.json() # 返回 JSON 格式的数据 + except requests.exceptions.RequestException as e: + return {"error": str(e)} + + +# 示例用法:可供 LLM function calling 调用 +if __name__ == "__main__": + import sys + + # 从命令行获取参数 + if len(sys.argv) < 3: + print("请提供区域代码和数据类型参数") + else: + area_code = sys.argv[1] + parameter_type = sys.argv[2] + result = query_sensor_data(area_code, parameter_type) + print(result) diff --git a/IOTLLM/SensorAPI/SensorAPI/__init__.py b/IOTLLM/SensorAPI/SensorAPI/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/IOTLLM/SensorAPI/SensorAPI/asgi.py b/IOTLLM/SensorAPI/SensorAPI/asgi.py new file mode 100644 index 0000000..52809bb --- /dev/null +++ b/IOTLLM/SensorAPI/SensorAPI/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for SensorAPI project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SensorAPI.settings") + +application = get_asgi_application() diff --git a/IOTLLM/SensorAPI/SensorAPI/settings.py b/IOTLLM/SensorAPI/SensorAPI/settings.py new file mode 100644 index 0000000..e3fbf4d --- /dev/null +++ b/IOTLLM/SensorAPI/SensorAPI/settings.py @@ -0,0 +1,124 @@ +""" +Django settings for SensorAPI project. + +Generated by 'django-admin startproject' using Django 5.1.2. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.1/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = "django-insecure-0+#$1#@+&8$+y#f%0q!^kcz-+5&nkqhaluu*3mv8fa9t793u=z" + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = [] + + +# Application definition + +INSTALLED_APPS = [ + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", + 'sensors', +] + +MIDDLEWARE = [ + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", +] + +ROOT_URLCONF = "SensorAPI.urls" + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ], + }, + }, +] + +WSGI_APPLICATION = "SensorAPI.wsgi.application" + + +# Database +# https://docs.djangoproject.com/en/5.1/ref/settings/#databases + +DATABASES = { + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", + } +} + + +# Password validation +# https://docs.djangoproject.com/en/5.1/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", + }, + { + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/5.1/topics/i18n/ + +LANGUAGE_CODE = "en-us" + +TIME_ZONE = "UTC" + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.1/howto/static-files/ + +STATIC_URL = "static/" + +# Default primary key field type +# https://docs.djangoproject.com/en/5.1/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" diff --git a/IOTLLM/SensorAPI/SensorAPI/urls.py b/IOTLLM/SensorAPI/SensorAPI/urls.py new file mode 100644 index 0000000..331affe --- /dev/null +++ b/IOTLLM/SensorAPI/SensorAPI/urls.py @@ -0,0 +1,25 @@ +""" +URL configuration for SensorAPI project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/5.1/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" + +from django.contrib import admin +from django.urls import path + +from django.urls import include, path + +urlpatterns = [ + path('sensors/', include('sensors.urls')), +] diff --git a/IOTLLM/SensorAPI/SensorAPI/wsgi.py b/IOTLLM/SensorAPI/SensorAPI/wsgi.py new file mode 100644 index 0000000..ddda36d --- /dev/null +++ b/IOTLLM/SensorAPI/SensorAPI/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for SensorAPI project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.1/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SensorAPI.settings") + +application = get_wsgi_application() diff --git a/IOTLLM/SensorAPI/__init__.py b/IOTLLM/SensorAPI/__init__.py new file mode 100644 index 0000000..edbffc0 --- /dev/null +++ b/IOTLLM/SensorAPI/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2024/11/11 19:25 +# @Author : 黄子寒 +# @Email : 1064071566@qq.com +# @File : __init__.py.py +# @Project : EmoLLM diff --git a/IOTLLM/SensorAPI/manage.py b/IOTLLM/SensorAPI/manage.py new file mode 100644 index 0000000..60339f2 --- /dev/null +++ b/IOTLLM/SensorAPI/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "SensorAPI.settings") + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/IOTLLM/SensorAPI/sensors/__init__.py b/IOTLLM/SensorAPI/sensors/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/IOTLLM/SensorAPI/sensors/admin.py b/IOTLLM/SensorAPI/sensors/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/IOTLLM/SensorAPI/sensors/apps.py b/IOTLLM/SensorAPI/sensors/apps.py new file mode 100644 index 0000000..a9870f8 --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class SensorsConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "sensors" diff --git a/IOTLLM/SensorAPI/sensors/management/__init__.py b/IOTLLM/SensorAPI/sensors/management/__init__.py new file mode 100644 index 0000000..dd291da --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/management/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2024/11/11 20:03 +# @Author : 黄子寒 +# @Email : 1064071566@qq.com +# @File : __init__.py.py +# @Project : EmoLLM diff --git a/IOTLLM/SensorAPI/sensors/management/commands/__init__.py b/IOTLLM/SensorAPI/sensors/management/commands/__init__.py new file mode 100644 index 0000000..dd291da --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/management/commands/__init__.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 -*- +# @Time : 2024/11/11 20:03 +# @Author : 黄子寒 +# @Email : 1064071566@qq.com +# @File : __init__.py.py +# @Project : EmoLLM diff --git a/IOTLLM/SensorAPI/sensors/management/commands/populate_data.py b/IOTLLM/SensorAPI/sensors/management/commands/populate_data.py new file mode 100644 index 0000000..a5c88f6 --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/management/commands/populate_data.py @@ -0,0 +1,32 @@ +# sensors/management/commands/populate_data.py + +import random +from django.core.management.base import BaseCommand +from sensors.models import Sensor, SensorData +from django.utils import timezone + +class Command(BaseCommand): + help = "Populate database with sample sensor data for areas A to Z" + + def handle(self, *args, **kwargs): + Sensor.objects.all().delete() + SensorData.objects.all().delete() + + areas = [chr(i) for i in range(ord('A'), ord('Z') + 1)] + parameter_types = ['moisture', 'temperature', 'conductivity'] + + for area in areas: + num_sensors = random.randint(2, 5) # 每个区域 2 到 5 个传感器 + for _ in range(num_sensors): + sensor = Sensor.objects.create(area_code=area) + # 为每个传感器创建参数数据 + for param in parameter_types: + value = round(random.uniform(0.1, 100.0), 2) + SensorData.objects.create( + sensor=sensor, + parameter_type=param, + value=value, + timestamp=timezone.now() + ) + + self.stdout.write(self.style.SUCCESS('Successfully populated sample data for areas A to Z')) diff --git a/IOTLLM/SensorAPI/sensors/migrations/0001_initial.py b/IOTLLM/SensorAPI/sensors/migrations/0001_initial.py new file mode 100644 index 0000000..d2e1395 --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/migrations/0001_initial.py @@ -0,0 +1,32 @@ +# Generated by Django 5.1.2 on 2024-11-11 12:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [] + + operations = [ + migrations.CreateModel( + name="SensorData", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("area_code", models.CharField(max_length=10)), + ("moisture", models.DecimalField(decimal_places=2, max_digits=5)), + ("temperature", models.DecimalField(decimal_places=2, max_digits=5)), + ("conductivity", models.DecimalField(decimal_places=2, max_digits=5)), + ("timestamp", models.DateTimeField(auto_now_add=True)), + ], + ), + ] diff --git a/IOTLLM/SensorAPI/sensors/migrations/__init__.py b/IOTLLM/SensorAPI/sensors/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/IOTLLM/SensorAPI/sensors/models.py b/IOTLLM/SensorAPI/sensors/models.py new file mode 100644 index 0000000..02cd6a3 --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/models.py @@ -0,0 +1,18 @@ +from django.db import models +import uuid + +class Sensor(models.Model): + uuid = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + area_code = models.CharField(max_length=10) + + def __str__(self): + return f"Sensor {self.uuid} in Area {self.area_code}" + +class SensorData(models.Model): + sensor = models.ForeignKey(Sensor, on_delete=models.CASCADE, related_name='data', null=True) # 暂时允许 null + parameter_type = models.CharField(max_length=20, default='moisture') # 提供默认值 + value = models.DecimalField(max_digits=10, decimal_places=2) + timestamp = models.DateTimeField(auto_now_add=True) + + def __str__(self): + return f"Data from Sensor {self.sensor.uuid if self.sensor else 'N/A'} at {self.timestamp}" diff --git a/IOTLLM/SensorAPI/sensors/tests.py b/IOTLLM/SensorAPI/sensors/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/IOTLLM/SensorAPI/sensors/urls.py b/IOTLLM/SensorAPI/sensors/urls.py new file mode 100644 index 0000000..b404787 --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/urls.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# @Time : 2024/11/11 18:56 +# @Author : 黄子寒 +# @Email : 1064071566@qq.com +# @File : urls.py +# @Project : EmoLLM +from django.urls import path +from .views import get_sensor_data + +urlpatterns = [ + path('api///', get_sensor_data, name='get_sensor_data'), +] \ No newline at end of file diff --git a/IOTLLM/SensorAPI/sensors/views.py b/IOTLLM/SensorAPI/sensors/views.py new file mode 100644 index 0000000..39e5a22 --- /dev/null +++ b/IOTLLM/SensorAPI/sensors/views.py @@ -0,0 +1,36 @@ +# sensors/views.py + +from django.http import JsonResponse + +from .models import Sensor + + +def get_sensor_data(request, area_code, parameter_type): + try: + sensors = Sensor.objects.filter(area_code=area_code) + if not sensors.exists(): + return JsonResponse({'error': 'No sensors found for this area code'}, status=404) + + data_list = [] + for sensor in sensors: + # 获取该传感器最新的指定参数的数据 + data = sensor.data.filter(parameter_type=parameter_type).order_by('-timestamp').first() + if data: + data_list.append({ + 'sensor_uuid': str(sensor.uuid), + 'value': float(data.value), + 'timestamp': data.timestamp.isoformat() + }) + + if not data_list: + return JsonResponse({'error': f'No {parameter_type} data available for this area'}, status=404) + + response = { + 'area_code': area_code, + 'parameter_type': parameter_type, + 'sensors': data_list + } + + return JsonResponse(response) + except Exception as e: + return JsonResponse({'error': str(e)}, status=500) diff --git a/generate_data/EC_process/Embedding_merge.py b/IOTLLM/generate_data/EC_process/Embedding_merge.py similarity index 100% rename from generate_data/EC_process/Embedding_merge.py rename to IOTLLM/generate_data/EC_process/Embedding_merge.py diff --git a/generate_data/EC_process/Embedding_similarity.py b/IOTLLM/generate_data/EC_process/Embedding_similarity.py similarity index 100% rename from generate_data/EC_process/Embedding_similarity.py rename to IOTLLM/generate_data/EC_process/Embedding_similarity.py diff --git a/generate_data/EC_process/LDArec.py b/IOTLLM/generate_data/EC_process/LDArec.py similarity index 100% rename from generate_data/EC_process/LDArec.py rename to IOTLLM/generate_data/EC_process/LDArec.py diff --git a/generate_data/EC_process/Sensor_QA.py b/IOTLLM/generate_data/EC_process/Sensor_QA.py similarity index 100% rename from generate_data/EC_process/Sensor_QA.py rename to IOTLLM/generate_data/EC_process/Sensor_QA.py diff --git a/generate_data/EC_process/SparkApi.py b/IOTLLM/generate_data/EC_process/SparkApi.py similarity index 100% rename from generate_data/EC_process/SparkApi.py rename to IOTLLM/generate_data/EC_process/SparkApi.py diff --git a/generate_data/EC_process/api_test.py b/IOTLLM/generate_data/EC_process/api_test.py similarity index 100% rename from generate_data/EC_process/api_test.py rename to IOTLLM/generate_data/EC_process/api_test.py diff --git a/generate_data/EC_process/chinese_stopwords.txt b/IOTLLM/generate_data/EC_process/chinese_stopwords.txt similarity index 100% rename from generate_data/EC_process/chinese_stopwords.txt rename to IOTLLM/generate_data/EC_process/chinese_stopwords.txt diff --git a/generate_data/EC_process/custom_dict.txt b/IOTLLM/generate_data/EC_process/custom_dict.txt similarity index 100% rename from generate_data/EC_process/custom_dict.txt rename to IOTLLM/generate_data/EC_process/custom_dict.txt diff --git a/generate_data/EC_process/extend_QA.py b/IOTLLM/generate_data/EC_process/extend_QA.py similarity index 100% rename from generate_data/EC_process/extend_QA.py rename to IOTLLM/generate_data/EC_process/extend_QA.py diff --git a/generate_data/EC_process/gen_QA.py b/IOTLLM/generate_data/EC_process/gen_QA.py similarity index 97% rename from generate_data/EC_process/gen_QA.py rename to IOTLLM/generate_data/EC_process/gen_QA.py index 4314422..70a59bc 100644 --- a/generate_data/EC_process/gen_QA.py +++ b/IOTLLM/generate_data/EC_process/gen_QA.py @@ -30,7 +30,7 @@ with open("../processPDF/cleaned_data.txt", "r", encoding="utf-8") as f: cleaned_text = f.read() -# 自定义分割函数,按最大100字以内的句子段落 +# 自定义分割函数,按最大300字以内的句子段落 def split_text_to_sentences(text, max_length=300): sentences = re.split('(?<=。)', text) grouped_sentences = [] @@ -100,7 +100,7 @@ def parse_multiple_qa(answer_text): # 迭代限制,防止API额度过大 def checklen(text): - while len(text) > 8000: # 限制在8000字符以内 + while len(text) > 80000: del text[0] return text diff --git a/generate_data/EC_process/jsonl2json.py b/IOTLLM/generate_data/EC_process/jsonl2json.py similarity index 100% rename from generate_data/EC_process/jsonl2json.py rename to IOTLLM/generate_data/EC_process/jsonl2json.py diff --git a/generate_data/EC_process/output/train_expanded_part2.jsonl b/IOTLLM/generate_data/EC_process/output/train_expanded_part2.jsonl similarity index 100% rename from generate_data/EC_process/output/train_expanded_part2.jsonl rename to IOTLLM/generate_data/EC_process/output/train_expanded_part2.jsonl diff --git a/generate_data/EC_process/processPDF/OCR.py b/IOTLLM/generate_data/EC_process/processPDF/OCR.py similarity index 94% rename from generate_data/EC_process/processPDF/OCR.py rename to IOTLLM/generate_data/EC_process/processPDF/OCR.py index d3ffb56..8684dac 100644 --- a/generate_data/EC_process/processPDF/OCR.py +++ b/IOTLLM/generate_data/EC_process/processPDF/OCR.py @@ -45,6 +45,6 @@ for img_path in image_list: f.write(f"{word}\n") - print(f"Word: {word}, Confidence: {confidence}") + print(f" {word}, C: {confidence}") print(f"{txt_file_path}") diff --git a/generate_data/EC_process/processPDF/PDF2Pic.py b/IOTLLM/generate_data/EC_process/processPDF/PDF2Pic.py similarity index 96% rename from generate_data/EC_process/processPDF/PDF2Pic.py rename to IOTLLM/generate_data/EC_process/processPDF/PDF2Pic.py index 5b6e79a..c47180d 100644 --- a/generate_data/EC_process/processPDF/PDF2Pic.py +++ b/IOTLLM/generate_data/EC_process/processPDF/PDF2Pic.py @@ -33,7 +33,7 @@ for page_number in range(len(pdf_document)): image_path = os.path.join(output_image_dir, f"{page_number + 1}.png") pix.save(image_path) - print(f"Saved {image_path}") + print(f" {image_path}") pdf_document.close() diff --git a/generate_data/EC_process/processPDF/mergeTXT.py b/IOTLLM/generate_data/EC_process/processPDF/mergeTXT.py similarity index 100% rename from generate_data/EC_process/processPDF/mergeTXT.py rename to IOTLLM/generate_data/EC_process/processPDF/mergeTXT.py diff --git a/generate_data/EC_process/process_missing_QA.py b/IOTLLM/generate_data/EC_process/process_missing_QA.py similarity index 100% rename from generate_data/EC_process/process_missing_QA.py rename to IOTLLM/generate_data/EC_process/process_missing_QA.py diff --git a/generate_data/EC_process/topic_model.py b/IOTLLM/generate_data/EC_process/topic_model.py similarity index 97% rename from generate_data/EC_process/topic_model.py rename to IOTLLM/generate_data/EC_process/topic_model.py index 0bb1825..d8b6d21 100644 --- a/generate_data/EC_process/topic_model.py +++ b/IOTLLM/generate_data/EC_process/topic_model.py @@ -21,7 +21,7 @@ def load_qa_data(file_path): # 文本预处理 def preprocess_text(text): - stop_words = set(stopwords.words('english')) + stop_words = set(stopwords.words('chinese')) tokens = word_tokenize(text.lower()) tokens = [word for word in tokens if word.isalnum() and word not in stop_words] return tokens diff --git a/generate_data/OCR.md b/IOTLLM/generate_data/OCR.md similarity index 100% rename from generate_data/OCR.md rename to IOTLLM/generate_data/OCR.md diff --git a/generate_data/check.py b/IOTLLM/generate_data/check.py similarity index 99% rename from generate_data/check.py rename to IOTLLM/generate_data/check.py index 2557be4..2418d31 100644 --- a/generate_data/check.py +++ b/IOTLLM/generate_data/check.py @@ -39,7 +39,7 @@ def check(filepath): if __name__ == '__main__': - dir_path = '.' + dir_path = '' paths = get_all_file_paths(dir_path, suffix='.json') for path in paths: print(check(filepath=path)) \ No newline at end of file diff --git a/generate_data/config.yml b/IOTLLM/generate_data/config.yml similarity index 100% rename from generate_data/config.yml rename to IOTLLM/generate_data/config.yml diff --git a/generate_data/ernie_gen_data.py b/IOTLLM/generate_data/ernie_gen_data.py similarity index 100% rename from generate_data/ernie_gen_data.py rename to IOTLLM/generate_data/ernie_gen_data.py diff --git a/generate_data/main.ipynb b/IOTLLM/generate_data/main.ipynb similarity index 100% rename from generate_data/main.ipynb rename to IOTLLM/generate_data/main.ipynb diff --git a/generate_data/merge_json.py b/IOTLLM/generate_data/merge_json.py similarity index 100% rename from generate_data/merge_json.py rename to IOTLLM/generate_data/merge_json.py diff --git a/generate_data/merge_jsonl.py b/IOTLLM/generate_data/merge_jsonl.py similarity index 97% rename from generate_data/merge_jsonl.py rename to IOTLLM/generate_data/merge_jsonl.py index 7887ab0..444117c 100644 --- a/generate_data/merge_jsonl.py +++ b/IOTLLM/generate_data/merge_jsonl.py @@ -23,7 +23,7 @@ def get_all_file_paths(folder_path, file_type='.jsonl'): if __name__ == '__main__': conversion_lis = [] - folder_path = r'./' # python merge_jsonl.py > curr.txt + folder_path = r'/' # python merge_jsonl.py > curr.txt merge_path = folder_path.split('/')[-1] try: diff --git a/generate_data/merge_jsonl_r.py b/IOTLLM/generate_data/merge_jsonl_r.py similarity index 100% rename from generate_data/merge_jsonl_r.py rename to IOTLLM/generate_data/merge_jsonl_r.py diff --git a/generate_data/qwen_gen_data.py b/IOTLLM/generate_data/qwen_gen_data.py similarity index 100% rename from generate_data/qwen_gen_data.py rename to IOTLLM/generate_data/qwen_gen_data.py diff --git a/generate_data/qwen_gen_data_NoBash.py b/IOTLLM/generate_data/qwen_gen_data_NoBash.py similarity index 100% rename from generate_data/qwen_gen_data_NoBash.py rename to IOTLLM/generate_data/qwen_gen_data_NoBash.py diff --git a/generate_data/requirements.txt b/IOTLLM/generate_data/requirements.txt similarity index 100% rename from generate_data/requirements.txt rename to IOTLLM/generate_data/requirements.txt diff --git a/generate_data/run_qwen.bash b/IOTLLM/generate_data/run_qwen.bash similarity index 100% rename from generate_data/run_qwen.bash rename to IOTLLM/generate_data/run_qwen.bash diff --git a/generate_data/tutorial.md b/IOTLLM/generate_data/tutorial.md similarity index 100% rename from generate_data/tutorial.md rename to IOTLLM/generate_data/tutorial.md diff --git a/generate_data/tutorial_EN.md b/IOTLLM/generate_data/tutorial_EN.md similarity index 100% rename from generate_data/tutorial_EN.md rename to IOTLLM/generate_data/tutorial_EN.md diff --git a/generate_data/xinghuo/Readme.md b/IOTLLM/generate_data/xinghuo/Readme.md similarity index 100% rename from generate_data/xinghuo/Readme.md rename to IOTLLM/generate_data/xinghuo/Readme.md diff --git a/generate_data/xinghuo/Readme_EN.md b/IOTLLM/generate_data/xinghuo/Readme_EN.md similarity index 100% rename from generate_data/xinghuo/Readme_EN.md rename to IOTLLM/generate_data/xinghuo/Readme_EN.md diff --git a/generate_data/xinghuo/SparkApi.py b/IOTLLM/generate_data/xinghuo/SparkApi.py similarity index 100% rename from generate_data/xinghuo/SparkApi.py rename to IOTLLM/generate_data/xinghuo/SparkApi.py diff --git a/generate_data/xinghuo/gen_Chat.py b/IOTLLM/generate_data/xinghuo/gen_Chat.py similarity index 100% rename from generate_data/xinghuo/gen_Chat.py rename to IOTLLM/generate_data/xinghuo/gen_Chat.py diff --git a/generate_data/xinghuo/gen_data.py b/IOTLLM/generate_data/xinghuo/gen_data.py similarity index 93% rename from generate_data/xinghuo/gen_data.py rename to IOTLLM/generate_data/xinghuo/gen_data.py index 70dbfcd..d6ed950 100644 --- a/generate_data/xinghuo/gen_data.py +++ b/IOTLLM/generate_data/xinghuo/gen_data.py @@ -54,8 +54,8 @@ if __name__ == '__main__': for j in tqdm(range(10)): Input = prompt(i) question = checklen(getText("user",Input)) - SparkApi.answer ="" - SparkApi.main(appid,api_key, api_secret, Spark_url, domain, question) + SparkApi.answer = "" + SparkApi.main(appid, api_key, api_secret, Spark_url, domain, question) getText("assistant", SparkApi.answer) conversations.append(xinghuo_api(SparkApi.answer)) if i % 2 == 0 : diff --git a/generate_data/xinghuo/prompt.py b/IOTLLM/generate_data/xinghuo/prompt.py similarity index 100% rename from generate_data/xinghuo/prompt.py rename to IOTLLM/generate_data/xinghuo/prompt.py diff --git a/generate_data/zhipuai_gen_data.py b/IOTLLM/generate_data/zhipuai_gen_data.py similarity index 100% rename from generate_data/zhipuai_gen_data.py rename to IOTLLM/generate_data/zhipuai_gen_data.py diff --git a/README.md b/README.md index 6d30043..a2d53e9 100644 --- a/README.md +++ b/README.md @@ -234,7 +234,7 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git ### 📌数据构建 -- 请阅读[数据构建指南](generate_data/tutorial.md)查阅 +- 请阅读[数据构建指南](IOTLLM/generate_data/tutorial.md)查阅 - 微调用到的数据集见[datasets](datasets/data.json) ### 🎨增量预训练、微调指南 diff --git a/README_EN.md b/README_EN.md index 976b339..3eb29da 100644 --- a/README_EN.md +++ b/README_EN.md @@ -235,7 +235,7 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git - Quick coding: [Baby EmoLLM](quick_start/Baby_EmoLLM.ipynb) ### 📌Data Construction -- Please read the [Data Construction Guide ](generate_data/tutorial_EN.md) for reference. +- Please read the [Data Construction Guide ](IOTLLM/generate_data/tutorial_EN.md) for reference. - The dataset used for this fine-tuning can be found at [datasets](datasets/data.json) ### 🎨Incremental Pre-training and Fine-tuning Guide diff --git a/README_JP.md b/README_JP.md index bc488ae..dd0ec90 100644 --- a/README_JP.md +++ b/README_JP.md @@ -225,7 +225,7 @@ git clone https://github.com/SmartFlowAI/EmoLLM.git ### 📌データ構築 -- [データ構築ガイド](generate_data/tutorial_EN.md)を参照してください。 +- [データ構築ガイド](IOTLLM/generate_data/tutorial_EN.md)を参照してください。 - この微調整に使用されたデータセットは[datasets](datasets/data.json)にあります。 diff --git a/requirements.txt b/requirements.txt index 356176c..f469e12 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,4 +35,7 @@ websocket~=0.2.1 websocket-client~=1.6.2 gensim~=4.3.3 pillow~=9.5.0 -natsort~=8.4.0 \ No newline at end of file +natsort~=8.4.0 +jsonlines~=4.0.0 +django~=5.1.2 +scikit-learn~=1.3.2 \ No newline at end of file