前置唤醒词识别优化

2026-01-03 22:24:02 +08:00 · 2026-01-03 22:24:02 +08:00 · 84551d59eb
commit 84551d59eb
parent 14c95ab079
1 changed files with 103 additions and 33 deletions
--- a/core/recorder.py
+++ b/core/recorder.py
@ -16,11 +16,68 @@ import tempfile
 import wave
 from core import fay_core
 from core import interact
+# ===== 新增：用于前置唤醒词句首容错 =====
+import re
+import unicodedata
+
 # 启动时间 (秒)
-_ATTACK = 0.2
+_ATTACK = 0.08 # ↓ 改小：让系统更早进入拾音，避免“唤醒词前半截被吃掉”

 # 释放时间 (秒)
-_RELEASE = 0.7
+_RELEASE = 0.55 # ↓ 略微缩短，避免一句话被切成两段
+
+# ===== 新增：前置唤醒词句首规范化与匹配 =====
+_PUNCS = "，。！？!?,.、:：；;“”\"'()（）[]【】<>《》-—…"  # 常见中文标点
+_FILLER_PREFIX = ("嗯", "啊", "呃", "欸", "诶", "喂", "那个", "就是", "然后")  # 常见句首语气词（ASR 很容易加）
+
+def _norm_head(s: str) -> str:
+    """只做句首容错：去不可见/空白/句首标点/句首语气词，不改变正文结构。"""
+    if not s:
+        return ""
+    s = unicodedata.normalize("NFKC", s).strip()
+    # 去掉开头空白
+    s = re.sub(r"^\s+", "", s)
+    # 去掉开头标点（可重复）
+    s = re.sub(r"^[{}]+".format(re.escape(_PUNCS)), "", s)
+
+    # 去掉句首常见语气词（允许多次叠加）
+    changed = True
+    while changed:
+        changed = False
+        for fp in _FILLER_PREFIX:
+            if s.startswith(fp):
+                s = s[len(fp):]
+                s = re.sub(r"^\s+", "", s)
+                s = re.sub(r"^[{}]+".format(re.escape(_PUNCS)), "", s)
+                changed = True
+                break
+    return s
+
+def _front_wake_match(text: str, wake_words):
+    """
+    前置唤醒词匹配（严格前置）：
+    - 唤醒词必须在规范化后的最前面
+    - 不允许句中唤醒
+    """
+    t = _norm_head(text)
+
+    for w in wake_words:
+        w = w.strip()
+        if not w:
+            continue
+
+        # 允许：唤醒词后面紧跟空格/标点/语气助词
+        # 例："小橄榄，帮我..."  "小橄榄啊 帮我..."
+        if t.startswith(w):
+            rest = t[len(w):]  # 去掉唤醒词，得到真正的问题
+            # 去掉紧随其后的标点 / 空格 / 语气助词
+            rest = rest.lstrip(" \t\r\n" + _PUNCS)
+            rest = re.sub(r"^(啊|呀|呢|吧|哈|哎|诶|欸)\s*", "", rest)
+            rest = rest.lstrip(" \t\r\n" + _PUNCS)
+            return True, w, rest
+
+    return False, None, ""
+


 class Recorder:
@ -141,35 +198,45 @@ class Recorder:
                        self.timer.cancel()  # 取消之前的计时器任务
                        self.timer = threading.Timer(60, self.reset_wakeup_status)  # 重设计时器为60秒
                        self.timer.start()
-                
-                #前置唤醒词模式
-                elif  cfg.config['source']['wake_word_type'] == 'front':
-                    wake_word =  cfg.config['source']['wake_word']
-                    wake_word_list = wake_word.split(',')
-                    wake_up = False
-                    for word in wake_word_list:
-                        if text.startswith(word):
-                            wake_up_word = word
-                            wake_up = True
-                            break
-                    if wake_up:
+
+                # 前置唤醒词模式（严格前置，但句首做容错）
+                elif cfg.config['source']['wake_word_type'] == 'front':
+                    # 读取配置的唤醒词（支持多个）
+                    wake_word = cfg.config['source']['wake_word']
+                    wake_word_list = [w.strip() for w in wake_word.split(',') if w.strip()]
+
+                    matched, wake_up_word, question = _front_wake_match(text, wake_word_list)
+
+                    if matched:
                        util.printInfo(1, self.username, "唤醒成功！")
                        if wsa_server.get_web_instance().is_connected(self.username):
-                            wsa_server.get_web_instance().add_cmd({"panelMsg": "唤醒成功！", "Username" : self.username , 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'})
+                            wsa_server.get_web_instance().add_cmd({"panelMsg": "唤醒成功！", "Username": self.username,
+                                                                   'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'})
                        if wsa_server.get_instance().is_connected(self.username):
-                            content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "唤醒成功！"}, 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'}
+                            content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "唤醒成功！"},
+                                       'Username': self.username,
+                                       'robot': f'http://{cfg.fay_url}:5000/robot/Listening.jpg'}
                            wsa_server.get_instance().add_cmd(content)
-                        #去除唤醒词后语句
-                        question = text#[len(wake_up_word):].lstrip()
-                        self.on_speaking(question)
+
+                        # 在识别到【前置唤醒词】后，发送“去掉唤醒词后的问题”
+                        if question:
+                            self.on_speaking(question)
+                        else:
+                            intt = interact.Interact("auto_play", 2, {'user': self.username, 'text': "在呢，你说？"})
+                            self.__fay.on_interact(intt)
+
                        self.processing = False
                    else:
                        util.printInfo(1, self.username, "[!] 待唤醒！")
                        if wsa_server.get_web_instance().is_connected(self.username):
-                            wsa_server.get_web_instance().add_cmd({"panelMsg": "[!] 待唤醒！", "Username" : self.username , 'robot': f'http://{cfg.fay_url}:5000/robot/Normal.jpg'})
+                            wsa_server.get_web_instance().add_cmd({"panelMsg": "[!] 待唤醒！", "Username": self.username,
+                                                                   'robot': f'http://{cfg.fay_url}:5000/robot/Normal.jpg'})
                        if wsa_server.get_instance().is_connected(self.username):
-                            content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "[!] 待唤醒！"}, 'Username' : self.username, 'robot': f'http://{cfg.fay_url}:5000/robot/Normal.jpg'}
+                            content = {'Topic': 'Unreal', 'Data': {'Key': 'log', 'Value': "[!] 待唤醒！"},
+                                       'Username': self.username,
+                                       'robot': f'http://{cfg.fay_url}:5000/robot/Normal.jpg'}
                            wsa_server.get_instance().add_cmd(content)
+                        self.processing = False

            #非唤醒模式
            else:
@ -220,12 +287,8 @@ class Recorder:
                continue 
            #是否可以拾音,不可以就掉弃录音
            can_listen = True
-            #没有开唤醒，但面板或数字人正在播音时不能拾音
-            if cfg.config['source']['wake_word_enabled'] == False and self.__fay.speaking == True:
-                can_listen = False
-            
-            #普通唤醒模式已经激活，并且面板或数字人正在输出声音时不能拾音
-            if cfg.config['source']['wake_word_enabled'] == True and cfg.config['source']['wake_word_type'] == 'common' and self.wakeup_matched == True and self.__fay.speaking == True:
+            if self.__fay.speaking == True:
+                # 只要数字人/面板在播放TTS，就禁拾音，避免把自己的声音识别成用户输入
                can_listen = False

            if can_listen == False:#掉弃录音
@ -234,7 +297,7 @@ class Recorder:

            #计算音量是否满足激活拾音
            level = audioop.rms(data, 2)
-            if len(self.__history_data) >= 10:#保存激活前的音频，以免信息掉失
+            if len(self.__history_data) >= 20:#保存激活前的音频，以免信息掉失
                self.__history_data.pop(0)
            if len(self.__history_level) >= 500:
                self.__history_level.pop(0)
@ -242,12 +305,19 @@ class Recorder:
            self.__history_level.append(level)
            percentage = level / self.__MAX_LEVEL
            history_percentage = self.__get_history_percentage(30)
+
+            # ===== 改进：阈值平滑变化，避免断句导致唤醒词被截断 =====
+            up_alpha = 0.01  # 环境变吵：慢慢升
+            down_alpha = 0.05   # 环境变安静：也不要瞬间掉
+
            if history_percentage > self.__dynamic_threshold:
-                self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * 0.0025
-            elif history_percentage < self.__dynamic_threshold:
-                self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * 1
-            
-           
+                self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * up_alpha
+            else:
+                self.__dynamic_threshold += (history_percentage - self.__dynamic_threshold) * down_alpha
+
+            # 给阈值一个下限，防止过度灵敏
+            self.__dynamic_threshold = max(self.__dynamic_threshold, 0.02)
+
            #激活拾音
            if percentage > self.__dynamic_threshold:
                last_speaking_time = time.time()