il y a 4 jours · 8cb8c36e38
--- a/dual_camera_system/__pycache__/ocr_recognizer.cpython-313.pyc
+++ b/dual_camera_system/__pycache__/ocr_recognizer.cpython-313.pyc
--- a/dual_camera_system/config/__init__.py
+++ b/dual_camera_system/config/__init__.py
@@ -11,7 +11,7 @@ from .detection import (
 
				     DETECTION_CONFIG, SAFETY_DETECTION_CONFIG
			
 
				 )
			
 
				 from .ptz import PTZ_CONFIG
			
 
				-from .ocr import OCR_CONFIG
			
 
				+from .ocr import OCR_CONFIG, SEGMENTATION_CONFIG
			
 
				 from .coordinator import COORDINATOR_CONFIG, CALIBRATION_CONFIG
			
 
				 from .event import EVENT_PUSHER_CONFIG, EVENT_LISTENER_CONFIG
			
 
				 from .voice import TTS_CONFIG, AUDIO_PLAYER_CONFIG, VOICE_ANNOUNCER_CONFIG
			
@@ -30,7 +30,7 @@ __all__ = [
 
				     # PTZ
			
 
				     'PTZ_CONFIG',
			
 
				     # OCR
			
 
				-    'OCR_CONFIG',
			
 
				+    'OCR_CONFIG', 'SEGMENTATION_CONFIG',
			
 
				     # 联动与校准
			
 
				     'COORDINATOR_CONFIG', 'CALIBRATION_CONFIG',
			
 
				     # 事件
			
--- a/dual_camera_system/config/__pycache__/__init__.cpython-313.pyc
+++ b/dual_camera_system/config/__pycache__/__init__.cpython-313.pyc
--- a/dual_camera_system/config/__pycache__/ocr.cpython-313.pyc
+++ b/dual_camera_system/config/__pycache__/ocr.cpython-313.pyc
--- a/dual_camera_system/config/ocr.py
+++ b/dual_camera_system/config/ocr.py
@@ -12,3 +12,13 @@ OCR_CONFIG = {
 
				     'temperature': 0.3,             # 温度参数
			
 
				     'timeout': 30,                  # 超时时间(秒)
			
 
				 }
			
 
				+
			
 
				+# 人体分割模型配置
			
 
				+SEGMENTATION_CONFIG = {
			
 
				+    # 模型路径 - 支持 RKNN 格式 (RK3588 平台)
			
 
				+    'model_path': '/home/admin/dsh/testrk3588/yolov8n-seg.rknn',
			
 
				+    'model_type': 'rknn',           # 模型类型: 'rknn'
			
 
				+    'input_size': (640, 640),       # 模型输入尺寸
			
 
				+    'conf_threshold': 0.5,          # 分割置信度阈值
			
 
				+    'use_npu': True,                # 使用 NPU 加速
			
 
				+}
			
--- a/dual_camera_system/ocr_recognizer.py
+++ b/dual_camera_system/ocr_recognizer.py
@@ -8,7 +8,7 @@ import numpy as np
 
				 from typing import List, Optional, Tuple, Dict
			
 
				 from dataclasses import dataclass
			
 
				 
			
 
				-from config import OCR_CONFIG
			
 
				+from config import OCR_CONFIG, SEGMENTATION_CONFIG
			
 
				 
			
 
				 
			
 
				 @dataclass
			
@@ -33,7 +33,7 @@ class PersonInfo:
 
				 
			
 
				 class PersonSegmenter:
			
 
				     """
			
 
				-    人体分割器
			
 
				+    人体分割器 - 使用 RKNN YOLOv8 分割模型
			
 
				     将人体从背景中分割出来
			
 
				     """
			
 
				     
			
@@ -41,22 +41,127 @@ class PersonSegmenter:
 
				         """
			
 
				         初始化分割器
			
 
				         Args:
			
 
				-            use_gpu: 是否使用GPU
			
 
				+            use_gpu: 是否使用GPU (RKNN使用NPU，此参数保留用于兼容)
			
 
				         """
			
 
				         self.use_gpu = use_gpu
			
 
				-        self.segmentor = None
			
 
				+        self.config = SEGMENTATION_CONFIG
			
 
				+        self.input_size = self.config.get('input_size', (640, 640))
			
 
				+        self.conf_threshold = self.config.get('conf_threshold', 0.5)
			
 
				+        self.rknn = None
			
 
				         self._load_model()
			
 
				     
			
 
				     def _load_model(self):
			
 
				-        """加载分割模型"""
			
 
				+        """加载 RKNN 分割模型"""
			
 
				         try:
			
 
				-            # 使用YOLO11分割模型
			
 
				-            from ultralytics import YOLO
			
 
				-            self.segmentor = YOLO('yolo11n-seg.pt')  # YOLO11分割模型
			
 
				-            print("成功加载YOLO11人体分割模型")
			
 
				+            from rknnlite.api import RKNNLite
			
 
				+            
			
 
				+            model_path = self.config.get('model_path', '/home/admin/dsh/testrk3588/yolov8n-seg.rknn')
			
 
				+            self.rknn = RKNNLite()
			
 
				+            
			
 
				+            ret = self.rknn.load_rknn(model_path)
			
 
				+            if ret != 0:
			
 
				+                print(f"[错误] 加载 RKNN 分割模型失败: {model_path}")
			
 
				+                self.rknn = None
			
 
				+                return
			
 
				+            
			
 
				+            # 初始化运行时，使用所有NPU核心
			
 
				+            ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)
			
 
				+            if ret != 0:
			
 
				+                print("[错误] 初始化 RKNN 运行时失败")
			
 
				+                self.rknn = None
			
 
				+                return
			
 
				+            
			
 
				+            print(f"成功加载 RKNN 人体分割模型: {model_path}")
			
 
				+        except ImportError:
			
 
				+            print("未安装 rknnlite，无法使用 RKNN 分割模型")
			
 
				+            self.rknn = None
			
 
				         except Exception as e:
			
 
				             print(f"加载分割模型失败: {e}")
			
 
				-            self.segmentor = None
			
 
				+            self.rknn = None
			
 
				+    
			
 
				+    def _letterbox(self, image: np.ndarray) -> tuple:
			
 
				+        """Letterbox 预处理，保持宽高比"""
			
 
				+        h0, w0 = image.shape[:2]
			
 
				+        ih, iw = self.input_size
			
 
				+        scale = min(iw / w0, ih / h0)
			
 
				+        new_w, new_h = int(w0 * scale), int(h0 * scale)
			
 
				+        pad_w = (iw - new_w) // 2
			
 
				+        pad_h = (ih - new_h) // 2
			
 
				+        resized = cv2.resize(image, (new_w, new_h))
			
 
				+        canvas = np.full((ih, iw, 3), 114, dtype=np.uint8)
			
 
				+        canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = resized
			
 
				+        return canvas, scale, pad_w, pad_h, h0, w0
			
 
				+    
			
 
				+    def _postprocess_segmentation(self, outputs, scale, pad_w, pad_h, w0, h0):
			
 
				+        """
			
 
				+        处理 YOLOv8 分割模型输出
			
 
				+        YOLOv8-seg 输出格式: [检测输出, 分割输出]
			
 
				+        - 检测输出: (1, 116, 8400) - 包含边界框、类别、掩码系数
			
 
				+        - 分割输出: (1, 32, 160, 160) - 原型掩码
			
 
				+        """
			
 
				+        if not outputs or len(outputs) < 2:
			
 
				+            return None
			
 
				+        
			
 
				+        # 解析输出
			
 
				+        det_output = outputs[0]  # (1, 116, 8400) - 检测输出
			
 
				+        seg_output = outputs[1]  # (1, 32, 160, 160) - 分割原型
			
 
				+        
			
 
				+        # 处理检测输出
			
 
				+        if len(det_output.shape) == 3:
			
 
				+            det_output = det_output[0]  # (116, 8400)
			
 
				+        
			
 
				+        # YOLOv8-seg: 前 84 维是检测 (4 box + 80 classes)，后 32 维是掩码系数
			
 
				+        num_anchors = det_output.shape[1]
			
 
				+        
			
 
				+        best_idx = -1
			
 
				+        best_conf = 0
			
 
				+        
			
 
				+        # 寻找最佳人体检测 (class 0 = person)
			
 
				+        for i in range(num_anchors):
			
 
				+            # 类别概率 (索引 4-84 是80个类别)
			
 
				+            class_probs = det_output[4:84, i]
			
 
				+            person_conf = float(class_probs[0])  # class 0 = person
			
 
				+            
			
 
				+            if person_conf > self.conf_threshold and person_conf > best_conf:
			
 
				+                best_conf = person_conf
			
 
				+                best_idx = i
			
 
				+        
			
 
				+        if best_idx < 0:
			
 
				+            return None
			
 
				+        
			
 
				+        # 获取掩码系数 (后32维)
			
 
				+        mask_coeffs = det_output[84:116, best_idx]  # (32,)
			
 
				+        
			
 
				+        # 处理分割原型 (1, 32, 160, 160) -> (32, 160, 160)
			
 
				+        if len(seg_output.shape) == 4:
			
 
				+            seg_output = seg_output[0]
			
 
				+        
			
 
				+        # 计算最终掩码: mask = coeffs @ prototypes
			
 
				+        # seg_output: (32, 160, 160), mask_coeffs: (32,)
			
 
				+        mask = np.zeros((160, 160), dtype=np.float32)
			
 
				+        for i in range(32):
			
 
				+            mask += mask_coeffs[i] * seg_output[i]
			
 
				+        
			
 
				+        # Sigmoid 激活
			
 
				+        mask = 1 / (1 + np.exp(-mask))
			
 
				+        
			
 
				+        # 移除 padding 并缩放到原始尺寸
			
 
				+        mask = (mask > 0.5).astype(np.uint8) * 255
			
 
				+        
			
 
				+        # 裁剪掉 letterbox 添加的 padding
			
 
				+        mask_h, mask_w = mask.shape
			
 
				+        pad_h_mask = int(pad_h * mask_h / self.input_size[0])  # 160/640 = 0.25
			
 
				+        pad_w_mask = int(pad_w * mask_w / self.input_size[1])
			
 
				+        new_h_mask = int((mask_h - 2 * pad_h_mask))
			
 
				+        new_w_mask = int((mask_w - 2 * pad_w_mask))
			
 
				+        
			
 
				+        if new_h_mask > 0 and new_w_mask > 0:
			
 
				+            mask = mask[pad_h_mask:pad_h_mask+new_h_mask, pad_w_mask:pad_w_mask+new_w_mask]
			
 
				+        
			
 
				+        # 缩放到原始 ROI 尺寸
			
 
				+        mask = cv2.resize(mask, (w0, h0))
			
 
				+        
			
 
				+        return mask
			
 
				     
			
 
				     def segment_person(self, frame: np.ndarray, 
			
 
				                        person_bbox: Tuple[int, int, int, int]) -> Optional[np.ndarray]:
			
@@ -66,33 +171,44 @@ class PersonSegmenter:
 
				             frame: 输入图像
			
 
				             person_bbox: 人体边界框 (x, y, w, h)
			
 
				         Returns:
			
 
				-            人体分割掩码 (或分割后的人体图像)
			
 
				+            人体分割掩码
			
 
				         """
			
 
				-        if self.segmentor is None:
			
 
				+        if self.rknn is None:
			
 
				             return None
			
 
				         
			
 
				         x, y, w, h = person_bbox
			
 
				         
			
 
				         # 裁剪人体区域
			
 
				         person_roi = frame[y:y+h, x:x+w]
			
 
				+        if person_roi.size == 0:
			
 
				+            return None
			
 
				         
			
 
				         try:
			
 
				-            # 使用分割模型
			
 
				-            results = self.segmentor(person_roi, classes=[0], verbose=False)  # class 0 = person
			
 
				+            # 预处理
			
 
				+            canvas, scale, pad_w, pad_h, h0, w0 = self._letterbox(person_roi)
			
 
				+            
			
 
				+            # RKNN 输入: NHWC (1, H, W, C), RGB, float32 normalized 0-1
			
 
				+            img = canvas[..., ::-1].astype(np.float32) / 255.0
			
 
				+            blob = img[None, ...]  # (1, 640, 640, 3)
			
 
				+            
			
 
				+            # 推理
			
 
				+            outputs = self.rknn.inference(inputs=[blob])
			
 
				+            
			
 
				+            # 后处理
			
 
				+            mask = self._postprocess_segmentation(outputs, scale, pad_w, pad_h, w0, h0)
			
 
				+            return mask
			
 
				             
			
 
				-            if results and len(results) > 0 and results[0].masks is not None:
			
 
				-                masks = results[0].masks.data
			
 
				-                if len(masks) > 0:
			
 
				-                    # 获取第一个掩码
			
 
				-                    mask = masks[0].cpu().numpy()
			
 
				-                    mask = cv2.resize(mask, (w, h))
			
 
				-                    mask = (mask > 0.5).astype(np.uint8) * 255
			
 
				-                    return mask
			
 
				         except Exception as e:
			
 
				             print(f"分割错误: {e}")
			
 
				         
			
 
				         return None
			
 
				     
			
 
				+    def release(self):
			
 
				+        """释放 RKNN 资源"""
			
 
				+        if self.rknn is not None:
			
 
				+            self.rknn.release()
			
 
				+            self.rknn = None
			
 
				+    
			
 
				     def extract_person_region(self, frame: np.ndarray,
			
 
				                                person_bbox: Tuple[int, int, int, int],
			
 
				                                padding: float = 0.1) -> Tuple[np.ndarray, Tuple[int, int]]:
			
@@ -495,6 +611,11 @@ class NumberDetector:
 
				             person_info.person_id = i
			
 
				             results.append(person_info)
			
 
				         return results
			
 
				+    
			
 
				+    def release(self):
			
 
				+        """释放资源"""
			
 
				+        if hasattr(self.segmenter, 'release'):
			
 
				+            self.segmenter.release()
			
 
				 
			
 
				 
			
 
				 def preprocess_for_ocr(image: np.ndarray) -> np.ndarray:
			
--- a/dual_camera_system/test_ptz.py
+++ b/dual_camera_system/test_ptz.py
@@ -200,6 +200,57 @@ def interactive_test(ptz: PTZCamera, sdk: DahuaSDK):
 
				             print(f"错误: {e}")
			
 
				 
			
 
				 
			
 
				+def test_channels(sdk: DahuaSDK):
			
 
				+    """测试不同通道号的 PTZ 控制"""
			
 
				+    print("\n" + "=" * 50)
			
 
				+    print("测试不同通道号")
			
 
				+    print("=" * 50)
			
 
				+    
			
 
				+    config = PTZ_CAMERA.copy()
			
 
				+    
			
 
				+    # 连接设备
			
 
				+    login_handle, error = sdk.login(
			
 
				+        config['ip'], config['port'],
			
 
				+        config['username'], config['password']
			
 
				+    )
			
 
				+    
			
 
				+    if login_handle is None:
			
 
				+        print(f"连接失败: 错误码={error}")
			
 
				+        return
			
 
				+    
			
 
				+    print(f"连接成功: handle={login_handle}")
			
 
				+    
			
 
				+    # 测试通道 0-3
			
 
				+    for channel in range(4):
			
 
				+        print(f"\n--- 测试通道 {channel} ---")
			
 
				+        
			
 
				+        # 测试 LEFT 命令
			
 
				+        print(f"  测试 LEFT 命令...")
			
 
				+        result = sdk.ptz_control(login_handle, channel, PTZCommand.LEFT, 0, 4, 0, False)
			
 
				+        if result:
			
 
				+            print(f"  通道 {channel}: LEFT 成功 ✓")
			
 
				+            time.sleep(1)
			
 
				+            # 发送停止
			
 
				+            sdk.ptz_control(login_handle, channel, PTZCommand.LEFT, 0, 0, 0, True)
			
 
				+        else:
			
 
				+            print(f"  通道 {channel}: LEFT 失败 ✗")
			
 
				+        
			
 
				+        time.sleep(0.5)
			
 
				+        
			
 
				+        # 测试 EXACTGOTO
			
 
				+        print(f"  测试 EXACTGOTO...")
			
 
				+        result = sdk.ptz_control(login_handle, channel, PTZCommand.EXACTGOTO, 900, 0, 1, False)
			
 
				+        if result:
			
 
				+            print(f"  通道 {channel}: EXACTGOTO 成功 ✓")
			
 
				+        else:
			
 
				+            print(f"  通道 {channel}: EXACTGOTO 失败 ✗")
			
 
				+        
			
 
				+        time.sleep(1)
			
 
				+    
			
 
				+    sdk.logout(login_handle)
			
 
				+    print("\n通道测试完成")
			
 
				+
			
 
				+
			
 
				 def main():
			
 
				     print("PTZ 控制测试脚本")
			
 
				     print("=" * 50)
			
@@ -215,6 +266,13 @@ def main():
 
				     
			
 
				     print("SDK 初始化成功")
			
 
				     
			
 
				+    # 询问是否先测试通道
			
 
				+    print("\n是否先测试不同通道号? (y/n): ", end="")
			
 
				+    if input().strip().lower() == 'y':
			
 
				+        test_channels(sdk)
			
 
				+        sdk.cleanup()
			
 
				+        return 0
			
 
				+    
			
 
				     # 连接球机
			
 
				     ptz = PTZCamera(sdk, PTZ_CAMERA)
			
 
				     if not ptz.connect():