3 ngày trước cách đây · 97ab7fd5ea
--- a/dual_camera_system/__pycache__/coordinator.cpython-310.pyc
+++ b/dual_camera_system/__pycache__/coordinator.cpython-310.pyc
--- a/dual_camera_system/__pycache__/dahua_sdk.cpython-310.pyc
+++ b/dual_camera_system/__pycache__/dahua_sdk.cpython-310.pyc
--- a/dual_camera_system/__pycache__/ocr_recognizer.cpython-310.pyc
+++ b/dual_camera_system/__pycache__/ocr_recognizer.cpython-310.pyc
--- a/dual_camera_system/__pycache__/panorama_camera.cpython-310.pyc
+++ b/dual_camera_system/__pycache__/panorama_camera.cpython-310.pyc
--- a/dual_camera_system/__pycache__/ptz_camera.cpython-310.pyc
+++ b/dual_camera_system/__pycache__/ptz_camera.cpython-310.pyc
--- a/dual_camera_system/__pycache__/ptz_person_tracker.cpython-310.pyc
+++ b/dual_camera_system/__pycache__/ptz_person_tracker.cpython-310.pyc
--- a/dual_camera_system/__pycache__/video_lock.cpython-310.pyc
+++ b/dual_camera_system/__pycache__/video_lock.cpython-310.pyc
--- a/dual_camera_system/config/__pycache__/__init__.cpython-310.pyc
+++ b/dual_camera_system/config/__pycache__/__init__.cpython-310.pyc
--- a/dual_camera_system/config/__pycache__/camera.cpython-310.pyc
+++ b/dual_camera_system/config/__pycache__/camera.cpython-310.pyc
--- a/dual_camera_system/config/__pycache__/coordinator.cpython-310.pyc
+++ b/dual_camera_system/config/__pycache__/coordinator.cpython-310.pyc
--- a/dual_camera_system/config/__pycache__/detection.cpython-310.pyc
+++ b/dual_camera_system/config/__pycache__/detection.cpython-310.pyc
--- a/dual_camera_system/config/__pycache__/ocr.cpython-310.pyc
+++ b/dual_camera_system/config/__pycache__/ocr.cpython-310.pyc
--- a/dual_camera_system/config/__pycache__/ptz.cpython-310.pyc
+++ b/dual_camera_system/config/__pycache__/ptz.cpython-310.pyc
--- a/dual_camera_system/config/__pycache__/system.cpython-310.pyc
+++ b/dual_camera_system/config/__pycache__/system.cpython-310.pyc
--- a/dual_camera_system/config/coordinator.py
+++ b/dual_camera_system/config/coordinator.py
@@ -9,6 +9,18 @@ COORDINATOR_CONFIG = {
 
				     'detection_interval': 1.0,
			
 
				     'ptz_command_cooldown': 0.5,
			
 
				     'ptz_position_threshold': 0.03,
			
 
				+    
			
 
				+    # 目标选择策略配置
			
 
				+    'target_selection': {
			
 
				+        'strategy': 'area',           # 策略: 'area'(面积优先), 'confidence'(置信度优先), 'hybrid'(混合)
			
 
				+        'area_weight': 0.6,           # 混合模式下面积权重
			
 
				+        'confidence_weight': 0.4,     # 混合模式下置信度权重
			
 
				+        'min_area_threshold': 5000,   # 最小面积阈值(像素²)，小于此值的目标优先级降低
			
 
				+        'prefer_center': True,        # 是否优先选择靠近画面中心的目标
			
 
				+        'center_weight': 0.2,         # 中心位置权重
			
 
				+        'switch_on_lost': True,       # 当前目标丢失时是否切换到次优目标
			
 
				+        'stickiness': 0.3,            # 目标粘性(0-1)，值越大越不容易切换目标
			
 
				+    },
			
 
				 }
			
 
				 
			
 
				 CALIBRATION_CONFIG = {
			
--- a/dual_camera_system/config/detection.py
+++ b/dual_camera_system/config/detection.py
@@ -6,7 +6,7 @@
 
				 DETECTION_CONFIG = {
			
 
				     'target_classes': ['person', '人'],   # 检测目标类别 (支持中英文)
			
 
				     'confidence_threshold': 0.5,     # 置信度阈值
			
 
				-    'detection_interval': 0.1,       # 检测间隔(秒)
			
 
				+    'detection_interval': 0.05,       # 检测间隔(秒)
			
 
				     
			
 
				     # 检测图片保存配置
			
 
				     'save_detection_image': False,   # 是否保存检测到人的图片
			
--- a/dual_camera_system/config/ptz.py
+++ b/dual_camera_system/config/ptz.py
@@ -25,4 +25,16 @@ PTZ_CONFIG = {
 
				     # tilt_flip: 如果俯仰方向相反，设为True（吸顶安装通常需要True）
			
 
				     'pan_flip': True,
			
 
				     'tilt_flip': False,              # 由 mount_type='ceiling' 时自动生效
			
 
				+    
			
 
				+    # 球机端人体检测与自动对焦配置
			
 
				+    'enable_ptz_detection': True,    # 是否启用球机端人体检测
			
 
				+    'auto_zoom': {
			
 
				+        'enabled': True,             # 是否启用自动变焦
			
 
				+        'target_size_ratio': 0.4,    # 目标人体占画面比例 (0.3-0.5)
			
 
				+        'min_zoom': 3,               # 最小变倍
			
 
				+        'max_zoom': 20,              # 最大变倍
			
 
				+        'zoom_step': 2,              # 变焦调整步长
			
 
				+        'center_threshold': 0.1,     # 居中阈值 (人体中心偏离画面中心的比例)
			
 
				+        'max_adjust_attempts': 3,    # 最大调整次数
			
 
				+    },
			
 
				 }
			
--- a/dual_camera_system/coordinator.py
+++ b/dual_camera_system/coordinator.py
@@ -13,10 +13,11 @@ from enum import Enum
 
				 
			
 
				 import numpy as np
			
 
				 
			
 
				-from config import COORDINATOR_CONFIG, SYSTEM_CONFIG
			
 
				+from config import COORDINATOR_CONFIG, SYSTEM_CONFIG, PTZ_CONFIG, DETECTION_CONFIG
			
 
				 from panorama_camera import PanoramaCamera, ObjectDetector, PersonTracker, DetectedObject
			
 
				 from ptz_camera import PTZCamera, PTZController
			
 
				 from ocr_recognizer import NumberDetector, PersonInfo
			
 
				+from ptz_person_tracker import PTZPersonDetector, PTZAutoZoomController
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
@@ -38,6 +39,160 @@ class TrackingTarget:
 
				     last_update: float              # 最后更新时间
			
 
				     person_info: Optional[PersonInfo] = None  # 人员信息
			
 
				     priority: int = 0               # 优先级
			
 
				+    area: int = 0                   # 目标面积(像素²)
			
 
				+    confidence: float = 0.0         # 置信度
			
 
				+    center_distance: float = 1.0    # 到画面中心的距离比例(0-1)
			
 
				+    score: float = 0.0              # 综合得分
			
 
				+
			
 
				+
			
 
				+class TargetSelector:
			
 
				+    """
			
 
				+    目标选择策略类
			
 
				+    支持按面积、置信度、混合模式排序，支持优先级切换
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(self, config: Dict = None):
			
 
				+        """
			
 
				+        初始化目标选择器
			
 
				+        Args:
			
 
				+            config: 目标选择配置
			
 
				+        """
			
 
				+        self.config = config or {
			
 
				+            'strategy': 'area',
			
 
				+            'area_weight': 0.6,
			
 
				+            'confidence_weight': 0.4,
			
 
				+            'min_area_threshold': 5000,
			
 
				+            'prefer_center': True,
			
 
				+            'center_weight': 0.2,
			
 
				+            'switch_on_lost': True,
			
 
				+            'stickiness': 0.3,
			
 
				+        }
			
 
				+        self.current_target_id: Optional[int] = None
			
 
				+        self.current_target_score: float = 0.0
			
 
				+    
			
 
				+    def calculate_score(self, target: TrackingTarget, frame_size: Tuple[int, int] = None) -> float:
			
 
				+        """
			
 
				+        计算目标综合得分
			
 
				+        Args:
			
 
				+            target: 跟踪目标
			
 
				+            frame_size: 帧尺寸(w, h)，用于计算中心距离
			
 
				+        Returns:
			
 
				+            综合得分(0-1)
			
 
				+        """
			
 
				+        strategy = self.config.get('strategy', 'area')
			
 
				+        area_weight = self.config.get('area_weight', 0.6)
			
 
				+        conf_weight = self.config.get('confidence_weight', 0.4)
			
 
				+        min_area = self.config.get('min_area_threshold', 5000)
			
 
				+        prefer_center = self.config.get('prefer_center', False)
			
 
				+        center_weight = self.config.get('center_weight', 0.2)
			
 
				+        
			
 
				+        # 归一化面积得分 (对数缩放，避免大目标得分过高)
			
 
				+        import math
			
 
				+        area_score = min(1.0, math.log10(max(target.area, 1)) / 5.0)  # 100000像素² ≈ 1.0
			
 
				+        
			
 
				+        # 小面积惩罚
			
 
				+        if target.area < min_area:
			
 
				+            area_score *= 0.5
			
 
				+        
			
 
				+        # 置信度得分直接使用
			
 
				+        conf_score = target.confidence
			
 
				+        
			
 
				+        # 中心距离得分 (距离中心越近得分越高)
			
 
				+        center_score = 1.0 - target.center_distance
			
 
				+        
			
 
				+        # 根据策略计算综合得分
			
 
				+        if strategy == 'area':
			
 
				+            score = area_score * 0.8 + conf_score * 0.2
			
 
				+        elif strategy == 'confidence':
			
 
				+            score = conf_score * 0.8 + area_score * 0.2
			
 
				+        else:  # hybrid
			
 
				+            score = area_score * area_weight + conf_score * conf_weight
			
 
				+        
			
 
				+        # 加入中心距离权重
			
 
				+        if prefer_center:
			
 
				+            score = score * (1 - center_weight) + center_score * center_weight
			
 
				+        
			
 
				+        return score
			
 
				+    
			
 
				+    def select_target(self, targets: Dict[int, TrackingTarget], 
			
 
				+                       frame_size: Tuple[int, int] = None) -> Optional[TrackingTarget]:
			
 
				+        """
			
 
				+        从多个目标中选择最优目标
			
 
				+        Args:
			
 
				+            targets: 目标字典 {track_id: TrackingTarget}
			
 
				+            frame_size: 帧尺寸
			
 
				+        Returns:
			
 
				+            最优目标
			
 
				+        """
			
 
				+        if not targets:
			
 
				+            self.current_target_id = None
			
 
				+            return None
			
 
				+        
			
 
				+        stickiness = self.config.get('stickiness', 0.3)
			
 
				+        switch_on_lost = self.config.get('switch_on_lost', True)
			
 
				+        
			
 
				+        # 计算所有目标得分
			
 
				+        scored_targets = []
			
 
				+        for track_id, target in targets.items():
			
 
				+            target.score = self.calculate_score(target, frame_size)
			
 
				+            scored_targets.append((track_id, target, target.score))
			
 
				+        
			
 
				+        # 按得分排序
			
 
				+        scored_targets.sort(key=lambda x: x[2], reverse=True)
			
 
				+        
			
 
				+        # 检查当前目标是否仍在列表中
			
 
				+        if self.current_target_id is not None:
			
 
				+            current_exists = self.current_target_id in targets
			
 
				+            if current_exists:
			
 
				+                # 应用粘性：当前目标得分需要显著低于最优目标才切换
			
 
				+                best_id, best_target, best_score = scored_targets[0]
			
 
				+                current_target = targets[self.current_target_id]
			
 
				+                
			
 
				+                # 粘性阈值: 当前目标得分 > 最优得分 * (1 - stickiness) 时保持
			
 
				+                stickiness_threshold = best_score * (1 - stickiness)
			
 
				+                if current_target.score > stickiness_threshold:
			
 
				+                    return current_target
			
 
				+        
			
 
				+        
			
 
				+        # 选择得分最高的目标
			
 
				+        best_id, best_target, best_score = scored_targets[0]
			
 
				+        self.current_target_id = best_id
			
 
				+        self.current_target_score = best_score
			
 
				+        
			
 
				+        logger.debug(
			
 
				+            f"[目标选择] 选择目标ID={best_id} 得分={best_score:.3f} "
			
 
				+            f"面积={best_target.area} 置信度={best_target.confidence:.2f}"
			
 
				+        )
			
 
				+        
			
 
				+        return best_target
			
 
				+    
			
 
				+    def get_sorted_targets(self, targets: Dict[int, TrackingTarget],
			
 
				+                            frame_size: Tuple[int, int] = None) -> List[Tuple[TrackingTarget, float]]:
			
 
				+        """
			
 
				+        获取按得分排序的目标列表
			
 
				+        Args:
			
 
				+            targets: 目标字典
			
 
				+            frame_size: 帧尺寸
			
 
				+        Returns:
			
 
				+            排序后的目标列表 [(target, score), ...]
			
 
				+        """
			
 
				+        scored = []
			
 
				+        for target in targets.values():
			
 
				+            target.score = self.calculate_score(target, frame_size)
			
 
				+            scored.append((target, target.score))
			
 
				+        
			
 
				+        scored.sort(key=lambda x: x[1], reverse=True)
			
 
				+        return scored
			
 
				+    
			
 
				+    def set_strategy(self, strategy: str):
			
 
				+        """设置选择策略"""
			
 
				+        self.config['strategy'] = strategy
			
 
				+        logger.info(f"[目标选择] 策略已切换为: {strategy}")
			
 
				+    
			
 
				+    def set_stickiness(self, stickiness: float):
			
 
				+        """设置目标粘性"""
			
 
				+        self.config['stickiness'] = max(0.0, min(1.0, stickiness))
			
 
				+        logger.info(f"[目标选择] 粘性已设置为: {self.config['stickiness']}")
			
 
				 
			
 
				 
			
 
				 class Coordinator:
			
@@ -75,6 +230,12 @@ class Coordinator:
 
				         self.enable_detection = SYSTEM_CONFIG.get('enable_detection', True)
			
 
				         self.enable_ocr = SYSTEM_CONFIG.get('enable_ocr', True)
			
 
				         
			
 
				+        # 球机端人体检测与自动对焦
			
 
				+        self.enable_ptz_detection = PTZ_CONFIG.get('enable_ptz_detection', False)
			
 
				+        self.auto_zoom_config = PTZ_CONFIG.get('auto_zoom', {})
			
 
				+        self.ptz_detector = None
			
 
				+        self.auto_zoom_controller = None
			
 
				+        
			
 
				         # 跟踪器
			
 
				         self.tracker = PersonTracker()
			
 
				         
			
@@ -107,6 +268,11 @@ class Coordinator:
 
				         self.last_ptz_position = None
			
 
				         self.ptz_position_threshold = self.config.get('ptz_position_threshold', 0.03)
			
 
				         
			
 
				+        # 目标选择器
			
 
				+        self.target_selector = TargetSelector(
			
 
				+            self.config.get('target_selection', {})
			
 
				+        )
			
 
				+        
			
 
				         # 结果队列
			
 
				         self.result_queue = queue.Queue()
			
 
				         
			
@@ -292,6 +458,8 @@ class Coordinator:
 
				                                   frame_size: Tuple[int, int]):
			
 
				         """更新跟踪目标"""
			
 
				         current_time = time.time()
			
 
				+        frame_w, frame_h = frame_size
			
 
				+        center_x, center_y = frame_w / 2, frame_h / 2
			
 
				         
			
 
				         with self.targets_lock:
			
 
				             # 更新现有目标
			
@@ -299,21 +467,36 @@ class Coordinator:
 
				                 if det.track_id is None:
			
 
				                     continue
			
 
				                 
			
 
				-                x_ratio = det.center[0] / frame_size[0]
			
 
				-                y_ratio = det.center[1] / frame_size[1]
			
 
				+                x_ratio = det.center[0] / frame_w
			
 
				+                y_ratio = det.center[1] / frame_h
			
 
				+                
			
 
				+                # 计算面积
			
 
				+                _, _, width, height = det.bbox
			
 
				+                area = width * height
			
 
				+                
			
 
				+                # 计算到画面中心的距离比例
			
 
				+                dx = abs(det.center[0] - center_x) / center_x
			
 
				+                dy = abs(det.center[1] - center_y) / center_y
			
 
				+                center_distance = (dx + dy) / 2  # 归一化到0-1
			
 
				                 
			
 
				                 if det.track_id in self.tracking_targets:
			
 
				                     # 更新位置
			
 
				                     target = self.tracking_targets[det.track_id]
			
 
				                     target.position = (x_ratio, y_ratio)
			
 
				                     target.last_update = current_time
			
 
				+                    target.area = area
			
 
				+                    target.confidence = det.confidence
			
 
				+                    target.center_distance = center_distance
			
 
				                 else:
			
 
				                     # 新目标
			
 
				                     if len(self.tracking_targets) < self.config['max_tracking_targets']:
			
 
				                         self.tracking_targets[det.track_id] = TrackingTarget(
			
 
				                             track_id=det.track_id,
			
 
				                             position=(x_ratio, y_ratio),
			
 
				-                            last_update=current_time
			
 
				+                            last_update=current_time,
			
 
				+                            area=area,
			
 
				+                            confidence=det.confidence,
			
 
				+                            center_distance=center_distance
			
 
				                         )
			
 
				     
			
 
				     def _process_detections(self, detections: List[DetectedObject],
			
@@ -331,12 +514,10 @@ class Coordinator:
 
				                 self.current_target = None
			
 
				                 return
			
 
				             
			
 
				-            # 选择优先级最高的目标(这里选择最新的)
			
 
				-            if self.current_target is None or \
			
 
				-               self.current_target.track_id not in self.tracking_targets:
			
 
				-                # 选择一个新目标
			
 
				-                target_id = list(self.tracking_targets.keys())[0]
			
 
				-                self.current_target = self.tracking_targets[target_id]
			
 
				+            # 使用目标选择器选择最优目标
			
 
				+            self.current_target = self.target_selector.select_target(
			
 
				+                self.tracking_targets, frame_size
			
 
				+            )
			
 
				         
			
 
				         if self.current_target:
			
 
				             # 移动球机到目标位置 (仅在 PTZ 跟踪启用时)
			
@@ -603,6 +784,15 @@ class AsyncCoordinator(Coordinator):
 
				                 print("连接球机失败")
			
 
				                 self.panorama.disconnect()
			
 
				                 return False
			
 
				+            
			
 
				+            # 启动球机RTSP流（用于球机端人体检测）
			
 
				+            if self.enable_ptz_detection:
			
 
				+                if not self.ptz.start_stream_rtsp():
			
 
				+                    print("球机RTSP流启动失败，禁用球机端检测功能")
			
 
				+                    self.enable_ptz_detection = False
			
 
				+                else:
			
 
				+                    # 初始化球机端人体检测器
			
 
				+                    self._init_ptz_detector()
			
 
				         else:
			
 
				             print("PTZ球机功能已禁用")
			
 
				         
			
@@ -738,10 +928,12 @@ class AsyncCoordinator(Coordinator):
 
				                             # tracked 是 DetectedObject，使用 center 计算位置
			
 
				                             x_ratio = t.center[0] / frame_size[0]
			
 
				                             y_ratio = t.center[1] / frame_size[1]
			
 
				+                            _, _, w, h = t.bbox
			
 
				+                            area = w * h
			
 
				                             logger.info(
			
 
				                                 f"[检测] ✓ 目标ID={t.track_id} "
			
 
				                                 f"位置=({x_ratio:.3f}, {y_ratio:.3f}) "
			
 
				-                                f"置信度={t.confidence:.2f}"
			
 
				+                                f"面积={area} 置信度={t.confidence:.2f}"
			
 
				                             )
			
 
				                     elif detections:
			
 
				                         # 有检测但没跟踪上
			
@@ -775,6 +967,32 @@ class AsyncCoordinator(Coordinator):
 
				                 logger.error(f"检测线程错误: {e}")
			
 
				                 time.sleep(0.1)
			
 
				     
			
 
				+    def _init_ptz_detector(self):
			
 
				+        """初始化球机端人体检测器"""
			
 
				+        try:
			
 
				+            model_path = DETECTION_CONFIG.get('model_path')
			
 
				+            model_type = DETECTION_CONFIG.get('model_type', 'auto')
			
 
				+            conf_threshold = DETECTION_CONFIG.get('person_threshold', 0.5)
			
 
				+            
			
 
				+            if model_path:
			
 
				+                self.ptz_detector = PTZPersonDetector(
			
 
				+                    model_path=model_path,
			
 
				+                    model_type=model_type,
			
 
				+                    confidence_threshold=conf_threshold
			
 
				+                )
			
 
				+                self.auto_zoom_controller = PTZAutoZoomController(
			
 
				+                    ptz_camera=self.ptz,
			
 
				+                    detector=self.ptz_detector,
			
 
				+                    config=self.auto_zoom_config
			
 
				+                )
			
 
				+                print(f"[AsyncCoordinator] 球机端人体检测器初始化成功")
			
 
				+            else:
			
 
				+                print("[AsyncCoordinator] 未配置球机检测模型路径，禁用球机端检测")
			
 
				+                self.enable_ptz_detection = False
			
 
				+        except Exception as e:
			
 
				+            print(f"[AsyncCoordinator] 球机端检测器初始化失败: {e}")
			
 
				+            self.enable_ptz_detection = False
			
 
				+
			
 
				     def _ptz_worker(self):
			
 
				         """PTZ控制线程：从队列接收命令并控制球机"""
			
 
				         while self.running:
			
@@ -798,10 +1016,10 @@ class AsyncCoordinator(Coordinator):
 
				                 self.current_target = None
			
 
				                 return None
			
 
				             
			
 
				-            if self.current_target is None or \
			
 
				-               self.current_target.track_id not in self.tracking_targets:
			
 
				-                target_id = list(self.tracking_targets.keys())[0]
			
 
				-                self.current_target = self.tracking_targets[target_id]
			
 
				+            # 使用目标选择器选择最优目标
			
 
				+            self.current_target = self.target_selector.select_target(
			
 
				+                self.tracking_targets
			
 
				+            )
			
 
				             
			
 
				             return self.current_target
			
 
				     
			
@@ -904,11 +1122,40 @@ class AsyncCoordinator(Coordinator):
 
				         
			
 
				         if success:
			
 
				             time.sleep(self.PTZ_CONFIRM_WAIT)
			
 
				-            self._confirm_ptz_position(cmd.x_ratio, cmd.y_ratio)
			
 
				+            
			
 
				+            # 球机端人体检测与自动对焦
			
 
				+            if self.enable_ptz_detection and self.auto_zoom_config.get('enabled', False):
			
 
				+                self._auto_zoom_person(pan, tilt, zoom)
			
 
				+            else:
			
 
				+                self._confirm_ptz_position(cmd.x_ratio, cmd.y_ratio)
			
 
				+            
			
 
				             logger.info(f"[PTZ] 到位确认完成: pan={pan:.1f}° tilt={tilt:.1f}°")
			
 
				         else:
			
 
				             logger.warning(f"[PTZ] 命令执行失败: pan={pan:.1f}° tilt={tilt:.1f}° zoom={zoom}")
			
 
				     
			
 
				+    def _auto_zoom_person(self, initial_pan: float, initial_tilt: float, initial_zoom: int):
			
 
				+        """
			
 
				+        自动对焦人体
			
 
				+        在球机画面中检测人体，自动调整zoom使人体居中且大小合适
			
 
				+        """
			
 
				+        if self.auto_zoom_controller is None:
			
 
				+            return
			
 
				+        
			
 
				+        logger.info("[AutoZoom] 开始自动对焦...")
			
 
				+        
			
 
				+        try:
			
 
				+            success, final_zoom = self.auto_zoom_controller.auto_focus_loop(
			
 
				+                get_frame_func=self.ptz.get_frame,
			
 
				+                max_attempts=self.auto_zoom_config.get('max_adjust_attempts', 3)
			
 
				+            )
			
 
				+            
			
 
				+            if success:
			
 
				+                logger.info(f"[AutoZoom] 自动对焦成功: zoom={final_zoom}")
			
 
				+            else:
			
 
				+                logger.warning("[AutoZoom] 自动对焦未能定位人体")
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[AutoZoom] 自动对焦异常: {e}")
			
 
				+    
			
 
				     def _confirm_ptz_position(self, x_ratio: float, y_ratio: float):
			
 
				         """PTZ位置确认：读取球机帧验证目标是否可见"""
			
 
				         if not hasattr(self.ptz, 'get_frame') or self.ptz.get_frame() is None:
			
--- a/dual_camera_system/ptz_person_tracker.py
+++ b/dual_camera_system/ptz_person_tracker.py
@@ -0,0 +1,426 @@
 
				+"""
			
 
				+球机端人体检测与自动对焦模块
			
 
				+在球机移动到位后，检测人体并自动调整焦距，使人体居中并占据合适比例
			
 
				+"""
			
 
				+
			
 
				+import time
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from typing import Optional, List, Tuple, Dict
			
 
				+from dataclasses import dataclass
			
 
				+
			
 
				+try:
			
 
				+    from ultralytics import YOLO
			
 
				+    HAS_YOLO = True
			
 
				+except ImportError:
			
 
				+    HAS_YOLO = False
			
 
				+
			
 
				+try:
			
 
				+    from rknnlite.api import RKNNLite
			
 
				+    HAS_RKNN = True
			
 
				+except ImportError:
			
 
				+    HAS_RKNN = False
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class DetectedPerson:
			
 
				+    """检测到的人体"""
			
 
				+    bbox: Tuple[int, int, int, int]  # (x1, y1, x2, y2)
			
 
				+    center: Tuple[float, float]       # 中心点 (x, y)
			
 
				+    width: int
			
 
				+    height: int
			
 
				+    confidence: float
			
 
				+    
			
 
				+    @property
			
 
				+    def area(self) -> int:
			
 
				+        return self.width * self.height
			
 
				+    
			
 
				+    @property
			
 
				+    def size_ratio(self) -> float:
			
 
				+        """人体宽高比"""
			
 
				+        return self.width / self.height if self.height > 0 else 0
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class ZoomAdjustResult:
			
 
				+    """变焦调整结果"""
			
 
				+    success: bool
			
 
				+    new_zoom: int
			
 
				+    pan_adjust: float      # pan调整量(度)
			
 
				+    tilt_adjust: float     # tilt调整量(度)
			
 
				+    person_detected: bool
			
 
				+    person_centered: bool
			
 
				+    message: str
			
 
				+
			
 
				+
			
 
				+class PTZPersonDetector:
			
 
				+    """
			
 
				+    球机端人体检测器
			
 
				+    支持YOLO(PT)和RKNN两种模型格式
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(self, model_path: str = None, model_type: str = 'auto',
			
 
				+                 confidence_threshold: float = 0.5, use_gpu: bool = False):
			
 
				+        """
			
 
				+        初始化检测器
			
 
				+        Args:
			
 
				+            model_path: 模型路径
			
 
				+            model_type: 模型类型 ('yolo', 'rknn', 'auto')
			
 
				+            confidence_threshold: 置信度阈值
			
 
				+            use_gpu: 是否使用GPU
			
 
				+        """
			
 
				+        self.model_path = model_path
			
 
				+        self.model_type = model_type
			
 
				+        self.confidence_threshold = confidence_threshold
			
 
				+        self.use_gpu = use_gpu
			
 
				+        self.model = None
			
 
				+        self.person_class_id = 0  # YOLO默认person类别ID
			
 
				+        
			
 
				+        if model_path:
			
 
				+            self._load_model(model_path, model_type)
			
 
				+    
			
 
				+    def _load_model(self, model_path: str, model_type: str = 'auto'):
			
 
				+        """加载模型"""
			
 
				+        if model_type == 'auto':
			
 
				+            if model_path.endswith('.rknn'):
			
 
				+                model_type = 'rknn'
			
 
				+            elif model_path.endswith('.onnx'):
			
 
				+                model_type = 'onnx'
			
 
				+            else:
			
 
				+                model_type = 'yolo'
			
 
				+        
			
 
				+        self.model_type = model_type
			
 
				+        
			
 
				+        if model_type == 'rknn':
			
 
				+            self._load_rknn_model(model_path)
			
 
				+        elif model_type == 'yolo':
			
 
				+            self._load_yolo_model(model_path)
			
 
				+        else:
			
 
				+            print(f"[PTZPersonDetector] 不支持的模型类型: {model_type}")
			
 
				+    
			
 
				+    def _load_yolo_model(self, model_path: str):
			
 
				+        """加载YOLO模型"""
			
 
				+        if not HAS_YOLO:
			
 
				+            print("[PTZPersonDetector] ultralytics未安装，无法使用YOLO模型")
			
 
				+            return
			
 
				+        
			
 
				+        try:
			
 
				+            self.model = YOLO(model_path)
			
 
				+            print(f"[PTZPersonDetector] YOLO模型加载成功: {model_path}")
			
 
				+        except Exception as e:
			
 
				+            print(f"[PTZPersonDetector] YOLO模型加载失败: {e}")
			
 
				+    
			
 
				+    def _load_rknn_model(self, model_path: str):
			
 
				+        """加载RKNN模型"""
			
 
				+        if not HAS_RKNN:
			
 
				+            print("[PTZPersonDetector] rknnlite未安装，无法使用RKNN模型")
			
 
				+            return
			
 
				+        
			
 
				+        try:
			
 
				+            self.model = RKNNLite()
			
 
				+            ret = self.model.load_rknn(model_path)
			
 
				+            if ret != 0:
			
 
				+                raise RuntimeError(f"加载RKNN模型失败: ret={ret}")
			
 
				+            
			
 
				+            ret = self.model.init_runtime(target=None)  # 自动选择NPU核心
			
 
				+            if ret != 0:
			
 
				+                raise RuntimeError(f"初始化RKNN运行时失败: ret={ret}")
			
 
				+            
			
 
				+            # RKNN安全模型的person类别ID
			
 
				+            self.person_class_id = 3
			
 
				+            print(f"[PTZPersonDetector] RKNN模型加载成功: {model_path}")
			
 
				+        except Exception as e:
			
 
				+            print(f"[PTZPersonDetector] RKNN模型加载失败: {e}")
			
 
				+            self.model = None
			
 
				+    
			
 
				+    def detect(self, frame: np.ndarray) -> List[DetectedPerson]:
			
 
				+        """
			
 
				+        检测人体
			
 
				+        Args:
			
 
				+            frame: BGR图像
			
 
				+        Returns:
			
 
				+            检测到的人体列表
			
 
				+        """
			
 
				+        if self.model is None:
			
 
				+            return []
			
 
				+        
			
 
				+        if self.model_type == 'rknn':
			
 
				+            return self._detect_rknn(frame)
			
 
				+        elif self.model_type == 'yolo':
			
 
				+            return self._detect_yolo(frame)
			
 
				+        
			
 
				+        return []
			
 
				+    
			
 
				+    def _detect_yolo(self, frame: np.ndarray) -> List[DetectedPerson]:
			
 
				+        """YOLO检测"""
			
 
				+        persons = []
			
 
				+        
			
 
				+        try:
			
 
				+            results = self.model(frame, verbose=False)
			
 
				+            
			
 
				+            for r in results:
			
 
				+                if r.boxes is None:
			
 
				+                    continue
			
 
				+                
			
 
				+                for box in r.boxes:
			
 
				+                    cls_id = int(box.cls[0])
			
 
				+                    if cls_id != self.person_class_id:
			
 
				+                        continue
			
 
				+                    
			
 
				+                    conf = float(box.conf[0])
			
 
				+                    if conf < self.confidence_threshold:
			
 
				+                        continue
			
 
				+                    
			
 
				+                    x1, y1, x2, y2 = map(int, box.xyxy[0])
			
 
				+                    center_x = (x1 + x2) / 2
			
 
				+                    center_y = (y1 + y2) / 2
			
 
				+                    width = x2 - x1
			
 
				+                    height = y2 - y1
			
 
				+                    
			
 
				+                    persons.append(DetectedPerson(
			
 
				+                        bbox=(x1, y1, x2, y2),
			
 
				+                        center=(center_x, center_y),
			
 
				+                        width=width,
			
 
				+                        height=height,
			
 
				+                        confidence=conf
			
 
				+                    ))
			
 
				+        except Exception as e:
			
 
				+            print(f"[PTZPersonDetector] YOLO检测错误: {e}")
			
 
				+        
			
 
				+        return persons
			
 
				+    
			
 
				+    def _detect_rknn(self, frame: np.ndarray) -> List[DetectedPerson]:
			
 
				+        """RKNN检测"""
			
 
				+        persons = []
			
 
				+        
			
 
				+        try:
			
 
				+            # 预处理
			
 
				+            img = cv2.resize(frame, (640, 640))
			
 
				+            img = img.astype(np.float32) / 255.0
			
 
				+            img = np.expand_dims(img, 0)
			
 
				+            
			
 
				+            # 推理
			
 
				+            outputs = self.model.inference(inputs=[img])
			
 
				+            
			
 
				+            # 后处理 (YOLO格式输出)
			
 
				+            # outputs shape: [1, 84, 8400] 或类似
			
 
				+            if outputs is None or len(outputs) == 0:
			
 
				+                return []
			
 
				+            
			
 
				+            output = outputs[0]
			
 
				+            
			
 
				+            # 解析检测结果
			
 
				+            h, w = frame.shape[:2]
			
 
				+            
			
 
				+            for i in range(output.shape[-1]):
			
 
				+                data = output[0, :, i]
			
 
				+                
			
 
				+                # 获取类别和置信度
			
 
				+                class_scores = data[4:]
			
 
				+                class_id = np.argmax(class_scores)
			
 
				+                confidence = class_scores[class_id]
			
 
				+                
			
 
				+                if confidence < self.confidence_threshold:
			
 
				+                    continue
			
 
				+                
			
 
				+                if class_id != self.person_class_id:
			
 
				+                    continue
			
 
				+                
			
 
				+                # 获取边界框
			
 
				+                cx, cy, bw, bh = data[:4]
			
 
				+                
			
 
				+                # 转换为原图坐标
			
 
				+                x1 = int((cx - bw/2) * w / 640)
			
 
				+                y1 = int((cy - bh/2) * h / 640)
			
 
				+                x2 = int((cx + bw/2) * w / 640)
			
 
				+                y2 = int((cy + bh/2) * h / 640)
			
 
				+                
			
 
				+                persons.append(DetectedPerson(
			
 
				+                    bbox=(x1, y1, x2, y2),
			
 
				+                    center=((x1+x2)/2, (y1+y2)/2),
			
 
				+                    width=x2-x1,
			
 
				+                    height=y2-y1,
			
 
				+                    confidence=float(confidence)
			
 
				+                ))
			
 
				+        except Exception as e:
			
 
				+            print(f"[PTZPersonDetector] RKNN检测错误: {e}")
			
 
				+        
			
 
				+        return persons
			
 
				+    
			
 
				+    def detect_largest_person(self, frame: np.ndarray) -> Optional[DetectedPerson]:
			
 
				+        """检测最大的人体"""
			
 
				+        persons = self.detect(frame)
			
 
				+        if not persons:
			
 
				+            return None
			
 
				+        return max(persons, key=lambda p: p.area)
			
 
				+
			
 
				+
			
 
				+class PTZAutoZoomController:
			
 
				+    """
			
 
				+    球机自动变焦控制器
			
 
				+    根据检测到的人体大小和位置，自动调整PTZ角度和变焦
			
 
				+    """
			
 
				+    
			
 
				+    def __init__(self, ptz_camera, detector: PTZPersonDetector = None, config: dict = None):
			
 
				+        """
			
 
				+        初始化控制器
			
 
				+        Args:
			
 
				+            ptz_camera: PTZCamera实例
			
 
				+            detector: 人体检测器
			
 
				+            config: 自动变焦配置
			
 
				+        """
			
 
				+        self.ptz = ptz_camera
			
 
				+        self.detector = detector
			
 
				+        self.config = config or {}
			
 
				+        
			
 
				+        # 默认配置
			
 
				+        self.target_size_ratio = self.config.get('target_size_ratio', 0.4)
			
 
				+        self.min_zoom = self.config.get('min_zoom', 3)
			
 
				+        self.max_zoom = self.config.get('max_zoom', 20)
			
 
				+        self.zoom_step = self.config.get('zoom_step', 2)
			
 
				+        self.center_threshold = self.config.get('center_threshold', 0.1)
			
 
				+        self.max_adjust_attempts = self.config.get('max_adjust_attempts', 3)
			
 
				+    
			
 
				+    def adjust_to_person(self, frame: np.ndarray, current_pan: float, 
			
 
				+                         current_tilt: float, current_zoom: int) -> ZoomAdjustResult:
			
 
				+        """
			
 
				+        调整PTZ使检测到的人体居中并占据合适比例
			
 
				+        
			
 
				+        Args:
			
 
				+            frame: 球机画面
			
 
				+            current_pan: 当前pan角度
			
 
				+            current_tilt: 当前tilt角度
			
 
				+            current_zoom: 当前变倍
			
 
				+            
			
 
				+        Returns:
			
 
				+            ZoomAdjustResult: 调整结果
			
 
				+        """
			
 
				+        if frame is None:
			
 
				+            return ZoomAdjustResult(
			
 
				+                success=False, new_zoom=current_zoom,
			
 
				+                pan_adjust=0, tilt_adjust=0,
			
 
				+                person_detected=False, person_centered=False,
			
 
				+                message="无法获取球机画面"
			
 
				+            )
			
 
				+        
			
 
				+        # 检测人体
			
 
				+        person = self.detector.detect_largest_person(frame) if self.detector else None
			
 
				+        
			
 
				+        if person is None:
			
 
				+            return ZoomAdjustResult(
			
 
				+                success=False, new_zoom=current_zoom,
			
 
				+                pan_adjust=0, tilt_adjust=0,
			
 
				+                person_detected=False, person_centered=False,
			
 
				+                message="球机画面中未检测到人体"
			
 
				+            )
			
 
				+        
			
 
				+        h, w = frame.shape[:2]
			
 
				+        
			
 
				+        # 计算人体中心偏离画面中心的程度
			
 
				+        frame_center_x = w / 2
			
 
				+        frame_center_y = h / 2
			
 
				+        
			
 
				+        offset_x = (person.center[0] - frame_center_x) / w  # -0.5 ~ 0.5
			
 
				+        offset_y = (person.center[1] - frame_center_y) / h  # -0.5 ~ 0.5
			
 
				+        
			
 
				+        # 计算人体占画面比例
			
 
				+        person_size_ratio = max(person.width / w, person.height / h)
			
 
				+        
			
 
				+        print(f"[AutoZoom] 检测到人体: 中心=({person.center[0]:.0f}, {person.center[1]:.0f}), "
			
 
				+              f"尺寸={person.width}x{person.height}, 占比={person.size_ratio:.2f}")
			
 
				+        print(f"[AutoZoom] 偏移: x={offset_x:.3f}, y={offset_y:.3f}")
			
 
				+        
			
 
				+        # 判断是否居中
			
 
				+        is_centered = abs(offset_x) < self.center_threshold and abs(offset_y) < self.center_threshold
			
 
				+        
			
 
				+        # 计算PTZ角度调整量 (根据视场角估算)
			
 
				+        # 假设当前zoom下的水平视场角约 60/zoom 度
			
 
				+        fov_per_pixel = (60.0 / current_zoom) / w
			
 
				+        pan_adjust = -offset_x * 60.0 / current_zoom  # 简化计算
			
 
				+        tilt_adjust = -offset_y * 45.0 / current_zoom
			
 
				+        
			
 
				+        # 计算新的zoom
			
 
				+        if person_size_ratio < self.target_size_ratio * 0.8:
			
 
				+            # 人体太小，放大
			
 
				+            zoom_factor = self.target_size_ratio / person_size_ratio
			
 
				+            new_zoom = min(int(current_zoom * zoom_factor), self.max_zoom)
			
 
				+        elif person_size_ratio > self.target_size_ratio * 1.2:
			
 
				+            # 人体太大，缩小
			
 
				+            zoom_factor = self.target_size_ratio / person_size_ratio
			
 
				+            new_zoom = max(int(current_zoom * zoom_factor), self.min_zoom)
			
 
				+        else:
			
 
				+            # 大小合适
			
 
				+            new_zoom = current_zoom
			
 
				+        
			
 
				+        # 限制调整量
			
 
				+        if is_centered:
			
 
				+            pan_adjust = 0
			
 
				+            tilt_adjust = 0
			
 
				+        
			
 
				+        return ZoomAdjustResult(
			
 
				+            success=True,
			
 
				+            new_zoom=new_zoom,
			
 
				+            pan_adjust=pan_adjust,
			
 
				+            tilt_adjust=tilt_adjust,
			
 
				+            person_detected=True,
			
 
				+            person_centered=is_centered,
			
 
				+            message=f"人体居中={is_centered}, zoom调整={current_zoom}→{new_zoom}"
			
 
				+        )
			
 
				+    
			
 
				+    def auto_focus_loop(self, get_frame_func, max_attempts: int = None) -> Tuple[bool, int]:
			
 
				+        """
			
 
				+        自动对焦循环
			
 
				+        持续调整PTZ直到人体居中且大小合适
			
 
				+        
			
 
				+        Args:
			
 
				+            get_frame_func: 获取球机画面的函数
			
 
				+            max_attempts: 最大调整次数
			
 
				+            
			
 
				+        Returns:
			
 
				+            (是否成功, 最终zoom)
			
 
				+        """
			
 
				+        max_attempts = max_attempts or self.max_adjust_attempts
			
 
				+        
			
 
				+        current_pos = self.ptz.get_current_position()
			
 
				+        current_pan = current_pos.pan
			
 
				+        current_tilt = current_pos.tilt
			
 
				+        current_zoom = current_pos.zoom
			
 
				+        
			
 
				+        for attempt in range(max_attempts):
			
 
				+            print(f"[AutoZoom] 调整轮次 {attempt + 1}/{max_attempts}")
			
 
				+            
			
 
				+            # 等待画面稳定
			
 
				+            time.sleep(0.3)
			
 
				+            
			
 
				+            # 获取球机画面
			
 
				+            frame = get_frame_func()
			
 
				+            
			
 
				+            # 分析并计算调整
			
 
				+            result = self.adjust_to_person(frame, current_pan, current_tilt, current_zoom)
			
 
				+            
			
 
				+            if not result.person_detected:
			
 
				+                print(f"[AutoZoom] 未检测到人体，停止调整")
			
 
				+                return False, current_zoom
			
 
				+            
			
 
				+            if result.person_centered and result.new_zoom == current_zoom:
			
 
				+                print(f"[AutoZoom] 人体已居中且大小合适，调整完成")
			
 
				+                return True, current_zoom
			
 
				+            
			
 
				+            # 执行调整
			
 
				+            if result.pan_adjust != 0 or result.tilt_adjust != 0:
			
 
				+                new_pan = current_pan + result.pan_adjust
			
 
				+                new_tilt = current_tilt + result.tilt_adjust
			
 
				+                print(f"[AutoZoom] 调整角度: pan {current_pan:.1f}→{new_pan:.1f}, "
			
 
				+                      f"tilt {current_tilt:.1f}→{new_tilt:.1f}")
			
 
				+                self.ptz.goto_exact_position(new_pan, new_tilt, result.new_zoom)
			
 
				+                current_pan = new_pan
			
 
				+                current_tilt = new_tilt
			
 
				+            elif result.new_zoom != current_zoom:
			
 
				+                print(f"[AutoZoom] 调整变焦: {current_zoom}→{result.new_zoom}")
			
 
				+                self.ptz.goto_exact_position(current_pan, current_tilt, result.new_zoom)
			
 
				+            
			
 
				+            current_zoom = result.new_zoom
			
 
				+        
			
 
				+        print(f"[AutoZoom] 达到最大调整次数，当前zoom={current_zoom}")
			
 
				+        return True, current_zoom
			
 
				+