Browse Source

refactor(coordinator): 优化联动控制器架构及目标选择策略

- 引入TargetSelector类实现目标综合评分和选择策略,支持面积、置信度及混合模式
- 扩展TrackingTarget数据结构,增加面积、置信度、中心距离及综合得分字段
- 精化目标选择逻辑,增加目标切换粘性和优先级控制
- 集成配置支持动态调整目标选择策略和参数
- 保留原联动控制器核心功能,改进代码结构便于后续扩展与维护
wenhongquan 3 ngày trước cách đây
mục cha
commit
97ab7fd5ea

BIN
dual_camera_system/__pycache__/coordinator.cpython-310.pyc


BIN
dual_camera_system/__pycache__/dahua_sdk.cpython-310.pyc


BIN
dual_camera_system/__pycache__/ocr_recognizer.cpython-310.pyc


BIN
dual_camera_system/__pycache__/panorama_camera.cpython-310.pyc


BIN
dual_camera_system/__pycache__/ptz_camera.cpython-310.pyc


BIN
dual_camera_system/__pycache__/ptz_person_tracker.cpython-310.pyc


BIN
dual_camera_system/__pycache__/video_lock.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/__init__.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/camera.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/coordinator.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/detection.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/ocr.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/ptz.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/system.cpython-310.pyc


+ 12 - 0
dual_camera_system/config/coordinator.py

@@ -9,6 +9,18 @@ COORDINATOR_CONFIG = {
     'detection_interval': 1.0,
     'ptz_command_cooldown': 0.5,
     'ptz_position_threshold': 0.03,
+    
+    # 目标选择策略配置
+    'target_selection': {
+        'strategy': 'area',           # 策略: 'area'(面积优先), 'confidence'(置信度优先), 'hybrid'(混合)
+        'area_weight': 0.6,           # 混合模式下面积权重
+        'confidence_weight': 0.4,     # 混合模式下置信度权重
+        'min_area_threshold': 5000,   # 最小面积阈值(像素²),小于此值的目标优先级降低
+        'prefer_center': True,        # 是否优先选择靠近画面中心的目标
+        'center_weight': 0.2,         # 中心位置权重
+        'switch_on_lost': True,       # 当前目标丢失时是否切换到次优目标
+        'stickiness': 0.3,            # 目标粘性(0-1),值越大越不容易切换目标
+    },
 }
 
 CALIBRATION_CONFIG = {

+ 1 - 1
dual_camera_system/config/detection.py

@@ -6,7 +6,7 @@
 DETECTION_CONFIG = {
     'target_classes': ['person', '人'],   # 检测目标类别 (支持中英文)
     'confidence_threshold': 0.5,     # 置信度阈值
-    'detection_interval': 0.1,       # 检测间隔(秒)
+    'detection_interval': 0.05,       # 检测间隔(秒)
     
     # 检测图片保存配置
     'save_detection_image': False,   # 是否保存检测到人的图片

+ 12 - 0
dual_camera_system/config/ptz.py

@@ -25,4 +25,16 @@ PTZ_CONFIG = {
     # tilt_flip: 如果俯仰方向相反,设为True(吸顶安装通常需要True)
     'pan_flip': True,
     'tilt_flip': False,              # 由 mount_type='ceiling' 时自动生效
+    
+    # 球机端人体检测与自动对焦配置
+    'enable_ptz_detection': True,    # 是否启用球机端人体检测
+    'auto_zoom': {
+        'enabled': True,             # 是否启用自动变焦
+        'target_size_ratio': 0.4,    # 目标人体占画面比例 (0.3-0.5)
+        'min_zoom': 3,               # 最小变倍
+        'max_zoom': 20,              # 最大变倍
+        'zoom_step': 2,              # 变焦调整步长
+        'center_threshold': 0.1,     # 居中阈值 (人体中心偏离画面中心的比例)
+        'max_adjust_attempts': 3,    # 最大调整次数
+    },
 }

+ 263 - 16
dual_camera_system/coordinator.py

@@ -13,10 +13,11 @@ from enum import Enum
 
 import numpy as np
 
-from config import COORDINATOR_CONFIG, SYSTEM_CONFIG
+from config import COORDINATOR_CONFIG, SYSTEM_CONFIG, PTZ_CONFIG, DETECTION_CONFIG
 from panorama_camera import PanoramaCamera, ObjectDetector, PersonTracker, DetectedObject
 from ptz_camera import PTZCamera, PTZController
 from ocr_recognizer import NumberDetector, PersonInfo
+from ptz_person_tracker import PTZPersonDetector, PTZAutoZoomController
 
 logger = logging.getLogger(__name__)
 
@@ -38,6 +39,160 @@ class TrackingTarget:
     last_update: float              # 最后更新时间
     person_info: Optional[PersonInfo] = None  # 人员信息
     priority: int = 0               # 优先级
+    area: int = 0                   # 目标面积(像素²)
+    confidence: float = 0.0         # 置信度
+    center_distance: float = 1.0    # 到画面中心的距离比例(0-1)
+    score: float = 0.0              # 综合得分
+
+
+class TargetSelector:
+    """
+    目标选择策略类
+    支持按面积、置信度、混合模式排序,支持优先级切换
+    """
+    
+    def __init__(self, config: Dict = None):
+        """
+        初始化目标选择器
+        Args:
+            config: 目标选择配置
+        """
+        self.config = config or {
+            'strategy': 'area',
+            'area_weight': 0.6,
+            'confidence_weight': 0.4,
+            'min_area_threshold': 5000,
+            'prefer_center': True,
+            'center_weight': 0.2,
+            'switch_on_lost': True,
+            'stickiness': 0.3,
+        }
+        self.current_target_id: Optional[int] = None
+        self.current_target_score: float = 0.0
+    
+    def calculate_score(self, target: TrackingTarget, frame_size: Tuple[int, int] = None) -> float:
+        """
+        计算目标综合得分
+        Args:
+            target: 跟踪目标
+            frame_size: 帧尺寸(w, h),用于计算中心距离
+        Returns:
+            综合得分(0-1)
+        """
+        strategy = self.config.get('strategy', 'area')
+        area_weight = self.config.get('area_weight', 0.6)
+        conf_weight = self.config.get('confidence_weight', 0.4)
+        min_area = self.config.get('min_area_threshold', 5000)
+        prefer_center = self.config.get('prefer_center', False)
+        center_weight = self.config.get('center_weight', 0.2)
+        
+        # 归一化面积得分 (对数缩放,避免大目标得分过高)
+        import math
+        area_score = min(1.0, math.log10(max(target.area, 1)) / 5.0)  # 100000像素² ≈ 1.0
+        
+        # 小面积惩罚
+        if target.area < min_area:
+            area_score *= 0.5
+        
+        # 置信度得分直接使用
+        conf_score = target.confidence
+        
+        # 中心距离得分 (距离中心越近得分越高)
+        center_score = 1.0 - target.center_distance
+        
+        # 根据策略计算综合得分
+        if strategy == 'area':
+            score = area_score * 0.8 + conf_score * 0.2
+        elif strategy == 'confidence':
+            score = conf_score * 0.8 + area_score * 0.2
+        else:  # hybrid
+            score = area_score * area_weight + conf_score * conf_weight
+        
+        # 加入中心距离权重
+        if prefer_center:
+            score = score * (1 - center_weight) + center_score * center_weight
+        
+        return score
+    
+    def select_target(self, targets: Dict[int, TrackingTarget], 
+                       frame_size: Tuple[int, int] = None) -> Optional[TrackingTarget]:
+        """
+        从多个目标中选择最优目标
+        Args:
+            targets: 目标字典 {track_id: TrackingTarget}
+            frame_size: 帧尺寸
+        Returns:
+            最优目标
+        """
+        if not targets:
+            self.current_target_id = None
+            return None
+        
+        stickiness = self.config.get('stickiness', 0.3)
+        switch_on_lost = self.config.get('switch_on_lost', True)
+        
+        # 计算所有目标得分
+        scored_targets = []
+        for track_id, target in targets.items():
+            target.score = self.calculate_score(target, frame_size)
+            scored_targets.append((track_id, target, target.score))
+        
+        # 按得分排序
+        scored_targets.sort(key=lambda x: x[2], reverse=True)
+        
+        # 检查当前目标是否仍在列表中
+        if self.current_target_id is not None:
+            current_exists = self.current_target_id in targets
+            if current_exists:
+                # 应用粘性:当前目标得分需要显著低于最优目标才切换
+                best_id, best_target, best_score = scored_targets[0]
+                current_target = targets[self.current_target_id]
+                
+                # 粘性阈值: 当前目标得分 > 最优得分 * (1 - stickiness) 时保持
+                stickiness_threshold = best_score * (1 - stickiness)
+                if current_target.score > stickiness_threshold:
+                    return current_target
+        
+        
+        # 选择得分最高的目标
+        best_id, best_target, best_score = scored_targets[0]
+        self.current_target_id = best_id
+        self.current_target_score = best_score
+        
+        logger.debug(
+            f"[目标选择] 选择目标ID={best_id} 得分={best_score:.3f} "
+            f"面积={best_target.area} 置信度={best_target.confidence:.2f}"
+        )
+        
+        return best_target
+    
+    def get_sorted_targets(self, targets: Dict[int, TrackingTarget],
+                            frame_size: Tuple[int, int] = None) -> List[Tuple[TrackingTarget, float]]:
+        """
+        获取按得分排序的目标列表
+        Args:
+            targets: 目标字典
+            frame_size: 帧尺寸
+        Returns:
+            排序后的目标列表 [(target, score), ...]
+        """
+        scored = []
+        for target in targets.values():
+            target.score = self.calculate_score(target, frame_size)
+            scored.append((target, target.score))
+        
+        scored.sort(key=lambda x: x[1], reverse=True)
+        return scored
+    
+    def set_strategy(self, strategy: str):
+        """设置选择策略"""
+        self.config['strategy'] = strategy
+        logger.info(f"[目标选择] 策略已切换为: {strategy}")
+    
+    def set_stickiness(self, stickiness: float):
+        """设置目标粘性"""
+        self.config['stickiness'] = max(0.0, min(1.0, stickiness))
+        logger.info(f"[目标选择] 粘性已设置为: {self.config['stickiness']}")
 
 
 class Coordinator:
@@ -75,6 +230,12 @@ class Coordinator:
         self.enable_detection = SYSTEM_CONFIG.get('enable_detection', True)
         self.enable_ocr = SYSTEM_CONFIG.get('enable_ocr', True)
         
+        # 球机端人体检测与自动对焦
+        self.enable_ptz_detection = PTZ_CONFIG.get('enable_ptz_detection', False)
+        self.auto_zoom_config = PTZ_CONFIG.get('auto_zoom', {})
+        self.ptz_detector = None
+        self.auto_zoom_controller = None
+        
         # 跟踪器
         self.tracker = PersonTracker()
         
@@ -107,6 +268,11 @@ class Coordinator:
         self.last_ptz_position = None
         self.ptz_position_threshold = self.config.get('ptz_position_threshold', 0.03)
         
+        # 目标选择器
+        self.target_selector = TargetSelector(
+            self.config.get('target_selection', {})
+        )
+        
         # 结果队列
         self.result_queue = queue.Queue()
         
@@ -292,6 +458,8 @@ class Coordinator:
                                   frame_size: Tuple[int, int]):
         """更新跟踪目标"""
         current_time = time.time()
+        frame_w, frame_h = frame_size
+        center_x, center_y = frame_w / 2, frame_h / 2
         
         with self.targets_lock:
             # 更新现有目标
@@ -299,21 +467,36 @@ class Coordinator:
                 if det.track_id is None:
                     continue
                 
-                x_ratio = det.center[0] / frame_size[0]
-                y_ratio = det.center[1] / frame_size[1]
+                x_ratio = det.center[0] / frame_w
+                y_ratio = det.center[1] / frame_h
+                
+                # 计算面积
+                _, _, width, height = det.bbox
+                area = width * height
+                
+                # 计算到画面中心的距离比例
+                dx = abs(det.center[0] - center_x) / center_x
+                dy = abs(det.center[1] - center_y) / center_y
+                center_distance = (dx + dy) / 2  # 归一化到0-1
                 
                 if det.track_id in self.tracking_targets:
                     # 更新位置
                     target = self.tracking_targets[det.track_id]
                     target.position = (x_ratio, y_ratio)
                     target.last_update = current_time
+                    target.area = area
+                    target.confidence = det.confidence
+                    target.center_distance = center_distance
                 else:
                     # 新目标
                     if len(self.tracking_targets) < self.config['max_tracking_targets']:
                         self.tracking_targets[det.track_id] = TrackingTarget(
                             track_id=det.track_id,
                             position=(x_ratio, y_ratio),
-                            last_update=current_time
+                            last_update=current_time,
+                            area=area,
+                            confidence=det.confidence,
+                            center_distance=center_distance
                         )
     
     def _process_detections(self, detections: List[DetectedObject],
@@ -331,12 +514,10 @@ class Coordinator:
                 self.current_target = None
                 return
             
-            # 选择优先级最高的目标(这里选择最新的)
-            if self.current_target is None or \
-               self.current_target.track_id not in self.tracking_targets:
-                # 选择一个新目标
-                target_id = list(self.tracking_targets.keys())[0]
-                self.current_target = self.tracking_targets[target_id]
+            # 使用目标选择器选择最优目标
+            self.current_target = self.target_selector.select_target(
+                self.tracking_targets, frame_size
+            )
         
         if self.current_target:
             # 移动球机到目标位置 (仅在 PTZ 跟踪启用时)
@@ -603,6 +784,15 @@ class AsyncCoordinator(Coordinator):
                 print("连接球机失败")
                 self.panorama.disconnect()
                 return False
+            
+            # 启动球机RTSP流(用于球机端人体检测)
+            if self.enable_ptz_detection:
+                if not self.ptz.start_stream_rtsp():
+                    print("球机RTSP流启动失败,禁用球机端检测功能")
+                    self.enable_ptz_detection = False
+                else:
+                    # 初始化球机端人体检测器
+                    self._init_ptz_detector()
         else:
             print("PTZ球机功能已禁用")
         
@@ -738,10 +928,12 @@ class AsyncCoordinator(Coordinator):
                             # tracked 是 DetectedObject,使用 center 计算位置
                             x_ratio = t.center[0] / frame_size[0]
                             y_ratio = t.center[1] / frame_size[1]
+                            _, _, w, h = t.bbox
+                            area = w * h
                             logger.info(
                                 f"[检测] ✓ 目标ID={t.track_id} "
                                 f"位置=({x_ratio:.3f}, {y_ratio:.3f}) "
-                                f"置信度={t.confidence:.2f}"
+                                f"面积={area} 置信度={t.confidence:.2f}"
                             )
                     elif detections:
                         # 有检测但没跟踪上
@@ -775,6 +967,32 @@ class AsyncCoordinator(Coordinator):
                 logger.error(f"检测线程错误: {e}")
                 time.sleep(0.1)
     
+    def _init_ptz_detector(self):
+        """初始化球机端人体检测器"""
+        try:
+            model_path = DETECTION_CONFIG.get('model_path')
+            model_type = DETECTION_CONFIG.get('model_type', 'auto')
+            conf_threshold = DETECTION_CONFIG.get('person_threshold', 0.5)
+            
+            if model_path:
+                self.ptz_detector = PTZPersonDetector(
+                    model_path=model_path,
+                    model_type=model_type,
+                    confidence_threshold=conf_threshold
+                )
+                self.auto_zoom_controller = PTZAutoZoomController(
+                    ptz_camera=self.ptz,
+                    detector=self.ptz_detector,
+                    config=self.auto_zoom_config
+                )
+                print(f"[AsyncCoordinator] 球机端人体检测器初始化成功")
+            else:
+                print("[AsyncCoordinator] 未配置球机检测模型路径,禁用球机端检测")
+                self.enable_ptz_detection = False
+        except Exception as e:
+            print(f"[AsyncCoordinator] 球机端检测器初始化失败: {e}")
+            self.enable_ptz_detection = False
+
     def _ptz_worker(self):
         """PTZ控制线程:从队列接收命令并控制球机"""
         while self.running:
@@ -798,10 +1016,10 @@ class AsyncCoordinator(Coordinator):
                 self.current_target = None
                 return None
             
-            if self.current_target is None or \
-               self.current_target.track_id not in self.tracking_targets:
-                target_id = list(self.tracking_targets.keys())[0]
-                self.current_target = self.tracking_targets[target_id]
+            # 使用目标选择器选择最优目标
+            self.current_target = self.target_selector.select_target(
+                self.tracking_targets
+            )
             
             return self.current_target
     
@@ -904,11 +1122,40 @@ class AsyncCoordinator(Coordinator):
         
         if success:
             time.sleep(self.PTZ_CONFIRM_WAIT)
-            self._confirm_ptz_position(cmd.x_ratio, cmd.y_ratio)
+            
+            # 球机端人体检测与自动对焦
+            if self.enable_ptz_detection and self.auto_zoom_config.get('enabled', False):
+                self._auto_zoom_person(pan, tilt, zoom)
+            else:
+                self._confirm_ptz_position(cmd.x_ratio, cmd.y_ratio)
+            
             logger.info(f"[PTZ] 到位确认完成: pan={pan:.1f}° tilt={tilt:.1f}°")
         else:
             logger.warning(f"[PTZ] 命令执行失败: pan={pan:.1f}° tilt={tilt:.1f}° zoom={zoom}")
     
+    def _auto_zoom_person(self, initial_pan: float, initial_tilt: float, initial_zoom: int):
+        """
+        自动对焦人体
+        在球机画面中检测人体,自动调整zoom使人体居中且大小合适
+        """
+        if self.auto_zoom_controller is None:
+            return
+        
+        logger.info("[AutoZoom] 开始自动对焦...")
+        
+        try:
+            success, final_zoom = self.auto_zoom_controller.auto_focus_loop(
+                get_frame_func=self.ptz.get_frame,
+                max_attempts=self.auto_zoom_config.get('max_adjust_attempts', 3)
+            )
+            
+            if success:
+                logger.info(f"[AutoZoom] 自动对焦成功: zoom={final_zoom}")
+            else:
+                logger.warning("[AutoZoom] 自动对焦未能定位人体")
+        except Exception as e:
+            logger.error(f"[AutoZoom] 自动对焦异常: {e}")
+    
     def _confirm_ptz_position(self, x_ratio: float, y_ratio: float):
         """PTZ位置确认:读取球机帧验证目标是否可见"""
         if not hasattr(self.ptz, 'get_frame') or self.ptz.get_frame() is None:

+ 426 - 0
dual_camera_system/ptz_person_tracker.py

@@ -0,0 +1,426 @@
+"""
+球机端人体检测与自动对焦模块
+在球机移动到位后,检测人体并自动调整焦距,使人体居中并占据合适比例
+"""
+
+import time
+import cv2
+import numpy as np
+from typing import Optional, List, Tuple, Dict
+from dataclasses import dataclass
+
+try:
+    from ultralytics import YOLO
+    HAS_YOLO = True
+except ImportError:
+    HAS_YOLO = False
+
+try:
+    from rknnlite.api import RKNNLite
+    HAS_RKNN = True
+except ImportError:
+    HAS_RKNN = False
+
+
+@dataclass
+class DetectedPerson:
+    """检测到的人体"""
+    bbox: Tuple[int, int, int, int]  # (x1, y1, x2, y2)
+    center: Tuple[float, float]       # 中心点 (x, y)
+    width: int
+    height: int
+    confidence: float
+    
+    @property
+    def area(self) -> int:
+        return self.width * self.height
+    
+    @property
+    def size_ratio(self) -> float:
+        """人体宽高比"""
+        return self.width / self.height if self.height > 0 else 0
+
+
+@dataclass
+class ZoomAdjustResult:
+    """变焦调整结果"""
+    success: bool
+    new_zoom: int
+    pan_adjust: float      # pan调整量(度)
+    tilt_adjust: float     # tilt调整量(度)
+    person_detected: bool
+    person_centered: bool
+    message: str
+
+
+class PTZPersonDetector:
+    """
+    球机端人体检测器
+    支持YOLO(PT)和RKNN两种模型格式
+    """
+    
+    def __init__(self, model_path: str = None, model_type: str = 'auto',
+                 confidence_threshold: float = 0.5, use_gpu: bool = False):
+        """
+        初始化检测器
+        Args:
+            model_path: 模型路径
+            model_type: 模型类型 ('yolo', 'rknn', 'auto')
+            confidence_threshold: 置信度阈值
+            use_gpu: 是否使用GPU
+        """
+        self.model_path = model_path
+        self.model_type = model_type
+        self.confidence_threshold = confidence_threshold
+        self.use_gpu = use_gpu
+        self.model = None
+        self.person_class_id = 0  # YOLO默认person类别ID
+        
+        if model_path:
+            self._load_model(model_path, model_type)
+    
+    def _load_model(self, model_path: str, model_type: str = 'auto'):
+        """加载模型"""
+        if model_type == 'auto':
+            if model_path.endswith('.rknn'):
+                model_type = 'rknn'
+            elif model_path.endswith('.onnx'):
+                model_type = 'onnx'
+            else:
+                model_type = 'yolo'
+        
+        self.model_type = model_type
+        
+        if model_type == 'rknn':
+            self._load_rknn_model(model_path)
+        elif model_type == 'yolo':
+            self._load_yolo_model(model_path)
+        else:
+            print(f"[PTZPersonDetector] 不支持的模型类型: {model_type}")
+    
+    def _load_yolo_model(self, model_path: str):
+        """加载YOLO模型"""
+        if not HAS_YOLO:
+            print("[PTZPersonDetector] ultralytics未安装,无法使用YOLO模型")
+            return
+        
+        try:
+            self.model = YOLO(model_path)
+            print(f"[PTZPersonDetector] YOLO模型加载成功: {model_path}")
+        except Exception as e:
+            print(f"[PTZPersonDetector] YOLO模型加载失败: {e}")
+    
+    def _load_rknn_model(self, model_path: str):
+        """加载RKNN模型"""
+        if not HAS_RKNN:
+            print("[PTZPersonDetector] rknnlite未安装,无法使用RKNN模型")
+            return
+        
+        try:
+            self.model = RKNNLite()
+            ret = self.model.load_rknn(model_path)
+            if ret != 0:
+                raise RuntimeError(f"加载RKNN模型失败: ret={ret}")
+            
+            ret = self.model.init_runtime(target=None)  # 自动选择NPU核心
+            if ret != 0:
+                raise RuntimeError(f"初始化RKNN运行时失败: ret={ret}")
+            
+            # RKNN安全模型的person类别ID
+            self.person_class_id = 3
+            print(f"[PTZPersonDetector] RKNN模型加载成功: {model_path}")
+        except Exception as e:
+            print(f"[PTZPersonDetector] RKNN模型加载失败: {e}")
+            self.model = None
+    
+    def detect(self, frame: np.ndarray) -> List[DetectedPerson]:
+        """
+        检测人体
+        Args:
+            frame: BGR图像
+        Returns:
+            检测到的人体列表
+        """
+        if self.model is None:
+            return []
+        
+        if self.model_type == 'rknn':
+            return self._detect_rknn(frame)
+        elif self.model_type == 'yolo':
+            return self._detect_yolo(frame)
+        
+        return []
+    
+    def _detect_yolo(self, frame: np.ndarray) -> List[DetectedPerson]:
+        """YOLO检测"""
+        persons = []
+        
+        try:
+            results = self.model(frame, verbose=False)
+            
+            for r in results:
+                if r.boxes is None:
+                    continue
+                
+                for box in r.boxes:
+                    cls_id = int(box.cls[0])
+                    if cls_id != self.person_class_id:
+                        continue
+                    
+                    conf = float(box.conf[0])
+                    if conf < self.confidence_threshold:
+                        continue
+                    
+                    x1, y1, x2, y2 = map(int, box.xyxy[0])
+                    center_x = (x1 + x2) / 2
+                    center_y = (y1 + y2) / 2
+                    width = x2 - x1
+                    height = y2 - y1
+                    
+                    persons.append(DetectedPerson(
+                        bbox=(x1, y1, x2, y2),
+                        center=(center_x, center_y),
+                        width=width,
+                        height=height,
+                        confidence=conf
+                    ))
+        except Exception as e:
+            print(f"[PTZPersonDetector] YOLO检测错误: {e}")
+        
+        return persons
+    
+    def _detect_rknn(self, frame: np.ndarray) -> List[DetectedPerson]:
+        """RKNN检测"""
+        persons = []
+        
+        try:
+            # 预处理
+            img = cv2.resize(frame, (640, 640))
+            img = img.astype(np.float32) / 255.0
+            img = np.expand_dims(img, 0)
+            
+            # 推理
+            outputs = self.model.inference(inputs=[img])
+            
+            # 后处理 (YOLO格式输出)
+            # outputs shape: [1, 84, 8400] 或类似
+            if outputs is None or len(outputs) == 0:
+                return []
+            
+            output = outputs[0]
+            
+            # 解析检测结果
+            h, w = frame.shape[:2]
+            
+            for i in range(output.shape[-1]):
+                data = output[0, :, i]
+                
+                # 获取类别和置信度
+                class_scores = data[4:]
+                class_id = np.argmax(class_scores)
+                confidence = class_scores[class_id]
+                
+                if confidence < self.confidence_threshold:
+                    continue
+                
+                if class_id != self.person_class_id:
+                    continue
+                
+                # 获取边界框
+                cx, cy, bw, bh = data[:4]
+                
+                # 转换为原图坐标
+                x1 = int((cx - bw/2) * w / 640)
+                y1 = int((cy - bh/2) * h / 640)
+                x2 = int((cx + bw/2) * w / 640)
+                y2 = int((cy + bh/2) * h / 640)
+                
+                persons.append(DetectedPerson(
+                    bbox=(x1, y1, x2, y2),
+                    center=((x1+x2)/2, (y1+y2)/2),
+                    width=x2-x1,
+                    height=y2-y1,
+                    confidence=float(confidence)
+                ))
+        except Exception as e:
+            print(f"[PTZPersonDetector] RKNN检测错误: {e}")
+        
+        return persons
+    
+    def detect_largest_person(self, frame: np.ndarray) -> Optional[DetectedPerson]:
+        """检测最大的人体"""
+        persons = self.detect(frame)
+        if not persons:
+            return None
+        return max(persons, key=lambda p: p.area)
+
+
+class PTZAutoZoomController:
+    """
+    球机自动变焦控制器
+    根据检测到的人体大小和位置,自动调整PTZ角度和变焦
+    """
+    
+    def __init__(self, ptz_camera, detector: PTZPersonDetector = None, config: dict = None):
+        """
+        初始化控制器
+        Args:
+            ptz_camera: PTZCamera实例
+            detector: 人体检测器
+            config: 自动变焦配置
+        """
+        self.ptz = ptz_camera
+        self.detector = detector
+        self.config = config or {}
+        
+        # 默认配置
+        self.target_size_ratio = self.config.get('target_size_ratio', 0.4)
+        self.min_zoom = self.config.get('min_zoom', 3)
+        self.max_zoom = self.config.get('max_zoom', 20)
+        self.zoom_step = self.config.get('zoom_step', 2)
+        self.center_threshold = self.config.get('center_threshold', 0.1)
+        self.max_adjust_attempts = self.config.get('max_adjust_attempts', 3)
+    
+    def adjust_to_person(self, frame: np.ndarray, current_pan: float, 
+                         current_tilt: float, current_zoom: int) -> ZoomAdjustResult:
+        """
+        调整PTZ使检测到的人体居中并占据合适比例
+        
+        Args:
+            frame: 球机画面
+            current_pan: 当前pan角度
+            current_tilt: 当前tilt角度
+            current_zoom: 当前变倍
+            
+        Returns:
+            ZoomAdjustResult: 调整结果
+        """
+        if frame is None:
+            return ZoomAdjustResult(
+                success=False, new_zoom=current_zoom,
+                pan_adjust=0, tilt_adjust=0,
+                person_detected=False, person_centered=False,
+                message="无法获取球机画面"
+            )
+        
+        # 检测人体
+        person = self.detector.detect_largest_person(frame) if self.detector else None
+        
+        if person is None:
+            return ZoomAdjustResult(
+                success=False, new_zoom=current_zoom,
+                pan_adjust=0, tilt_adjust=0,
+                person_detected=False, person_centered=False,
+                message="球机画面中未检测到人体"
+            )
+        
+        h, w = frame.shape[:2]
+        
+        # 计算人体中心偏离画面中心的程度
+        frame_center_x = w / 2
+        frame_center_y = h / 2
+        
+        offset_x = (person.center[0] - frame_center_x) / w  # -0.5 ~ 0.5
+        offset_y = (person.center[1] - frame_center_y) / h  # -0.5 ~ 0.5
+        
+        # 计算人体占画面比例
+        person_size_ratio = max(person.width / w, person.height / h)
+        
+        print(f"[AutoZoom] 检测到人体: 中心=({person.center[0]:.0f}, {person.center[1]:.0f}), "
+              f"尺寸={person.width}x{person.height}, 占比={person.size_ratio:.2f}")
+        print(f"[AutoZoom] 偏移: x={offset_x:.3f}, y={offset_y:.3f}")
+        
+        # 判断是否居中
+        is_centered = abs(offset_x) < self.center_threshold and abs(offset_y) < self.center_threshold
+        
+        # 计算PTZ角度调整量 (根据视场角估算)
+        # 假设当前zoom下的水平视场角约 60/zoom 度
+        fov_per_pixel = (60.0 / current_zoom) / w
+        pan_adjust = -offset_x * 60.0 / current_zoom  # 简化计算
+        tilt_adjust = -offset_y * 45.0 / current_zoom
+        
+        # 计算新的zoom
+        if person_size_ratio < self.target_size_ratio * 0.8:
+            # 人体太小,放大
+            zoom_factor = self.target_size_ratio / person_size_ratio
+            new_zoom = min(int(current_zoom * zoom_factor), self.max_zoom)
+        elif person_size_ratio > self.target_size_ratio * 1.2:
+            # 人体太大,缩小
+            zoom_factor = self.target_size_ratio / person_size_ratio
+            new_zoom = max(int(current_zoom * zoom_factor), self.min_zoom)
+        else:
+            # 大小合适
+            new_zoom = current_zoom
+        
+        # 限制调整量
+        if is_centered:
+            pan_adjust = 0
+            tilt_adjust = 0
+        
+        return ZoomAdjustResult(
+            success=True,
+            new_zoom=new_zoom,
+            pan_adjust=pan_adjust,
+            tilt_adjust=tilt_adjust,
+            person_detected=True,
+            person_centered=is_centered,
+            message=f"人体居中={is_centered}, zoom调整={current_zoom}→{new_zoom}"
+        )
+    
+    def auto_focus_loop(self, get_frame_func, max_attempts: int = None) -> Tuple[bool, int]:
+        """
+        自动对焦循环
+        持续调整PTZ直到人体居中且大小合适
+        
+        Args:
+            get_frame_func: 获取球机画面的函数
+            max_attempts: 最大调整次数
+            
+        Returns:
+            (是否成功, 最终zoom)
+        """
+        max_attempts = max_attempts or self.max_adjust_attempts
+        
+        current_pos = self.ptz.get_current_position()
+        current_pan = current_pos.pan
+        current_tilt = current_pos.tilt
+        current_zoom = current_pos.zoom
+        
+        for attempt in range(max_attempts):
+            print(f"[AutoZoom] 调整轮次 {attempt + 1}/{max_attempts}")
+            
+            # 等待画面稳定
+            time.sleep(0.3)
+            
+            # 获取球机画面
+            frame = get_frame_func()
+            
+            # 分析并计算调整
+            result = self.adjust_to_person(frame, current_pan, current_tilt, current_zoom)
+            
+            if not result.person_detected:
+                print(f"[AutoZoom] 未检测到人体,停止调整")
+                return False, current_zoom
+            
+            if result.person_centered and result.new_zoom == current_zoom:
+                print(f"[AutoZoom] 人体已居中且大小合适,调整完成")
+                return True, current_zoom
+            
+            # 执行调整
+            if result.pan_adjust != 0 or result.tilt_adjust != 0:
+                new_pan = current_pan + result.pan_adjust
+                new_tilt = current_tilt + result.tilt_adjust
+                print(f"[AutoZoom] 调整角度: pan {current_pan:.1f}→{new_pan:.1f}, "
+                      f"tilt {current_tilt:.1f}→{new_tilt:.1f}")
+                self.ptz.goto_exact_position(new_pan, new_tilt, result.new_zoom)
+                current_pan = new_pan
+                current_tilt = new_tilt
+            elif result.new_zoom != current_zoom:
+                print(f"[AutoZoom] 调整变焦: {current_zoom}→{result.new_zoom}")
+                self.ptz.goto_exact_position(current_pan, current_tilt, result.new_zoom)
+            
+            current_zoom = result.new_zoom
+        
+        print(f"[AutoZoom] 达到最大调整次数,当前zoom={current_zoom}")
+        return True, current_zoom
+