Explorar el Código

chore: 清理废弃代码与配置,优化项目结构

1. 移除OCR相关配置、代码和依赖
2. 删除语音播报相关模块和配置
3. 优化OSS上传器、第三方推送器的线程安全实现
4. 简化摄像头组和联动控制器的初始化参数
5. 新增系统服务配置文件和OSS清理脚本
6. 移除废弃的本地快照保存逻辑
7. 清理测试代码和无用配置项
wenhongquan hace 2 semanas
padre
commit
46c8a87392
Se han modificado 46 ficheros con 1719 adiciones y 2721 borrados
  1. 14 1
      .claude/settings.local.json
  2. BIN
      dual_camera_system/__pycache__/calibration.cpython-310.pyc
  3. BIN
      dual_camera_system/__pycache__/camera_group.cpython-310.pyc
  4. BIN
      dual_camera_system/__pycache__/coordinator.cpython-310.pyc
  5. BIN
      dual_camera_system/__pycache__/event_pusher.cpython-310.pyc
  6. BIN
      dual_camera_system/__pycache__/multi_group_system.cpython-310.pyc
  7. BIN
      dual_camera_system/__pycache__/paired_image_saver.cpython-310.pyc
  8. BIN
      dual_camera_system/__pycache__/panorama_camera.cpython-310.pyc
  9. BIN
      dual_camera_system/__pycache__/ptz_camera.cpython-310.pyc
  10. BIN
      dual_camera_system/__pycache__/ptz_person_tracker.cpython-310.pyc
  11. BIN
      dual_camera_system/__pycache__/safety_coordinator.cpython-310.pyc
  12. BIN
      dual_camera_system/__pycache__/safety_detector.cpython-310.pyc
  13. BIN
      dual_camera_system/__pycache__/third_party_pusher.cpython-310.pyc
  14. 5 13
      dual_camera_system/camera_group.py
  15. 1 7
      dual_camera_system/config.py
  16. 0 6
      dual_camera_system/config/__init__.py
  17. BIN
      dual_camera_system/config/__pycache__/__init__.cpython-310.pyc
  18. BIN
      dual_camera_system/config/__pycache__/camera.cpython-310.pyc
  19. BIN
      dual_camera_system/config/__pycache__/coordinator.cpython-310.pyc
  20. BIN
      dual_camera_system/config/__pycache__/detection.cpython-310.pyc
  21. BIN
      dual_camera_system/config/__pycache__/device.cpython-310.pyc
  22. BIN
      dual_camera_system/config/__pycache__/oss.cpython-310.pyc
  23. BIN
      dual_camera_system/config/__pycache__/ptz.cpython-310.pyc
  24. BIN
      dual_camera_system/config/__pycache__/system.cpython-310.pyc
  25. 0 24
      dual_camera_system/config/ocr.py
  26. 10 13
      dual_camera_system/config/system.py
  27. 0 50
      dual_camera_system/config/voice.py
  28. 18 124
      dual_camera_system/coordinator.py
  29. 983 0
      dual_camera_system/docs/技术实现架构.md
  30. 6 4
      dual_camera_system/event_pusher.py
  31. 18 114
      dual_camera_system/llm_service.py
  32. 82 63
      dual_camera_system/main.py
  33. 1 17
      dual_camera_system/multi_group_system.py
  34. 0 648
      dual_camera_system/ocr_recognizer.py
  35. 13 9
      dual_camera_system/oss_uploader.py
  36. 77 48
      dual_camera_system/paired_image_saver.py
  37. 36 25
      dual_camera_system/ptz_camera.py
  38. 47 33
      dual_camera_system/ptz_person_tracker.py
  39. 12 73
      dual_camera_system/safety_coordinator.py
  40. 5 13
      dual_camera_system/safety_detector.py
  41. 0 822
      dual_camera_system/safety_main.py
  42. 238 0
      dual_camera_system/scripts/cleanup_oss.py
  43. 38 0
      dual_camera_system/scripts/dsh.service
  44. 13 12
      dual_camera_system/third_party_pusher.py
  45. 0 602
      dual_camera_system/voice_announcer.py
  46. 102 0
      test_oss_upload.py

+ 14 - 1
.claude/settings.local.json

@@ -16,7 +16,20 @@
       "Bash(python -c \"import py_compile; py_compile.compile\\('main.py', doraise=True\\)\")",
       "Bash(python -c \"import py_compile; py_compile.compile\\('main.py', doraise=True\\); py_compile.compile\\('coordinator.py', doraise=True\\); py_compile.compile\\('calibration.py', doraise=True\\)\")",
       "Bash(scp *)",
-      "Bash(python3 *)"
+      "Bash(python3 *)",
+      "Bash(git log *)",
+      "Bash(git check-ignore *)",
+      "Bash(git ls-tree *)",
+      "Bash(pip list *)",
+      "Bash(ruff check *)",
+      "Bash(mypy *)",
+      "Bash(black *)",
+      "Bash(awk '/^class / {class=$2} /def _get_clear_ptz_frame/ {print class\": \"$0}' coordinator.py)",
+      "Bash(awk '/^class / {class=$0} /def _update_tracking/ {print NR\": \"class\" -> \"$0}' coordinator.py)",
+      "Bash(awk '/^class / {class=$0} /with self._track_id_lock/ {print NR\": \"class\" -> \"$0}' coordinator.py)",
+      "Bash(python -c \"import config\")",
+      "Bash(python -c \"from coordinator import Coordinator, AsyncCoordinator, SequentialCoordinator; print\\('OK'\\)\")",
+      "Bash(python *)"
     ]
   }
 }

BIN
dual_camera_system/__pycache__/calibration.cpython-310.pyc


BIN
dual_camera_system/__pycache__/camera_group.cpython-310.pyc


BIN
dual_camera_system/__pycache__/coordinator.cpython-310.pyc


BIN
dual_camera_system/__pycache__/event_pusher.cpython-310.pyc


BIN
dual_camera_system/__pycache__/multi_group_system.cpython-310.pyc


BIN
dual_camera_system/__pycache__/paired_image_saver.cpython-310.pyc


BIN
dual_camera_system/__pycache__/panorama_camera.cpython-310.pyc


BIN
dual_camera_system/__pycache__/ptz_camera.cpython-310.pyc


BIN
dual_camera_system/__pycache__/ptz_person_tracker.cpython-310.pyc


BIN
dual_camera_system/__pycache__/safety_coordinator.cpython-310.pyc


BIN
dual_camera_system/__pycache__/safety_detector.cpython-310.pyc


BIN
dual_camera_system/__pycache__/third_party_pusher.cpython-310.pyc


+ 5 - 13
dual_camera_system/camera_group.py

@@ -13,7 +13,6 @@ import numpy as np
 
 from panorama_camera import PanoramaCamera, ObjectDetector, DetectedObject
 from ptz_camera import PTZCamera
-from ocr_recognizer import NumberDetector, PersonInfo
 from coordinator import SequentialCoordinator
 from calibration import CameraCalibrator, CalibrationManager
 from paired_image_saver import PairedImageSaver, get_paired_saver
@@ -46,20 +45,18 @@ class CameraGroup:
     - 配对图片保存器
     """
     
-    def __init__(self, 
+    def __init__(self,
                  group_config: Dict[str, Any],
                  sdk,
                  detector: ObjectDetector,
-                 number_detector: Optional[NumberDetector] = None,
                  shared_config: Optional[Dict[str, Any]] = None):
         """
         初始化摄像头组
-        
+
         Args:
             group_config: 组配置字典
             sdk: 大华SDK实例(共享)
             detector: 检测器实例(共享)
-            number_detector: 编号检测器实例(可选,共享)
             shared_config: 共享配置(校准配置、联动配置等)
         """
         self.group_id = group_config.get('group_id', 'unknown')
@@ -67,7 +64,6 @@ class CameraGroup:
         self.config = group_config
         self.sdk = sdk
         self.detector = detector
-        self.number_detector = number_detector
         self.shared_config = shared_config or {}
         
         # 组件实例
@@ -131,6 +127,7 @@ class CameraGroup:
             self.paired_saver = get_paired_saver(
                 base_dir=self.paired_image_dir,
                 time_window=5.0,
+                enable_oss=True,  # 启用 OSS 上传
                 device_config=device_config
             )
             
@@ -152,8 +149,7 @@ class CameraGroup:
         self.coordinator = SequentialCoordinator(
             self.panorama_camera,
             self.ptz_camera,
-            self.detector,
-            self.number_detector
+            self.detector
         )
         
         # 应用顺序模式配置
@@ -280,13 +276,9 @@ class CameraGroup:
         """设置回调函数"""
         def on_person_detected(person: DetectedObject, frame: np.ndarray):
             logger.info(f"[{self.group_id}] 检测到人体: 位置={person.center}, 置信度={person.confidence:.2f}")
-        
-        def on_number_recognized(person_info: PersonInfo):
-            logger.info(f"[{self.group_id}] 识别到编号: {person_info.number_text}")
-        
+
         if self.coordinator:
             self.coordinator.on_person_detected = on_person_detected
-            self.coordinator.on_number_recognized = on_number_recognized
     
     def start(self) -> bool:
         """

+ 1 - 7
dual_camera_system/config.py

@@ -15,10 +15,8 @@ from config.detection import (
     DETECTION_CONFIG, SAFETY_DETECTION_CONFIG,
 )
 from config.ptz import PTZ_CONFIG
-from config.ocr import OCR_CONFIG
 from config.coordinator import COORDINATOR_CONFIG, CALIBRATION_CONFIG
 from config.event import EVENT_PUSHER_CONFIG, EVENT_LISTENER_CONFIG
-from config.voice import TTS_CONFIG, AUDIO_PLAYER_CONFIG, VOICE_ANNOUNCER_CONFIG
 from config.llm import LLM_CONFIG, LLM_SAFETY_CONFIG
 from config.system import SYSTEM_CONFIG
 
@@ -32,16 +30,12 @@ __all__ = [
     'DETECTION_CONFIG', 'SAFETY_DETECTION_CONFIG',
     # PTZ
     'PTZ_CONFIG',
-    # OCR
-    'OCR_CONFIG',
     # 联动与校准
     'COORDINATOR_CONFIG', 'CALIBRATION_CONFIG',
     # 事件
     'EVENT_PUSHER_CONFIG', 'EVENT_LISTENER_CONFIG',
-    # 语音
-    'TTS_CONFIG', 'AUDIO_PLAYER_CONFIG', 'VOICE_ANNOUNCER_CONFIG',
     # LLM
     'LLM_CONFIG', 'LLM_SAFETY_CONFIG',
     # 系统
     'SYSTEM_CONFIG',
-]
+]

+ 0 - 6
dual_camera_system/config/__init__.py

@@ -11,10 +11,8 @@ from .detection import (
     DETECTION_CONFIG, SAFETY_DETECTION_CONFIG
 )
 from .ptz import PTZ_CONFIG
-from .ocr import OCR_CONFIG, SEGMENTATION_CONFIG
 from .coordinator import COORDINATOR_CONFIG, CALIBRATION_CONFIG
 from .event import EVENT_PUSHER_CONFIG, EVENT_LISTENER_CONFIG
-from .voice import TTS_CONFIG, AUDIO_PLAYER_CONFIG, VOICE_ANNOUNCER_CONFIG
 from .llm import LLM_CONFIG, LLM_SAFETY_CONFIG
 from .system import SYSTEM_CONFIG
 from .oss import S3_COMPATIBLE_CONFIG
@@ -33,14 +31,10 @@ __all__ = [
     'DETECTION_CONFIG', 'SAFETY_DETECTION_CONFIG',
     # PTZ
     'PTZ_CONFIG',
-    # OCR
-    'OCR_CONFIG', 'SEGMENTATION_CONFIG',
     # 联动与校准
     'COORDINATOR_CONFIG', 'CALIBRATION_CONFIG',
     # 事件
     'EVENT_PUSHER_CONFIG', 'EVENT_LISTENER_CONFIG',
-    # 语音
-    'TTS_CONFIG', 'AUDIO_PLAYER_CONFIG', 'VOICE_ANNOUNCER_CONFIG',
     # LLM
     'LLM_CONFIG', 'LLM_SAFETY_CONFIG',
     # 系统

BIN
dual_camera_system/config/__pycache__/__init__.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/camera.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/coordinator.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/detection.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/device.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/oss.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/ptz.cpython-310.pyc


BIN
dual_camera_system/config/__pycache__/system.cpython-310.pyc


+ 0 - 24
dual_camera_system/config/ocr.py

@@ -1,24 +0,0 @@
-"""
-OCR识别配置
-"""
-
-# OCR配置 - 使用llama-server API
-OCR_CONFIG = {
-    'use_llama_api': True,          # 使用llama-server API
-    'api_host': 'localhost',        # API服务器地址
-    'api_port': 8111,               # API端口
-    'model': 'PaddleOCR-VL-1.5-GGUF.gguf',  # 模型名称
-    'prompt': '请识别图片中的数字编号,只返回数字,不要其他内容',  # OCR提示词
-    'temperature': 0.3,             # 温度参数
-    'timeout': 30,                  # 超时时间(秒)
-}
-
-# 人体分割模型配置
-SEGMENTATION_CONFIG = {
-    # 模型路径 - 支持 RKNN 格式 (RK3588 平台)
-    'model_path': '/home/admin/dsh/testrk3588/yolov8n-seg.rknn',
-    'model_type': 'rknn',           # 模型类型: 'rknn'
-    'input_size': (640, 640),       # 模型输入尺寸
-    'conf_threshold': 0.5,          # 分割置信度阈值
-    'use_npu': True,                # 使用 NPU 加速
-}

+ 10 - 13
dual_camera_system/config/system.py

@@ -2,38 +2,35 @@
 系统配置
 """
 
-from .voice import TTS_CONFIG, AUDIO_PLAYER_CONFIG
 from .llm import LLM_CONFIG, LLM_SAFETY_CONFIG
 
 # 系统配置
 SYSTEM_CONFIG = {
     'name': '施工现场安全行为智能识别系统',
     'version': '2.0.0',
-    
+
     # === 功能开关 ===
     # 摄像头模块
     'enable_panorama_camera': True,      # 启用全景摄像头
     'enable_ptz_camera': True,           # 启用 PTZ 球机
-    
+
     # 检测模块
     'enable_detection': True,            # 启用人体检测 (YOLO)
     'enable_safety_detection': False,     # 启用安全检测 (安全帽/反光衣)
-    
+
     # 联动与校准
     'enable_calibration': True,          # 启用自动校准
     'enable_ptz_tracking': True,         # 启用 PTZ 跟踪联动
-    
-    # OCR 与大模型
-    'enable_ocr': False,                  # 启用 OCR 编号识别
+
+    # 大模型
     'enable_llm': False,                  # 启用大模型判断
-    
-    # 事件与播报
+
+    # 事件推送
     'enable_event_push': False,           # 启用事件推送
-    'enable_voice_announce': False,       # 启用语音播报
-    
+
     # === 工作模式 ===
-    'mode': 'safety',                    # 工作模式: 'safety'(安全检测), 'ocr'(编号识别)
-    
+    'mode': 'safety',                    # 工作模式: 'safety'(安全检测)
+
     # === 安全判断策略 ===
     'safety_strategy': 'hybrid',         # 'llm'(仅大模型), 'rule'(仅规则), 'hybrid'(混合)
 }

+ 0 - 50
dual_camera_system/config/voice.py

@@ -1,50 +0,0 @@
-"""
-语音播报配置
-"""
-
-# TTS 服务配置
-TTS_CONFIG = {
-    # 服务类型: 'api', 'edge-tts', 'piper', 'local'
-    'service_type': 'edge-tts',
-    
-    # Edge-TTS 配置 (推荐)
-    'edge_voice': 'zh-CN-XiaoxiaoNeural',  # 中文女声
-    
-    # API 配置 (如果使用 API 方式)
-    'api_url': '',
-    'api_key': '',
-    
-    # Piper 配置 (如果使用 Piper)
-    'piper_model': 'zh_CN-huayan-medium',
-    
-    # 本地命令 (如果使用本地命令)
-    'local_command': '',
-    
-    # 缓存配置
-    'cache_dir': '/tmp/tts_cache',
-    'cache_enabled': True,
-}
-
-# 音频播放器配置
-AUDIO_PLAYER_CONFIG = {
-    'player_command': 'auto',        # 'auto' 自动检测, 或指定: 'mpg123', 'aplay', 'ffplay'
-    'volume': 1.0,                   # 默认音量
-}
-
-# 语音播报配置
-VOICE_ANNOUNCER_CONFIG = {
-    'enabled': True,                 # 是否启用语音播报
-    
-    # 默认参数
-    'default_speed': 1.0,            # 默认语速
-    'default_volume': 1.0,           # 默认音量
-    'default_repeat': 1,             # 默认重复次数
-    
-    # 违规播报
-    'violation_repeat': 3,           # 违规播报重复次数
-    'violation_interval': 0.5,       # 违规播报间隔(秒)
-    
-    # TTS 和播放器配置 (引用)
-    'tts': TTS_CONFIG,
-    'player': AUDIO_PLAYER_CONFIG,
-}

+ 18 - 124
dual_camera_system/coordinator.py

@@ -18,9 +18,8 @@ import cv2
 from config import COORDINATOR_CONFIG, SYSTEM_CONFIG, PTZ_CONFIG, DETECTION_CONFIG
 from panorama_camera import PanoramaCamera, ObjectDetector, DetectedObject
 from ptz_camera import PTZCamera, PTZController
-from ocr_recognizer import NumberDetector, PersonInfo
 from ptz_person_tracker import PTZPersonDetector, PTZAutoZoomController
-from paired_image_saver import PairedImageSaver, get_paired_saver
+from paired_image_saver import PairedImageSaver, get_paired_saver, PersonInfo
 
 logger = logging.getLogger(__name__)
 
@@ -31,7 +30,6 @@ class TrackingState(Enum):
     SEARCHING = 1      # 搜索目标
     TRACKING = 2       # 跟踪中
     ZOOMING = 3        # 变焦中
-    OCR_PROCESSING = 4 # OCR处理中
 
 
 @dataclass
@@ -40,7 +38,7 @@ class TrackingTarget:
     track_id: int                    # 跟踪ID
     position: Tuple[float, float]    # 位置比例 (x_ratio, y_ratio)
     last_update: float              # 最后更新时间
-    person_info: Optional[PersonInfo] = None  # 人员信息
+    person_info: Optional[dict] = None  # 人员信息
     priority: int = 0               # 优先级
     area: int = 0                   # 目标面积(像素²)
     confidence: float = 0.0         # 置信度
@@ -204,10 +202,9 @@ class Coordinator:
     协调全景摄像头和球机实现联动抓拍
     """
     
-    def __init__(self, panorama_camera: PanoramaCamera, 
+    def __init__(self, panorama_camera: PanoramaCamera,
                  ptz_camera: PTZCamera,
                  detector: ObjectDetector = None,
-                 number_detector: NumberDetector = None,
                  calibrator = None):
         """
         初始化联动控制器
@@ -215,13 +212,11 @@ class Coordinator:
             panorama_camera: 全景摄像头
             ptz_camera: 球机
             detector: 物体检测器
-            number_detector: 编号检测器
             calibrator: 校准器 (用于坐标转换)
         """
         self.panorama = panorama_camera
         self.ptz = ptz_camera
         self.detector = detector
-        self.number_detector = number_detector
         self.calibrator = calibrator
         
         self.config = COORDINATOR_CONFIG
@@ -231,7 +226,6 @@ class Coordinator:
         self.enable_ptz_tracking = SYSTEM_CONFIG.get('enable_ptz_tracking', True)
         self.enable_calibration = SYSTEM_CONFIG.get('enable_calibration', True)
         self.enable_detection = SYSTEM_CONFIG.get('enable_detection', True)
-        self.enable_ocr = SYSTEM_CONFIG.get('enable_ocr', True)
         
         # 球机端人体检测与自动对焦
         self.enable_ptz_detection = PTZ_CONFIG.get('enable_ptz_detection', False)
@@ -252,7 +246,6 @@ class Coordinator:
         
         # 回调函数
         self.on_person_detected: Optional[Callable] = None
-        self.on_number_recognized: Optional[Callable] = None
         self.on_tracking_started: Optional[Callable] = None
         self.on_tracking_stopped: Optional[Callable] = None
         
@@ -263,10 +256,6 @@ class Coordinator:
         self._paused_event.set()  # 默认非暂停状态
         self.coordinator_thread = None
         
-        # OCR频率控制
-        self.last_ocr_time = 0
-        self.ocr_interval = 1.0  # OCR间隔(秒),避免过于频繁调用API
-        
         # PTZ优化 - 避免频繁发送相同位置的命令
         self.last_ptz_position = None
         self.ptz_position_threshold = self.config.get('ptz_position_threshold', 0.03)
@@ -279,12 +268,14 @@ class Coordinator:
         # 结果队列
         self.result_queue = queue.Queue()
         
+        # 跨帧跟踪:全局track_id计数器
+        self._next_track_id = 1
+        self._track_id_lock = threading.Lock()
+
         # 性能统计
         self.stats = {
             'frames_processed': 0,
             'persons_detected': 0,
-            'ocr_attempts': 0,
-            'ocr_success': 0,
             'start_time': None,
             'last_frame_time': None,
         }
@@ -401,8 +392,6 @@ class Coordinator:
                 print(f"处理帧数: {self.stats['frames_processed']}")
                 print(f"平均帧率: {fps:.1f} fps")
                 print(f"检测人体: {self.stats['persons_detected']}次")
-                print(f"OCR尝试: {self.stats['ocr_attempts']}次")
-                print(f"OCR成功: {self.stats['ocr_success']}次")
                 print("================\n")
     
     def get_stats(self) -> dict:
@@ -638,71 +627,6 @@ class Coordinator:
                     else:
                         self.ptz.track_target(x_ratio, y_ratio)
                     self.last_ptz_position = (x_ratio, y_ratio)
-            
-            # 执行OCR识别 (仅在 OCR 启用时)
-            if self.enable_ocr:
-                self._perform_ocr(frame, self.current_target)
-    
-    def _perform_ocr(self, frame: np.ndarray, target: TrackingTarget):
-        """执行OCR识别"""
-        if not self.enable_ocr or self.number_detector is None:
-            return
-        
-        # 频率控制 - 避免过于频繁调用OCR API
-        current_time = time.time()
-        if current_time - self.last_ocr_time < self.ocr_interval:
-            return
-        self.last_ocr_time = current_time
-        
-        # 更新OCR尝试统计
-        self._update_stats('ocr_attempts')
-        
-        # 计算人体边界框 (基于位置估算)
-        frame_h, frame_w = frame.shape[:2]
-        
-        # 人体占画面比例 (可配置,默认宽20%、高40%)
-        person_width_ratio = self.config.get('person_width_ratio', 0.2)
-        person_height_ratio = self.config.get('person_height_ratio', 0.4)
-        
-        person_width = int(frame_w * person_width_ratio)
-        person_height = int(frame_h * person_height_ratio)
-        
-        x_ratio, y_ratio = target.position
-        center_x = int(x_ratio * frame_w)
-        center_y = int(y_ratio * frame_h)
-        
-        # 计算边界框,确保不超出画面范围
-        x1 = max(0, center_x - person_width // 2)
-        y1 = max(0, center_y - person_height // 2)
-        x2 = min(frame_w, x1 + person_width)
-        y2 = min(frame_h, y1 + person_height)
-        
-        # 更新实际宽高 (可能因边界裁剪而变小)
-        actual_width = x2 - x1
-        actual_height = y2 - y1
-        
-        person_bbox = (x1, y1, actual_width, actual_height)
-        
-        # 检测编号
-        self._set_state(TrackingState.OCR_PROCESSING)
-        person_info = self.number_detector.detect_number(frame, person_bbox)
-        person_info.person_id = target.track_id
-        
-        # 更新OCR成功统计
-        if person_info.number_text:
-            self._update_stats('ocr_success')
-        
-        # 更新目标信息
-        with self.targets_lock:
-            if target.track_id in self.tracking_targets:
-                self.tracking_targets[target.track_id].person_info = person_info
-        
-        # 回调
-        if self.on_number_recognized and person_info.number_text:
-            self.on_number_recognized(person_info)
-        
-        # 放入结果队列
-        self.result_queue.put(person_info)
     
     def _cleanup_expired_targets(self):
         """清理过期目标"""
@@ -870,11 +794,14 @@ class AsyncCoordinator(Coordinator):
         # 上次PTZ命令时间(添加线程锁保护)
         self._last_ptz_time = 0.0
         self._last_ptz_time_lock = threading.Lock()
-        
-        # 跨帧跟踪:全局track_id计数器
-        self._next_track_id = 1
-        self._track_id_lock = threading.Lock()
-        
+
+        # 帧获取配置
+        self._frame_config = {
+            'wait_interval': PTZ_CONFIG.get('frame_wait_interval', 0.2),
+            'max_attempts': PTZ_CONFIG.get('frame_max_attempts', 8),
+            'min_clarity': PTZ_CONFIG.get('min_clarity', 200),
+        }
+
         # 配对图片保存器
         self._enable_paired_saving = DETECTION_CONFIG.get('enable_paired_saving', False)
         self._paired_saver: Optional[PairedImageSaver] = None
@@ -1737,12 +1664,12 @@ class SequentialCoordinator(AsyncCoordinator):
             'default_zoom': 1,              # 默认zoom(广角)
         }
 
-        # 帧获取配置
-        self._frame_config = {
+        # 帧获取配置(覆盖父类默认值)
+        self._frame_config.update({
             'wait_interval': ptz_capture_config.get('frame_wait_interval', 0.2),
             'max_attempts': ptz_capture_config.get('frame_max_attempts', 8),
             'min_clarity': ptz_capture_config.get('min_clarity', 200),
-        }
+        })
         
         # 覆盖父类的PTZ冷却时间(顺序模式下可以更短)
         self.PTZ_COMMAND_COOLDOWN = 0.1
@@ -2074,9 +2001,6 @@ class SequentialCoordinator(AsyncCoordinator):
                         (pan, tilt, final_zoom), ptz_frame_marked=ptz_frame_marked
                     )
 
-                # 保存到本地(无论是否启用配对保存)
-                self._save_local_snapshot(ptz_frame, current_idx, pan, tilt, final_zoom)
-
                 logger.info(f"[顺序模式] 目标 {current_idx + 1} 抓拍完成")
             else:
                 logger.warning(f"[顺序模式] 获取球机画面失败")
@@ -2127,37 +2051,7 @@ class SequentialCoordinator(AsyncCoordinator):
         with self.targets_lock:
             self.tracking_targets.clear()
             logger.info("[顺序模式] 已清空跟踪目标列表")
-    
-    def _save_local_snapshot(self, frame: np.ndarray, index: int,
-                              pan: float, tilt: float, zoom: int):
-        """保存本地快照,返回文件路径"""
-        try:
-            import os
-            from datetime import datetime
-
-            # 创建保存目录
-            save_dir = '/home/admin/dsh/captures'
-            os.makedirs(save_dir, exist_ok=True)
-
-            # 生成文件名 - 使用 PNG 无损格式
-            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-3]
-            filename = f"capture_{timestamp}_person{index}_p{int(pan)}_t{int(tilt)}_z{zoom}.png"
-            filepath = os.path.join(save_dir, filename)
 
-            # 保存图片 - 使用 PNG 无损格式
-            cv2.imwrite(filepath, frame)
-            logger.info(f"[顺序模式] 快照已保存: {filepath}")
-
-            # 记录到配对保存器,批次完成时删除
-            if self._paired_saver is not None and self._current_batch_id:
-                self._paired_saver.add_capture_path(self._current_batch_id, filepath)
-
-            return filepath
-
-        except Exception as e:
-            logger.error(f"[顺序模式] 保存快照失败: {e}")
-            return None
-    
     def set_capture_config(self, **kwargs):
         """设置抓拍配置"""
         self._capture_config.update(kwargs)

+ 983 - 0
dual_camera_system/docs/技术实现架构.md

@@ -0,0 +1,983 @@
+# 施工现场安全行为智能识别系统 v2.0 — 技术实现架构
+
+> 基于 Python 的施工现场安全行为智能识别系统,支持全景+球机双摄像头联动、YOLO11 实时检测、大模型安全分析、事件推送。
+
+---
+
+## 目录
+
+1. [系统概览](#1-系统概览)
+2. [架构分层与模块职责](#2-架构分层与模块职责)
+3. [核心工作流](#3-核心工作流)
+4. [模块详解](#4-模块详解)
+5. [数据流图](#5-数据流图)
+6. [部署架构](#6-部署架构)
+7. [模型量化与 RKNN 部署](#7-模型量化与-rknn-部署)
+8. [关键技术决策](#8-关键技术决策)
+9. [配置体系](#9-配置体系)
+10. [扩展设计](#10-扩展设计)
+
+---
+
+## 1. 系统概览
+
+### 1.1 系统上下文 (C4 Context)
+
+```mermaid
+graph TB
+    subgraph "系统边界"
+        DSS["双摄像头联动系统<br/>Dual-Camera System"]
+    end
+
+    PanoramaCamera["📷 全景摄像头<br/>(Panorama)"]
+    PTZCamera["📷 球机<br/>(PTZ / Dome)"]
+    
+    DSS -->|RTSP / SDK| PanoramaCamera
+    DSS -->|RTSP / SDK / PTZ| PTZCamera
+
+    subgraph "外部依赖"
+        LLM["🧠 llama-server<br/>Qwen2.5-VL-7B<br/>安全分析"]
+        BIZ["🏢 业务平台<br/>jtjai.device.wenhq.top:8583"]
+        YOLO["🎯 YOLO11<br/>Ultralytics"]
+    end
+
+    DSS -->|base64 image| LLM
+    DSS -->|HTTP POST| BIZ
+
+    subgraph "操作者"
+        Admin["👤 运维人员<br/>(交互命令行)"]
+    end
+
+    Admin -->|键盘命令| DSS
+```
+
+### 1.2 工作模式
+
+| 入口 | 核心功能 |
+|------|----------|
+| `safety_main.py` | 实时视频监控 → YOLO11 检测(安全帽/反光衣/人) → LLM/规则安全判断 → 事件推送 |
+
+---
+
+## 2. 架构分层与模块职责
+
+```mermaid
+graph TB
+    subgraph "Entry Layer 入口层"
+        Safety_Main["safety_main.py<br/>安全模式入口"]
+    end
+
+    subgraph "Orchestration Layer 编排层"
+        SafetyMonitor["SafetyMonitorSystem<br/>安全监控系统"]
+        SafetyCoord["SafetyCoordinator<br/>安全联动控制器"]
+    end
+
+    subgraph "Core Domain Layer 核心领域层"
+        Camera["SimpleCamera<br/>摄像头管理"]
+        ObjectDetector["ObjectDetector / LLMSafetyDetector<br/>YOLO11 安全检测"]
+        PTZCam["PTZCamera<br/>球机 PTZ 控制(可选)"]
+        Calibrator["CameraCalibrator<br/>坐标校准器"]
+    end
+
+    subgraph "Infrastructure Layer 基础设施层"
+        SDK["DahuaSDK<br/>大华SDK ctypes 封装"]
+        EventPusher["EventPusher<br/>事件推送"]
+        LLMService["llm_service.py<br/>大模型服务"]
+        ThirdParty["third_party_pusher.py<br/>第三方推送"]
+    end
+
+    subgraph "Config Layer 配置层"
+        Config["config/ 模块化配置"]
+    end
+
+    %% 编排关系
+    Safety_Main --> SafetyMonitor
+    Safety_Main --> SafetyCoord
+
+    %% 核心依赖
+    SafetyMonitor --> Camera
+    SafetyMonitor --> ObjectDetector
+    SafetyMonitor --> PTZCam
+    SafetyMonitor --> SDK
+
+    %% 基础设施
+    EventPusher -.->|HTTP| BIZ
+    LLMService -.->|HTTP| LLM
+
+    %% 配置
+    SafetyMonitor -.-> Config
+```
+
+### 2.1 各层职责
+
+| 层 | 职责 | 关键设计原则 |
+|----|------|-------------|
+| **入口层** | CLI 参数解析、信号处理、模式选择 | 统一配置体系 |
+| **编排层** | 组件初始化、生命周期管理、功能开关 | 先 YOLO 后 SDK(避免内存映射冲突) |
+| **核心领域层** | 摄像头控制、YOLO 安全检测推理、PTZ 控制、坐标校准 | 依赖倒置,面向接口 |
+| **基础设施层** | SDK 封装、网络通信、文件系统 | 函数可选绑定、自动降级 |
+| **配置层** | 按功能拆分的模块化配置 | 命令行参数可覆盖配置项 |
+
+---
+
+## 3. 核心工作流
+
+### 3.1 安全模式完整流程
+
+```mermaid
+sequenceDiagram
+    participant Main as safety_main.py
+    participant System as SafetyMonitorSystem
+    participant Camera as SimpleCamera
+    participant Detector as LLMSafetyDetector
+    participant Pusher as EventPusher
+    participant LLM as llama-server
+    participant BIZ as 业务平台
+
+    Main->>System: initialize(camera_source)
+    System->>Camera: connect()
+    System->>Detector: LLMSafetyDetector(yolo, llm_config)
+    System->>Pusher: EventPusher(config)
+    
+    Main->>System: start()
+    System->>Pusher: start()
+    System->>System: _detection_worker()
+
+    loop 每 0.5 秒
+        System->>Camera: get_frame()
+        Camera-->>System: frame
+        System->>Detector: detect(frame)
+        Detector->>Detector: YOLO11 inference (安全帽/反光衣/人)
+        Detector-->>System: [SafetyDetection, ...]
+        System->>Detector: check_safety(frame, detections)
+        
+        alt 规则模式 (rule)
+            Detector->>Detector: 规则判断: 人和安全帽/反光衣的 IoU 匹配
+        else LLM 模式 (llm)
+            System->>LLM: 发送裁剪后的人体图像
+            LLM-->>System: 安全状态分析结果
+        else 混合模式 (hybrid)
+            Detector->>Detector: 规则初步判断 → LLM 二次验证
+        end
+        
+        Detector-->>System: [PersonSafetyStatus, ...]
+
+        loop 每个违规
+            System->>Pusher: push_safety_violation(description, image, ...)
+            Pusher->>BIZ: POST /api/resource/oss/upload (图片)
+            BIZ-->>Pusher: image_url
+            Pusher->>BIZ: POST /api/system/event (事件)
+            BIZ-->>Pusher: ✓
+        end
+    end
+```
+
+### 3.2 校准流程
+
+```mermaid
+sequenceDiagram
+    participant System as DualCameraSystem
+    participant Coordinator as AsyncCoordinator
+    participant Calibrator as CameraCalibrator
+    participant Panorama as PanoramaCamera
+    participant PTZ as PTZCamera
+
+    System->>System: _auto_calibrate(force)
+    System->>Coordinator: pause_detection()
+    
+    System->>Panorama: connect()
+    System->>PTZ: connect()
+    System->>Panorama: start_stream_rtsp()
+    System->>PTZ: start_stream_rtsp()
+
+    Note over System: 等待视频流稳定 (最长 15s)
+
+    System->>Calibrator: CameraCalibrator(ptz, get_frame, ptz_capture)
+
+    Note over Calibrator: 阶段 1: 视野重叠区域发现
+    Calibrator->>PTZ: 按 (pan_step, tilt_step) 扫描
+    PTZ->>PTZ: goto_exact_position(pan, tilt, 1)
+    Note over PTZ: 等待 stabilize_time(2.0s)
+    Calibrator->>Panorama: get_frame() (移动前)
+    Calibrator->>PTZ: get_frame() (移动后)
+    Calibrator->>Panorama: 帧差法 → 运动区域中心
+    Calibrator->>Calibrator: SIFT/ORB 特征匹配 → 匹配点中心
+    Calibrator->>Calibrator: 加权融合 → 保存校准点
+
+    Note over Calibrator: 阶段 2: 建立坐标映射查找表
+    Calibrator->>Calibrator: 插值生成 (x_ratio, y_ratio) → (pan, tilt) 映射
+    Calibrator->>Calibrator: 保存 calibration.json
+    
+    Calibrator-->>System: CalibrationResult
+    alt 成功
+        System->>Coordinator: set_calibrator(calibrator)
+    else 失败
+        System->>System: 日志告警,回退到线性映射
+    end
+
+    System->>Coordinator: resume_detection()
+```
+
+---
+
+## 4. 模块详解
+
+### 4.1 联动控制器体系 (coordinator.py)
+
+联动控制器采用**继承+策略模式**,形成清晰的控制器家族:
+
+```mermaid
+graph TB
+    Coordinator["Coordinator<br/>联动控制器基类"] -->|继承| EventDriven["EventDrivenCoordinator<br/>事件驱动模式"]
+    Coordinator -->|继承| AsyncCoord["AsyncCoordinator<br/>异步双线程模式"]
+    AsyncCoord -->|继承| SeqCoord["SequentialCoordinator<br/>顺序抓拍模式"]
+
+    Coordinator -->|组合| TargetSelector["TargetSelector<br/>目标选择策略"]
+
+    subgraph "TargetSelector 策略"
+        TS_area["面积优先<br/>strategy='area'"]
+        TS_conf["置信度优先<br/>strategy='confidence'"]
+        TS_hybrid["混合模式<br/>strategy='hybrid'"]
+    end
+
+    TargetSelector --- TS_area
+    TargetSelector --- TS_conf
+    TargetSelector --- TS_hybrid
+
+    subgraph "TrackingState 状态机"
+        IDLE["IDLE 空闲"]
+        SEARCHING["SEARCHING<br/>搜索目标"]
+        TRACKING["TRACKING 跟踪中"]
+        ZOOMING["ZOOMING 变焦中"]
+        IDLE --> SEARCHING
+        SEARCHING --> TRACKING
+        TRACKING --> ZOOMING
+        ZOOMING --> IDLE
+```
+
+#### 三种控制器模式对比
+
+| 特性 | `Coordinator` (单线程) | `AsyncCoordinator` (异步) | `SequentialCoordinator` (顺序) |
+|------|----------------------|--------------------------|-------------------------------|
+| 线程模型 | 1 个工作线程 | 检测线程 + PTZ 线程 | 检测线程 + PTZ 线程 |
+| 通信方式 | 直接调用 | queue.Queue 命令队列 | queue.Queue + 状态锁 |
+| 目标处理 | 每次选最优目标 | 所有有效目标入队 | 批量捕获后逐个抓拍 |
+| 停顿 | 无 | 无 | PTZ 抓拍时检测暂停 |
+| 适用场景 | 简单联动 | 持续多目标跟踪 | 高质量抓拍 |
+
+#### AsyncCoordinator 关键设计
+
+```
+┌─────────────────────────────────────────────────────┐
+│                  AsyncCoordinator                     │
+│                                                       │
+│  ┌─────────────────────┐   ┌──────────────────────┐  │
+│  │  detection_worker   │   │    ptz_worker         │  │
+│  │  (检测线程)         │   │  (PTZ控制线程)        │  │
+│  │                     │   │                       │  │
+│  │  1. 读全景帧        │   │  1. 等待 PTZ 命令     │  │
+│  │  2. YOLO 推理       │───┼──> 2. 坐标变换        │  │
+│  │  3. 目标跟踪更新    │   │  3. goto_exact_position│  │
+│  │  4. 发 PTZ 命令     │   │  4. 等待到位(0.3s)    │  │
+│  │  5. 性能日志        │   │  5. 球机端自检测      │  │
+│  └─────────────────────┘   │  6. 自动变焦          │  │
+│                            │  7. 获取清晰帧        │  │
+│                            │  8. 保存配对图片      │  │
+│                            └──────────────────────┘  │
+│                                                       │
+│  ┌────────────────────────────────────────────────┐   │
+│  │  共享状态:                                      │   │
+│  │  - tracking_targets: Dict[int, TrackingTarget]  │   │
+│  │  - ptz_queue: Queue(maxsize=10)                │   │
+│  │  - state: TrackingState (带锁访问)              │   │
+│  │  - _paused_event: threading.Event (暂停同步)   │   │
+│  └────────────────────────────────────────────────┘   │
+└─────────────────────────────────────────────────────┘
+```
+
+#### 目标选择策略 (TargetSelector)
+
+```python
+# 支持三种选择策略
+strategies = {
+    'area':       # 面积优先 — 选择画面中最大目标(默认)
+    'confidence': # 置信度优先 — 选择检测最可靠目标
+    'hybrid':     # 混合模式 — 面积×权重 + 置信度×权重
+}
+
+# 核心特性
+- 粘性(stickiness): 当前目标得分需显著低于最优目标才切换,避免抖动
+- 中心偏好(prefer_center): 靠近画面中心的目标获得额外分数
+- 面积归一化: log10 对数缩放,避免大目标压倒小目标
+```
+
+### 4.2 大华 SDK 封装 (dahua_sdk.py)
+
+```mermaid
+graph TB
+    SDK["DahuaSDK"] -->|ctypes.CDLL| lib["libdhnetsdk.so"]
+    SDK --> Login["CLIENT_LoginWithHighLevelSecurity"]
+    SDK --> RealPlay["CLIENT_RealPlay / RealPlayEx"]
+    SDK --> PTZCtrl["CLIENT_DHPTZControlEx"]
+    SDK --> Snap["CLIENT_SnapPicture"]
+    SDK --> VideoCallback["CLIENT_SetVideoProcCallBack<br/>(可选绑定)"]
+
+    subgraph "类型映射 (Linux)"
+        DWORD["c_uint32 (4B)"]
+        LONG["c_int (4B)"]
+        LLONG["c_long (8B, LP64)"]
+        BOOL["c_int (非0=真, 0=假)"]
+    end
+
+    SDK -.-> DWORD
+    SDK -.-> LONG
+    SDK -.-> LLONG
+    SDK -.-> BOOL
+
+    subgraph "内存安全"
+        InitOrder["初始化顺序: YOLO → SDK"]
+        OptionalBind["可选函数: 缺失时设为 None"]
+        CBRef["回调引用: 防止 GC"]
+    end
+
+    SDK -.-> InitOrder
+    SDK -.-> OptionalBind
+    SDK -.-> CBRef
+```
+
+**关键设计决策:**
+
+1. **初始化顺序**:先加载 YOLO/PyTorch,再初始化大华 SDK。因为 `CLIENT_Init` 会修改进程内存映射,如果先于 PyTorch 加载会导致 segfault。
+2. **类型映射严格对齐**:Linux 上 `BOOL = int (4 bytes)` 而非 `c_bool (1 byte)`,结构体字段顺序和大小必须与 SDK 头文件完全一致。
+3. **可选函数绑定**:ARM64 版 SDK 缺少 `CLIENT_SetVideoProcCallBack` 等函数,用 `_bind_optional` 处理,缺失时设为 `None` 而非崩溃。
+4. **断线重连**:`CLIENT_Init` 注册断线回调,配合 `auto_reconnect=True` 重连逻辑。
+
+### 4.3 校准模块 (calibration.py)
+
+```mermaid
+graph TB
+    subgraph "校准方法"
+        Motion["运动检测法<br/>帧差定位球机移动区域"]
+        Feature["特征匹配法<br/>SIFT/ORB 匹配球机与全景画面"]
+    end
+
+    subgraph "融合逻辑"
+        Weighted["加权融合<br/>动态权重"]
+        Fallback["降级方案<br/>→ 角度估算"]
+    end
+
+    subgraph "校准输出"
+        LUT["校准查找表<br/>[(x_ratio, y_ratio) → (pan, tilt)]"]
+        Interp["双线性插值<br/>非网格点插值"]
+    end
+
+    Motion --> Weighted
+    Feature --> Weighted
+    Weighted -->|成功| LUT
+    Weighted -->|失败| Fallback
+
+    LUT --> Interp
+```
+
+**视野重叠发现机制:**
+
+校准时球机会按 `pan_step=20°, tilt_step=15°` 在 `pan_range=(0,360), tilt_range=(-20,50)` 范围内逐格扫描,在每个位置抓拍并与全景画面做特征匹配,自动发现两台摄像头的视野重叠区域。避免球机指向无重叠区域导致校准失败。
+
+### 4.4 安全检测器 (safety_detector.py)
+
+三种判断策略(`config/system.py → safety_strategy`):
+
+| 策略 | 描述 | 延迟 | 准确率 |
+|------|------|------|--------|
+| `rule` | 规则判断:人和安全帽/反光衣的 IoU 匹配 | 低 | 中等 |
+| `llm` | 仅大模型:发送裁剪图像给 llama-server 分析 | 高 | 高 |
+| `hybrid` | 混合模式:规则初筛 → LLM 二次验证 | 中等 | 最高 |
+
+```mermaid
+graph LR
+    Frame["全景帧"] --> YOLO["YOLO11<br/>推理"]
+    YOLO -->|person class| Person["人员检测"]
+    YOLO -->|hat class| Hat["安全帽检测"]
+    YOLO -->|reflective class| Reflective["反光衣检测"]
+
+    Person --> Rule["规则引擎<br/>IoU匹配"]
+
+    Hat --> Rule
+    Reflective --> Rule
+
+    Rule -->|有违规嫌疑| LLM["LLM 二次验证<br/>(hybrid模式)"]
+    Rule -->|明确合规| OK["✓ 合规"]
+    Rule -->|明确违规| Violation["⚠️ 违规"]
+
+    LLM -->|确认违规| Violation
+    LLM -->|确认合规| OK
+
+    Violation --> EventPusher["事件推送"]
+```
+
+**类别映射(YOLO11 安全模型):** `0=安全帽, 3=人, 4=反光衣`
+
+### 4.5 事件推送
+
+```mermaid
+sequenceDiagram
+    participant System as SafetyMonitorSystem
+    participant Pusher as EventPusher
+    participant oSS as OSS Uploader
+    participant BIZ as 业务平台
+
+    System->>Pusher: push_safety_violation(desc, image)
+
+    Pusher->>Pusher: 频率控制 (upload_interval=2.0s)
+    
+    Pusher->>oSS: POST /api/resource/oss/upload
+    oSS->>BIZ: 图片二进制
+    BIZ-->>oSS: {url: "https://...jpg"}
+    oSS-->>Pusher: image_url
+
+    Pusher->>BIZ: POST /api/system/event
+    Note over Pusher, BIZ: {eventType, description, imageUrl, trackId, timestamp}
+
+    BIZ-->>Pusher: 200 OK
+
+    alt 推送失败
+        Pusher->>Pusher: 重试 (retry_count=3, retry_delay=1.0s)
+    end
+```
+
+### 4.6 多组摄像头并行管理
+
+```mermaid
+graph TB
+    subgraph "MultiGroupSystem"
+        SDK_Shared["SDK 共享实例"]
+        Detector_Shared["Detector 共享实例"]
+        Groups["摄像头组列表"]
+    end
+
+    subgraph "CameraGroup #1"
+        P1["全景 #1"]
+        Z1["球机 #1"]
+        C1["校准器 #1"]
+        CO1["协调器 #1"]
+        PS1["配对保存器 #1"]
+    end
+
+    subgraph "CameraGroup #2"
+        P2["全景 #2"]
+        Z2["球机 #2"]
+        C2["校准器 #2"]
+        CO2["协调器 #2"]
+        PS2["配对保存器 #2"]
+    end
+
+    Groups --> CameraGroup #1
+    Groups --> CameraGroup #2
+
+    SDK_Shared --> P1
+    SDK_Shared --> Z1
+    SDK_Shared --> P2
+    SDK_Shared --> Z2
+
+    Detector_Shared --> CO1
+    Detector_Shared --> CO2
+```
+
+每组(全景+球机)在独立线程中运行,共享 SDK 实例(只需初始化一次)和 YOLO 检测器(模型只加载一次)。
+
+---
+
+## 5. 数据流图
+
+### 5.0 图例说明
+
+```mermaid
+graph LR
+    subgraph "图例"
+        A["实体/模块"] -->|"数据流<br/>箭头方向=流向"| B["另一实体"]
+        C[(数据库/存储)]
+        D{"判断/分流"}
+        E["外部服务"]
+    end
+```
+
+### 5.1 安全模式数据流
+
+```mermaid
+graph TB
+    Camera["摄像头源<br/>(索引/RTSP/视频)"] -->|帧| Frame["当前帧"]
+
+    Frame -->|每 0.5s| YOLO["YOLO11 安全推理"]
+    YOLO --> dets["检测列表<br/>安全帽/人/反光衣"]
+
+    dets --> SafetyCheck{"安全判断"}
+    SafetyCheck -->|规则模式| Rule["IoU 匹配<br/>人和 PPE 重合度"]
+    SafetyCheck -->|LLM 模式| LLM["llama-server<br/>图像分析"]
+    SafetyCheck -->|混合模式| Hybrid["规则初筛 → LLM 验证"]
+
+    Rule --> status["PersonSafetyStatus"]
+    LLM --> status
+    Hybrid --> status
+
+    status -->|is_violation| Violation["违规处理"]
+    Violation --> Cooldown{"冷却检查<br/>(3s/违规类型)"}
+    Cooldown -->|通过| Push["推送队列"]
+
+    Push --> PersonCrop["裁剪人体区域"]
+    PersonCrop --> Image["缩放到 640x640"]
+
+    Image --> Upload["OSS 上传<br/>POST /oss/upload"]
+    Upload -->|image_url| Event["事件推送<br/>POST /system/event"]
+
+    Event -->|失败| Retry["重试<br/>(最多3次)"]
+    Event -->|成功| Log["日志记录"]
+```
+
+---
+
+## 6. 部署架构
+
+```mermaid
+graph TB
+    subgraph "服务器端 (x86_64 Linux / RK3588 ARM64)"
+        subgraph "进程边界"
+            subgraph "Python 主进程"
+                Main["safety_main.py<br/>安全模式"]
+                SDK[DahuaSDK<br/>ctypes]
+                YOLO[YOLO11<br/>GPU/NPU]
+                Pusher[事件推送线程]
+                Calib[校准管理线程]
+            end
+
+            subgraph "LLM 推理进程"
+                LlamaServer["llama-server<br/>Qwen2.5-VL-7B-Instruct"]
+            end
+        end
+
+        subgraph "文件系统"
+            CalibFile["calibration.json"]
+            Images["paired_images/"]
+            Models["yolo11m_safety.pt/rknn"]
+            Logs["日志文件<br/>(RotatingFileHandler)"]
+        end
+
+        Main -->|ctypes| SDK
+        Main -->|ultralytics| YOLO
+        Main -->|file I/O| CalibFile
+        Main -->|file I/O| Images
+        Main -->|cv2.imwrite| Images
+        Main -->|httpx| LlamaServer
+        Main -->|logging| Logs
+    end
+
+    subgraph "网络设备"
+        Pan["📷 全景摄像头<br/>SDK:37777 / RTSP:554"]
+        Dome["📷 球机<br/>SDK:37777 / RTSP:554"]
+    end
+
+    SDK -->|CLIENT_Login / RealPlay / PTZControl| Pan
+    SDK -->|CLIENT_Login / RealPlay / PTZControl| Dome
+    Main -->|RTSP| Pan
+    Main -->|RTSP| Dome
+
+    Main ---->|"场景 1 (有网)"| BIZ["业务平台<br/>jtjai.device.wenhq.top:8583"]
+
+    LlamaServer -->|GPU| GPU["NVIDIA GPU<br/>/ NPU (RK3588)"]
+    YOLO -->|GPU| GPU
+```
+
+### 6.1 硬件规格
+
+| 部署场景 | 处理器 | 加速 | 内存 | SDK 路径 |
+|----------|--------|------|------|----------|
+| 开发机 | macOS (x86_64) | - | - | `dh/Bin` (参考用, .so 不可用) |
+| 服务器 | Linux x86_64 | NVIDIA GPU | ≥16GB | `/home/wen/dsh/dh/Bin` |
+| 测试设备 | Orange Pi (aarch64) | NPU | ≥8GB | `/home/admin/dsh/dh/arm/Bin` |
+
+### 6.2 进程间关系
+
+| 进程 | 通信方式 | 端口 | 职责 |
+|------|----------|------|------|
+| `safety_main.py` | - | - | 主程序(检测→安全判断→推送全链路) |
+| `llama-server` | HTTP REST | 8111 | 视觉语言模型推理(安全分析) |
+
+---
+
+## 7. 模型量化与 RKNN 部署
+
+> ⚠️ 本节内容涉及 RK3588 平台专属的模型量化与 NPU 部署。在 x86_64 服务器上使用 YOLO (.pt) 或 ONNX 推理时,只需参考 [7.4 推理性能](#74-推理性能)。
+
+### 7.1 部署流水线
+
+```mermaid
+graph LR
+    subgraph "PC 开发环境"
+        PT["YOLO11 .pt 权重<br/>ultralytics 训练"] -->|export| ONNX["ONNX 模型<br/>(float32 NCHW)"]
+        ONNX -->|onnx2rknn.py| RKNN_tool["RKNN 工具链<br/>rknn-toolkit2"]
+        RKNN_tool -->|build| RKNN["RKNN 模型<br/>(w8a8 量化)"]
+    end
+
+    subgraph "RK3588 目标板"
+        RKNN -->|scp 部署| RKNN_Deploy["yolo11m_safety.rknn<br/>~50MB"]
+        RKNN_Deploy -->|rknnlite| Runtime["RKNNLite 运行时"]
+        Runtime -->|NPU_CORE_0_1_2| NPU["NPU 推理<br/>3 核并行"]
+    end
+
+    subgraph "推理流程"
+        Frame["摄像头帧<br/>BGR uint8"] -->|letterbox| Resized["640×640<br/>保持宽高比"]
+        Resized -->|BGR→RGB /255.0| Normalized["float32 [0,1]<br/>NHWC (1,640,640,3)"]
+        Normalized -->|rknn.inference| NPU
+        NPU -->|output (1,84,8400)| Post["后处理<br/>NMS + 坐标映射"]
+        Post --> dets["检测列表<br/>安全帽/人/反光衣"]
+    end
+```
+
+### 7.2 模型导出与量化
+
+YOLO11 安全检测模型(`yolo11m_safety.pt`)需从 ultralytics 格式转换为 RKNN 格式,使用 `testrk3588/onnx2rknn.py`:
+
+**第一步:ONNX 导出(在 PC 上)**
+
+```bash
+yolo export model=yolo11m_safety.pt format=onnx
+# → 输出 yolo11m_safety.onnx (NCHW, float32)
+```
+
+**第二步:RKNN 量化构建(在 PC 上)**
+
+```python
+from rknn.api import RKNN
+
+rknn = RKNN(verbose=True)
+rknn.config(
+    target_platform='rk3588',
+    mean_values=[[0, 0, 0]],      # 不做均值归一化(由应用层处理)
+    std_values=[[1, 1, 1]],        # 不做标准差归一化
+    quantized_dtype='w8a8',        # 权重量化 int8,激活量化 int8
+    optimization_level=3           # 最高优化等级
+)
+rknn.load_onnx(model='yolo11m_safety.onnx', input_size_list=[[3, 640, 640]])
+rknn.build(dataset='dataset.txt', do_quantization=False)  # float32 推理
+rknn.export_rknn('./yolo11m_safety.rknn')
+```
+
+**量化选项决策:**
+
+| 选项 | 配置 | 推理精度 | NPU 加速 | 适用场景 |
+|------|------|----------|----------|----------|
+| float32 (当前) | `do_quantization=False` | 最高 | 中等 | 精度优先 |
+| int8 混合量化 | `do_quantization=True, asymmetric_quantized-u8` | 略低 | 最高 | 性能优先 |
+
+当前采用 **float32 推理 + w8a8 权重量化**,不做激活量化,以保证安全检测的精度。
+
+### 7.3 RK3588 板端推理
+
+**模型加载:**
+
+```python
+from rknnlite.api import RKNNLite
+
+rknn = RKNNLite()
+rknn.load_rknn('yolo11m_safety.rknn')
+rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)  # 3 核并行
+```
+
+| 参数 | 值 | 说明 |
+|------|-----|------|
+| `core_mask` | `NPU_CORE_0_1_2` | 使用全部 3 个 NPU 核心并行推理 |
+| 模型大小 | ~50MB | yolo11m 级别,约 25M 参数 |
+| 输入尺寸 | 640×640 | letterbox 保持宽高比,不足补 114 灰边 |
+| 输入格式 | NHWC (1,H,W,C) | RKNN 统一 NHWC,与 ONNX 的 NCHW 由工具链自动转换 |
+| 输入类型 | float32 [0,1] | 应用层负责归一化 |
+
+**预处理(`BaseDetector.letterbox`):**
+1. 按比例缩放图像到 640×640 画布内
+2. 剩余区域用 114 灰度填充(保持宽高比)
+3. BGR → RGB 通道转换
+4. `/ 255.0` 归一化到 [0, 1]
+
+**后处理(`BaseDetector.postprocess`):**
+1. 解析输出张量 `(1, 84, 8400)` → 8400 个候选框
+2. 类别过滤(仅保留 class_id ∈ {0, 3, 4})
+3. 置信度阈值过滤(人 ≥ 0.8,安全帽/反光衣 ≥ 0.5)
+4. 非极大值抑制 NMS(IoU 阈值 0.45)
+5. 移除 letterbox padding,缩放到原始图像尺寸
+
+### 7.4 推理性能
+
+| 部署方式 | 设备 | 吞吐量 | 备注 |
+|----------|------|--------|------|
+| YOLO (.pt) | x86_64 + NVIDIA GPU | ~30 FPS | CUDA 加速 |
+| YOLO (.pt) | x86_64 CPU | ~3-5 FPS | 仅 CPU 推理 |
+| RKNN | RK3588 NPU (3 核) | ~15-20 FPS | 实测值,与模型复杂度相关 |
+| ONNX | RK3588 CPU | ~2-3 FPS | 不推荐,仅作为备选 |
+
+系统在 `safety_detector.py` 中通过 `SafetyDetector` 统一封装三种推理后端:
+
+```python
+class SafetyDetector:
+    def __init__(self, model_path, model_type='auto'):
+        # model_type 自动检测:.rknn → rknn, .onnx → onnx, 其他 → yolo
+        if model_type == 'rknn':
+            self.rknn_detector = RKNNDetector(model_path)
+        elif model_type == 'onnx':
+            self.rknn_detector = ONNXDetector(model_path)
+        else:
+            self.model = YOLO(model_path)
+```
+
+### 7.5 测试与验证
+
+测试脚本位于 `testrk3588/` 目录:
+
+| 脚本 | 用途 |
+|------|------|
+| `onnx2rknn.py` | ONNX → RKNN 量化导出 |
+| `test_detection.py` | 单图检测验证 |
+| `rtsp_person_detection.py` | RTSP 实时流检测(含安全判断) |
+| `test_model.py` | 模型加载与推理正确性测试 |
+
+连接到 RK3588 测试设备:
+```bash
+ssh admin@192.168.20.84
+conda activate rknn
+cd /home/admin/dsh/testrk3588
+python rtsp_person_detection.py
+```
+
+---
+
+## 8. 关键技术决策
+
+### 8.1 线程模型
+
+```mermaid
+graph TB
+    subgraph "线程模型"
+        MainThread["主线程<br/>CLI / 信号处理"] -->|启动| DetThread["检测线程<br/>(detection_worker)"]
+        MainThread -->|启动| PTZThread["PTZ 线程<br/>(ptz_worker)"]
+        MainThread -->|启动| CalibThread["定时校准线程<br/>(每日 08:00)"]
+        MainThread -->|启动| LogCleanup["日志清理线程<br/>(每 6 小时)"]
+
+        DetThread -->|queue.Queue| PTZThread
+        DetThread -.->|_paused_event| PTZThread
+        MainThread -->|pause/resume| DetThread
+    end
+
+    subgraph "同步原语"
+        Lock["threading.Lock<br/>(targets_lock, state_lock)"]
+        Event["threading.Event<br/>(_paused_event)"]
+        Queue["queue.Queue<br/>(ptz_queue, result_queue)"]
+    end
+```
+
+| 原语 | 用途 | 保护对象 |
+|------|------|---------|
+| `threading.Lock` | 互斥访问 | `tracking_targets`, `state`, `stats`, `frame` |
+| `threading.Event` | 暂停/恢复同步 | `_paused_event` — 校准时暂停检测 |
+| `queue.Queue` | 线程间通信 | `_ptz_queue` (检测→PTZ 命令) |
+
+### 8.2 暂停/恢复协议
+
+校准期间需要暂停检测线程,避免校准移动球机时检测线程也发 PTZ 命令造成争抢:
+
+```
+pause_detection() → _paused = True, _paused_event.clear()
+  → 检测线程: _paused_event.wait() 阻塞
+  → PTZ 线程: 检查 _paused 后跳过处理
+
+resume_detection() → _paused = False, _paused_event.set()
+  → 检测线程: 恢复运行
+  → PTZ 线程: 恢复处理命令
+```
+
+### 8.3 坐标映射体系
+
+```
+全景画面坐标 (x_ratio, y_ratio)      球机 PTZ 角度 (pan, tilt, zoom)
+         │                                        ▲
+         │                                        │
+         ▼                                        │
+┌──────────────────────┐     ┌────────────────────┐
+│   标定阶段:           │     │  运行时:            │
+│  - 运动检测           │────>│  - 查找表查询       │
+│  - 特征匹配           │     │  - 双线性插值       │
+│  - 加权融合           │     │  - tilt 线性重映射  │
+│  → 校准查找表         │     │  - pan 边缘曲线补偿  │
+└──────────────────────┘     └────────────────────┘
+```
+
+**PTZ 坐标计算(`PTZ_CONFIG` 中的关键参数):**
+
+| 参数 | 默认值 | 作用 |
+|------|--------|------|
+| `tilt_linear_enabled` | `True` | 使用线性 tilt 映射替代查找表(查找表 tilt 不稳定) |
+| `tilt_y0 / tilt_y1` | `15 / 55` | y_ratio=0 和 y_ratio=1 对应的 tilt 角度 |
+| `tilt_curve_power` | `0.8` | tilt 曲线指数,控制中间区域 tilt 变化率 |
+| `pan_flip` | `False` | 球机与全景朝向相反时翻转 pan 方向 |
+| `pan_edge_offset` | `25` | 画面边缘处 pan 额外补偿度数 |
+| `pan_curve_power` | `2.0` | pan 边缘曲线指数,>1 时边缘补偿更大 |
+
+### 8.4 图像清晰度评估
+
+PTZ 到位后获取球机帧时,使用**拉普拉斯算子方差**评估清晰度:
+
+```python
+gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
+# min_clarity=200, 高于此值认为清晰
+```
+
+获取策略:最多尝试 8 次(间隔 0.2s),返回清晰度最高的帧。
+
+---
+
+## 9. 配置体系
+
+### 9.1 配置层级
+
+```mermaid
+graph TB
+    Default["config/*.py<br/>默认配置"] -->|被覆盖| CLI["命令行参数<br/>--panorama-ip ..."]
+    CLI -->|构建| ConfigDict["运行期配置字典"]
+    ConfigDict -->|读取| Modules["各个模块"]
+
+    subgraph "配置模块"
+        camera["camera.py<br/>摄像头 + SDK + 日志"]
+        detection["detection.py<br/>YOLO + 安全检测参数"]
+        ptz["ptz.py<br/>PTZ 控制 + 自动变焦"]
+        coordinator["coordinator.py<br/>联动 + 校准"]
+        event["event.py<br/>事件推送"]
+        llm["llm.py<br/>大模型"]
+        system["system.py<br/>系统开关 + 工作模式"]
+        oss["oss.py<br/>OSS 上传"]
+        device["device.py<br/>第三方平台 + 设备"]
+    end
+
+    Modules --> camera
+    Modules --> detection
+    Modules --> ptz
+    Modules --> coordinator
+    Modules --> event
+    Modules --> llm
+    Modules --> system
+    Modules --> oss
+    Modules --> device
+```
+
+### 9.2 功能开关矩阵 (`config/system.py`)
+
+| 开关 | 安全模式 | 说明 |
+|------|----------|------|
+| `enable_panorama_camera` | ✓ | 全景摄像头启用 |
+| `enable_ptz_camera` | ✓ (可选) | 球机 PTZ 启用 |
+| `enable_detection` | ✓ | 人体/安全检测 |
+| `enable_safety_detection` | ✓ | 安全帽/反光衣检测 |
+| `enable_calibration` | ✗ | 全景-球机校准 |
+| `enable_ptz_tracking` | ✓ (可选) | PTZ 联动跟踪 |
+| `enable_llm` | ✓ | 大模型安全判断 |
+| `enable_event_push` | ✓ | 事件推送 |
+
+---
+
+## 10. 扩展设计
+
+### 10.1 多组摄像头扩展
+
+通过 `CameraGroup` 和 `MultiGroupSystem` 支持 N 组全景+球机并行运行:
+
+```python
+# config/camera.py 中配置多组
+CAMERA_GROUPS = [
+    {
+        'group_id': 'group_1',
+        'enabled': True,
+        'panorama': { ... },
+        'ptz': { ... },
+        'calibration_file': '/home/admin/dsh/calibration_group1.json',
+        'paired_image_dir': '/home/admin/dsh/paired_images_group1',
+    },
+    # 可继续添加 group_2, group_3, ...
+]
+```
+
+### 10.2 扩展新联动控制器
+
+继承 `Coordinator` 基类,重写 `_coordinator_worker()` 和 `start()/stop()`:
+
+```python
+class CustomCoordinator(Coordinator):
+    def _coordinator_worker(self):
+        # 自定义联动逻辑
+        pass
+```
+
+### 10.3 扩展新安全检测策略
+
+`safety_strategy` 支持 `rule`/`llm`/`hybrid` 三种,可扩展:
+
+```python
+# config/system.py
+'safety_strategy': 'hybrid'  # 可扩展为 'custom'
+```
+
+### 10.4 模型推理后端扩展
+
+`model_type` 支持 `auto`/`yolo`/`rknn`/`onnx`,可添加新类型:
+
+```python
+# DETECTION_CONFIG.model_type
+'auto':    # 自动检测(.pt→yolo, .rknn→rknn, .onnx→onnxruntime)
+'yolo':    # Ultralytics YOLO
+'rknn':    # RK3588 NPU 推理
+'onnx':    # ONNX Runtime
+```
+
+---
+
+## 附录
+
+### A. 项目文件清单
+
+| 文件 | 行数 | 职责 |
+|------|------|------|
+| `safety_main.py` | 822 | 安全模式主程序入口 |
+| `coordinator.py` | 2177 | 联动控制器(三种策略) |
+| `dahua_sdk.py` | 527 | 大华 SDK ctypes 封装 |
+| `panorama_camera.py` | 909 | 全景摄像头 + YOLO 检测 |
+| `ptz_camera.py` | ~600 | 球机 PTZ 控制 |
+| `calibration.py` | 1668 | 全景-球机坐标校准 |
+| `safety_detector.py` | ~500 | 安全检测 + LLM 混合 + RKNN推理 |
+| `safety_coordinator.py` | ~400 | 安全联动控制器 |
+| `event_pusher.py` | 460 | 事件推送 + OSS 上传 |
+| `llm_service.py` | ~300 | 大模型服务封装 |
+| `camera_group.py` | 419 | 摄像头组封装 |
+| `multi_group_system.py` | 308 | 多组并行管理 |
+| `calibration.py` | 1668 | 校准器 + 视野发现 |
+| `paired_image_saver.py` | ~200 | 配对图片保存 |
+| `third_party_pusher.py` | ~200 | 第三方平台推送 |
+| `ptz_person_tracker.py` | ~300 | 球机端人体检测 + 自动变焦 |
+| `config/__init__.py` | 52 | 配置汇总导出 |
+| `config/camera.py` | 113 | 摄像头 + SDK + 日志 |
+| `config/detection.py` | 69 | 检测参数 |
+| `config/ptz.py` | 58 | PTZ 控制参数 |
+| `config/coordinator.py` | 59 | 联动 + 校准配置 |
+| `config/event.py` | 36 | 事件推送配置 |
+| `config/llm.py` | ~40 | 大模型配置 |
+| `config/system.py` | 39 | 系统开关 + 工作模式 |
+| `config/oss.py` | ~20 | OSS 上传配置 |
+| `config/device.py` | ~30 | 第三方平台配置 |
+
+### B. 外部依赖清单
+
+| 依赖 | 用途 | 安装方式 |
+|------|------|----------|
+| `ultralytics` | YOLO11 检测推理 | `pip install ultralytics` |
+| `opencv-python` | 图像处理、RTSP、显示 | `pip install opencv-python` |
+| `opencv-contrib-python` | SIFT 特征匹配 | `pip install opencv-contrib-python` |
+| `numpy` | 数值计算 | `pip install numpy` |
+| `requests` / `httpx` | HTTP API 调用 | `pip install requests httpx` |
+| `llama-server` | 视觉语言模型推理(安全分析) | 单独部署(GGUF 模型) |
+| 大华 NetSDK | 摄像头 SDK | 厂商提供 `.so` 文件 |
+| `rknnlite2` / `rknn-toolkit2` | RK3588 NPU 推理 / 模型量化 | 板载:`pip install rknnlite2` |
+| `onnxruntime` | ONNX 推理(备选) | `pip install onnxruntime` |

+ 6 - 4
dual_camera_system/event_pusher.py

@@ -249,9 +249,10 @@ class EventPusher:
             return None
         
         for attempt in range(self.retry_count):
+            conn = None
             try:
                 filename = os.path.basename(image_path)
-                
+
                 # 创建连接
                 if self.use_https:
                     conn = http.client.HTTPSConnection(self.api_host, self.api_port)
@@ -289,8 +290,7 @@ class EventPusher:
                 conn.request("POST", self.upload_url, body, headers)
                 res = conn.getresponse()
                 data = res.read()
-                conn.close()
-                
+
                 if res.status == 200:
                     result = json.loads(data.decode("utf-8"))
                     if result.get('code') == 200:
@@ -299,11 +299,13 @@ class EventPusher:
                         print(f"上传失败: {result.get('msg', '未知错误')}")
                 else:
                     print(f"上传失败: HTTP {res.status}")
-                    
             except Exception as e:
                 print(f"上传异常 (尝试 {attempt + 1}/{self.retry_count}): {e}")
                 if attempt < self.retry_count - 1:
                     time.sleep(self.retry_delay)
+            finally:
+                if conn:
+                    conn.close()
         
         return None
     

+ 18 - 114
dual_camera_system/llm_service.py

@@ -103,45 +103,47 @@ class LLMClient:
         last_error = None
         
         for attempt in range(self.max_retries):
+            conn = None
             try:
                 start_time = time.time()
-                
+
                 # 创建连接
                 conn_class = http.client.HTTPSConnection if self.use_https else http.client.HTTPConnection
                 conn = conn_class(self.api_host, self.api_port, timeout=self.timeout)
-                
-                conn.request("POST", "/v1/chat/completions", 
+
+                conn.request("POST", "/v1/chat/completions",
                             json.dumps(payload), headers)
-                
+
                 res = conn.getresponse()
                 data = res.read()
-                conn.close()
-                
+
                 latency = time.time() - start_time
-                
+
                 if res.status != 200:
                     error_msg = f"HTTP {res.status}: {data.decode('utf-8', errors='ignore')}"
                     return LLMResponse(content="", success=False, error=error_msg, latency=latency)
-                
+
                 response = json.loads(data.decode('utf-8'))
-                
+
                 if 'choices' in response and len(response['choices']) > 0:
                     content = response['choices'][0]['message']['content']
                     return LLMResponse(content=content, success=True, latency=latency)
                 elif 'error' in response:
-                    return LLMResponse(content="", success=False, 
+                    return LLMResponse(content="", success=False,
                                       error=response['error'].get('message', 'Unknown error'),
                                       latency=latency)
                 else:
-                    return LLMResponse(content="", success=False, 
+                    return LLMResponse(content="", success=False,
                                       error="Invalid response format", latency=latency)
-                
             except json.JSONDecodeError as e:
                 last_error = f"JSON 解析错误: {e}"
             except http.client.HTTPException as e:
                 last_error = f"HTTP 错误: {e}"
             except Exception as e:
                 last_error = str(e)
+            finally:
+                if conn:
+                    conn.close()
             
             # 重试
             if attempt < self.max_retries - 1:
@@ -178,15 +180,18 @@ class LLMClient:
     
     def check_connection(self) -> bool:
         """检查 API 连接"""
+        conn = None
         try:
             conn_class = http.client.HTTPSConnection if self.use_https else http.client.HTTPConnection
             conn = conn_class(self.api_host, self.api_port, timeout=5)
             conn.request("GET", "/v1/models")
             res = conn.getresponse()
-            conn.close()
             return res.status in [200, 404]  # 404 也表示服务在运行
         except:
             return False
+        finally:
+            if conn:
+                conn.close()
 
 
 class SafetyAnalyzer:
@@ -340,107 +345,6 @@ class SafetyAnalyzer:
                    'violation_desc': '解析失败', 'confidence': 0.0}
 
 
-class NumberRecognizer:
-    """
-    编号识别器
-    使用大模型进行 OCR 编号识别
-    """
-    
-    NUMBER_PROMPT = """请识别这张图片中工作人员衣服上的编号或工号。
-
-只返回识别到的编号数字,如果没有看到编号则返回 "无"。
-不要返回其他内容。"""
-    
-    def __init__(self, llm_config: Dict[str, Any] = None):
-        """
-        初始化识别器
-        
-        Args:
-            llm_config: LLM 配置
-        """
-        self.llm = LLMClient(llm_config)
-    
-    def recognize(self, image: np.ndarray) -> Dict[str, Any]:
-        """
-        识别图像中的编号
-        
-        Args:
-            image: 输入图像
-            
-        Returns:
-            识别结果 {'number': str, 'confidence': float, 'success': bool}
-        """
-        if image is None:
-            return {'number': None, 'confidence': 0.0, 'success': False}
-        
-        response = self.llm.vision_chat(image, self.NUMBER_PROMPT, temperature=0.1)
-        
-        if not response.success:
-            return {'number': None, 'confidence': 0.0, 'success': False, 
-                   'error': response.error}
-        
-        content = response.content.strip()
-        
-        # 处理结果
-        if content == '无' or '无' in content or not content:
-            return {'number': None, 'confidence': 0.0, 'success': True}
-        
-        # 提取数字/字母数字组合
-        import re
-        matches = re.findall(r'[A-Za-z]*\d+[A-Za-z0-9]*', content)
-        
-        if matches:
-            number = matches[0]
-            return {'number': number, 'confidence': 0.9, 'success': True}
-        
-        # 如果没有匹配到,返回原始内容
-        return {'number': content, 'confidence': 0.5, 'success': True}
-    
-    def recognize_person_number(self, person_image: np.ndarray,
-                                search_chest: bool = True) -> Dict[str, Any]:
-        """
-        识别人员编号(在胸部/背部区域搜索)
-        
-        Args:
-            person_image: 人员图像
-            search_chest: 是否搜索胸部区域
-            
-        Returns:
-            识别结果
-        """
-        if person_image is None:
-            return {'number': None, 'confidence': 0.0, 'success': False}
-        
-        h, w = person_image.shape[:2]
-        
-        # 如果图像较大,先尝试裁剪胸部区域
-        if search_chest and h > 100 and w > 100:
-            # 胸部区域:上半身中间部分
-            y1 = int(h * 0.15)
-            y2 = int(h * 0.55)
-            x1 = int(w * 0.15)
-            x2 = int(w * 0.85)
-            
-            chest_region = person_image[y1:y2, x1:x2]
-            
-            # 先在胸部区域搜索
-            result = self.recognize(chest_region)
-            
-            if result.get('number'):
-                result['location'] = '胸部'
-                return result
-        
-        # 整图识别
-        result = self.recognize(person_image)
-        result['location'] = '全身'
-        return result
-
-
 def create_safety_analyzer(config: Dict[str, Any] = None) -> SafetyAnalyzer:
     """创建安全分析器"""
     return SafetyAnalyzer(config)
-
-
-def create_number_recognizer(config: Dict[str, Any] = None) -> NumberRecognizer:
-    """创建编号识别器"""
-    return NumberRecognizer(config)

+ 82 - 63
dual_camera_system/main.py

@@ -4,7 +4,6 @@
 系统功能:
 1. 全景摄像头实时监控和物体检测
 2. 检测到人体后,球机自动变焦定位
-3. 对人体进行分割并OCR识别衣服上的编号
 """
 
 # 必须在import cv2之前设置,否则FFmpeg多线程解码会导致
@@ -29,14 +28,13 @@ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 
 from config import (
     PANORAMA_CAMERA, PTZ_CAMERA, SDK_PATH,
-    DETECTION_CONFIG, PTZ_CONFIG, OCR_CONFIG, COORDINATOR_CONFIG,
+    DETECTION_CONFIG, PTZ_CONFIG, COORDINATOR_CONFIG,
     CALIBRATION_CONFIG, LOG_CONFIG, SYSTEM_CONFIG,
     CAMERA_GROUPS, get_enabled_groups
 )
 from dahua_sdk import DahuaSDK
 from panorama_camera import PanoramaCamera, ObjectDetector, DetectedObject
 from ptz_camera import PTZCamera, PTZController
-from ocr_recognizer import NumberDetector, PersonInfo
 from coordinator import Coordinator, EventDrivenCoordinator, AsyncCoordinator, SequentialCoordinator
 
 
@@ -158,9 +156,7 @@ class DualCameraSystem:
         self.panorama_camera = None
         self.ptz_camera = None
         
-        # 检测器和识别器
-        self.detector = None
-        self.number_detector = None
+        # 检测器
         
         # 联动控制器
         self.coordinator = None
@@ -182,6 +178,11 @@ class DualCameraSystem:
 
         # 检测暂停状态(校准时暂停检测)
         self._detection_was_running = False
+
+        # Captures 目录清理配置
+        self.captures_cleanup_enabled = False  # 已禁用 captures,不再需要定时清理
+        self.captures_dir = '/home/admin/dsh/captures'
+        self.captures_cleanup_hour = 3  # 每天凌晨3点清理
     
     def initialize(self, skip_calibration: bool = False) -> bool:
         """
@@ -210,18 +211,6 @@ class DualCameraSystem:
         except Exception as e:
             logger.warning(f"检测器初始化失败: {e}")
         
-        # 初始化编号检测器 (使用llama-server API)
-        try:
-            ocr_config = {
-                'api_host': self.config.get('ocr_host', OCR_CONFIG['api_host']),
-                'api_port': self.config.get('ocr_port', OCR_CONFIG['api_port']),
-                'model': self.config.get('ocr_model', OCR_CONFIG['model']),
-            }
-            self.number_detector = NumberDetector(use_api=True, ocr_config=ocr_config)
-            logger.info("编号检测器初始化成功 (使用llama-server API)")
-        except Exception as e:
-            logger.warning(f"编号检测器初始化失败: {e}")
-        
         # 初始化SDK(在检测器之后,避免SDK内存映射与PyTorch冲突)
         sdk_path = os.path.join(
             self.config.get('sdk_path', SDK_PATH['lib_path']),
@@ -254,8 +243,7 @@ class DualCameraSystem:
             self.coordinator = SequentialCoordinator(
                 self.panorama_camera,
                 self.ptz_camera,
-                self.detector,
-                self.number_detector
+                self.detector
             )
             # 应用顺序模式配置
             seq_config = COORDINATOR_CONFIG.get('sequential_mode', {})
@@ -272,8 +260,7 @@ class DualCameraSystem:
             self.coordinator = AsyncCoordinator(
                 self.panorama_camera,
                 self.ptz_camera,
-                self.detector,
-                self.number_detector
+                self.detector
             )
         
         # 设置回调
@@ -516,22 +503,12 @@ class DualCameraSystem:
     
     def _setup_callbacks(self):
         """设置回调函数"""
-        
+
         def on_person_detected(person: DetectedObject, frame: np.ndarray):
             """人体检测回调"""
             logger.info(f"检测到人体: 位置={person.center}, 置信度={person.confidence:.2f}")
-        
-        def on_number_recognized(person_info: PersonInfo):
-            """编号识别回调"""
-            logger.info(
-                f"识别到编号: ID={person_info.person_id}, "
-                f"编号={person_info.number_text}, "
-                f"置信度={person_info.number_confidence:.2f}, "
-                f"位置={person_info.number_location}"
-            )
-        
+
         self.coordinator.on_person_detected = on_person_detected
-        self.coordinator.on_number_recognized = on_number_recognized
     
     def start(self) -> bool:
         """
@@ -551,10 +528,13 @@ class DualCameraSystem:
         
         self.running = True
         logger.info("联动系统启动成功")
-        
+
         # 启动定时校准
         self._start_periodic_calibration()
-        
+
+        # 启动 captures 目录定时清理
+        self._start_captures_cleanup()
+
         return True
     
     def stop(self):
@@ -601,6 +581,70 @@ class DualCameraSystem:
             self.calibration_thread.join(timeout=2)
             self.calibration_thread = None
         logger.info("定时校准已停止")
+
+    def _start_captures_cleanup(self):
+        """启动 captures 目录定时清理"""
+        if not self.captures_cleanup_enabled:
+            return
+
+        def cleanup_worker():
+            while self.running:
+                try:
+                    from datetime import datetime
+                    now = datetime.now()
+
+                    # 计算到凌晨清理时间的秒数
+                    target_hour = self.captures_cleanup_hour
+                    target_time = now.replace(hour=target_hour, minute=0, second=0, microsecond=0)
+                    if now.hour >= target_hour:
+                        target_time = target_time.replace(day=now.day + 1)
+
+                    wait_seconds = int((target_time - now).total_seconds())
+                    logger.info(f"[Captures清理] 下次清理时间: {target_time.strftime('%Y-%m-%d %H:%M')} (等待 {wait_seconds // 3600} 小时)")
+
+                    # 每小时检查一次是否到达清理时间
+                    for _ in range(3600):
+                        if not self.running:
+                            return
+                        time.sleep(1)
+
+                        # 检查是否到达清理时间(每小时的0分)
+                        current = datetime.now()
+                        if current.hour == target_hour and current.minute == 0:
+                            self._cleanup_captures()
+                            break
+                except Exception as e:
+                    logger.error(f"[Captures清理] 清理任务异常: {e}")
+                    time.sleep(60)
+
+        cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
+        cleanup_thread.start()
+        logger.info(f"[Captures清理] 定时清理已启��� (每日 {self.captures_cleanup_hour}:00)")
+
+    def _cleanup_captures(self):
+        """清理 captures 目录"""
+        try:
+            import os
+            if not os.path.exists(self.captures_dir):
+                return
+
+            files = [f for f in os.listdir(self.captures_dir) if f.startswith('capture_')]
+            if not files:
+                logger.info("[Captures清理] 目录为空,无需清理")
+                return
+
+            deleted = 0
+            for filename in files:
+                filepath = os.path.join(self.captures_dir, filename)
+                try:
+                    os.remove(filepath)
+                    deleted += 1
+                except Exception as e:
+                    logger.warning(f"[Captures清理] 删除失败: {filepath}, {e}")
+
+            logger.info(f"[Captures清理] 已清理 {deleted} 个文件")
+        except Exception as e:
+            logger.error(f"[Captures清理] 清理失败: {e}")
     
     def _get_seconds_until_target_time(self, target_time_str: str) -> int:
         """
@@ -729,9 +773,7 @@ def run_interactive(system: DualCameraSystem):
             elif cmd == 'r':
                 results = system.get_results()
                 if results:
-                    print(f"获取到 {len(results)} 个识别结果:")
-                    for r in results:
-                        print(f"  ID={r.person_id}, 编号={r.number_text}, 置信度={r.number_confidence:.2f}")
+                    print(f"获取到 {len(results)} 个识别结果")
                 else:
                     print("暂无识别结果")
             
@@ -788,9 +830,6 @@ def main():
                         choices=['n', 's', 'm', 'l', 'x'],
                         help='YOLO11模型尺寸 (n/s/m/l/x)')
     parser.add_argument('--no-gpu', action='store_true', help='不使用GPU')
-    parser.add_argument('--ocr-host', type=str, default='localhost', help='OCR API服务器地址')
-    parser.add_argument('--ocr-port', type=int, default=8111, help='OCR API端口')
-    parser.add_argument('--ocr-model', type=str, default='PaddleOCR-VL-1.5-GGUF.gguf', help='OCR模型名称')
     parser.add_argument('--interactive', action='store_true', help='交互模式')
     parser.add_argument('--demo', action='store_true', help='演示模式(不连接实际摄像头)')
     parser.add_argument('--skip-calibration', action='store_true', help='跳过自动校准')
@@ -840,9 +879,6 @@ def run_multi_group_mode(args):
     config = {
         'model_size': args.model_size,
         'use_gpu': not args.no_gpu,
-        'ocr_host': args.ocr_host,
-        'ocr_port': args.ocr_port,
-        'ocr_model': args.ocr_model,
     }
     
     if args.model:
@@ -912,9 +948,6 @@ def run_single_group_mode(args):
     
     config['model_size'] = args.model_size
     config['use_gpu'] = not args.no_gpu
-    config['ocr_host'] = args.ocr_host
-    config['ocr_port'] = args.ocr_port
-    config['ocr_model'] = args.ocr_model
     
     # 创建系统实例
     system = DualCameraSystem(config)
@@ -942,9 +975,7 @@ def run_single_group_mode(args):
                 time.sleep(1)
                 results = system.get_results()
                 if results:
-                    for r in results:
-                        if r.number_text:
-                            print(f"[识别] ID={r.person_id}, 编号={r.number_text}")
+                    print(f"[识别] 获取到 {len(results)} 个结果")
     
     except KeyboardInterrupt:
         print("\n接收到停止信号")
@@ -976,14 +1007,6 @@ def run_demo():
 │  │ PTZ控制 │ -> │ 精确定位 │ -> │ 变焦放大 │                 │
 │  └─────────┘    └─────────┘    └─────────┘                 │
 └─────────────────────────────────────────────────────────────┘
-                          │
-                          ▼ 变焦后的人体图像
-┌─────────────────────────────────────────────────────────────┐
-│                     识别模块 (OCR)                           │
-│  ┌─────────┐    ┌─────────┐    ┌─────────┐                 │
-│  │人体分割 │ -> │ 区域检测 │ -> │ OCR识别 │ -> 编号结果    │
-│  └─────────┘    └─────────┘    └─────────┘                 │
-└─────────────────────────────────────────────────────────────┘
 
 工作流程:
 1. 全景摄像头实时获取视频流
@@ -991,15 +1014,11 @@ def run_demo():
 3. 计算人体在画面中的相对位置
 4. 控制球机PTZ移动到对应位置
 5. 球机变焦放大人体区域
-6. 对人体进行分割,提取服装区域
-7. 使用OCR识别服装上的编号
-8. 输出识别结果
 
 主要组件:
 - dahua_sdk.py: 大华SDK封装
 - panorama_camera.py: 全景摄像头和人体检测
 - ptz_camera.py: 球机PTZ控制
-- ocr_recognizer.py: 人体分割和OCR识别
 - coordinator.py: 联动控制逻辑
 """)
     

+ 1 - 17
dual_camera_system/multi_group_system.py

@@ -21,7 +21,6 @@ from config import (
 )
 from dahua_sdk import DahuaSDK
 from panorama_camera import ObjectDetector
-from ocr_recognizer import NumberDetector
 from camera_group import CameraGroup
 
 logger = logging.getLogger(__name__)
@@ -49,7 +48,6 @@ class MultiGroupSystem:
         # 共享组件
         self.sdk: Optional[DahuaSDK] = None
         self.detector: Optional[ObjectDetector] = None
-        self.number_detector: Optional[NumberDetector] = None
         
         # 摄像头组列表
         self.groups: List[CameraGroup] = []
@@ -86,20 +84,7 @@ class MultiGroupSystem:
         except Exception as e:
             logger.warning(f"[MultiGroupSystem] 检测器初始化失败: {e}")
         
-        # 2. 初始化编号检测器
-        try:
-            from config import OCR_CONFIG
-            ocr_config = {
-                'api_host': self.config.get('ocr_host', OCR_CONFIG['api_host']),
-                'api_port': self.config.get('ocr_port', OCR_CONFIG['api_port']),
-                'model': self.config.get('ocr_model', OCR_CONFIG['model']),
-            }
-            self.number_detector = NumberDetector(use_api=True, ocr_config=ocr_config)
-            logger.info("[MultiGroupSystem] 编号检测器初始化成功")
-        except Exception as e:
-            logger.warning(f"[MultiGroupSystem] 编号检测器初始化失败: {e}")
-        
-        # 3. 初始化SDK
+        # 2. 初始化SDK
         sdk_path = os.path.join(
             self.config.get('sdk_path', SDK_PATH['lib_path']),
             self.config.get('netsdk', SDK_PATH['netsdk'])
@@ -138,7 +123,6 @@ class MultiGroupSystem:
                 group_config=group_config,
                 sdk=self.sdk,
                 detector=self.detector,
-                number_detector=self.number_detector,
                 shared_config=shared_config
             )
             

+ 0 - 648
dual_camera_system/ocr_recognizer.py

@@ -1,648 +0,0 @@
-"""
-OCR识别模块
-负责人体分割和编号OCR识别
-"""
-
-import cv2
-import numpy as np
-from typing import List, Optional, Tuple, Dict
-from dataclasses import dataclass
-
-from config import OCR_CONFIG, SEGMENTATION_CONFIG
-
-
-@dataclass
-class OCRResult:
-    """OCR识别结果"""
-    text: str              # 识别文本
-    confidence: float      # 置信度
-    bbox: Tuple[int, int, int, int]  # 边界框
-    location: str = ""     # 位置描述 (如: "胸部", "背部")
-
-
-@dataclass
-class PersonInfo:
-    """人员信息"""
-    person_id: int                    # 人员ID
-    person_bbox: Tuple[int, int, int, int]  # 人体边界框
-    number_text: Optional[str] = None  # 编号文本
-    number_confidence: float = 0.0     # 编号置信度
-    number_location: str = ""          # 编号位置
-    ocr_results: List[OCRResult] = None  # 所有OCR结果
-
-
-class PersonSegmenter:
-    """
-    人体分割器 - 使用 RKNN YOLOv8 分割模型
-    将人体从背景中分割出来
-    """
-    
-    def __init__(self, use_gpu: bool = True):
-        """
-        初始化分割器
-        Args:
-            use_gpu: 是否使用GPU (RKNN使用NPU,此参数保留用于兼容)
-        """
-        self.use_gpu = use_gpu
-        self.config = SEGMENTATION_CONFIG
-        self.input_size = self.config.get('input_size', (640, 640))
-        self.conf_threshold = self.config.get('conf_threshold', 0.5)
-        self.rknn = None
-        self._load_model()
-    
-    def _load_model(self):
-        """加载 RKNN 分割模型"""
-        try:
-            from rknnlite.api import RKNNLite
-            
-            model_path = self.config.get('model_path', '/home/admin/dsh/testrk3588/yolov8n-seg.rknn')
-            self.rknn = RKNNLite()
-            
-            ret = self.rknn.load_rknn(model_path)
-            if ret != 0:
-                print(f"[错误] 加载 RKNN 分割模型失败: {model_path}")
-                self.rknn = None
-                return
-            
-            # 初始化运行时,使用所有NPU核心
-            ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)
-            if ret != 0:
-                print("[错误] 初始化 RKNN 运行时失败")
-                self.rknn = None
-                return
-            
-            print(f"成功加载 RKNN 人体分割模型: {model_path}")
-        except ImportError:
-            print("未安装 rknnlite,无法使用 RKNN 分割模型")
-            self.rknn = None
-        except Exception as e:
-            print(f"加载分割模型失败: {e}")
-            self.rknn = None
-    
-    def _letterbox(self, image: np.ndarray) -> tuple:
-        """Letterbox 预处理,保持宽高比"""
-        h0, w0 = image.shape[:2]
-        ih, iw = self.input_size
-        scale = min(iw / w0, ih / h0)
-        new_w, new_h = int(w0 * scale), int(h0 * scale)
-        pad_w = (iw - new_w) // 2
-        pad_h = (ih - new_h) // 2
-        resized = cv2.resize(image, (new_w, new_h))
-        canvas = np.full((ih, iw, 3), 114, dtype=np.uint8)
-        canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = resized
-        return canvas, scale, pad_w, pad_h, h0, w0
-    
-    def _postprocess_segmentation(self, outputs, scale, pad_w, pad_h, w0, h0):
-        """
-        处理 YOLOv8 分割模型输出
-        YOLOv8-seg 输出格式: [检测输出, 分割输出]
-        - 检测输出: (1, 116, 8400) - 包含边界框、类别、掩码系数
-        - 分割输出: (1, 32, 160, 160) - 原型掩码
-        """
-        if not outputs or len(outputs) < 2:
-            return None
-        
-        # 解析输出
-        det_output = outputs[0]  # (1, 116, 8400) - 检测输出
-        seg_output = outputs[1]  # (1, 32, 160, 160) - 分割原型
-        
-        # 处理检测输出
-        if len(det_output.shape) == 3:
-            det_output = det_output[0]  # (116, 8400)
-        
-        # YOLOv8-seg: 前 84 维是检测 (4 box + 80 classes),后 32 维是掩码系数
-        num_anchors = det_output.shape[1]
-        
-        best_idx = -1
-        best_conf = 0
-        
-        # 寻找最佳人体检测 (class 0 = person)
-        for i in range(num_anchors):
-            # 类别概率 (索引 4-84 是80个类别)
-            class_probs = det_output[4:84, i]
-            person_conf = float(class_probs[0])  # class 0 = person
-            
-            if person_conf > self.conf_threshold and person_conf > best_conf:
-                best_conf = person_conf
-                best_idx = i
-        
-        if best_idx < 0:
-            return None
-        
-        # 获取掩码系数 (后32维)
-        mask_coeffs = det_output[84:116, best_idx]  # (32,)
-        
-        # 处理分割原型 (1, 32, 160, 160) -> (32, 160, 160)
-        if len(seg_output.shape) == 4:
-            seg_output = seg_output[0]
-        
-        # 计算最终掩码: mask = coeffs @ prototypes
-        # seg_output: (32, 160, 160), mask_coeffs: (32,)
-        mask = np.zeros((160, 160), dtype=np.float32)
-        for i in range(32):
-            mask += mask_coeffs[i] * seg_output[i]
-        
-        # Sigmoid 激活
-        mask = 1 / (1 + np.exp(-mask))
-        
-        # 移除 padding 并缩放到原始尺寸
-        mask = (mask > 0.5).astype(np.uint8) * 255
-        
-        # 裁剪掉 letterbox 添加的 padding
-        mask_h, mask_w = mask.shape
-        pad_h_mask = int(pad_h * mask_h / self.input_size[0])  # 160/640 = 0.25
-        pad_w_mask = int(pad_w * mask_w / self.input_size[1])
-        new_h_mask = int((mask_h - 2 * pad_h_mask))
-        new_w_mask = int((mask_w - 2 * pad_w_mask))
-        
-        if new_h_mask > 0 and new_w_mask > 0:
-            mask = mask[pad_h_mask:pad_h_mask+new_h_mask, pad_w_mask:pad_w_mask+new_w_mask]
-        
-        # 缩放到原始 ROI 尺寸
-        mask = cv2.resize(mask, (w0, h0))
-        
-        return mask
-    
-    def segment_person(self, frame: np.ndarray, 
-                       person_bbox: Tuple[int, int, int, int]) -> Optional[np.ndarray]:
-        """
-        分割人体
-        Args:
-            frame: 输入图像
-            person_bbox: 人体边界框 (x, y, w, h)
-        Returns:
-            人体分割掩码
-        """
-        if self.rknn is None:
-            return None
-        
-        x, y, w, h = person_bbox
-        
-        # 裁剪人体区域
-        person_roi = frame[y:y+h, x:x+w]
-        if person_roi.size == 0:
-            return None
-        
-        try:
-            # 预处理
-            canvas, scale, pad_w, pad_h, h0, w0 = self._letterbox(person_roi)
-            
-            # RKNN 输入: NHWC (1, H, W, C), RGB, float32 normalized 0-1
-            img = canvas[..., ::-1].astype(np.float32) / 255.0
-            blob = img[None, ...]  # (1, 640, 640, 3)
-            
-            # 推理
-            outputs = self.rknn.inference(inputs=[blob])
-            
-            # 后处理
-            mask = self._postprocess_segmentation(outputs, scale, pad_w, pad_h, w0, h0)
-            return mask
-            
-        except Exception as e:
-            print(f"分割错误: {e}")
-        
-        return None
-    
-    def release(self):
-        """释放 RKNN 资源"""
-        if self.rknn is not None:
-            self.rknn.release()
-            self.rknn = None
-    
-    def extract_person_region(self, frame: np.ndarray,
-                               person_bbox: Tuple[int, int, int, int],
-                               padding: float = 0.1) -> Tuple[np.ndarray, Tuple[int, int]]:
-        """
-        提取人体区域
-        Args:
-            frame: 输入图像
-            person_bbox: 人体边界框
-            padding: 边界填充比例
-        Returns:
-            (人体区域图像, 原始位置偏移)
-        """
-        x, y, w, h = person_bbox
-        
-        # 添加填充
-        pad_w = int(w * padding)
-        pad_h = int(h * padding)
-        
-        x1 = max(0, x - pad_w)
-        y1 = max(0, y - pad_h)
-        x2 = min(frame.shape[1], x + w + pad_w)
-        y2 = min(frame.shape[0], y + h + pad_h)
-        
-        person_region = frame[y1:y2, x1:x2]
-        offset = (x1, y1)
-        
-        return person_region, offset
-
-
-class OCRRecognizer:
-    """
-    OCR识别器
-    使用llama-server API接口进行OCR识别
-    """
-    
-    def __init__(self, config: Dict = None):
-        """
-        初始化OCR
-        Args:
-            config: API配置
-        """
-        self.config = config or OCR_CONFIG
-        self.api_host = self.config.get('api_host', 'localhost')
-        self.api_port = self.config.get('api_port', 8111)
-        self.model = self.config.get('model', 'PaddleOCR-VL-1.5-GGUF.gguf')
-        self.prompt = self.config.get('prompt', '请识别图片中的数字编号,只返回数字,不要其他内容')
-        self.temperature = self.config.get('temperature', 0.3)
-        self.timeout = self.config.get('timeout', 30)
-        
-        # 检查API是否可用
-        self._check_api()
-    
-    def _check_api(self):
-        """检查API是否可用"""
-        try:
-            import http.client
-            # localhost通常使用HTTP而非HTTPS
-            use_https = self.api_host not in ['localhost', '127.0.0.1']
-            conn_class = http.client.HTTPSConnection if use_https else http.client.HTTPConnection
-            conn = conn_class(self.api_host, self.api_port, timeout=5)
-            conn.request("GET", "/")
-            res = conn.getresponse()
-            conn.close()
-            print(f"llama-server API已连接: {self.api_host}:{self.api_port}")
-        except Exception as e:
-            print(f"连接llama-server失败: {e}")
-            print(f"请确保llama-server运行在 {self.api_host}:{self.api_port}")
-    
-    def _image_to_base64(self, image: np.ndarray) -> str:
-        """
-        将图像转换为base64编码
-        Args:
-            image: 输入图像
-        Returns:
-            base64编码字符串
-        """
-        import base64
-        _, buffer = cv2.imencode('.jpg', image)
-        base64_str = base64.b64encode(buffer).decode('utf-8')
-        return f"data:image/jpeg;base64,{base64_str}"
-    
-    def recognize(self, image: np.ndarray, 
-                  prompt: str = None,
-                  detect_only_numbers: bool = True,
-                  max_retries: int = 3) -> List[OCRResult]:
-        """
-        使用llama-server API识别图像中的文字
-        Args:
-            image: 输入图像
-            prompt: 自定义提示词
-            detect_only_numbers: 是否只检测数字编号
-            max_retries: 最大重试次数
-        Returns:
-            识别结果列表
-        """
-        if image is None:
-            return []
-        
-        import http.client
-        import json
-        import re
-        
-        results = []
-        last_error = None
-        
-        for attempt in range(max_retries):
-            try:
-                # 准备图像数据
-                image_base64 = self._image_to_base64(image)
-                
-                # 构建请求
-                use_prompt = prompt or self.prompt
-                
-                payload = {
-                    "model": self.model,
-                    "messages": [
-                        {
-                            "role": "user",
-                            "content": [
-                                {
-                                    "type": "text",
-                                    "text": use_prompt
-                                },
-                                {
-                                    "type": "image_url",
-                                    "image_url": {
-                                        "url": image_base64
-                                    }
-                                }
-                            ]
-                        }
-                    ],
-                    "temperature": self.temperature,
-                    "stream": False
-                }
-                
-                headers = {
-                    'Content-Type': 'application/json',
-                    'Accept': 'application/json',
-                }
-                
-                # 发送请求 - localhost使用HTTP
-                use_https = self.api_host not in ['localhost', '127.0.0.1']
-                conn_class = http.client.HTTPSConnection if use_https else http.client.HTTPConnection
-                conn = conn_class(
-                    self.api_host, 
-                    self.api_port,
-                    timeout=self.timeout
-                )
-                
-                conn.request("POST", "/v1/chat/completions", 
-                            json.dumps(payload), headers)
-                
-                res = conn.getresponse()
-                data = res.read()
-                conn.close()
-                
-                # 解析响应
-                response = json.loads(data.decode('utf-8'))
-                
-                if 'choices' in response and len(response['choices']) > 0:
-                    content = response['choices'][0]['message']['content']
-                    
-                    # 从响应中提取数字/编号
-                    text = content.strip()
-                    
-                    # 如果只检测数字,提取数字部分
-                    if detect_only_numbers:
-                        # 匹配数字、字母数字组合
-                        numbers = re.findall(r'[A-Za-z]*\d+[A-Za-z0-9]*', text)
-                        if numbers:
-                            text = numbers[0]
-                    
-                    # 创建结果
-                    if text:
-                        results.append(OCRResult(
-                            text=text,
-                            confidence=1.0,  # API不返回置信度,设为1.0
-                            bbox=(0, 0, image.shape[1], image.shape[0])
-                        ))
-                        return results  # 成功则直接返回
-                        
-            except Exception as e:
-                last_error = e
-                print(f"OCR API识别错误 (尝试 {attempt + 1}/{max_retries}): {e}")
-                if attempt < max_retries - 1:
-                    import time
-                    time.sleep(0.5 * (attempt + 1))  # 指数退避
-        
-        # 所有重试都失败
-        if last_error:
-            print(f"OCR API识别最终失败: {last_error}")
-        
-        return results
-    
-    def recognize_number(self, image: np.ndarray) -> Optional[str]:
-        """
-        识别图像中的编号
-        Args:
-            image: 输入图像
-        Returns:
-            编号文本
-        """
-        results = self.recognize(image, detect_only_numbers=True)
-        if results:
-            return results[0].text
-        return None
-
-
-class OCRRecognizerLocal:
-    """
-    本地OCR识别器 (备用)
-    使用PaddleOCR或EasyOCR进行识别
-    """
-    
-    def __init__(self, use_gpu: bool = True, languages: List[str] = None):
-        """
-        初始化OCR
-        Args:
-            use_gpu: 是否使用GPU
-            languages: 支持的语言列表
-        """
-        self.use_gpu = use_gpu
-        self.languages = languages or ['ch', 'en']
-        self.ocr = None
-        self._load_ocr()
-    
-    def _load_ocr(self):
-        """加载OCR引擎"""
-        try:
-            from paddleocr import PaddleOCR
-            self.ocr = PaddleOCR(
-                use_angle_cls=True,
-                lang='ch' if 'ch' in self.languages else 'en',
-                use_gpu=self.use_gpu,
-                show_log=False
-            )
-            print("成功加载PaddleOCR")
-        except ImportError:
-            print("未安装PaddleOCR")
-            self.ocr = None
-        except Exception as e:
-            print(f"加载OCR失败: {e}")
-    
-    def recognize(self, image: np.ndarray, 
-                  detect_only_numbers: bool = True) -> List[OCRResult]:
-        """识别图像中的文字"""
-        if self.ocr is None or image is None:
-            return []
-        
-        results = []
-        
-        try:
-            ocr_results = self.ocr.ocr(image, cls=True)
-            if ocr_results and len(ocr_results) > 0:
-                for line in ocr_results[0]:
-                    if line is None:
-                        continue
-                    bbox_points, (text, conf) = line
-                    
-                    if conf < 0.5:
-                        continue
-                    
-                    x1 = int(min(p[0] for p in bbox_points))
-                    y1 = int(min(p[1] for p in bbox_points))
-                    x2 = int(max(p[0] for p in bbox_points))
-                    y2 = int(max(p[1] for p in bbox_points))
-                    
-                    results.append(OCRResult(
-                        text=text,
-                        confidence=conf,
-                        bbox=(x1, y1, x2-x1, y2-y1)
-                    ))
-        except Exception as e:
-            print(f"OCR识别错误: {e}")
-        
-        return results
-
-
-class NumberDetector:
-    """
-    编号检测器
-    在人体图像中检测编号
-    使用llama-server API进行OCR识别
-    """
-    
-    def __init__(self, use_api: bool = True, ocr_config: Dict = None):
-        """
-        初始化检测器
-        Args:
-            use_api: 是否使用API进行OCR
-            ocr_config: OCR配置
-        """
-        self.segmenter = PersonSegmenter(use_gpu=False)
-        
-        # 使用API OCR或本地OCR
-        if use_api:
-            self.ocr = OCRRecognizer(ocr_config)
-            print("使用llama-server API进行OCR识别")
-        else:
-            self.ocr = OCRRecognizerLocal()
-            print("使用本地OCR进行识别")
-        
-        # 编号可能出现的区域 (相对于人体边界框的比例)
-        self.search_regions = [
-            {'name': '胸部', 'y_ratio': (0.2, 0.5), 'x_ratio': (0.2, 0.8)},
-            {'name': '腹部', 'y_ratio': (0.5, 0.8), 'x_ratio': (0.2, 0.8)},
-            {'name': '背部上方', 'y_ratio': (0.1, 0.4), 'x_ratio': (0.1, 0.9)},
-        ]
-    
-    def detect_number(self, frame: np.ndarray,
-                      person_bbox: Tuple[int, int, int, int]) -> PersonInfo:
-        """
-        检测人体编号
-        Args:
-            frame: 输入图像
-            person_bbox: 人体边界框
-        Returns:
-            人员信息
-        """
-        x, y, w, h = person_bbox
-        
-        # 提取人体区域
-        person_region, offset = self.segmenter.extract_person_region(
-            frame, person_bbox
-        )
-        
-        person_info = PersonInfo(
-            person_id=-1,
-            person_bbox=person_bbox,
-            ocr_results=[]
-        )
-        
-        # 在不同区域搜索编号
-        best_result = None
-        best_confidence = 0
-        
-        for region in self.search_regions:
-            # 计算搜索区域
-            y1 = int(h * region['y_ratio'][0])
-            y2 = int(h * region['y_ratio'][1])
-            x1 = int(w * region['x_ratio'][0])
-            x2 = int(w * region['x_ratio'][1])
-            
-            # 确保在图像范围内
-            y1 = max(0, min(y1, person_region.shape[0]))
-            y2 = max(0, min(y2, person_region.shape[0]))
-            x1 = max(0, min(x1, person_region.shape[1]))
-            x2 = max(0, min(x2, person_region.shape[1]))
-            
-            if y2 <= y1 or x2 <= x1:
-                continue
-            
-            # 裁剪区域
-            roi = person_region[y1:y2, x1:x2]
-            
-            # OCR识别
-            ocr_results = self.ocr.recognize(roi)
-            
-            for result in ocr_results:
-                # 调整坐标到原始图像坐标系
-                adjusted_bbox = (
-                    result.bbox[0] + x1 + offset[0],
-                    result.bbox[1] + y1 + offset[1],
-                    result.bbox[2],
-                    result.bbox[3]
-                )
-                
-                result.bbox = adjusted_bbox
-                result.location = region['name']
-                
-                person_info.ocr_results.append(result)
-                
-                # 更新最佳结果
-                if result.confidence > best_confidence:
-                    best_confidence = result.confidence
-                    best_result = result
-        
-        # 设置最佳结果作为编号
-        if best_result:
-            person_info.number_text = best_result.text
-            person_info.number_confidence = best_result.confidence
-            person_info.number_location = best_result.location
-        
-        return person_info
-    
-    def detect_numbers_batch(self, frame: np.ndarray,
-                             person_bboxes: List[Tuple[int, int, int, int]]) -> List[PersonInfo]:
-        """
-        批量检测人体编号
-        Args:
-            frame: 输入图像
-            person_bboxes: 人体边界框列表
-        Returns:
-            人员信息列表
-        """
-        results = []
-        for i, bbox in enumerate(person_bboxes):
-            person_info = self.detect_number(frame, bbox)
-            person_info.person_id = i
-            results.append(person_info)
-        return results
-    
-    def release(self):
-        """释放资源"""
-        if hasattr(self.segmenter, 'release'):
-            self.segmenter.release()
-
-
-def preprocess_for_ocr(image: np.ndarray) -> np.ndarray:
-    """
-    OCR预处理
-    Args:
-        image: 输入图像
-    Returns:
-        预处理后的图像
-    """
-    if image is None:
-        return None
-    
-    # 转换为灰度图
-    if len(image.shape) == 3:
-        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-    else:
-        gray = image
-    
-    # 自适应直方图均衡化
-    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-    enhanced = clahe.apply(gray)
-    
-    # 降噪
-    denoised = cv2.fastNlMeansDenoising(enhanced, None, 10)
-    
-    # 二值化
-    _, binary = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    
-    return binary

+ 13 - 9
dual_camera_system/oss_uploader.py

@@ -396,29 +396,33 @@ class OSSUploader:
 
 # 全局单例
 _oss_uploader_instance: Optional[OSSUploader] = None
+_oss_uploader_lock = threading.Lock()
 
 
 def get_oss_uploader(config: Dict[str, Any] = None) -> OSSUploader:
     """
-    获取 OSS 上传器实例(单例模式)
-    
+    获取 OSS 上传器实例(单例模式,线程安全
+
     Args:
         config: OSS 配置
-        
+
     Returns:
         OSSUploader 实例
     """
     global _oss_uploader_instance
-    
+
     if _oss_uploader_instance is None:
-        _oss_uploader_instance = OSSUploader(config)
-    
+        with _oss_uploader_lock:
+            if _oss_uploader_instance is None:
+                _oss_uploader_instance = OSSUploader(config)
+
     return _oss_uploader_instance
 
 
 def reset_oss_uploader():
     """重置 OSS 上传器实例"""
     global _oss_uploader_instance
-    if _oss_uploader_instance is not None:
-        _oss_uploader_instance.stop()
-    _oss_uploader_instance = None
+    with _oss_uploader_lock:
+        if _oss_uploader_instance is not None:
+            _oss_uploader_instance.stop()
+        _oss_uploader_instance = None

+ 77 - 48
dual_camera_system/paired_image_saver.py

@@ -50,7 +50,6 @@ class DetectionBatch:
     completed: bool = False
     device_id: str = ''  # 设备编号
     project_id: str = ''  # 项目编号
-    capture_paths: List[str] = field(default_factory=list)  # captures 快照路径
 
 
 class PairedImageSaver:
@@ -215,10 +214,14 @@ class PairedImageSaver:
         persons = task['persons']
 
         try:
-            # 保存原图
-            original_filename = f"00_panorama_original_n{len(persons)}.png"
+            # 保存原图 - JPEG 压缩 (质量 85%)
+            original_filename = f"00_panorama_original_n{len(persons)}.jpg"
             original_filepath = batch_dir / original_filename
-            cv2.imwrite(str(original_filepath), frame)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+            result, encoded = cv2.imencode('.jpg', frame, encode_param)
+            if result:
+                with open(str(original_filepath), 'wb') as f:
+                    f.write(encoded)
 
             # 保存标记图
             marked_frame = frame.copy()
@@ -232,9 +235,13 @@ class PairedImageSaver:
                 cv2.rectangle(marked_frame, (x1, y1 - label_h - 8), (x1 + label_w, y1), (0, 255, 0), -1)
                 cv2.putText(marked_frame, label, (x1, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
 
-            marked_filename = f"00_panorama_marked_n{len(persons)}.png"
+            marked_filename = f"00_panorama_marked_n{len(persons)}.jpg"
             marked_filepath = batch_dir / marked_filename
-            cv2.imwrite(str(marked_filepath), marked_frame)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+            result, encoded = cv2.imencode('.jpg', marked_frame, encode_param)
+            if result:
+                with open(str(marked_filepath), 'wb') as f:
+                    f.write(encoded)
 
             logger.info(f"[配对保存] 全景图已保存: batch={batch_id}")
 
@@ -261,13 +268,17 @@ class PairedImageSaver:
         try:
             pan, tilt, zoom = ptz_position
 
-            # 保存原图(未标记)
-            original_filename = f"01_ptz_person{person_index}_p{int(pan)}_t{int(tilt)}_z{int(zoom)}_original.png"
+            # 保存原图(未标记)- JPEG 压缩
+            original_filename = f"01_ptz_person{person_index}_p{int(pan)}_t{int(tilt)}_z{int(zoom)}_original.jpg"
             original_filepath = batch_dir / original_filename
             if ptz_frame is not None:
-                cv2.imwrite(str(original_filepath), ptz_frame)
+                encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+                result, encoded = cv2.imencode('.jpg', ptz_frame, encode_param)
+                if result:
+                    with open(str(original_filepath), 'wb') as f:
+                        f.write(encoded)
 
-            # 保存标记图
+            # 保存标记图 - JPEG 压缩
             marked_frame = ptz_frame_marked if ptz_frame_marked is not None else None
             if marked_frame is None and ptz_frame is not None:
                 marked_frame = ptz_frame.copy()
@@ -282,10 +293,14 @@ class PairedImageSaver:
                     bbox_text = f"PTZ_BBox: ({x1},{y1},{x2},{y2})"
                     cv2.putText(marked_frame, bbox_text, (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
 
-            marked_filename = f"01_ptz_person{person_index}_p{int(pan)}_t{int(tilt)}_z{int(zoom)}_marked.png"
+            marked_filename = f"01_ptz_person{person_index}_p{int(pan)}_t{int(tilt)}_z{int(zoom)}_marked.jpg"
             marked_filepath = batch_dir / marked_filename
             if marked_frame is not None:
-                cv2.imwrite(str(marked_filepath), marked_frame)
+                encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+                result, encoded = cv2.imencode('.jpg', marked_frame, encode_param)
+                if result:
+                    with open(str(marked_filepath), 'wb') as f:
+                        f.write(encoded)
 
             logger.info(f"[配对保存] 球机图已保存: batch={batch_id}, person={person_index}")
 
@@ -341,12 +356,32 @@ class PairedImageSaver:
             txt_path = batch_dir / "batch_info.txt"
             self._save_batch_info_txt(batch, txt_path)
 
+            # 校验 OSS URL 不能为 null,上报前确保所有图片已上传
+            upload_status = self._get_upload_status(batch.batch_id)
+            missing_urls = []
+
+            # 检查全景图
+            if not upload_status.get('panorama_url') and not batch.panorama_oss_url:
+                missing_urls.append('panorama')
+            if not upload_status.get('panorama_original_url') and not batch.panorama_oss_url_original:
+                missing_urls.append('panorama_original')
+
+            # 检查球机图
+            for idx, person in enumerate(batch.persons):
+                if person.ptz_image_saved:
+                    ptz_marked = upload_status.get('ptz_marked', {}).get(idx) or person.ptz_oss_url
+                    ptz_original = upload_status.get('ptz_original', {}).get(idx) or person.ptz_oss_url_original
+                    if not ptz_marked:
+                        missing_urls.append(f'ptz_{idx}_marked')
+                    if not ptz_original:
+                        missing_urls.append(f'ptz_{idx}_original')
+
+            if missing_urls:
+                logger.warning(f"[配对保存] OSS 上传未完成,跳过上报: batch_id={batch.batch_id}, 缺失: {missing_urls}")
+
             # 上报第三方平台
             self._report_to_third_party(batch_info)
 
-            # 删除 captures 目录下已上传的快照文件
-            self._cleanup_captures(batch)
-
             # 标记上传完成
             if batch.batch_id in self._upload_status:
                 self._upload_status[batch.batch_id]['completed'] = True
@@ -492,10 +527,14 @@ class PairedImageSaver:
             (原图路径, 标记图路径) 或 (None, None)
         """
         try:
-            # 保存原图(未标记)
-            original_filename = f"00_panorama_original_n{len(persons)}.png"
+            # 保存原图(未标记)- JPEG 压缩
+            original_filename = f"00_panorama_original_n{len(persons)}.jpg"
             original_filepath = batch_dir / original_filename
-            cv2.imwrite(str(original_filepath), frame)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+            result, encoded = cv2.imencode('.jpg', frame, encode_param)
+            if result:
+                with open(str(original_filepath), 'wb') as f:
+                    f.write(encoded)
 
             # 复制图像避免修改原图
             marked_frame = frame.copy()
@@ -533,9 +572,13 @@ class PairedImageSaver:
                 )
 
             # 保存标记图
-            marked_filename = f"00_panorama_marked_n{len(persons)}.png"
+            marked_filename = f"00_panorama_marked_n{len(persons)}.jpg"
             marked_filepath = batch_dir / marked_filename
-            cv2.imwrite(str(marked_filepath), marked_frame)
+            encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+            result, encoded = cv2.imencode('.jpg', marked_frame, encode_param)
+            if result:
+                with open(str(marked_filepath), 'wb') as f:
+                    f.write(encoded)
 
             logger.info(f"[配对保存] 全景图已保存: 原图={original_filepath}, 标记图={marked_filepath}, 人员数量 {len(persons)}")
             return str(original_filepath), str(marked_filepath)
@@ -575,12 +618,16 @@ class PairedImageSaver:
             batch_dir = self.base_dir / f"batch_{batch_id}"
 
             try:
-                # 保存原图(未标记)
-                original_filename = f"01_ptz_person{person_index}_p{int(ptz_position[0])}_t{int(ptz_position[1])}_z{int(ptz_position[2])}_original.png"
+                # 保存原图(未标记)- JPEG 压缩
+                original_filename = f"01_ptz_person{person_index}_p{int(ptz_position[0])}_t{int(ptz_position[1])}_z{int(ptz_position[2])}_original.jpg"
                 original_filepath = batch_dir / original_filename
 
                 if ptz_frame is not None:
-                    cv2.imwrite(str(original_filepath), ptz_frame)
+                    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+                    result, encoded = cv2.imencode('.jpg', ptz_frame, encode_param)
+                    if result:
+                        with open(str(original_filepath), 'wb') as f:
+                            f.write(encoded)
 
                 # 优先使用外部传入的标记帧,否则内部生成
                 if ptz_frame_marked is not None:
@@ -612,12 +659,16 @@ class PairedImageSaver:
                         cv2.putText(marked_frame, bbox_text, (10, 90),
                                     cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
 
-                # 保存标记图
-                marked_filename = f"01_ptz_person{person_index}_p{int(ptz_position[0])}_t{int(ptz_position[1])}_z{int(ptz_position[2])}_marked.png"
+                # 保存标记图 - JPEG 压缩
+                marked_filename = f"01_ptz_person{person_index}_p{int(ptz_position[0])}_t{int(ptz_position[1])}_z{int(ptz_position[2])}_marked.jpg"
                 marked_filepath = batch_dir / marked_filename
 
                 if marked_frame is not None:
-                    cv2.imwrite(str(marked_filepath), marked_frame)
+                    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 85]
+                    result, encoded = cv2.imencode('.jpg', marked_frame, encode_param)
+                    if result:
+                        with open(str(marked_filepath), 'wb') as f:
+                            f.write(encoded)
 
                 logger.info(f"[配对保存] 球机图已保存: 原图={original_filepath}, 标记图={marked_filepath}")
 
@@ -826,28 +877,6 @@ class PairedImageSaver:
         
         # 清理旧批次
         self._cleanup_old_batches()
-    
-    def add_capture_path(self, batch_id: str, capture_path: str):
-        """记录 captures 快照路径,批次完成时删除"""
-        with self._batch_lock:
-            if self._current_batch and self._current_batch.batch_id == batch_id:
-                self._current_batch.capture_paths.append(capture_path)
-
-    def _cleanup_captures(self, batch: DetectionBatch):
-        """删除 captures 目录下已上传的快照文件"""
-        if not batch.capture_paths:
-            return
-        deleted = 0
-        for path in batch.capture_paths:
-            try:
-                p = Path(path)
-                if p.exists():
-                    p.unlink()
-                    deleted += 1
-            except Exception as e:
-                logger.warning(f"[配对保存] 删除快照失败: {path}, {e}")
-        if deleted > 0:
-            logger.info(f"[配对保存] 已清理 {deleted} 个 captures 快照")
 
     def _report_to_third_party(self, batch_info: Dict):
         """上报批次信息到第三方平台"""

+ 36 - 25
dual_camera_system/ptz_camera.py

@@ -53,6 +53,7 @@ class PTZCamera:
         
         # 视频流 (用于校准时抓拍球机画面)
         self.rtsp_cap = None
+        self._rtsp_lock = threading.Lock()
         self.current_frame = None
         self.frame_lock = threading.Lock()
         self.stream_thread = None
@@ -101,14 +102,14 @@ class PTZCamera:
         
         try:
             # 先尝试FFmpeg后端
-            self.rtsp_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
-            if not self.rtsp_cap.isOpened():
+            new_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
+            if not new_cap.isOpened():
                 # FFmpeg失败,尝试GStreamer后端(ARM64上更稳定)
                 print(f"[PTZCamera] FFmpeg后端无法打开RTSP流,尝试GStreamer后端...")
                 try:
                     gst_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_GSTREAMER)
                     if gst_cap.isOpened():
-                        self.rtsp_cap = gst_cap
+                        new_cap = gst_cap
                         print(f"[PTZCamera] 使用GStreamer后端打开RTSP流成功")
                     else:
                         print(f"[PTZCamera] 无法打开RTSP流: {rtsp_url}")
@@ -116,8 +117,10 @@ class PTZCamera:
                 except Exception as ge:
                     print(f"[PTZCamera] GStreamer后端也不可用: {ge}")
                     return False
-            
-            self.rtsp_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+
+            new_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+            with self._rtsp_lock:
+                self.rtsp_cap = new_cap
             
             self.running_stream = True
             self.stream_thread = threading.Thread(target=self._stream_worker, daemon=True)
@@ -142,11 +145,13 @@ class PTZCamera:
         
         while self.running_stream:
             try:
-                if self.rtsp_cap is None or not safe_is_opened(self.rtsp_cap, self._camera_id):
+                with self._rtsp_lock:
+                    cap = self.rtsp_cap
+                if cap is None or not safe_is_opened(cap, self._camera_id):
                     time.sleep(0.1)
                     continue
-                
-                ret, frame = safe_read(self.rtsp_cap, self._camera_id)
+
+                ret, frame = safe_read(cap, self._camera_id)
                 if not ret or frame is None:
                     error_count += 1
                     if error_count > max_consecutive_errors:
@@ -175,27 +180,32 @@ class PTZCamera:
     def _reconnect_rtsp(self):
         rtsp_url = self.config.get('rtsp_url') or \
             f"rtsp://{self.config['username']}:{self.config['password']}@{self.config['ip']}:{self.config.get('rtsp_port', 554)}/cam/realmonitor?channel=1&subtype=1"
-        
-        if self.rtsp_cap is not None:
-            try:
-                self.rtsp_cap.release()
-            except Exception:
-                pass
-            self.rtsp_cap = None
-        
+
+        with self._rtsp_lock:
+            if self.rtsp_cap is not None:
+                try:
+                    self.rtsp_cap.release()
+                except Exception:
+                    pass
+                self.rtsp_cap = None
+
         time.sleep(1)
-        
+
         try:
-            self.rtsp_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
-            if safe_is_opened(self.rtsp_cap, self._camera_id):
-                self.rtsp_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+            new_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
+            if safe_is_opened(new_cap, self._camera_id):
+                new_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+                with self._rtsp_lock:
+                    self.rtsp_cap = new_cap
                 print("[PTZCamera] RTSP流重连成功")
             else:
                 print("[PTZCamera] RTSP流重连失败")
-                self.rtsp_cap = None
+                try:
+                    new_cap.release()
+                except Exception:
+                    pass
         except Exception as e:
             print(f"[PTZCamera] RTSP流重连异常: {e}")
-            self.rtsp_cap = None
     
     def get_frame(self) -> Optional[np.ndarray]:
         """获取球机当前帧"""
@@ -208,9 +218,10 @@ class PTZCamera:
         if self.stream_thread:
             self.stream_thread.join(timeout=2)
             self.stream_thread = None
-        if self.rtsp_cap:
-            self.rtsp_cap.release()
-            self.rtsp_cap = None
+        with self._rtsp_lock:
+            if self.rtsp_cap:
+                self.rtsp_cap.release()
+                self.rtsp_cap = None
     
     def ptz_control(self, command: int, param1: int = 0, param2: int = 0, 
                     param3: int = 0, stop: bool = False) -> bool:

+ 47 - 33
dual_camera_system/ptz_person_tracker.py

@@ -216,61 +216,75 @@ class PTZPersonDetector:
         return persons
     
     def _detect_rknn(self, frame: np.ndarray) -> List[DetectedPerson]:
-        """RKNN检测"""
+        """RKNN检测(使用 letterbox 预处理 + BGR→RGB 转换)"""
         persons = []
-        
+
         try:
-            # 预处理
-            img = cv2.resize(frame, (640, 640))
-            img = img.astype(np.float32) / 255.0
-            img = np.expand_dims(img, 0)
-            
+            h, w = frame.shape[:2]
+
+            # Letterbox 预处理:保持宽高比,灰边填充
+            input_size = 640
+            scale = min(input_size / w, input_size / h)
+            new_w = int(w * scale)
+            new_h = int(h * scale)
+            pad_w = (input_size - new_w) / 2
+            pad_h = (input_size - new_h) / 2
+
+            img = cv2.resize(frame, (new_w, new_h))
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+
+            # 填充灰边 (114,114,114)
+            letterboxed = np.full((input_size, input_size, 3), 114, dtype=np.uint8)
+            letterboxed[int(pad_h):int(pad_h + new_h), int(pad_w):int(pad_w + new_w)] = img
+
+            img_input = letterboxed.astype(np.float32) / 255.0
+            img_input = np.expand_dims(img_input, 0)
+
             # 推理
-            outputs = self.model.inference(inputs=[img])
-            
-            # 后处理 (YOLO格式输出)
-            # outputs shape: [1, 84, 8400] 或类似
+            outputs = self.model.inference(inputs=[img_input])
+
             if outputs is None or len(outputs) == 0:
                 return []
-            
+
             output = outputs[0]
-            
-            # 解析检测结果
-            h, w = frame.shape[:2]
-            
+
             for i in range(output.shape[-1]):
                 data = output[0, :, i]
-                
-                # 获取类别和置信度
+
                 class_scores = data[4:]
                 class_id = np.argmax(class_scores)
                 confidence = class_scores[class_id]
-                
+
                 if confidence < self.confidence_threshold:
                     continue
-                
+
                 if class_id != self.person_class_id:
                     continue
-                
-                # 获取边界框
+
                 cx, cy, bw, bh = data[:4]
-                
-                # 转换为原图坐标
-                x1 = int((cx - bw/2) * w / 640)
-                y1 = int((cy - bh/2) * h / 640)
-                x2 = int((cx + bw/2) * w / 640)
-                y2 = int((cy + bh/2) * h / 640)
-                
+
+                # letterbox 坐标 → 原图坐标
+                x1 = int((cx - bw / 2 - pad_w) / scale)
+                y1 = int((cy - bh / 2 - pad_h) / scale)
+                x2 = int((cx + bw / 2 - pad_w) / scale)
+                y2 = int((cy + bh / 2 - pad_h) / scale)
+
+                # 裁剪到画面范围
+                x1 = max(0, min(x1, w))
+                y1 = max(0, min(y1, h))
+                x2 = max(0, min(x2, w))
+                y2 = max(0, min(y2, h))
+
                 persons.append(DetectedPerson(
                     bbox=(x1, y1, x2, y2),
-                    center=((x1+x2)/2, (y1+y2)/2),
-                    width=x2-x1,
-                    height=y2-y1,
+                    center=((x1 + x2) / 2, (y1 + y2) / 2),
+                    width=x2 - x1,
+                    height=y2 - y1,
                     confidence=float(confidence)
                 ))
         except Exception as e:
             print(f"[PTZPersonDetector] RKNN检测错误: {e}")
-        
+
         return persons
     
     def detect_largest_person(self, frame: np.ndarray) -> Optional[DetectedPerson]:

+ 12 - 73
dual_camera_system/safety_coordinator.py

@@ -1,6 +1,6 @@
 """
 安全联动控制器
-整合安全检测、事件推送和语音播报功能
+整合安全检测、事件推送功能
 """
 
 import time
@@ -14,11 +14,10 @@ import numpy as np
 import cv2
 
 from config import (
-    COORDINATOR_CONFIG, 
+    COORDINATOR_CONFIG,
     SAFETY_DETECTION_CONFIG,
-    EVENT_PUSHER_CONFIG, 
+    EVENT_PUSHER_CONFIG,
     EVENT_LISTENER_CONFIG,
-    VOICE_ANNOUNCER_CONFIG,
     SYSTEM_CONFIG
 )
 from safety_detector import (
@@ -26,7 +25,6 @@ from safety_detector import (
     SafetyViolationType, draw_safety_result
 )
 from event_pusher import EventPusher, EventListener, SafetyEvent, EventType
-from voice_announcer import VoiceAnnouncer, VoicePriority
 
 
 class CoordinatorState(Enum):
@@ -46,11 +44,10 @@ class AlertRecord:
     frame: Optional[np.ndarray]      # 图像
     timestamp: float                 # 时间戳
     pushed: bool = False             # 是否已推送
-    announced: bool = False          # 是否已播报
 
 
 class SafetyCoordinator:
-    """安全联动控制器:协调摄像头、安全检测、事件推送、语音播报、PTZ跟踪"""
+    """安全联动控制器:协调摄像头、安全检测、事件推送、PTZ跟踪"""
     
     def __init__(self, camera, config: Dict = None, ptz_camera=None, calibrator=None):
         self.camera = camera
@@ -60,7 +57,6 @@ class SafetyCoordinator:
         
         self.detector = None
         self.event_pusher = None
-        self.voice_announcer = None
         self.event_listener = None
         
         self.state = CoordinatorState.IDLE
@@ -80,7 +76,6 @@ class SafetyCoordinator:
             'persons_detected': 0,
             'violations_detected': 0,
             'events_pushed': 0,
-            'voice_announced': 0,
             'ptz_commands_sent': 0,
             'start_time': None
         }
@@ -97,8 +92,7 @@ class SafetyCoordinator:
         enable_detection = SYSTEM_CONFIG.get('enable_detection', True)
         enable_safety_detection = SYSTEM_CONFIG.get('enable_safety_detection', True)
         enable_event_push = SYSTEM_CONFIG.get('enable_event_push', True)
-        enable_voice_announce = SYSTEM_CONFIG.get('enable_voice_announce', True)
-        
+
         # 安全检测器
         if enable_detection and enable_safety_detection:
             try:
@@ -113,7 +107,7 @@ class SafetyCoordinator:
                 print(f"安全检测器初始化失败: {e}")
         else:
             print("安全检测功能已禁用")
-        
+
         # 事件推送器
         if enable_event_push:
             try:
@@ -123,41 +117,15 @@ class SafetyCoordinator:
                 print(f"事件推送器初始化失败: {e}")
         else:
             print("事件推送功能已禁用")
-        
-        # 语音播报器
-        if enable_voice_announce:
-            try:
-                self.voice_announcer = VoiceAnnouncer(
-                    tts_config=VOICE_ANNOUNCER_CONFIG.get('tts', {}),
-                    player_config=VOICE_ANNOUNCER_CONFIG.get('player', {})
-                )
-                print("语音播报器初始化成功")
-            except Exception as e:
-                print(f"语音播报器初始化失败: {e}")
-        else:
-            print("语音播报功能已禁用")
-        
+
         # 事件监听器
         if EVENT_LISTENER_CONFIG.get('enabled', True):
             try:
                 self.event_listener = EventListener(EVENT_LISTENER_CONFIG)
-                # 设置语音播放回调
-                self.event_listener.set_voice_callback(self._on_voice_command)
                 print("事件监听器初始化成功")
             except Exception as e:
                 print(f"事件监听器初始化失败: {e}")
     
-    def _on_voice_command(self, cmd: Dict):
-        """处理语音播放指令"""
-        if not self.voice_announcer:
-            return
-        
-        text = cmd.get('text', '')
-        priority = VoicePriority(cmd.get('priority', 2))
-        
-        if text:
-            self.voice_announcer.announce(text, priority=priority)
-    
     def start(self) -> bool:
         """启动控制器"""
         if self.running:
@@ -165,10 +133,7 @@ class SafetyCoordinator:
         
         if self.event_pusher:
             self.event_pusher.start()
-        
-        if self.voice_announcer:
-            self.voice_announcer.start()
-        
+
         if self.event_listener:
             self.event_listener.start()
         
@@ -195,10 +160,7 @@ class SafetyCoordinator:
         
         if self.event_pusher:
             self.event_pusher.stop()
-        
-        if self.voice_announcer:
-            self.voice_announcer.stop()
-        
+
         if self.event_listener:
             self.event_listener.stop()
         
@@ -255,8 +217,6 @@ class SafetyCoordinator:
                     
                     result = self._process_frame_with_logging(frame, detection_run_count, detection_violation_count, last_no_detect_log_time, sc_logger)
                     detection_violation_count = result
-                    
-                    self._cleanup_tracks()
                 
                 time.sleep(0.01)
                 
@@ -375,12 +335,7 @@ class SafetyCoordinator:
                 confidence=status.person_conf
             )
             self._update_stats('events_pushed')
-        
-        # 语音播报
-        if self.voice_announcer:
-            self.voice_announcer.announce_violation(description, urgent=True)
-            self._update_stats('voice_announced')
-        
+
         print(f"[告警] {description}")
     
     # PTZ跟踪已禁用 - _track_violator_ptz 和 _ptz_worker 方法已移除
@@ -412,16 +367,11 @@ class SafetyCoordinator:
                 print(f"检测人员: {self.stats['persons_detected']}次")
                 print(f"违规检测: {self.stats['violations_detected']}次")
                 print(f"事件推送: {self.stats['events_pushed']}次")
-                print(f"语音播报: {self.stats['voice_announced']}次")
-                
+
                 if self.event_pusher:
                     push_stats = self.event_pusher.get_stats()
                     print(f"推送详情: 成功{push_stats['pushed_events']}, 失败{push_stats['failed_events']}")
-                
-                if self.voice_announcer:
-                    voice_stats = self.voice_announcer.get_stats()
-                    print(f"播报详情: 成功{voice_stats['played_commands']}, 失败{voice_stats['failed_commands']}")
-                
+
                 print("===================\n")
     
     def get_stats(self) -> Dict:
@@ -433,17 +383,6 @@ class SafetyCoordinator:
         """获取告警记录"""
         return self.alert_records.copy()
     
-    def announce(self, text: str, priority: VoicePriority = VoicePriority.NORMAL):
-        """
-        手动播报语音
-        
-        Args:
-            text: 播报文本
-            priority: 优先级
-        """
-        if self.voice_announcer:
-            self.voice_announcer.announce(text, priority=priority)
-    
     def force_detect(self, frame: np.ndarray = None) -> Tuple[List[SafetyDetection], List[PersonSafetyStatus]]:
         """
         强制执行一次检测

+ 5 - 13
dual_camera_system/safety_detector.py

@@ -708,9 +708,8 @@ class LLMSafetyDetector:
         
         if use_llm:
             try:
-                from llm_service import SafetyAnalyzer, NumberRecognizer
+                from llm_service import SafetyAnalyzer
                 self.llm_analyzer = SafetyAnalyzer(llm_config)
-                self.number_recognizer = NumberRecognizer(llm_config)
                 print("大模型安全分析器初始化成功")
             except ImportError:
                 print("未找到 llm_service 模块,将使用规则判断")
@@ -803,22 +802,15 @@ class LLMSafetyDetector:
     def recognize_number(self, frame: np.ndarray,
                          person_bbox: Tuple[int, int, int, int]) -> Dict[str, Any]:
         """
-        识别人员编号
-        
+        识别人员编号(已禁用)
+
         Args:
             frame: 输入图像
             person_bbox: 人员边界框
-            
+
         Returns:
             编号识别结果
         """
-        if self.number_recognizer is None:
-            return {'number': None, 'success': False}
-        
-        # 裁剪人员区域
-        x1, y1, x2, y2 = person_bbox
-        person_image = frame[y1:y2, x1:x2]
-        
-        return self.number_recognizer.recognize_person_number(person_image)
+        return {'number': None, 'success': False}
     
     # 轨迹追踪已禁用 - detect_with_tracking 方法已移除

+ 0 - 822
dual_camera_system/safety_main.py

@@ -1,822 +0,0 @@
-"""
-施工现场安全行为智能识别系统 - 主程序
-
-系统功能:
-1. 实时视频监控
-2. 人员、安全帽、反光衣检测
-3. 安全违规识别(未戴安全帽、未穿反光衣)
-4. 事件推送至业务平台
-5. 接收平台指令,TTS 语音播报
-"""
-
-import os
-import sys
-import time
-import glob
-import argparse
-import logging
-import threading
-import signal
-from typing import Optional, List
-
-import cv2
-import numpy as np
-
-# 添加项目路径
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-from config import (
-    LOG_CONFIG, PANORAMA_CAMERA, PTZ_CAMERA, SDK_PATH,
-    SAFETY_DETECTION_CONFIG, EVENT_PUSHER_CONFIG,
-    VOICE_ANNOUNCER_CONFIG, SYSTEM_CONFIG,
-    LLM_CONFIG, LLM_SAFETY_CONFIG
-)
-from safety_detector import (
-    SafetyDetector, SafetyDetection, PersonSafetyStatus,
-    draw_safety_result, SafetyViolationType, LLMSafetyDetector
-)
-from llm_service import SafetyAnalyzer, NumberRecognizer
-from event_pusher import EventPusher, SafetyEvent, EventType
-from voice_announcer import VoiceAnnouncer, VoicePriority
-from safety_coordinator import SafetyCoordinator, SimpleCamera
-
-
-# 配置日志
-def _cleanup_old_logs(log_file: str, retention_days: int):
-    """清理超过保留天数的日志文件"""
-    if not log_file:
-        return
-
-    log_dir = os.path.dirname(log_file) or '.'
-    log_basename = os.path.basename(log_file)
-
-    patterns = [
-        log_basename,
-        f"{log_basename}.*",
-        f"{os.path.splitext(log_basename)[0]}.*",
-    ]
-
-    now = time.time()
-    cutoff = now - (retention_days * 86400)
-
-    for pattern in patterns:
-        full_pattern = os.path.join(log_dir, pattern)
-        for log_path in glob.glob(full_pattern):
-            try:
-                if os.path.isfile(log_path):
-                    mtime = os.path.getmtime(log_path)
-                    if mtime < cutoff:
-                        os.remove(log_path)
-                        print(f"[日志清理] 已删除过期日志: {log_path}")
-            except Exception:
-                pass
-
-
-def _log_cleanup_worker(retention_days: int, interval_hours: int = 6):
-    """日志清理后台线程"""
-    log_file = LOG_CONFIG.get('file')
-    if not log_file:
-        return
-
-    while True:
-        _cleanup_old_logs(log_file, retention_days)
-        time.sleep(interval_hours * 3600)
-
-
-def setup_logging():
-    """设置日志配置"""
-    log_level = getattr(logging, LOG_CONFIG.get('level', 'INFO'), logging.INFO)
-    log_format = LOG_CONFIG.get('format', '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-    log_file = LOG_CONFIG.get('file')
-    retention_days = LOG_CONFIG.get('retention_days', 7)
-
-    handlers = [logging.StreamHandler()]
-
-    if log_file:
-        log_dir = os.path.dirname(log_file)
-        if log_dir:
-            os.makedirs(log_dir, exist_ok=True)
-
-        from logging.handlers import RotatingFileHandler
-        file_handler = RotatingFileHandler(
-            log_file,
-            maxBytes=LOG_CONFIG.get('max_bytes', 10*1024*1024),
-            backupCount=LOG_CONFIG.get('backup_count', 5)
-        )
-        file_handler.setFormatter(logging.Formatter(log_format))
-        handlers.append(file_handler)
-
-        # 启动日志清理后台线程
-        cleanup_thread = threading.Thread(
-            target=_log_cleanup_worker,
-            args=(retention_days, 6),
-            daemon=True
-        )
-        cleanup_thread.start()
-
-    logging.basicConfig(
-        level=log_level,
-        format=log_format,
-        handlers=handlers
-    )
-
-setup_logging()
-logger = logging.getLogger(__name__)
-
-
-class SafetyMonitorSystem:
-    """
-    施工现场安全监控系统
-    """
-    
-    def __init__(self, config: dict = None):
-        """
-        初始化系统
-        
-        Args:
-            config: 配置覆盖
-        """
-        self.config = config or {}
-        
-        # 摄像头
-        self.camera = None
-        self.camera_source = None
-        
-        # PTZ球机(安全模式可选联动)
-        self.ptz_camera = None
-        self.calibrator = None
-        self.sdk = None
-        
-        # 双路流管理器(用于PTZ模式下的双流并行)
-        self.stream_manager = None
-        
-        # 组件
-        self.detector = None          # 安全检测器 (支持 LLM)
-        self.llm_analyzer = None      # 大模型安全分析器
-        self.number_recognizer = None # 编号识别器
-        self.event_pusher = None      # 事件推送器
-        self.voice_announcer = None   # 语音播报器
-        
-        # 功能开关 - 从 SYSTEM_CONFIG 读取
-        self.enable_panorama_camera = SYSTEM_CONFIG.get('enable_panorama_camera', True)
-        self.enable_ptz_camera = SYSTEM_CONFIG.get('enable_ptz_camera', True)
-        self.enable_detection = SYSTEM_CONFIG.get('enable_detection', True)
-        self.enable_safety_detection = SYSTEM_CONFIG.get('enable_safety_detection', True)
-        self.enable_calibration = SYSTEM_CONFIG.get('enable_calibration', True)
-        self.enable_ptz_tracking = SYSTEM_CONFIG.get('enable_ptz_tracking', True)
-        self.enable_ocr = SYSTEM_CONFIG.get('enable_ocr', True)
-        self.enable_llm = SYSTEM_CONFIG.get('enable_llm', True)
-        self.enable_event_push = SYSTEM_CONFIG.get('enable_event_push', True)
-        self.enable_voice_announce = SYSTEM_CONFIG.get('enable_voice_announce', True)
-        
-        # 状态
-        self.running = False
-        self.display = True           # 是否显示画面
-        
-        # 帧处理
-        self.current_frame = None
-        self.frame_lock = threading.Lock()
-        
-        # 统计
-        self.stats = {
-            'frames_processed': 0,
-            'persons_detected': 0,
-            'violations_detected': 0,
-            'events_pushed': 0,
-            'voice_announced': 0,
-            'start_time': None
-        }
-        self.stats_lock = threading.Lock()
-        
-        # 工作线程
-        self.detection_thread = None
-    
-    def initialize(self, camera_source=0) -> bool:
-        """
-        初始化系统组件
-        
-        Args:
-            camera_source: 摄像头源 (索引/RTSP/视频文件)
-            
-        Returns:
-            是否成功
-        """
-        logger.info("=" * 60)
-        logger.info(f"初始化 {SYSTEM_CONFIG['name']} v{SYSTEM_CONFIG['version']}")
-        logger.info("=" * 60)
-        
-        # 初始化摄像头
-        self.camera_source = camera_source
-        if self.enable_panorama_camera:
-            self.camera = SimpleCamera(camera_source)
-            
-            if not self.camera.connect():
-                logger.error("连接摄像头失败")
-                return False
-            
-            logger.info(f"摄像头连接成功: {camera_source}")
-        else:
-            self.camera = None
-            logger.info("摄像头功能已禁用")
-        
-        # 初始化 PTZ 球机(安全模式可选联动)
-        if self.enable_ptz_camera and self.enable_ptz_tracking:
-            try:
-                from dahua_sdk import DahuaSDK
-                sdk_path = os.path.join(SDK_PATH['lib_path'], SDK_PATH['netsdk'])
-                self.sdk = DahuaSDK(sdk_path)
-                if self.sdk.init():
-                    from ptz_camera import PTZCamera
-                    ptz_config = self.config.get('ptz_camera', PTZ_CAMERA)
-                    self.ptz_camera = PTZCamera(self.sdk, ptz_config)
-                    if self.ptz_camera.connect():
-                        logger.info(f"PTZ球机连接成功: {ptz_config['ip']}")
-                        if self.ptz_camera.start_stream_rtsp():
-                            logger.info("PTZ球机RTSP流启动成功")
-                        else:
-                            logger.warning("PTZ球机RTSP流启动失败,PTZ跟踪将无法进行帧验证")
-                    else:
-                        logger.warning(f"PTZ球机连接失败: {ptz_config['ip']}")
-                        self.ptz_camera = None
-                else:
-                    logger.warning("SDK初始化失败,PTZ功能不可用")
-                    self.sdk = None
-            except Exception as e:
-                logger.warning(f"PTZ球机初始化失败: {e},PTZ跟踪将不可用")
-                self.ptz_camera = None
-        
-        # 初始化 LLM 大模型服务
-        if self.enable_llm:
-            try:
-                llm_config = {**LLM_CONFIG}
-                if 'llm_host' in self.config:
-                    llm_config['api_host'] = self.config['llm_host']
-                if 'llm_port' in self.config:
-                    llm_config['api_port'] = self.config['llm_port']
-                
-                self.llm_analyzer = SafetyAnalyzer(llm_config)
-                logger.info(f"大模型分析器初始化成功: {llm_config['api_host']}:{llm_config['api_port']}")
-            except Exception as e:
-                logger.warning(f"大模型分析器初始化失败: {e},将使用规则判断")
-                self.use_llm = False
-        
-        # 初始化安全检测器 (支持 LLM)
-        if self.enable_detection and self.enable_safety_detection:
-            try:
-                llm_config = {**LLM_CONFIG} if self.enable_llm else None
-                
-                self.detector = LLMSafetyDetector(
-                    yolo_model_path=self.config.get('model_path', SAFETY_DETECTION_CONFIG.get('model_path')),
-                    llm_config=llm_config,
-                    use_gpu=self.config.get('use_gpu', SAFETY_DETECTION_CONFIG.get('use_gpu', True)),
-                    use_llm=self.enable_llm,
-                    model_type=self.config.get('model_type', SAFETY_DETECTION_CONFIG.get('model_type', 'auto'))
-                )
-                logger.info("安全检测器初始化成功")
-            except Exception as e:
-                logger.error(f"安全检测器初始化失败: {e}")
-                return False
-        else:
-            self.detector = None
-            logger.info("安全检测功能已禁用")
-        
-        # 初始化编号识别器
-        if self.enable_ocr and self.enable_llm:
-            try:
-                self.number_recognizer = NumberRecognizer(LLM_CONFIG)
-                logger.info("编号识别器初始化成功")
-            except Exception as e:
-                logger.warning(f"编号识别器初始化失败: {e}")
-                self.number_recognizer = None
-        
-        # 初始化事件推送器
-        if self.enable_event_push:
-            try:
-                push_config = {**EVENT_PUSHER_CONFIG}
-                if 'api_host' in self.config:
-                    push_config['api_host'] = self.config['api_host']
-                if 'api_port' in self.config:
-                    push_config['api_port'] = self.config['api_port']
-                
-                self.event_pusher = EventPusher(push_config)
-                logger.info("事件推送器初始化成功")
-            except Exception as e:
-                logger.warning(f"事件推送器初始化失败: {e}")
-        
-        # 初始化语音播报器
-        if self.enable_voice_announce:
-            try:
-                self.voice_announcer = VoiceAnnouncer(
-                    tts_config=VOICE_ANNOUNCER_CONFIG.get('tts', {}),
-                    player_config=VOICE_ANNOUNCER_CONFIG.get('player', {})
-                )
-                logger.info("语音播报器初始化成功")
-            except Exception as e:
-                logger.warning(f"语音播报器初始化失败: {e}")
-        
-        logger.info("系统初始化完成")
-        return True
-    
-    def start(self) -> bool:
-        """启动系统"""
-        if self.running:
-            logger.warning("系统已在运行")
-            return True
-        
-        logger.info("启动安全监控系统...")
-        
-        # 启动事件推送器
-        if self.event_pusher:
-            self.event_pusher.start()
-        
-        # 启动语音播报器
-        if self.voice_announcer:
-            self.voice_announcer.start()
-        
-        # 启动检测线程
-        self.running = True
-        self.detection_thread = threading.Thread(target=self._detection_worker, daemon=True)
-        self.detection_thread.start()
-        
-        with self.stats_lock:
-            self.stats['start_time'] = time.time()
-        
-        logger.info("安全监控系统启动成功")
-        return True
-    
-    def stop(self):
-        """停止系统"""
-        if not self.running:
-            return
-        
-        logger.info("停止安全监控系统...")
-        
-        self.running = False
-        
-        if self.detection_thread:
-            self.detection_thread.join(timeout=3)
-        
-        if self.event_pusher:
-            self.event_pusher.stop()
-        
-        if self.voice_announcer:
-            self.voice_announcer.stop()
-        
-        if self.camera:
-            self.camera.disconnect()
-        
-        if self.ptz_camera:
-            self.ptz_camera.stop_stream()
-            self.ptz_camera.disconnect()
-        
-        if self.stream_manager:
-            self.stream_manager.stop_all()
-        
-        if self.sdk:
-            try:
-                self.sdk.cleanup()
-            except Exception:
-                pass
-        
-        self._print_stats()
-        logger.info("安全监控系统已停止")
-    
-    def _detection_worker(self):
-        """检测工作线程"""
-        # 检查摄像头和检测是否启用
-        if not self.enable_panorama_camera or not self.enable_detection:
-            logger.info("摄像头或检测功能已禁用,检测线程休眠")
-            while self.running:
-                time.sleep(1)
-            return
-        
-        # 优先使用 detection_fps,默认每秒2帧
-        detection_fps = SAFETY_DETECTION_CONFIG.get('detection_fps', 2)
-        detection_interval = 1.0 / detection_fps  # 根据FPS计算间隔
-        last_detection_time = 0
-        
-        # 告警冷却(按违规类型)
-        alert_cooldown = {}
-        cooldown_time = SAFETY_DETECTION_CONFIG.get('alert_cooldown', 3.0)
-        
-        while self.running:
-            try:
-                current_time = time.time()
-                
-                # 获取帧
-                frame = self.camera.get_frame() if self.camera else None
-                if frame is None:
-                    time.sleep(0.01)
-                    continue
-                
-                with self.frame_lock:
-                    self.current_frame = frame.copy()
-                
-                self._update_stats('frames_processed')
-                
-                # 周期性检测
-                if current_time - last_detection_time >= detection_interval:
-                    last_detection_time = current_time
-                    
-                    # 执行检测
-                    detections = self.detector.detect(frame)
-                    status_list = self.detector.check_safety(frame, detections)
-                    
-                    self._update_stats('persons_detected', len(status_list))
-                    
-                    # 轨迹追踪已禁用
-                    
-                    # 处理违规
-                    for status in status_list:
-                        if status.is_violation:
-                            # 检查冷却(按违规类型)
-                            violation_key = status.get_violation_desc()
-                            if violation_key in alert_cooldown:
-                                if current_time - alert_cooldown[violation_key] < cooldown_time:
-                                    continue
-                            
-                            alert_cooldown[violation_key] = current_time
-                            
-                            self._handle_violation(status, frame)
-                
-                # 显示画面
-                if self.display:
-                    self._display_frame(frame, detections, status_list)
-                
-                time.sleep(0.01)
-                
-            except Exception as e:
-                logger.error(f"检测错误: {e}")
-                time.sleep(0.1)
-    
-    def _handle_violation(self, status: PersonSafetyStatus, frame: np.ndarray):
-        """处理违规"""
-        description = status.get_violation_desc()
-        
-        self._update_stats('violations_detected')
-        
-        # 裁剪人体区域
-        x1, y1, x2, y2 = status.person_bbox
-        margin = 20
-        x1 = max(0, x1 - margin)
-        y1 = max(0, y1 - margin)
-        x2 = min(frame.shape[1], x2 + margin)
-        y2 = min(frame.shape[0], y2 + margin)
-        person_image = frame[y1:y2, x1:x2].copy()
-        
-        # 编号识别
-        number_text = None
-        if self.number_recognizer:
-            try:
-                number_result = self.number_recognizer.recognize_person_number(person_image)
-                number_text = number_result.get('number')
-                if number_text:
-                    logger.info(f"识别到编号: {number_text}")
-            except Exception as e:
-                logger.warning(f"编号识别失败: {e}")
-        
-        # 如果识别到编号,添加到描述中
-        if number_text:
-            description = f"{description} (编号: {number_text})"
-        
-        # 推送事件
-        if self.event_pusher:
-            self.event_pusher.push_safety_violation(
-                description=description,
-                image=person_image,
-                track_id=status.track_id,
-                confidence=status.person_conf
-            )
-            self._update_stats('events_pushed')
-        
-        # 语音播报
-        if self.voice_announcer:
-            self.voice_announcer.announce_violation(description, urgent=True)
-            self._update_stats('voice_announced')
-        
-        logger.warning(f"[违规] {description}")
-    
-    def _display_frame(self, frame: np.ndarray, 
-                       detections: List[SafetyDetection],
-                       status_list: List[PersonSafetyStatus]):
-        """显示帧"""
-        # 绘制检测结果
-        result_frame = draw_safety_result(frame, detections, status_list)
-        
-        # 添加统计信息
-        stats_text = f"FPS: {self._get_fps():.1f}"
-        cv2.putText(result_frame, stats_text, (10, 30), 
-                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
-        
-        cv2.imshow('Safety Monitor', result_frame)
-        
-        # 按 'q' 退出
-        if cv2.waitKey(1) & 0xFF == ord('q'):
-            self.running = False
-    
-    def _get_fps(self) -> float:
-        """获取帧率"""
-        with self.stats_lock:
-            if self.stats['start_time']:
-                elapsed = time.time() - self.stats['start_time']
-                if elapsed > 0:
-                    return self.stats['frames_processed'] / elapsed
-        return 0.0
-    
-    def _update_stats(self, key: str, value: int = 1):
-        """更新统计"""
-        with self.stats_lock:
-            if key in self.stats:
-                self.stats[key] += value
-    
-    def _print_stats(self):
-        """打印统计"""
-        with self.stats_lock:
-            if self.stats['start_time']:
-                elapsed = time.time() - self.stats['start_time']
-                print("\n" + "=" * 50)
-                print("安全检测统计")
-                print("=" * 50)
-                print(f"运行时长: {elapsed:.1f} 秒")
-                print(f"处理帧数: {self.stats['frames_processed']}")
-                print(f"检测人员: {self.stats['persons_detected']} 次")
-                print(f"违规检测: {self.stats['violations_detected']} 次")
-                print(f"事件推送: {self.stats['events_pushed']} 次")
-                print(f"语音播报: {self.stats['voice_announced']} 次")
-                
-                if self.event_pusher:
-                    push_stats = self.event_pusher.get_stats()
-                    print(f"推送成功: {push_stats['pushed_events']}")
-                    print(f"推送失败: {push_stats['failed_events']}")
-                
-                if self.voice_announcer:
-                    voice_stats = self.voice_announcer.get_stats()
-                    print(f"播报成功: {voice_stats['played_commands']}")
-                    print(f"播报失败: {voice_stats['failed_commands']}")
-                
-                print("=" * 50 + "\n")
-    
-    def get_stats(self) -> dict:
-        """获取统计"""
-        with self.stats_lock:
-            return self.stats.copy()
-    
-    
-    def announce(self, text: str):
-        """
-        手动播报语音
-        
-        Args:
-            text: 播报文本
-        """
-        if self.voice_announcer:
-            self.voice_announcer.announce(text, priority=VoicePriority.NORMAL)
-
-
-def run_interactive(system: SafetyMonitorSystem):
-    """
-    交互模式运行
-    
-    Args:
-        system: 系统实例
-    """
-    print("\n施工现场安全监控系统 - 交互模式")
-    print("=" * 50)
-    print("命令:")
-    print("  s - 开始/停止监控")
-    print("  a - 手动播报 (输入文本)")
-    print("  r - 查看统计信息")
-    print("  q - 退出")
-    print("=" * 50)
-    
-    running = False
-    
-    while True:
-        try:
-            cmd = input("\n> ").strip().lower()
-            
-            if cmd == 'q':
-                break
-            
-            elif cmd == 's':
-                if running:
-                    system.stop()
-                    running = False
-                    print("监控已停止")
-                else:
-                    if system.start():
-                        running = True
-                        print("监控已启动")
-            
-            elif cmd == 'a':
-                text = input("输入播报文本: ").strip()
-                if text:
-                    system.announce(text)
-                    print(f"已播报: {text}")
-            
-            elif cmd == 'r':
-                stats = system.get_stats()
-                print("\n统计信息:")
-                for k, v in stats.items():
-                    if v is not None:
-                        print(f"  {k}: {v}")
-            
-            else:
-                print("未知命令")
-                
-        except KeyboardInterrupt:
-            break
-        except Exception as e:
-            print(f"错误: {e}")
-    
-    print("退出交互模式")
-
-
-def main():
-    """主函数"""
-    parser = argparse.ArgumentParser(
-        description='施工现场安全行为智能识别系统'
-    )
-    
-    # 摄像头参数
-    parser.add_argument('--camera', type=str, default='0',
-                       help='摄像头源 (索引/RTSP地址/视频文件)')
-    parser.add_argument('--no-display', action='store_true',
-                       help='不显示画面')
-    
-    # 检测参数
-    parser.add_argument('--model', type=str,
-                       help='安全检测模型路径')
-    parser.add_argument('--conf', type=float, default=0.5,
-                       help='置信度阈值')
-    parser.add_argument('--person-conf', type=float, default=0.8,
-                       help='人员检测置信度阈值')
-    parser.add_argument('--no-gpu', action='store_true',
-                       help='不使用GPU')
-    
-    # LLM 大模型参数
-    parser.add_argument('--llm-host', type=str,
-                       help='大模型 API 主机')
-    parser.add_argument('--llm-port', type=int,
-                       help='大模型 API 端口')
-    parser.add_argument('--no-llm', action='store_true',
-                       help='禁用大模型判断,使用规则判断')
-    parser.add_argument('--no-ocr', action='store_true',
-                       help='禁用编号识别')
-    
-    # 业务平台参数
-    parser.add_argument('--api-host', type=str,
-                       help='业务平台 API 主机')
-    parser.add_argument('--api-port', type=int,
-                       help='业务平台 API 端口')
-    parser.add_argument('--no-push', action='store_true',
-                       help='禁用事件推送')
-    parser.add_argument('--no-voice', action='store_true',
-                       help='禁用语音播报')
-    
-    # 运行模式
-    parser.add_argument('--interactive', action='store_true',
-                       help='交互模式')
-    parser.add_argument('--demo', action='store_true',
-                       help='演示模式')
-    
-    args = parser.parse_args()
-    
-    # 构建配置
-    config = {}
-    
-    if args.model:
-        config['model_path'] = args.model
-    config['conf_threshold'] = args.conf
-    config['person_threshold'] = args.person_conf
-    config['use_gpu'] = not args.no_gpu
-    
-    # LLM 配置
-    if args.llm_host:
-        config['llm_host'] = args.llm_host
-    if args.llm_port:
-        config['llm_port'] = args.llm_port
-    
-    if args.api_host:
-        config['api_host'] = args.api_host
-    if args.api_port:
-        config['api_port'] = args.api_port
-    
-    # 演示模式
-    if args.demo:
-        print("\n施工现场安全行为智能识别系统")
-        print("=" * 60)
-        print("""
-系统功能:
-  1. 实时视频监控
-  2. YOLO11 检测: 人员、安全帽、反光衣
-  3. 大模型判断: 安全状态分析
-  4. 编号识别: OCR 识别衣服上的工号
-  5. 事件推送到业务平台
-  6. 接收平台指令,TTS 语音播报
-
-系统架构:
-  ┌─────────────────────────────────────────────────────┐
-  │                    摄像头视频流                      │
-  └─────────────────────────────────────────────────────┘
-                          │
-                          ▼
-  ┌─────────────────────────────────────────────────────┐
-  │              YOLO11 安全检测模型                     │
-  │   检测类别: 人员(3)、安全帽(0)、反光衣(4)            │
-  └─────────────────────────────────────────────────────┘
-                          │
-                          ▼
-  ┌─────────────────────────────────────────────────────┐
-  │              大模型安全状态判断                      │
-  │   分析: 是否佩戴安全帽、是否穿反光衣                 │
-  │   识别: 衣服上的工号/编号                            │
-  └─────────────────────────────────────────────────────┘
-                          │
-            ┌─────────────┼─────────────┐
-            ▼             ▼             ▼
-  ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
-  │  事件推送   │ │  语音播报   │ │  编号记录   │
-  │ 业务平台API │ │  TTS服务    │ │  身份关联   │
-  └─────────────┘ └─────────────┘ └─────────────┘
-
-运行命令:
-  python safety_main.py --camera 0                    # 使用默认摄像头
-  python safety_main.py --camera rtsp://...           # RTSP 流
-  python safety_main.py --camera video.mp4            # 视频文件
-  python safety_main.py --interactive                 # 交互模式
-  python safety_main.py --no-display                  # 无界面模式
-  python safety_main.py --no-llm                      # 禁用大模型,使用规则判断
-  python safety_main.py --no-ocr                      # 禁用编号识别
-  python safety_main.py --llm-host localhost --llm-port 8111  # 指定大模型服务
-""")
-        return 0
-    
-    # 创建系统
-    system = SafetyMonitorSystem(config)
-    system.display = not args.no_display
-    
-    # 设置信号处理
-    def signal_handler(sig, frame):
-        print("\n接收到停止信号")
-        system.stop()
-        sys.exit(0)
-    
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-    
-    try:
-        # 解析摄像头源
-        camera_source = args.camera
-        if camera_source.isdigit():
-            camera_source = int(camera_source)
-        
-        # 初始化
-        if not system.initialize(camera_source):
-            print("\n系统初始化失败!")
-            return 1
-        
-        # 禁用功能 (命令行参数覆盖配置)
-        if args.no_llm:
-            system.enable_llm = False
-            print("大模型判断已禁用,使用规则判断")
-        if args.no_ocr:
-            system.enable_ocr = False
-            system.number_recognizer = None
-            print("编号识别已禁用")
-        if args.no_push:
-            system.enable_event_push = False
-            system.event_pusher = None
-            print("事件推送已禁用")
-        if args.no_voice:
-            system.enable_voice_announce = False
-            system.voice_announcer = None
-            print("语音播报已禁用")
-        
-        # 运行
-        if args.interactive:
-            run_interactive(system)
-        else:
-            # 自动模式
-            if not system.start():
-                print("启动失败")
-                return 1
-            
-            print("\n系统运行中,按 Ctrl+C 停止")
-            print("(按 'q' 键退出显示窗口)\n")
-            
-            # 主循环
-            while system.running:
-                time.sleep(0.1)
-    
-    except KeyboardInterrupt:
-        print("\n接收到停止信号")
-    
-    finally:
-        system.stop()
-    
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main() or 0)

+ 238 - 0
dual_camera_system/scripts/cleanup_oss.py

@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+"""
+清理 OSS 存储桶中的旧文件
+"""
+
+import boto3
+from botocore.config import Config
+import sys
+
+# OSS 配置
+OSS_CONFIG = {
+    'endpoint_url': 'http://58.213.48.57:15900',
+    'access_key_id': 'wvp',
+    'secret_access_key': '6MnZFxZxRwbvS01khA9ldiawJuc9mytyiq2kEv3k',
+    'bucket_name': 'wvp',
+    'path_prefix': 'device',
+}
+
+def get_s3_client():
+    """创建 S3 客户端"""
+    config = Config(
+        s3={'addressing_style': 'path'},
+        signature_version='s3v4'
+    )
+    return boto3.client(
+        's3',
+        endpoint_url=OSS_CONFIG['endpoint_url'],
+        aws_access_key_id=OSS_CONFIG['access_key_id'],
+        aws_secret_access_key=OSS_CONFIG['secret_access_key'],
+        region_name='us-east-1',
+        config=config
+    )
+
+def list_objects(s3_client, prefix=''):
+    """列出对象"""
+    objects = []
+    continuation_token = None
+
+    while True:
+        params = {
+            'Bucket': OSS_CONFIG['bucket_name'],
+            'Prefix': prefix
+        }
+        if continuation_token:
+            params['ContinuationToken'] = continuation_token
+
+        response = s3_client.list_objects_v2(**params)
+
+        if 'Contents' in response:
+            for obj in response['Contents']:
+                objects.append(obj['Key'])
+
+        if response.get('IsTruncated'):
+            continuation_token = response.get('NextContinuationToken')
+        else:
+            break
+
+    return objects
+
+def delete_objects(s3_client, keys):
+    """删除对象"""
+    if not keys:
+        print("没有需要删除的对象")
+        return 0
+
+    # 批量删除(每次最多1000个)
+    deleted = 0
+    use_batch = True
+
+    for i in range(0, len(keys), 1000):
+        batch = keys[i:i+1000]
+
+        if use_batch:
+            try:
+                objects_to_delete = [{'Key': key} for key in batch]
+                s3_client.delete_objects(
+                    Bucket=OSS_CONFIG['bucket_name'],
+                    Delete={'Objects': objects_to_delete, 'Quiet': True}
+                )
+                deleted += len(batch)
+                print(f"已删除 {len(batch)} 个文件 (总计 {i+len(batch)}/{len(keys)})")
+                continue
+            except Exception as e:
+                print(f"批量删除不支持,降级为逐个删除: {e}")
+                use_batch = False
+
+        for key in batch:
+            try:
+                s3_client.delete_object(Bucket=OSS_CONFIG['bucket_name'], Key=key)
+                deleted += 1
+                if deleted % 100 == 0 or deleted == len(keys):
+                    print(f"  进度: {deleted}/{len(keys)}")
+            except Exception as e2:
+                print(f"  删除 {key} 失败: {e2}")
+
+    return deleted
+
+def cleanup_old_files(days=21, auto_confirm=False):
+    """清理旧文件
+
+    Args:
+        days: 保留最近N天的文件,删除更早的
+        auto_confirm: 是否自动确认
+    """
+    import time
+    from datetime import datetime, timedelta
+
+    s3_client = get_s3_client()
+
+    # 计算截止时间
+    cutoff_time = time.time() - (days * 24 * 3600)
+    cutoff_date = datetime.fromtimestamp(cutoff_time).strftime('%Y-%m-%d')
+
+    print(f"将删除 {cutoff_date} 之前的文件...")
+
+    # 列出所有对象
+    prefix = OSS_CONFIG['path_prefix'] + '/'
+    print(f"正在列出 {OSS_CONFIG['bucket_name']}/{prefix} 下的对象...")
+
+    all_objects = list_objects(s3_client, prefix)
+    print(f"共找到 {len(all_objects)} 个对象")
+
+    if not all_objects:
+        print("没有对象需要删除")
+        return 0
+
+    # 筛选旧文件
+    old_objects = []
+    for key in all_objects:
+        # 从 key 中提取日期 (格式: device/20260520/...)
+        parts = key.split('/')
+        if len(parts) >= 2 and parts[1]:
+            try:
+                # 尝试解析日期
+                date_str = parts[1]  # 如 20260520
+                file_date = datetime.strptime(date_str, '%Y%m%d')
+                file_timestamp = file_date.timestamp()
+
+                if file_timestamp < cutoff_time:
+                    old_objects.append(key)
+            except:
+                pass
+
+    print(f"将删除 {len(old_objects)} 个旧文件")
+
+    if old_objects:
+        # 显示前10个
+        print("前10个将删除的文件:")
+        for key in old_objects[:10]:
+            print(f"  - {key}")
+        if len(old_objects) > 10:
+            print(f"  ... 还有 {len(old_objects) - 10} 个")
+
+        # 确认
+        if not auto_confirm:
+            confirm = input(f"\n确认删除 {len(old_objects)} 个文件? (y/n): ")
+            if confirm.lower() != 'y':
+                print("已取消")
+                return 0
+        else:
+            print(f"\n自动确认删除 {len(old_objects)} 个文件...")
+
+        deleted = delete_objects(s3_client, old_objects)
+        print(f"\n完成! 共删除 {deleted} 个文件")
+        return deleted
+    else:
+        print("没有旧文件需要删除")
+        return 0
+
+def clear_all():
+    """清空所有文件"""
+    s3_client = get_s3_client()
+
+    prefix = OSS_CONFIG['path_prefix'] + '/'
+    all_objects = list_objects(s3_client, prefix)
+
+    print(f"存储桶中共有 {len(all_objects)} 个对象")
+
+    if not all_objects:
+        print("没有对象需要删除")
+        return 0
+
+    # 显示前10个
+    print("前10个对象:")
+    for key in all_objects[:10]:
+        print(f"  - {key}")
+    if len(all_objects) > 10:
+        print(f"  ... 还有 {len(all_objects) - 10} 个")
+
+    # 确认
+    confirm = input(f"\n警告: 将删除 ALL {len(all_objects)} 个文件! 确认? (y/n): ")
+    if confirm.lower() != 'y':
+        print("已取消")
+        return 0
+
+    deleted = delete_objects(s3_client, all_objects)
+    print(f"\n完成! 共删除 {deleted} 个文件")
+    return deleted
+
+def main():
+    if len(sys.argv) < 2:
+        print("用法:")
+        print("  python cleanup_oss.py cleanup [days] [--yes]  - 清理N天前的旧文件")
+        print("  python cleanup_oss.py clear [--yes]           - 清空所有文件")
+        print("  python cleanup_oss.py list                    - 列出所有文件")
+        sys.exit(1)
+
+    command = sys.argv[1]
+    auto_confirm = '--yes' in sys.argv
+
+    if command == 'cleanup':
+        days = 7
+        for arg in sys.argv[2:]:
+            if arg.isdigit():
+                days = int(arg)
+        cleanup_old_files(days, auto_confirm)
+    elif command == 'clear':
+        if not auto_confirm:
+            confirm = input("确定要清空所有文件吗? 此操作不可恢复! (输入 'yes' 确认): ")
+            if confirm != 'yes':
+                print("已取消")
+                return
+        clear_all()
+    elif command == 'list':
+        s3_client = get_s3_client()
+        prefix = OSS_CONFIG['path_prefix'] + '/'
+        objects = list_objects(s3_client, prefix)
+        print(f"共 {len(objects)} 个对象:")
+        for obj in objects[:50]:
+            print(f"  - {obj}")
+        if len(objects) > 50:
+            print(f"  ... 还有 {len(objects) - 50} 个")
+    else:
+        print(f"未知命令: {command}")
+        sys.exit(1)
+
+if __name__ == '__main__':
+    main()

+ 38 - 0
dual_camera_system/scripts/dsh.service

@@ -0,0 +1,38 @@
+[Unit]
+Description=双摄像头联动抓拍系统
+After=network.target
+Wants=network.target
+
+[Service]
+Type=simple
+User=admin
+WorkingDirectory=/home/admin/dsh/dual_camera_system
+
+# 环境变量
+Environment="PATH=/home/admin/miniconda3/envs/rknn/bin:/usr/local/bin:/usr/bin:/bin"
+Environment="LD_LIBRARY_PATH=/home/admin/dsh/dh/arm/Bin:/usr/lib:/lib"
+Environment="OPENCV_FFMPEG_CAPTURE_OPTIONS=threads;1"
+
+# 启动命令
+ExecStart=/bin/bash /home/admin/dsh/dual_camera_system/scripts/start.sh
+
+# 日志配置
+StandardOutput=append:/home/admin/dsh/logs/dual-camera.log
+StandardError=append:/home/admin/dsh/logs/dual-camera.log
+
+# 自动重启配置
+Restart=always
+RestartSec=10
+
+# 健康检查 - 每60秒检查一次进程是否存活
+ExecStartPost=/bin/bash -c 'echo "$(date): 服务已启动" >> /home/admin/dsh/logs/dual-camera.log'
+
+# 进程安全设置
+KillMode=mixed
+TimeoutStopSec=30
+
+# 资源限制 (可选)
+# MemoryMax=4G
+
+[Install]
+WantedBy=multi-user.target

+ 13 - 12
dual_camera_system/third_party_pusher.py

@@ -363,9 +363,6 @@ class ThirdPartyPusher:
         )
         
         self.report_queue.put(report)
-        
-        with self.stats_lock:
-            self.stats['total_reports'] += 1
     
     def report_batch_sync(self, batch_info: Dict[str, Any], 
                           local_path: Optional[str] = None) -> bool:
@@ -457,29 +454,33 @@ class ThirdPartyPusher:
 
 # 全局单例
 _third_party_pusher_instance: Optional[ThirdPartyPusher] = None
+_third_party_pusher_lock = threading.Lock()
 
 
 def get_third_party_pusher(config: Dict[str, Any] = None) -> ThirdPartyPusher:
     """
-    获取第三方平台推送器实例(单例模式)
-    
+    获取第三方平台推送器实例(单例模式,线程安全
+
     Args:
         config: 第三方平台配置
-        
+
     Returns:
         ThirdPartyPusher 实例
     """
     global _third_party_pusher_instance
-    
+
     if _third_party_pusher_instance is None:
-        _third_party_pusher_instance = ThirdPartyPusher(config)
-    
+        with _third_party_pusher_lock:
+            if _third_party_pusher_instance is None:
+                _third_party_pusher_instance = ThirdPartyPusher(config)
+
     return _third_party_pusher_instance
 
 
 def reset_third_party_pusher():
     """重置第三方平台推送器实例"""
     global _third_party_pusher_instance
-    if _third_party_pusher_instance is not None:
-        _third_party_pusher_instance.stop()
-    _third_party_pusher_instance = None
+    with _third_party_pusher_lock:
+        if _third_party_pusher_instance is not None:
+            _third_party_pusher_instance.stop()
+        _third_party_pusher_instance = None

+ 0 - 602
dual_camera_system/voice_announcer.py

@@ -1,602 +0,0 @@
-"""
-语音播放模块
-接收业务平台的语音播放指令,调用 TTS 服务生成语音并通过喇叭播放
-"""
-
-import os
-import time
-import json
-import threading
-import queue
-import requests
-import subprocess
-import tempfile
-from typing import Optional, Dict, Any, List
-from dataclasses import dataclass
-from enum import Enum
-
-
-class VoicePriority(Enum):
-    """语音优先级"""
-    LOW = 1       # 低优先级(一般通知)
-    NORMAL = 2    # 正常优先级
-    HIGH = 3      # 高优先级(紧急告警)
-    URGENT = 4    # 最高优先级(立即播放,打断当前)
-
-
-@dataclass
-class VoiceCommand:
-    """语音播放指令"""
-    text: str                              # 要播放的文本
-    priority: VoicePriority = VoicePriority.NORMAL  # 优先级
-    speed: float = 1.0                     # 语速
-    volume: float = 1.0                    # 音量
-    voice_id: str = ""                     # 音色ID
-    repeat: int = 1                        # 重复次数
-    interval: float = 0.5                  # 重复间隔
-    source: str = ""                       # 来源(业务平台等)
-    timestamp: float = 0.0                 # 时间戳
-    
-    def __post_init__(self):
-        if self.timestamp == 0.0:
-            self.timestamp = time.time()
-
-
-class TTSService:
-    """
-    TTS 服务接口
-    支持多种 TTS 后端
-    """
-    
-    def __init__(self, config: Dict[str, Any] = None):
-        """
-        初始化 TTS 服务
-        
-        Args:
-            config: 配置字典
-        """
-        self.config = config or {}
-        
-        # 服务类型: 'api', 'local', 'edge-tts', 'piper'
-        self.service_type = self.config.get('service_type', 'edge-tts')
-        
-        # API 配置
-        self.api_url = self.config.get('api_url', '')
-        self.api_key = self.config.get('api_key', '')
-        
-        # 本地配置
-        self.local_command = self.config.get('local_command', '')
-        
-        # Edge-TTS 配置
-        self.edge_voice = self.config.get('edge_voice', 'zh-CN-XiaoxiaoNeural')
-        
-        # 缓存目录
-        self.cache_dir = self.config.get('cache_dir', '/tmp/tts_cache')
-        os.makedirs(self.cache_dir, exist_ok=True)
-        
-        # 语音缓存
-        self.voice_cache = {}
-        self.cache_enabled = self.config.get('cache_enabled', True)
-    
-    def synthesize(self, text: str, output_path: str = None,
-                   speed: float = 1.0, volume: float = 1.0,
-                   voice_id: str = "") -> Optional[str]:
-        """
-        合成语音
-        
-        Args:
-            text: 要合成的文本
-            output_path: 输出路径,如果为 None 则自动生成
-            speed: 语速
-            volume: 音量
-            voice_id: 音色ID
-            
-        Returns:
-            生成的音频文件路径,失败返回 None
-        """
-        if not text:
-            return None
-        
-        # 检查缓存
-        if self.cache_enabled:
-            cache_key = self._get_cache_key(text, speed, volume, voice_id)
-            if cache_key in self.voice_cache:
-                cached_path = self.voice_cache[cache_key]
-                if os.path.exists(cached_path):
-                    return cached_path
-        
-        # 生成输出路径
-        if output_path is None:
-            output_path = os.path.join(
-                self.cache_dir, 
-                f"tts_{int(time.time() * 1000)}.mp3"
-            )
-        
-        # 根据服务类型调用不同的 TTS
-        success = False
-        
-        if self.service_type == 'api':
-            success = self._synthesize_api(text, output_path, speed, volume, voice_id)
-        elif self.service_type == 'edge-tts':
-            success = self._synthesize_edge_tts(text, output_path, speed, volume, voice_id)
-        elif self.service_type == 'piper':
-            success = self._synthesize_piper(text, output_path, speed, volume, voice_id)
-        elif self.service_type == 'local':
-            success = self._synthesize_local(text, output_path, speed, volume, voice_id)
-        else:
-            print(f"未知的 TTS 服务类型: {self.service_type}")
-            return None
-        
-        if success and os.path.exists(output_path):
-            # 缓存
-            if self.cache_enabled:
-                self.voice_cache[cache_key] = output_path
-            return output_path
-        
-        return None
-    
-    def _get_cache_key(self, text: str, speed: float, volume: float, voice_id: str) -> str:
-        """生成缓存键"""
-        return f"{text}_{speed}_{volume}_{voice_id}"
-    
-    def _synthesize_api(self, text: str, output_path: str,
-                        speed: float, volume: float, voice_id: str) -> bool:
-        """使用 API 合成语音"""
-        try:
-            headers = {'Content-Type': 'application/json'}
-            if self.api_key:
-                headers['Authorization'] = f'Bearer {self.api_key}'
-            
-            data = {
-                'text': text,
-                'speed': speed,
-                'volume': volume,
-                'voice_id': voice_id or self.edge_voice
-            }
-            
-            response = requests.post(
-                self.api_url, 
-                headers=headers, 
-                json=data,
-                timeout=30
-            )
-            
-            if response.status_code == 200:
-                # 假设返回音频数据
-                with open(output_path, 'wb') as f:
-                    f.write(response.content)
-                return True
-            else:
-                print(f"TTS API 错误: {response.status_code}")
-                return False
-                
-        except Exception as e:
-            print(f"TTS API 调用失败: {e}")
-            return False
-    
-    def _synthesize_edge_tts(self, text: str, output_path: str,
-                             speed: float, volume: float, voice_id: str) -> bool:
-        """使用 edge-tts 合成语音"""
-        try:
-            import edge_tts
-            
-            voice = voice_id or self.edge_voice
-            
-            # 语速和音量参数
-            rate = f"+{int((speed - 1) * 100)}%" if speed > 1 else f"{int((speed - 1) * 100)}%"
-            volume_str = f"+{int((volume - 1) * 100)}%" if volume > 1 else f"{int((volume - 1) * 100)}%"
-            
-            communicate = edge_tts.Communicate(
-                text, 
-                voice,
-                rate=rate,
-                volume=volume_str
-            )
-            
-            # 异步保存
-            import asyncio
-            
-            async def save():
-                await communicate.save(output_path)
-            
-            asyncio.run(save())
-            
-            return os.path.exists(output_path)
-            
-        except ImportError:
-            print("未安装 edge-tts,请运行: pip install edge-tts")
-            return False
-        except Exception as e:
-            print(f"edge-tts 合成失败: {e}")
-            return False
-    
-    def _synthesize_piper(self, text: str, output_path: str,
-                          speed: float, volume: float, voice_id: str) -> bool:
-        """使用 piper 合成语音"""
-        try:
-            # piper 命令行调用
-            model = voice_id or self.config.get('piper_model', 'zh_CN-huayan-medium')
-            
-            cmd = [
-                'piper',
-                '--model', model,
-                '--output_file', output_path
-            ]
-            
-            process = subprocess.Popen(
-                cmd,
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
-            )
-            
-            stdout, stderr = process.communicate(input=text.encode('utf-8'))
-            
-            if process.returncode == 0:
-                return os.path.exists(output_path)
-            else:
-                print(f"piper 错误: {stderr.decode('utf-8')}")
-                return False
-                
-        except FileNotFoundError:
-            print("未找到 piper 命令")
-            return False
-        except Exception as e:
-            print(f"piper 合成失败: {e}")
-            return False
-    
-    def _synthesize_local(self, text: str, output_path: str,
-                          speed: float, volume: float, voice_id: str) -> bool:
-        """使用本地命令合成语音"""
-        try:
-            cmd = self.local_command.format(
-                text=text,
-                output=output_path,
-                speed=speed,
-                volume=volume
-            )
-            
-            result = subprocess.run(
-                cmd,
-                shell=True,
-                capture_output=True,
-                timeout=30
-            )
-            
-            if result.returncode == 0:
-                return os.path.exists(output_path)
-            else:
-                print(f"本地命令错误: {result.stderr.decode('utf-8')}")
-                return False
-                
-        except Exception as e:
-            print(f"本地命令执行失败: {e}")
-            return False
-
-
-class AudioPlayer:
-    """
-    音频播放器
-    使用系统音频设备播放音频
-    """
-    
-    def __init__(self, config: Dict[str, Any] = None):
-        """
-        初始化播放器
-        
-        Args:
-            config: 配置字典
-        """
-        self.config = config or {}
-        
-        # 播放命令
-        # Linux: 'aplay', 'mpg123', 'ffplay'
-        # macOS: 'afplay'
-        # Windows: 'cmdmp3'
-        self.player_command = self.config.get('player_command', self._detect_player())
-        
-        # 音量控制
-        self.volume = self.config.get('volume', 1.0)
-        
-        # 播放状态
-        self.playing = False
-        self.current_process = None
-    
-    def _detect_player(self) -> str:
-        """检测可用的播放器"""
-        players = ['mpg123', 'aplay', 'ffplay', 'afplay']
-        
-        for player in players:
-            try:
-                subprocess.run(
-                    ['which', player],
-                    capture_output=True,
-                    check=True
-                )
-                return player
-            except:
-                continue
-        
-        return 'mpg123'  # 默认
-    
-    def play(self, audio_path: str, volume: float = None) -> bool:
-        """
-        播放音频文件
-        
-        Args:
-            audio_path: 音频文件路径
-            volume: 音量 (覆盖默认值)
-            
-        Returns:
-            是否成功
-        """
-        if not os.path.exists(audio_path):
-            print(f"音频文件不存在: {audio_path}")
-            return False
-        
-        vol = volume if volume is not None else self.volume
-        
-        try:
-            self.playing = True
-            
-            # 根据播放器选择命令
-            if self.player_command == 'mpg123':
-                cmd = ['mpg123', '-g', str(int(vol * 100)), audio_path]
-            elif self.player_command == 'aplay':
-                # aplay 只支持 WAV,需要转换
-                cmd = ['aplay', audio_path]
-            elif self.player_command == 'ffplay':
-                cmd = ['ffplay', '-nodisp', '-autoexit', '-volume', str(int(vol * 100)), audio_path]
-            elif self.player_command == 'afplay':
-                cmd = ['afplay', '-v', str(vol), audio_path]
-            else:
-                cmd = [self.player_command, audio_path]
-            
-            self.current_process = subprocess.Popen(
-                cmd,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE
-            )
-            
-            # 等待播放完成
-            self.current_process.wait()
-            self.playing = False
-            
-            return self.current_process.returncode == 0
-            
-        except FileNotFoundError:
-            print(f"播放器未找到: {self.player_command}")
-            return False
-        except Exception as e:
-            print(f"播放失败: {e}")
-            self.playing = False
-            return False
-    
-    def stop(self):
-        """停止播放"""
-        if self.current_process:
-            self.current_process.terminate()
-            self.current_process = None
-        self.playing = False
-    
-    def play_async(self, audio_path: str, volume: float = None, 
-                   callback: callable = None) -> threading.Thread:
-        """
-        异步播放
-        
-        Args:
-            audio_path: 音频文件路径
-            volume: 音量
-            callback: 播放完成回调
-            
-        Returns:
-            播放线程
-        """
-        def _play():
-            success = self.play(audio_path, volume)
-            if callback:
-                callback(success)
-        
-        thread = threading.Thread(target=_play, daemon=True)
-        thread.start()
-        return thread
-
-
-class VoiceAnnouncer:
-    """
-    语音播报器
-    整合 TTS 和音频播放,支持队列播放和优先级管理
-    """
-    
-    def __init__(self, tts_config: Dict[str, Any] = None, 
-                 player_config: Dict[str, Any] = None):
-        """
-        初始化语音播报器
-        
-        Args:
-            tts_config: TTS 配置
-            player_config: 播放器配置
-        """
-        self.tts = TTSService(tts_config)
-        self.player = AudioPlayer(player_config)
-        
-        # 播放队列
-        self.queue = queue.PriorityQueue()
-        
-        # 运行状态
-        self.running = False
-        self.worker_thread = None
-        
-        # 统计
-        self.stats = {
-            'total_commands': 0,
-            'played_commands': 0,
-            'failed_commands': 0
-        }
-        self.stats_lock = threading.Lock()
-    
-    def start(self):
-        """启动播报器"""
-        if self.running:
-            return
-        
-        self.running = True
-        self.worker_thread = threading.Thread(target=self._worker, daemon=True)
-        self.worker_thread.start()
-        print("语音播报器已启动")
-    
-    def stop(self):
-        """停止播报器"""
-        self.running = False
-        self.player.stop()
-        if self.worker_thread:
-            self.worker_thread.join(timeout=3)
-        print("语音播报器已停止")
-    
-    def announce(self, text: str, priority: VoicePriority = VoicePriority.NORMAL,
-                 speed: float = 1.0, volume: float = 1.0, repeat: int = 1) -> bool:
-        """
-        播报语音
-        
-        Args:
-            text: 要播报的文本
-            priority: 优先级
-            speed: 语速
-            volume: 音量
-            repeat: 重复次数
-            
-        Returns:
-            是否成功加入队列
-        """
-        if not text:
-            return False
-        
-        # 如果是紧急优先级,立即播放
-        if priority == VoicePriority.URGENT:
-            self._play_immediately(text, speed, volume, repeat)
-            return True
-        
-        # 加入队列
-        command = VoiceCommand(
-            text=text,
-            priority=priority,
-            speed=speed,
-            volume=volume,
-            repeat=repeat
-        )
-        
-        # 优先级队列:数值越小优先级越高
-        self.queue.put((-priority.value, time.time(), command))
-        
-        with self.stats_lock:
-            self.stats['total_commands'] += 1
-        
-        return True
-    
-    def announce_violation(self, description: str, urgent: bool = False):
-        """
-        播报安全违规
-        
-        Args:
-            description: 违规描述
-            urgent: 是否紧急
-        """
-        text = f"警告:{description},请立即整改"
-        priority = VoicePriority.URGENT if urgent else VoicePriority.HIGH
-        self.announce(text, priority=priority, repeat=3)
-    
-    def announce_safe(self):
-        """播报安全提示"""
-        text = "安全装备齐全,请继续保持"
-        self.announce(text, priority=VoicePriority.LOW)
-    
-    def _worker(self):
-        """工作线程"""
-        while self.running:
-            try:
-                # 获取命令
-                try:
-                    _, _, command = self.queue.get(timeout=1.0)
-                except queue.Empty:
-                    continue
-                
-                # 播放
-                success = self._play_command(command)
-                
-                with self.stats_lock:
-                    if success:
-                        self.stats['played_commands'] += 1
-                    else:
-                        self.stats['failed_commands'] += 1
-                
-            except Exception as e:
-                print(f"播报错误: {e}")
-    
-    def _play_immediately(self, text: str, speed: float, volume: float, repeat: int):
-        """立即播放(紧急)"""
-        # 停止当前播放
-        self.player.stop()
-        
-        # 合成并播放
-        audio_path = self.tts.synthesize(text, speed=speed, volume=volume)
-        
-        if audio_path:
-            for _ in range(repeat):
-                self.player.play(audio_path, volume)
-                time.sleep(0.5)
-    
-    def _play_command(self, command: VoiceCommand) -> bool:
-        """播放命令"""
-        audio_path = self.tts.synthesize(
-            command.text,
-            speed=command.speed,
-            volume=command.volume,
-            voice_id=command.voice_id
-        )
-        
-        if not audio_path:
-            return False
-        
-        for i in range(command.repeat):
-            if not self.running:
-                break
-            
-            success = self.player.play(audio_path, command.volume)
-            if not success:
-                return False
-            
-            if i < command.repeat - 1:
-                time.sleep(command.interval)
-        
-        return True
-    
-    def get_stats(self) -> Dict[str, int]:
-        """获取统计信息"""
-        with self.stats_lock:
-            return self.stats.copy()
-    
-    def clear_queue(self):
-        """清空队列"""
-        while not self.queue.empty():
-            try:
-                self.queue.get_nowait()
-            except queue.Empty:
-                break
-
-
-def create_voice_announcer(config: Dict[str, Any] = None) -> VoiceAnnouncer:
-    """
-    创建语音播报器实例
-    
-    Args:
-        config: 配置字典
-        
-    Returns:
-        VoiceAnnouncer 实例
-    """
-    config = config or {}
-    
-    tts_config = config.get('tts', {})
-    player_config = config.get('player', {})
-    
-    return VoiceAnnouncer(tts_config, player_config)

+ 102 - 0
test_oss_upload.py

@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+"""
+OSS 上传测试脚本
+测试本地图片上传到 MinIO
+"""
+
+import os
+import sys
+import cv2
+import tempfile
+import logging
+import numpy as np
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+# 添加项目路径
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from dual_camera_system.oss_uploader import OSSUploader
+
+
+def create_test_image(width=640, height=480, text="Test Image"):
+    """创建测试图片"""
+    # 创建白色背景
+    img = np.ones((height, width, 3), dtype=np.uint8) * 255
+
+    # 画几个矩形
+    cv2.rectangle(img, (50, 50), (width-50, height-50), (200, 200, 200), 2)
+    cv2.rectangle(img, (100, 100), (width-100, height-100), (150, 150, 150), -1)
+
+    # 添加文字
+    cv2.putText(img, text, (width//4, height//2),
+                cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+
+    return img
+
+
+def main():
+    # 创建临时测试图片
+    with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as tmp:
+        temp_path = tmp.name
+
+    try:
+        # 生成测试图片
+        img = create_test_image(640, 480, "OSS Test")
+        cv2.imwrite(temp_path, img)
+        logger.info(f"测试图片已创建: {temp_path}")
+
+        # 初始化 OSS 上传器
+        logger.info("初始化 OSS 上传器...")
+        uploader = OSSUploader()
+
+        if not uploader.enabled:
+            logger.error("OSS 上传器未启用,请检查配置")
+            return 1
+
+        # 启动上传器
+        logger.info("启动上传器...")
+        uploader.start()
+
+        # 测试上传
+        test_key = "test/test_image.jpg"
+        batch_id = "test_batch_001"
+
+        logger.info(f"开始上传测试图片: {temp_path}")
+        logger.info(f"OSS Key: {test_key}")
+
+        # 同步上传测试
+        result = uploader.upload_image_sync(
+            local_path=temp_path,
+            oss_key=test_key,
+            batch_id=batch_id,
+            image_type='test'
+        )
+
+        logger.info(f"上传结果: success={result.success}")
+        logger.info(f"OSS URL: {result.oss_url}")
+
+        if result.error:
+            logger.error(f"错误信息: {result.error}")
+
+        if result.success:
+            logger.info("✅ OSS 上传测试成功!")
+        else:
+            logger.error("❌ OSS 上传测试失败!")
+            return 1
+
+    except Exception as e:
+        logger.exception(f"测试异常: {e}")
+        return 1
+    finally:
+        # 清理临时文件
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+            logger.info(f"临时文件已清理: {temp_path}")
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())