3 月之前 · 5ae178a0c7
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -6,47 +6,74 @@
 
				 
			
 
				 ## 项目概述
			
 
				 
			
 
				-**双摄像头联动抓拍系统** - 基于 Python 实现的全景摄像头与 PTZ 球机联动系统。
			
 
				+**施工现场安全行为智能识别系统 v2.0.0** - 基于 Python 的双摄像头联动系统。
			
 
				 
			
 
				 核心功能：
			
 
				-- 全景摄像头实时监控，使用 YOLO11 检测人体
			
 
				-- 检测到人体后，球机自动变焦定位到目标
			
 
				-- 对人体进行分割，通过 llama-server API 进行 OCR 编号识别
			
 
				-- 自动校准全景相机与球机的坐标映射关系
			
 
				+- 全景摄像头实时监控，YOLO11 检测人体/安全帽/反光衣
			
 
				+- 球机 PTZ 联动跟踪，变焦定位目标
			
 
				+- OCR 编号识别（llama-server API）
			
 
				+- 安全违规检测（未戴安全帽、未穿反光衣）
			
 
				+- 事件推送至业务平台 + 语音播报
			
 
				 
			
 
				 ---
			
 
				 
			
 
				 ## 目录结构
			
 
				 
			
 
				 ```
			
 
				-/home/wen/dsh/
			
 
				-├── AGENTS.md                    # 本文档
			
 
				-├── dual_camera_system/          # 主项目目录
			
 
				-│   ├── config.py                # 配置文件 (摄像头、检测、OCR等配置)
			
 
				-│   ├── dahua_sdk.py             # 大华 SDK Python ctypes 封装
			
 
				-│   ├── panorama_camera.py       # 全景摄像头模块 (视频流、YOLO11人体检测)
			
 
				-│   ├── ptz_camera.py            # 球机控制模块 (PTZ控制、三维精确定位)
			
 
				-│   ├── calibration.py           # 视觉校准模块 (运动检测+特征匹配)
			
 
				-│   ├── ocr_recognizer.py        # OCR识别模块 (llama-server API)
			
 
				-│   ├── coordinator.py           # 联动控制器 (事件驱动、坐标转换)
			
 
				-│   ├── main.py                  # 主程序入口
			
 
				-│   └── README.md                # 项目说明文档
			
 
				-└── dh/                          # 大华 SDK 目录
			
 
				-    ├── Bin/                     # SDK 动态库
			
 
				-    │   ├── libdhnetsdk.so       # 网络 SDK
			
 
				-    │   ├── libavnetsdk.so       # AV 网络 SDK
			
 
				-    │   └── ...
			
 
				-    ├── Demo/                    # C++ 示例代码 (Qt项目)
			
 
				-    │   ├── 00.DevInit/          # 设备初始化
			
 
				-    │   ├── 01.RealPlay/         # 实时预览
			
 
				-    │   ├── 02.PTZControl/       # PTZ控制
			
 
				-    │   └── ...
			
 
				-    ├── Doc/                     # SDK 文档
			
 
				-    │   ├── NetSDK编程指导手册.pdf
			
 
				-    │   └── ...
			
 
				-    └── Include/Common/          # SDK 头文件
			
 
				-        ├── dhnetsdk.h           # 网络 SDK 接口定义
			
 
				-        └── ...
			
 
				+dual_camera_system/
			
 
				+├── config/                      # 模块化配置（已重构）
			
 
				+│   ├── __init__.py              # 配置汇总导出
			
 
				+│   ├── camera.py                # 摄像头 + 日志配置
			
 
				+│   ├── detection.py             # 人体/安全检测配置
			
 
				+│   ├── ptz.py                   # PTZ 控制参数
			
 
				+│   ├── ocr.py                   # OCR 配置
			
 
				+│   ├── coordinator.py            # 联动 + 校准配置
			
 
				+│   ├── event.py                 # 事件推送配置
			
 
				+│   ├── voice.py                 # 语音播报配置
			
 
				+│   ├── llm.py                   # 大模型配置
			
 
				+│   └── system.py                # 系统开关 + 工作模式
			
 
				+├── main.py                      # OCR 模式入口（编号识别）
			
 
				+├── safety_main.py               # 安全模式入口（安全检测）
			
 
				+├── dahua_sdk.py                 # 大华 SDK ctypes 封装
			
 
				+├── panorama_camera.py           # 全景摄像头 + 人体检测
			
 
				+├── ptz_camera.py                # 球机 PTZ 控制
			
 
				+├── calibration.py               # 视觉校准（运动检测 + 特征匹配）
			
 
				+├── ocr_recognizer.py            # OCR 编号识别
			
 
				+├── safety_detector.py           # 安全检测（安全帽/反光衣）
			
 
				+├── safety_coordinator.py        # 安全联动控制器
			
 
				+├── llm_service.py               # 大模型服务封装
			
 
				+├── event_pusher.py              # 事件推送至业务平台
			
 
				+├── voice_announcer.py           # TTS 语音播报
			
 
				+└── README.md                    # 项目说明
			
 
				+
			
 
				+dh/                              # 大华 SDK（仅参考）
			
 
				+├── Bin/                         # Linux .so 库（macOS 不可用）
			
 
				+├── Demo/                        # C++ Qt 示例
			
 
				+└── Include/Common/              # SDK 头文件
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 运行命令
			
 
				+
			
 
				+### OCR 模式（编号识别）
			
 
				+```bash
			
 
				+python main.py --panorama-ip 192.168.1.100 --ptz-ip 192.168.1.101
			
 
				+python main.py --interactive      # 交互模式
			
 
				+python main.py --demo            # 演示模式
			
 
				+python main.py --skip-calibration # 跳过校准
			
 
				+```
			
 
				+
			
 
				+### 安全模式（安全检测）
			
 
				+```bash
			
 
				+python safety_main.py --panorama-ip 192.168.1.100 --ptz-ip 192.168.1.101
			
 
				+```
			
 
				+
			
 
				+### 通用参数
			
 
				+```bash
			
 
				+--model-size {n,s,m,l,x}    # YOLO11 模型尺寸
			
 
				+--no-gpu                     # 禁用 GPU
			
 
				+--ocr-host localhost --ocr-port 8111  # OCR API 地址
			
 
				 ```
			
 
				 
			
 
				 ---
			
@@ -56,156 +83,107 @@
 
				 | 组件 | 技术 |
			
 
				 |------|------|
			
 
				 | 人体检测 | YOLO11 (ultralytics) |
			
 
				-| OCR识别 | llama-server API (PaddleOCR-VL) |
			
 
				-| 摄像头SDK | 大华 NetSDK (ctypes封装) |
			
 
				+| 安全检测 | YOLO11 安全专用模型 |
			
 
				+| OCR 识别 | llama-server API (Qwen2.5-VL-7B-Instruct) |
			
 
				+| 安全判断 | 规则 + LLM 混合模式 |
			
 
				+| 摄像头 SDK | 大华 NetSDK (ctypes) |
			
 
				 | 图像处理 | OpenCV |
			
 
				 | 特征匹配 | SIFT / ORB |
			
 
				-| PTZ控制 | DH_EXTPTZ_EXACTGOTO (三维精确定位) |
			
 
				+| PTZ 控制 | DH_EXTPTZ_EXACTGOTO |
			
 
				+| 语音播报 | Edge-TTS (zh-CN-XiaoxiaoNeural) |
			
 
				+| 事件推送 | HTTP API → jtjai.device.wenhq.top:8583 |
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## 运行命令
			
 
				+## 配置说明
			
 
				 
			
 
				-### 安装依赖
			
 
				-```bash
			
 
				-pip install opencv-python opencv-contrib-python ultralytics
			
 
				-```
			
 
				+### config/system.py - 功能开关
			
 
				+- `mode`: `'safety'` 或 `'ocr'`
			
 
				+- `enable_panorama_camera`, `enable_ptz_camera`
			
 
				+- `enable_detection`, `enable_safety_detection`
			
 
				+- `enable_calibration`, `enable_ptz_tracking`
			
 
				+- `enable_ocr`, `enable_llm`
			
 
				+- `enable_event_push`, `enable_voice_announce`
			
 
				+- `safety_strategy`: `'llm'` / `'rule'` / `'hybrid'`
			
 
				 
			
 
				-### 启动 OCR 服务
			
 
				-```bash
			
 
				-llama-server -m PaddleOCR-VL-1.5-GGUF.gguf --port 8111
			
 
				-```
			
 
				-
			
 
				-### 运行系统
			
 
				-```bash
			
 
				-# 基本运行
			
 
				-python main.py --panorama-ip 192.168.1.100 --ptz-ip 192.168.1.101
			
 
				-
			
 
				-# 指定模型大小
			
 
				-python main.py --model-size s --ocr-host localhost --ocr-port 8111
			
 
				+### config/camera.py - 摄像头配置
			
 
				+- `PANORAMA_CAMERA`: 全景摄像头 IP/端口/凭证
			
 
				+- `PTZ_CAMERA`: 球机 IP/端口/凭证
			
 
				+- `SDK_PATH`: `/home/wen/dsh/dh/Bin`（Linux 路径）
			
 
				 
			
 
				-# 交互模式
			
 
				-python main.py --interactive
			
 
				+### config/detection.py - 检测配置
			
 
				+- `DETECTION_CONFIG`: 人体检测（目标类别、置信度、检测间隔）
			
 
				+- `SAFETY_DETECTION_CONFIG`: 安全检测模型路径 `/home/wen/dsh/yolo/yolo11m_safety.pt`
			
 
				+  - 类别映射: `0=安全帽`, `3=人`, `4=反光衣`
			
 
				+  - `alert_cooldown`: 3.0 秒，同一目标告警冷却
			
 
				 
			
 
				-# 演示模式 (不连接实际摄像头)
			
 
				-python main.py --demo
			
 
				+### config/llm.py - 大模型配置
			
 
				+- `LLM_CONFIG`: Qwen2.5-VL-7B-Instruct，localhost:8111
			
 
				+- `LLM_SAFETY_CONFIG`: `use_llm_for_safety`, `verify_with_llm`
			
 
				 
			
 
				-# 跳过校准
			
 
				-python main.py --skip-calibration
			
 
				-```
			
 
				+### config/event.py - 事件推送
			
 
				+- `EVENT_PUSHER_CONFIG`: 推送到 `jtjai.device.wenhq.top:8583`
			
 
				+- `/api/resource/oss/upload` - 图片上传
			
 
				+- `/api/system/event` - 事件创建
			
 
				 
			
 
				----
			
 
				+### config/voice.py - 语音播报
			
 
				+- `TTS_CONFIG`: Edge-TTS, zh-CN-XiaoxiaoNeural
			
 
				+- `VOICE_ANNOUNCER_CONFIG`: 违规播报重复 3 次
			
 
				 
			
 
				-## 核心模块说明
			
 
				-
			
 
				-### config.py
			
 
				-集中配置文件，包含：
			
 
				-- `LOG_CONFIG`: 日志配置
			
 
				-- `PANORAMA_CAMERA`: 全景摄像头连接参数
			
 
				-- `PTZ_CAMERA`: 球机连接参数
			
 
				-- `SDK_PATH`: 大华SDK库路径
			
 
				-- `DETECTION_CONFIG`: YOLO检测配置
			
 
				-- `PTZ_CONFIG`: PTZ控制参数
			
 
				-- `OCR_CONFIG`: llama-server API配置
			
 
				-- `CALIBRATION_CONFIG`: 校准配置 (间隔24小时)
			
 
				-
			
 
				-### dahua_sdk.py
			
 
				-大华SDK Python封装：
			
 
				-- 使用 ctypes 加载 libdhnetsdk.so
			
 
				-- 实现登录、实时预览、PTZ控制等接口
			
 
				-- SDK回调使用 `_disconnect_callback` 持有引用防止垃圾回收
			
 
				-
			
 
				-### panorama_camera.py
			
 
				-全景摄像头模块：
			
 
				-- `ObjectDetector`: YOLO11人体检测器
			
 
				-- `PersonTracker`: 人体跟踪器
			
 
				-- `PanoramaCamera`: 视频流获取和帧处理
			
 
				-
			
 
				-### ptz_camera.py
			
 
				-球机控制模块：
			
 
				-- `PTZController`: PTZ控制封装
			
 
				-- `PTZCamera`: 球机连接和变焦控制
			
 
				-- 支持三维精确定位 (DH_EXTPTZ_EXACTGOTO)
			
 
				-
			
 
				-### calibration.py
			
 
				-视觉校准模块：
			
 
				-- `VisualCalibrationDetector`: 视觉校准检测器
			
 
				-- 运动检测法：帧差定位球机移动区域
			
 
				-- 特征匹配法：SIFT/ORB特征点匹配
			
 
				-- 加权融合两种方法结果
			
 
				-- 降级方案：视觉失败时使用角度估算
			
 
				-
			
 
				-### ocr_recognizer.py
			
 
				-OCR识别模块：
			
 
				-- `NumberDetector`: 编号识别器
			
 
				-- 使用 llama-server HTTP API
			
 
				-- 图像base64编码发送
			
 
				-- 重试机制 (最多3次，指数退避)
			
 
				-
			
 
				-### coordinator.py
			
 
				-联动控制器：
			
 
				-- `Coordinator`: 基础联动逻辑
			
 
				-- `EventDrivenCoordinator`: 事件驱动控制器
			
 
				-- 坐标转换：全景(x_ratio, y_ratio) → PTZ(pan, tilt)
			
 
				-- 性能统计：帧率、检测数、OCR成功率
			
 
				-
			
 
				-### main.py
			
 
				-主程序入口：
			
 
				-- `DualCameraSystem`: 系统主类
			
 
				-- 命令行参数解析
			
 
				-- 定时校准线程
			
 
				-- 交互命令处理 (s/r/t/c/q)
			
 
				+### config/coordinator.py - 校准配置
			
 
				+- `CALIBRATION_CONFIG.interval`: 24 小时（不是 5 分钟）
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## 工作流程
			
 
				+## 重要注意事项
			
 
				 
			
 
				-```
			
 
				-┌─────────────────────────────────────────────────────────────┐
			
 
				-│  1. 系统启动 → 自动校准全景相机与球机坐标映射               │
			
 
				-│  2. 全景摄像头获取视频流                                    │
			
 
				-│  3. YOLO11检测画面中的人体                                  │
			
 
				-│  4. 计算人体在画面中的相对位置 (x_ratio, y_ratio)           │
			
 
				-│  5. 通过校准转换 → PTZ角度 (pan, tilt)                      │
			
 
				-│  6. 球机变焦移动到目标位置                                  │
			
 
				-│  7. 人体分割 + OCR识别编号                                  │
			
 
				-│  8. 输出识别结果                                            │
			
 
				-│  9. 每24小时自动重新校准                                    │
			
 
				-└─────────────────────────────────────────────────────────────┘
			
 
				-```
			
 
				+1. **SDK 路径**: config 中硬编码为 `/home/wen/dsh/dh/Bin`（Linux 路径），macOS 开发需注意
			
 
				+2. **校准间隔**: 实际是 24 小时，不是 README 中的 5 分钟
			
 
				+3. **模型路径**: 安全检测模型在 `/home/wen/dsh/yolo/yolo11m_safety.pt`
			
 
				+4. **YOLO11 自动下载**: 首次运行自动下载预训练权重
			
 
				+5. **OCR 服务**: 需先启动 llama-server（默认 localhost:8111）
			
 
				+6. **工作模式**: `main.py` 是 OCR 模式，`safety_main.py` 是安全检测模式
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## 性能优化要点
			
 
				+## 交互命令（OCR 模式 main.py）
			
 
				 
			
 
				-1. **OCR频率控制**: `self.ocr_interval = 1.0` 避免API过于频繁调用
			
 
				-2. **PTZ位置阈值**: `self.ptz_position_threshold = 0.02` 避免频繁发送相同位置命令
			
 
				-3. **帧缓冲**: 支持运动检测所需的多帧处理
			
 
				-4. **重试机制**: OCR失败时指数退避重试
			
 
				+- `s` - 开始/停止联动
			
 
				+- `r` - 获取识别结果
			
 
				+- `t` - 手动跟踪（输入坐标）
			
 
				+- `c` - 抓拍快照
			
 
				+- `q` - 退出
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## 大华SDK参考
			
 
				+## 校准机制
			
 
				 
			
 
				-SDK头文件位置：`/home/wen/dsh/dh/Include/Common/dhnetsdk.h`
			
 
				+**方法**：
			
 
				+1. 运动检测法：帧差定位球机移动区域
			
 
				+2. 特征匹配法：SIFT/ORB 匹配球机图像与全景画面
			
 
				+3. 加权融合 + 降级（角度估算）
			
 
				 
			
 
				-关键接口：
			
 
				-- `CLIENT_Init`: 初始化SDK
			
 
				-- `CLIENT_LoginEx2`: 登录设备
			
 
				-- `CLIENT_RealPlayEx`: 开始实时预览
			
 
				-- `CLIENT_DHPTZControlEx`: PTZ控制
			
 
				-- `CLIENT_Logout`: 登出设备
			
 
				-- `CLIENT_Cleanup`: 清理SDK
			
 
				+**流程**：
			
 
				+```
			
 
				+移动前全景帧 ────┐
			
 
				+                 ├──> 运动检测 ──> 运动区域中心
			
 
				+移动后全景帧 ────┘
			
 
				 
			
 
				-PTZ命令：
			
 
				-- `DH_EXTPTZ_EXACTGOTO`: 三维精确定位 (pan, tilt, zoom)
			
 
				+球机抓拍 ────────┐
			
 
				+                 ├──> 特征匹配 ──> 匹配点中心
			
 
				+全景画面 ─────────┘
			
 
				+
			
 
				+运动区域 + 匹配点 ──> 加权融合 ──> 坐标映射
			
 
				+```
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## 注意事项
			
 
				+## SDK 参考
			
 
				 
			
 
				-1. 大华SDK库路径在 `config.py` 中配置，默认 `/home/wen/dsh/dh/Bin`
			
 
				-2. YOLO11首次运行会自动下载预训练权重
			
 
				-3. OCR依赖 llama-server 服务，需先启动
			
 
				-4. 校准间隔默认24小时，可在 `CALIBRATION_CONFIG` 中修改
			
 
				-5. 球机需支持PTZ控制功能
			
 
				+头文件：`dh/Include/Common/dhnetsdk.h`
			
 
				+
			
 
				+关键接口：
			
 
				+- `CLIENT_Init` / `CLIENT_Cleanup`
			
 
				+- `CLIENT_LoginEx2` / `CLIENT_Logout`
			
 
				+- `CLIENT_RealPlayEx`
			
 
				+- `CLIENT_DHPTZControlEx` (DH_EXTPTZ_EXACTGOTO)
			
--- a/testrk3588/__pycache__/rtsp_person_detection.cpython-313.pyc
+++ b/testrk3588/__pycache__/rtsp_person_detection.cpython-313.pyc
--- a/testrk3588/a.jpeg
+++ b/testrk3588/a.jpeg
--- a/testrk3588/b.jpg
+++ b/testrk3588/b.jpg
--- a/testrk3588/c.jpg
+++ b/testrk3588/c.jpg
--- a/testrk3588/dataset.txt
+++ b/testrk3588/dataset.txt
@@ -0,0 +1 @@
 
				+b.jpg
			
--- a/testrk3588/debug_rtsp_frame.jpg
+++ b/testrk3588/debug_rtsp_frame.jpg
--- a/testrk3588/onnx2rknn.py
+++ b/testrk3588/onnx2rknn.py
@@ -0,0 +1,44 @@
 
				+from rknn.api import RKNN
			
 
				+
			
 
				+# ONNX model uses NCHW format with float32 input normalized to 0-1
			
 
				+# mean_values=0, std_values=1 means the RKNN will NOT apply any normalization
			
 
				+# during inference — the user must provide pre-normalized (0-1) float32 NHWC input
			
 
				+#
			
 
				+# IMPORTANT: RKNN inference always expects NHWC input (1,H,W,C) regardless of
			
 
				+# the ONNX model's NCHW layout. The toolkit handles the transpose internally.
			
 
				+
			
 
				+rknn = RKNN(verbose=True)
			
 
				+
			
 
				+rknn.config(
			
 
				+    target_platform='rk3588',
			
 
				+    mean_values=[[0, 0, 0]],
			
 
				+    std_values=[[1, 1, 1]],
			
 
				+    quantized_dtype='w8a8',
			
 
				+    optimization_level=3
			
 
				+)
			
 
				+
			
 
				+print('Loading ONNX model...')
			
 
				+ret = rknn.load_onnx(model='yolo11m_safety.onnx', input_size_list=[[3, 640, 640]])
			
 
				+if ret != 0: 
			
 
				+    print("load_onnx failed")
			
 
				+    exit(1)
			
 
				+
			
 
				+print('Building RKNN model (do_quantization=False, float32)...')
			
 
				+ret = rknn.build(dataset='dataset.txt', do_quantization=False)
			
 
				+if ret != 0:
			
 
				+    print("build failed")
			
 
				+    exit(1)
			
 
				+
			
 
				+print('Exporting RKNN model...')
			
 
				+rknn.export_rknn('./yolo11m_safety.rknn')
			
 
				+
			
 
				+# Optional: hybrid quantization (uncomment to enable int8 weights with float outputs)
			
 
				+# This can improve NPU performance while keeping output precision
			
 
				+# ret = rknn.build(dataset='dataset.txt', do_quantization=True, quantized_dtype='asymmetric_quantized-u8')
			
 
				+# if ret != 0:
			
 
				+#     print("build with quantization failed")
			
 
				+#     exit(1)
			
 
				+# rknn.export_rknn('./yolo11m_safety_quantized.rknn')
			
 
				+
			
 
				+rknn.release()
			
 
				+print("Done: yolo11m_safety.rknn")
			
--- a/testrk3588/requirements.txt
+++ b/testrk3588/requirements.txt
@@ -0,0 +1,4 @@
 
				+# RTSP Person Detection Dependencies
			
 
				+opencv-python>=4.8.0
			
 
				+numpy>=1.24.0
			
 
				+onnxruntime>=1.16.0
			
--- a/testrk3588/rtsp_person_detection.py
+++ b/testrk3588/rtsp_person_detection.py
@@ -0,0 +1,522 @@
 
				+#!/usr/bin/env python3
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import argparse
			
 
				+import requests
			
 
				+import json
			
 
				+import time
			
 
				+import os
			
 
				+from PIL import Image, ImageDraw, ImageFont
			
 
				+from rknnlite.api import RKNNLite
			
 
				+import onnxruntime as ort
			
 
				+from dataclasses import dataclass
			
 
				+from typing import List, Tuple, Optional
			
 
				+import subprocess
			
 
				+
			
 
				+os.system("taskset -p 0xff0 %d" % os.getpid())
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class Detection:
			
 
				+    class_id: int
			
 
				+    class_name: str
			
 
				+    confidence: float
			
 
				+    bbox: Tuple[int, int, int, int]
			
 
				+
			
 
				+
			
 
				+def nms(dets, iou_threshold=0.45):
			
 
				+    if len(dets) == 0:
			
 
				+        return []
			
 
				+    
			
 
				+    boxes = np.array([[d.bbox[0], d.bbox[1], d.bbox[2], d.bbox[3], d.confidence] for d in dets])
			
 
				+    x1 = boxes[:, 0]
			
 
				+    y1 = boxes[:, 1]
			
 
				+    x2 = boxes[:, 2]
			
 
				+    y2 = boxes[:, 3]
			
 
				+    scores = boxes[:, 4]
			
 
				+    
			
 
				+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
			
 
				+    order = scores.argsort()[::-1]
			
 
				+    
			
 
				+    keep = []
			
 
				+    while order.size > 0:
			
 
				+        i = order[0]
			
 
				+        keep.append(i)
			
 
				+        
			
 
				+        xx1 = np.maximum(x1[i], x1[order[1:]])
			
 
				+        yy1 = np.maximum(y1[i], y1[order[1:]])
			
 
				+        xx2 = np.minimum(x2[i], x2[order[1:]])
			
 
				+        yy2 = np.minimum(y2[i], y2[order[1:]])
			
 
				+        
			
 
				+        w = np.maximum(0.0, xx2 - xx1 + 1)
			
 
				+        h = np.maximum(0.0, yy2 - yy1 + 1)
			
 
				+        inter = w * h
			
 
				+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
			
 
				+        
			
 
				+        inds = np.where(ovr <= iou_threshold)[0]
			
 
				+        order = order[inds + 1]
			
 
				+    
			
 
				+    return [dets[i] for i in keep]
			
 
				+
			
 
				+
			
 
				+class BaseDetector:
			
 
				+    LABEL_MAP = {0: '安全帽', 4: '安全衣', 3: '人'}
			
 
				+    
			
 
				+    def __init__(self):
			
 
				+        self.input_size = (640, 640)
			
 
				+        self.num_classes = 5
			
 
				+    
			
 
				+    def letterbox(self, image):
			
 
				+        h0, w0 = image.shape[:2]
			
 
				+        ih, iw = self.input_size
			
 
				+        scale = min(iw / w0, ih / h0)
			
 
				+        new_w, new_h = int(w0 * scale), int(h0 * scale)
			
 
				+        pad_w = (iw - new_w) // 2
			
 
				+        pad_h = (ih - new_h) // 2
			
 
				+        resized = cv2.resize(image, (new_w, new_h))
			
 
				+        canvas = np.full((ih, iw, 3), 114, dtype=np.uint8)
			
 
				+        canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = resized
			
 
				+        return canvas, scale, pad_w, pad_h, h0, w0
			
 
				+    
			
 
				+    def postprocess(self, outputs, scale, pad_w, pad_h, h0, w0, conf_threshold_map):
			
 
				+        dets = []
			
 
				+        
			
 
				+        if not outputs:
			
 
				+            return dets
			
 
				+        
			
 
				+        output = outputs[0]
			
 
				+        
			
 
				+        if len(output.shape) == 3:
			
 
				+            output = output[0]
			
 
				+        
			
 
				+        num_boxes = output.shape[1]
			
 
				+        
			
 
				+        for i in range(num_boxes):
			
 
				+            x_center = float(output[0, i])
			
 
				+            y_center = float(output[1, i])
			
 
				+            width = float(output[2, i])
			
 
				+            height = float(output[3, i])
			
 
				+            
			
 
				+            class_probs = output[4:4+self.num_classes, i]
			
 
				+            best_class = int(np.argmax(class_probs))
			
 
				+            confidence = float(class_probs[best_class])
			
 
				+            
			
 
				+            if best_class not in self.LABEL_MAP:
			
 
				+                continue
			
 
				+            
			
 
				+            conf_threshold = conf_threshold_map.get(best_class, 0.5)
			
 
				+            
			
 
				+            if confidence < conf_threshold:
			
 
				+                continue
			
 
				+            
			
 
				+            # Remove padding and scale to original image
			
 
				+            x1 = int(((x_center - width / 2) - pad_w) / scale)
			
 
				+            y1 = int(((y_center - height / 2) - pad_h) / scale)
			
 
				+            x2 = int(((x_center + width / 2) - pad_w) / scale)
			
 
				+            y2 = int(((y_center + height / 2) - pad_h) / scale)
			
 
				+            
			
 
				+            x1 = max(0, min(w0, x1))
			
 
				+            y1 = max(0, min(h0, y1))
			
 
				+            x2 = max(0, min(w0, x2))
			
 
				+            y2 = max(0, min(h0, y2))
			
 
				+            
			
 
				+            det = Detection(
			
 
				+                class_id=best_class,
			
 
				+                class_name=self.LABEL_MAP[best_class],
			
 
				+                confidence=confidence,
			
 
				+                bbox=(x1, y1, x2, y2)
			
 
				+            )
			
 
				+            dets.append(det)
			
 
				+        
			
 
				+        dets = nms(dets, iou_threshold=0.45)
			
 
				+        return dets
			
 
				+        
			
 
				+        output = outputs[0]
			
 
				+        
			
 
				+        if len(output.shape) == 3:
			
 
				+            output = output[0]
			
 
				+        
			
 
				+        # Output shape: (4+nc, num_anchors) = (9, 8400)
			
 
				+        # Row 0-3: x_center, y_center, width, height (in pixel space 0-640)
			
 
				+        # Row 4-8: class scores (already sigmoid'd, 0-1 range)
			
 
				+        # NO objectness column in YOLO v8/v11
			
 
				+        num_boxes = output.shape[1]
			
 
				+        
			
 
				+        for i in range(num_boxes):
			
 
				+            # Coordinates are already in pixel space (0-640), NO sigmoid needed
			
 
				+            x_center = float(output[0, i])
			
 
				+            y_center = float(output[1, i])
			
 
				+            width = float(output[2, i])
			
 
				+            height = float(output[3, i])
			
 
				+            
			
 
				+            # Class scores are already sigmoid'd
			
 
				+            class_probs = output[4:4+self.num_classes, i]
			
 
				+            
			
 
				+            # Find best class and its confidence
			
 
				+            best_class = int(np.argmax(class_probs))
			
 
				+            confidence = float(class_probs[best_class])
			
 
				+            
			
 
				+            if best_class not in self.LABEL_MAP:
			
 
				+                continue
			
 
				+            
			
 
				+            conf_threshold = conf_threshold_map.get(best_class, 0.5)
			
 
				+            
			
 
				+            if confidence < conf_threshold:
			
 
				+                continue
			
 
				+            
			
 
				+            # Convert from center format to corner format and scale to original image
			
 
				+            x1 = int((x_center - width/2) * (w0/640))
			
 
				+            y1 = int((y_center - height/2) * (h0/640))
			
 
				+            x2 = int((x_center + width/2) * (w0/640))
			
 
				+            y2 = int((y_center + height/2) * (h0/640))
			
 
				+            
			
 
				+            x1 = max(0, x1)
			
 
				+            y1 = max(0, y1)
			
 
				+            x2 = min(w0, x2)
			
 
				+            y2 = min(h0, y2)
			
 
				+            
			
 
				+            det = Detection(
			
 
				+                class_id=best_class,
			
 
				+                class_name=self.LABEL_MAP[best_class],
			
 
				+                confidence=confidence,
			
 
				+                bbox=(x1, y1, x2, y2)
			
 
				+            )
			
 
				+            dets.append(det)
			
 
				+        
			
 
				+        dets = nms(dets, iou_threshold=0.45)
			
 
				+        return dets
			
 
				+    
			
 
				+    def detect(self, image, conf_threshold_map):
			
 
				+        raise NotImplementedError
			
 
				+    
			
 
				+    def release(self):
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+class RKNNDetector(BaseDetector):
			
 
				+    """RKNN detector - uses NHWC input format (1, H, W, C)"""
			
 
				+    def __init__(self, model_path: str):
			
 
				+        super().__init__()
			
 
				+        self.rknn = RKNNLite()
			
 
				+        
			
 
				+        ret = self.rknn.load_rknn(model_path)
			
 
				+        if ret != 0:
			
 
				+            print("[ERROR] load_rknn failed")
			
 
				+            exit(-1)
			
 
				+        
			
 
				+        ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)
			
 
				+        if ret != 0:
			
 
				+            print("[ERROR] init_runtime failed")
			
 
				+            exit(-1)
			
 
				+    
			
 
				+    def detect(self, image, conf_threshold_map):
			
 
				+        canvas, scale, pad_w, pad_h, h0, w0 = self.letterbox(image)
			
 
				+        # RKNN expects NHWC (1, H, W, C), RGB, normalized 0-1
			
 
				+        img = canvas[..., ::-1].astype(np.float32) / 255.0
			
 
				+        blob = img[None, ...]  # (1, 640, 640, 3)
			
 
				+        outs = self.rknn.inference(inputs=[blob])
			
 
				+        return self.postprocess(outs, scale, pad_w, pad_h, h0, w0, conf_threshold_map)
			
 
				+    
			
 
				+    def release(self):
			
 
				+        self.rknn.release()
			
 
				+
			
 
				+
			
 
				+class ONNXDetector(BaseDetector):
			
 
				+    """ONNX detector - uses NCHW input format (1, C, H, W)"""
			
 
				+    def __init__(self, model_path: str):
			
 
				+        super().__init__()
			
 
				+        self.session = ort.InferenceSession(model_path)
			
 
				+        self.input_name = self.session.get_inputs()[0].name
			
 
				+        self.output_name = self.session.get_outputs()[0].name
			
 
				+    
			
 
				+    def detect(self, image, conf_threshold_map):
			
 
				+        canvas, scale, pad_w, pad_h, h0, w0 = self.letterbox(image)
			
 
				+        # ONNX expects NCHW (1, C, H, W), RGB, normalized 0-1
			
 
				+        img = canvas[..., ::-1].astype(np.float32) / 255.0
			
 
				+        img = img.transpose(2, 0, 1)
			
 
				+        blob = img[None, ...]  # (1, 3, 640, 640)
			
 
				+        outs = self.session.run([self.output_name], {self.input_name: blob})
			
 
				+        return self.postprocess(outs, scale, pad_w, pad_h, h0, w0, conf_threshold_map)
			
 
				+
			
 
				+
			
 
				+def create_detector(model_path: str):
			
 
				+    ext = os.path.splitext(model_path)[1].lower()
			
 
				+    if ext == '.rknn':
			
 
				+        print("使用 RKNN 模型")
			
 
				+        return RKNNDetector(model_path)
			
 
				+    elif ext == '.onnx':
			
 
				+        print("使用 ONNX 模型")
			
 
				+        return ONNXDetector(model_path)
			
 
				+    else:
			
 
				+        print("不支持的模型格式")
			
 
				+        exit(-1)
			
 
				+
			
 
				+
			
 
				+def put_text_chinese(img, text, position, font_size=20, color=(255, 0, 0)):
			
 
				+    img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
			
 
				+    draw = ImageDraw.Draw(img_pil)
			
 
				+    font_path = "Alibaba_PuHuiTi_2.0_35_Thin_35_Thin.ttf"
			
 
				+    try:
			
 
				+        font = ImageFont.truetype(font_path, font_size)
			
 
				+    except:
			
 
				+        try:
			
 
				+            font = ImageFont.truetype("MiSans-Thin.ttf", font_size)
			
 
				+        except:
			
 
				+            font = ImageFont.load_default()
			
 
				+    
			
 
				+    color_rgb = (color[2], color[1], color[0])
			
 
				+    draw.text(position, text, font=font, fill=color_rgb)
			
 
				+    
			
 
				+    img_cv2 = cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
			
 
				+    
			
 
				+    x, y = position
			
 
				+    text_width = draw.textlength(text, font=font)
			
 
				+    text_height = font_size
			
 
				+    img[y:y+text_height, x:x+int(text_width)] = img_cv2[y:y+text_height, x:x+int(text_width)]
			
 
				+
			
 
				+
			
 
				+def upload_image(image_path):
			
 
				+    try:
			
 
				+        import http.client
			
 
				+        import mimetypes
			
 
				+        from codecs import encode
			
 
				+        
			
 
				+        filename = os.path.basename(image_path)
			
 
				+        
			
 
				+        conn = http.client.HTTPSConnection("jtjai.device.wenhq.top", 8583)
			
 
				+        
			
 
				+        boundary = 'wL36Yn8afVp8Ag7AmP8qZ0SA4n1v9T'
			
 
				+        dataList = []
			
 
				+        dataList.append(encode('--' + boundary))
			
 
				+        dataList.append(encode('Content-Disposition: form-data; name=file; filename={0}'.format(filename)))
			
 
				+        
			
 
				+        fileType = mimetypes.guess_type(image_path)[0] or 'application/octet-stream'
			
 
				+        dataList.append(encode('Content-Type: {}'.format(fileType)))
			
 
				+        dataList.append(encode(''))
			
 
				+        
			
 
				+        with open(image_path, 'rb') as f:
			
 
				+            dataList.append(f.read())
			
 
				+        
			
 
				+        dataList.append(encode('--'+boundary+'--'))
			
 
				+        dataList.append(encode(''))
			
 
				+        
			
 
				+        body = b'\r\n'.join(dataList)
			
 
				+        
			
 
				+        headers = {
			
 
				+            'User-Agent': 'Apifox/1.0.0 (https://apifox.com)',
			
 
				+            'Accept': '*/*',
			
 
				+            'Host': 'jtjai.device.wenhq.top:8583',
			
 
				+            'Connection': 'keep-alive',
			
 
				+            'Content-Type': 'multipart/form-data; boundary={}'.format(boundary)
			
 
				+        }
			
 
				+        
			
 
				+        conn.request("POST", "/api/resource/oss/upload", body, headers)
			
 
				+        res = conn.getresponse()
			
 
				+        data = res.read()
			
 
				+        
			
 
				+        if res.status == 200:
			
 
				+            result = json.loads(data.decode("utf-8"))
			
 
				+            if result.get('code') == 200:
			
 
				+                return result.get('data', {}).get('purl')
			
 
				+        print(f"上传图片失败: {data.decode('utf-8')}")
			
 
				+    except Exception as e:
			
 
				+        print(f"上传图片异常: {e}")
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def create_event(addr, purl):
			
 
				+    try:
			
 
				+        url = "https://jtjai.device.wenhq.top:8583/api/system/event"
			
 
				+        create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
			
 
				+        data = {
			
 
				+            "createTime": create_time,
			
 
				+            "addr": addr,
			
 
				+            "ext1": json.dumps([purl]),
			
 
				+            "ext2": json.dumps({"lx":"工地安全"})
			
 
				+        }
			
 
				+        response = requests.post(url, json=data, verify=False)
			
 
				+        if response.status_code == 200:
			
 
				+            result = response.json()
			
 
				+            if result.get('code') == 200:
			
 
				+                print(f"事件创建成功: {addr}")
			
 
				+                return True
			
 
				+        print(f"创建事件失败: {response.text}")
			
 
				+    except Exception as e:
			
 
				+        print(f"创建事件异常: {e}")
			
 
				+    return False
			
 
				+
			
 
				+
			
 
				+def check_safety_equipment(detections):
			
 
				+    person_detections = []
			
 
				+    helmet_detections = []
			
 
				+    safety_clothes_detections = []
			
 
				+    
			
 
				+    for det in detections:
			
 
				+        x1, y1, x2, y2 = det.bbox
			
 
				+        if det.class_id == 3:
			
 
				+            person_detections.append((x1, y1, x2, y2, det.confidence))
			
 
				+        elif det.class_id == 0:
			
 
				+            helmet_detections.append((x1, y1, x2, y2, det.confidence))
			
 
				+        elif det.class_id == 4:
			
 
				+            safety_clothes_detections.append((x1, y1, x2, y2, det.confidence))
			
 
				+    
			
 
				+    need_alert = False
			
 
				+    alert_addr = None
			
 
				+    
			
 
				+    for person_x1, person_y1, person_x2, person_y2, person_conf in person_detections:
			
 
				+        has_helmet = False
			
 
				+        for helmet_x1, helmet_y1, helmet_x2, helmet_y2, helmet_conf in helmet_detections:
			
 
				+            helmet_center_x = (helmet_x1 + helmet_x2) / 2
			
 
				+            helmet_center_y = (helmet_y1 + helmet_y2) / 2
			
 
				+            if (helmet_center_x >= person_x1 and helmet_center_x <= person_x2 and
			
 
				+                helmet_center_y >= person_y1 and helmet_center_y <= person_y2):
			
 
				+                has_helmet = True
			
 
				+                break
			
 
				+        
			
 
				+        has_safety_clothes = False
			
 
				+        for clothes_x1, clothes_y1, clothes_x2, clothes_y2, clothes_conf in safety_clothes_detections:
			
 
				+            overlap_x1 = max(person_x1, clothes_x1)
			
 
				+            overlap_y1 = max(person_y1, clothes_y1)
			
 
				+            overlap_x2 = min(person_x2, clothes_x2)
			
 
				+            overlap_y2 = min(person_y2, clothes_y2)
			
 
				+            
			
 
				+            if overlap_x1 < overlap_x2 and overlap_y1 < overlap_y2:
			
 
				+                has_safety_clothes = True
			
 
				+                break
			
 
				+        
			
 
				+        if not has_helmet or not has_safety_clothes:
			
 
				+            need_alert = True
			
 
				+            if not has_helmet and not has_safety_clothes:
			
 
				+                alert_addr = "反光衣和安全帽都没戴"
			
 
				+            elif not has_helmet:
			
 
				+                alert_addr = "未戴安全帽"
			
 
				+            else:
			
 
				+                alert_addr = "未穿反光衣"
			
 
				+            
			
 
				+            print(f"警告: {alert_addr}，置信度: {person_conf:.2f}")
			
 
				+    
			
 
				+    return need_alert, alert_addr, person_detections
			
 
				+
			
 
				+
			
 
				+class RTSPCapture:
			
 
				+    def __init__(self, rtsp_url, model_path, rtmp_url, fps=2):
			
 
				+        self.rtsp_url = rtsp_url
			
 
				+        self.rtmp_url = rtmp_url
			
 
				+        self.det = create_detector(model_path)
			
 
				+        self.cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
			
 
				+        self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
			
 
				+        self.rtmp_pipe = None
			
 
				+        self.process_fps = fps
			
 
				+        self.conf_threshold_map = {3: 0.8, 0: 0.5, 4: 0.5}
			
 
				+        self.last_upload_time = 0
			
 
				+        self.upload_interval = 2
			
 
				+
			
 
				+    def start_rtmp(self):
			
 
				+        w = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
			
 
				+        h = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
			
 
				+        fps = self.cap.get(cv2.CAP_PROP_FPS) or 25
			
 
				+
			
 
				+        command = [
			
 
				+            'ffmpeg',
			
 
				+            '-y',
			
 
				+            '-f', 'rawvideo',
			
 
				+            '-pix_fmt', 'bgr24',
			
 
				+            '-s', f'{w}x{h}',
			
 
				+            '-r', str(fps),
			
 
				+            '-i', '-',
			
 
				+            '-c:v', 'libx264',
			
 
				+            '-preset', 'ultrafast',
			
 
				+            '-tune', 'zerolatency',
			
 
				+            '-f', 'flv',
			
 
				+            self.rtmp_url
			
 
				+        ]
			
 
				+
			
 
				+        self.rtmp_pipe = subprocess.Popen(
			
 
				+            command,
			
 
				+            stdin=subprocess.PIPE,
			
 
				+            stdout=subprocess.DEVNULL,
			
 
				+            stderr=subprocess.DEVNULL
			
 
				+        )
			
 
				+
			
 
				+    def run(self):
			
 
				+        self.start_rtmp()
			
 
				+        frame_count = 0
			
 
				+        
			
 
				+        fps = self.cap.get(cv2.CAP_PROP_FPS) or 25
			
 
				+        frame_interval = int(round(fps / self.process_fps)) if fps > 0 else 1
			
 
				+        print(f"帧间隔: {frame_interval} 帧")
			
 
				+        
			
 
				+        last_dets = []
			
 
				+        last_need_alert = False
			
 
				+        last_alert_addr = None
			
 
				+        last_person_detections = []
			
 
				+        
			
 
				+        while True:
			
 
				+            ret, frame = self.cap.read()
			
 
				+            if not ret:
			
 
				+                break
			
 
				+
			
 
				+            frame_count += 1
			
 
				+            
			
 
				+            if frame_count % frame_interval == 0:
			
 
				+                try:
			
 
				+                    last_dets = self.det.detect(frame, self.conf_threshold_map)
			
 
				+                    print(last_dets)
			
 
				+                    
			
 
				+                    last_need_alert, last_alert_addr, last_person_detections = check_safety_equipment(last_dets)
			
 
				+                    
			
 
				+                    if last_dets:
			
 
				+                        print(f"[Frame {frame_count}] 检测到 {len(last_dets)} 个目标")
			
 
				+                        for d in last_dets:
			
 
				+                            print(f"  {d.class_name}: conf={d.confidence:.2f}, box={d.bbox}")
			
 
				+                except Exception as e:
			
 
				+                    print(f"检测过程中出错: {e}")
			
 
				+
			
 
				+            for d in last_dets:
			
 
				+                x1, y1, x2, y2 = d.bbox
			
 
				+                cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)
			
 
				+                text = f"{d.class_name}: {d.confidence:.2f}"
			
 
				+                text_y = max(15, y1 - 20)
			
 
				+                put_text_chinese(frame, text, (x1, text_y), font_size=20, color=(255, 0, 0))
			
 
				+            
			
 
				+            if last_person_detections and last_need_alert and last_alert_addr:
			
 
				+                current_time = time.time()
			
 
				+                if current_time - self.last_upload_time >= self.upload_interval:
			
 
				+                    print(f"检测到人，触发告警上传")
			
 
				+                    temp_image_path = f"alert_frame_{frame_count}.jpg"
			
 
				+                    cv2.imwrite(temp_image_path, frame)
			
 
				+                    
			
 
				+                    purl = upload_image(temp_image_path)
			
 
				+                    if purl:
			
 
				+                        create_event(last_alert_addr, purl)
			
 
				+                        self.last_upload_time = current_time
			
 
				+                    
			
 
				+                    if os.path.exists(temp_image_path):
			
 
				+                        os.remove(temp_image_path)
			
 
				+
			
 
				+            if self.rtmp_pipe:
			
 
				+                try:
			
 
				+                    self.rtmp_pipe.stdin.write(frame.tobytes())
			
 
				+                except:
			
 
				+                    pass
			
 
				+
			
 
				+            cv2.imshow("RK3588 工地安全检测", frame)
			
 
				+            if cv2.waitKey(1) & 0xFF == ord('q'):
			
 
				+                break
			
 
				+                
			
 
				+        self.cap.release()
			
 
				+        self.det.release()
			
 
				+        cv2.destroyAllWindows()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import argparse
			
 
				+    parser = argparse.ArgumentParser()
			
 
				+    parser.add_argument("--rtsp", required=True)
			
 
				+    parser.add_argument("--model", default="yolo11m_safety.rknn")
			
 
				+    parser.add_argument("--rtmp", required=True)
			
 
				+    parser.add_argument("--fps", type=int, default=2, help="每秒处理的帧数")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    cap = RTSPCapture(args.rtsp, args.model, args.rtmp, args.fps)
			
 
				+    cap.run()
			
--- a/testrk3588/test_detection.py
+++ b/testrk3588/test_detection.py
@@ -0,0 +1,24 @@
 
				+#!/usr/bin/env python3
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from rtsp_person_detection import create_detector
			
 
				+
			
 
				+# 测试检测功能
			
 
				+model_path = 'yolo11m_safety.onnx'  # 可以切换为 'yolo11m_safety.rknn'
			
 
				+det = create_detector(model_path)
			
 
				+
			
 
				+# 读取测试图片
			
 
				+img = cv2.imread('b.jpg')
			
 
				+if img is None:
			
 
				+    print('无法读取测试图片')
			
 
				+    exit(1)
			
 
				+
			
 
				+print(f'测试图片形状: {img.shape}')
			
 
				+
			
 
				+# 进行检测
			
 
				+conf_threshold_map = {3: 0.8, 0: 0.5, 4: 0.5}
			
 
				+detections = det.detect(img, conf_threshold_map)
			
 
				+
			
 
				+print(f'检测结果: {len(detections)} 个目标')
			
 
				+for det in detections:
			
 
				+    print(f'  {det.class_name}: conf={det.confidence:.2f}, box={det.bbox}')
			
--- a/testrk3588/test_model.py
+++ b/testrk3588/test_model.py
@@ -0,0 +1,145 @@
 
				+#!/usr/bin/env python3
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+from rknnlite.api import RKNNLite
			
 
				+from dataclasses import dataclass
			
 
				+from typing import List, Tuple, Optional
			
 
				+
			
 
				+@dataclass
			
 
				+class Detection:
			
 
				+    class_id: int
			
 
				+    class_name: str
			
 
				+    confidence: float
			
 
				+    bbox: Tuple[int, int, int, int]
			
 
				+
			
 
				+LABEL_MAP = {0: '安全帽', 4: '安全衣', 3: '人'}
			
 
				+INPUT_SIZE = (640, 640)
			
 
				+
			
 
				+def nms(dets, iou_threshold=0.45):
			
 
				+    if len(dets) == 0:
			
 
				+        return []
			
 
				+    
			
 
				+    boxes = np.array([[d.bbox[0], d.bbox[1], d.bbox[2], d.bbox[3], d.confidence] for d in dets])
			
 
				+    x1 = boxes[:, 0]
			
 
				+    y1 = boxes[:, 1]
			
 
				+    x2 = boxes[:, 2]
			
 
				+    y2 = boxes[:, 3]
			
 
				+    scores = boxes[:, 4]
			
 
				+    
			
 
				+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
			
 
				+    order = scores.argsort()[::-1]
			
 
				+    
			
 
				+    keep = []
			
 
				+    while order.size > 0:
			
 
				+        i = order[0]
			
 
				+        keep.append(i)
			
 
				+        
			
 
				+        xx1 = np.maximum(x1[i], x1[order[1:]])
			
 
				+        yy1 = np.maximum(y1[i], y1[order[1:]])
			
 
				+        xx2 = np.minimum(x2[i], x2[order[1:]])
			
 
				+        yy2 = np.minimum(y2[i], y2[order[1:]])
			
 
				+        
			
 
				+        w = np.maximum(0.0, xx2 - xx1 + 1)
			
 
				+        h = np.maximum(0.0, yy2 - yy1 + 1)
			
 
				+        inter = w * h
			
 
				+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
			
 
				+        
			
 
				+        inds = np.where(ovr <= iou_threshold)[0]
			
 
				+        order = order[inds + 1]
			
 
				+    
			
 
				+    return [dets[i] for i in keep]
			
 
				+
			
 
				+def letterbox(image, input_size=(640, 640)):
			
 
				+    h0, w0 = image.shape[:2]
			
 
				+    ih, iw = input_size
			
 
				+    scale = min(iw / w0, ih / h0)
			
 
				+    new_w, new_h = int(w0 * scale), int(h0 * scale)
			
 
				+    pad_w = (iw - new_w) // 2
			
 
				+    pad_h = (ih - new_h) // 2
			
 
				+    resized = cv2.resize(image, (new_w, new_h))
			
 
				+    canvas = np.full((ih, iw, 3), 114, dtype=np.uint8)
			
 
				+    canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = resized
			
 
				+    return canvas, scale, pad_w, pad_h, h0, w0
			
 
				+
			
 
				+def test_model():
			
 
				+    model_path = "yolo11m_safety.rknn"
			
 
				+    conf_threshold_map = {3: 0.8, 0: 0.5, 4: 0.5}
			
 
				+    
			
 
				+    rknn = RKNNLite()
			
 
				+    
			
 
				+    ret = rknn.load_rknn(model_path)
			
 
				+    if ret != 0:
			
 
				+        print("[ERROR] load_rknn failed")
			
 
				+        return
			
 
				+    
			
 
				+    ret = rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)
			
 
				+    if ret != 0:
			
 
				+        print("[ERROR] init_runtime failed")
			
 
				+        return
			
 
				+    
			
 
				+    image = cv2.imread("b.jpg")
			
 
				+    if image is None:
			
 
				+        print("无法读取测试图片")
			
 
				+        return
			
 
				+    
			
 
				+    canvas, scale, pad_w, pad_h, h0, w0 = letterbox(image)
			
 
				+    
			
 
				+    # RKNN expects NHWC input: (1, H, W, C), RGB, float32 normalized 0-1
			
 
				+    img = canvas[..., ::-1].astype(np.float32) / 255.0
			
 
				+    blob = img[None, ...]  # (1, 640, 640, 3)
			
 
				+    
			
 
				+    outputs = rknn.inference(inputs=[blob])
			
 
				+    
			
 
				+    if outputs:
			
 
				+        output = outputs[0]
			
 
				+        if len(output.shape) == 3:
			
 
				+            output = output[0]
			
 
				+        
			
 
				+        num_classes = 5
			
 
				+        dets = []
			
 
				+        for i in range(output.shape[1]):
			
 
				+            x_center = float(output[0, i])
			
 
				+            y_center = float(output[1, i])
			
 
				+            width = float(output[2, i])
			
 
				+            height = float(output[3, i])
			
 
				+            
			
 
				+            class_probs = output[4:4+num_classes, i]
			
 
				+            best_class = int(np.argmax(class_probs))
			
 
				+            confidence = float(class_probs[best_class])
			
 
				+            
			
 
				+            if best_class not in LABEL_MAP:
			
 
				+                continue
			
 
				+            
			
 
				+            conf_threshold = conf_threshold_map.get(best_class, 0.5)
			
 
				+            if confidence < conf_threshold:
			
 
				+                continue
			
 
				+            
			
 
				+            # Remove padding and scale to original image
			
 
				+            x1 = int(((x_center - width / 2) - pad_w) / scale)
			
 
				+            y1 = int(((y_center - height / 2) - pad_h) / scale)
			
 
				+            x2 = int(((x_center + width / 2) - pad_w) / scale)
			
 
				+            y2 = int(((y_center + height / 2) - pad_h) / scale)
			
 
				+            
			
 
				+            x1 = max(0, min(w0, x1))
			
 
				+            y1 = max(0, min(h0, y1))
			
 
				+            x2 = max(0, min(w0, x2))
			
 
				+            y2 = max(0, min(h0, y2))
			
 
				+            
			
 
				+            det = Detection(
			
 
				+                class_id=best_class,
			
 
				+                class_name=LABEL_MAP[best_class],
			
 
				+                confidence=confidence,
			
 
				+                bbox=(x1, y1, x2, y2)
			
 
				+            )
			
 
				+            dets.append(det)
			
 
				+        
			
 
				+        dets = nms(dets, iou_threshold=0.45)
			
 
				+        
			
 
				+        print(f"检测结果: {len(dets)} 个目标")
			
 
				+        for d in dets:
			
 
				+            print(f"  {d.class_name}: conf={d.confidence:.3f}, box={d.bbox}")
			
 
				+    
			
 
				+    rknn.release()
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    test_model()
			
--- a/testrk3588/test_onnx.py
+++ b/testrk3588/test_onnx.py
@@ -0,0 +1,120 @@
 
				+#!/usr/bin/env python3
			
 
				+import cv2
			
 
				+import numpy as np
			
 
				+import onnxruntime as ort
			
 
				+
			
 
				+LABEL_MAP = {0: '安全帽', 4: '安全衣', 3: '人'}
			
 
				+
			
 
				+def sigmoid(x):
			
 
				+    return 1 / (1 + np.exp(-x))
			
 
				+
			
 
				+def test_onnx():
			
 
				+    model_path = "yolo11m_safety.onnx"
			
 
				+    input_size = (640, 640)
			
 
				+    
			
 
				+    image = cv2.imread("b.jpg")
			
 
				+    if image is None:
			
 
				+        print("无法读取测试图片")
			
 
				+        return
			
 
				+    
			
 
				+    h0, w0 = image.shape[:2]
			
 
				+    
			
 
				+    print(f"=== 预处理 ===")
			
 
				+    img = cv2.resize(image, input_size)
			
 
				+    img = img[..., ::-1].astype(np.float32) / 255.0
			
 
				+    img = img.transpose(2, 0, 1)
			
 
				+    blob = img[None, ...]
			
 
				+    print(f"Input blob shape: {blob.shape}")
			
 
				+    
			
 
				+    print(f"\n=== 加载 ONNX 模型 ===")
			
 
				+    session = ort.InferenceSession(model_path)
			
 
				+    input_name = session.get_inputs()[0].name
			
 
				+    output_name = session.get_outputs()[0].name
			
 
				+    print(f"Input name: {input_name}")
			
 
				+    print(f"Output name: {output_name}")
			
 
				+    
			
 
				+    print(f"\n=== 推理 ===")
			
 
				+    outputs = session.run([output_name], {input_name: blob})
			
 
				+    output = outputs[0]
			
 
				+    print(f"Output shape: {output.shape}")
			
 
				+    
			
 
				+    print(f"\n=== 原始输出分析 ===")
			
 
				+    output_a = output[0]
			
 
				+    print(f"After squeeze shape: {output_a.shape}")
			
 
				+    
			
 
				+    print(f"\n=== 查找高置信度框 (obj_conf > 0.1) ===")
			
 
				+    high_obj_indices = []
			
 
				+    for i in range(output_a.shape[1]):
			
 
				+        obj_conf = output_a[4, i]
			
 
				+        if obj_conf > 0.1:
			
 
				+            high_obj_indices.append((i, obj_conf))
			
 
				+    
			
 
				+    high_obj_indices.sort(key=lambda x: x[1], reverse=True)
			
 
				+    print(f"找到 {len(high_obj_indices)} 个高置信度框")
			
 
				+    
			
 
				+    print(f"\n前 20 个高置信度框:")
			
 
				+    for idx, obj_conf in high_obj_indices[:20]:
			
 
				+        x_center = float(output_a[0, idx])
			
 
				+        y_center = float(output_a[1, idx])
			
 
				+        width = float(output_a[2, idx])
			
 
				+        height = float(output_a[3, idx])
			
 
				+        class_probs_raw = output_a[5:9, idx]
			
 
				+        class_probs = sigmoid(class_probs_raw)
			
 
				+        class_id = int(np.argmax(class_probs))
			
 
				+        class_conf = float(class_probs[class_id])
			
 
				+        confidence = obj_conf * class_conf
			
 
				+        
			
 
				+        print(f"\nBox {idx}:")
			
 
				+        print(f"  坐标: x={x_center:.3f}, y={y_center:.3f}, w={width:.3f}, h={height:.3f}")
			
 
				+        print(f"  置信度: obj_conf={obj_conf:.3f}, class_conf={class_conf:.6f}, total={confidence:.3f}")
			
 
				+        print(f"  类别: class_id={class_id}, name={LABEL_MAP.get(class_id, 'unknown')}")
			
 
				+    
			
 
				+    print(f"\n=== 检测结果 ===")
			
 
				+    dets = []
			
 
				+    h0, w0 = image.shape[:2]
			
 
				+    for idx, obj_conf in high_obj_indices:
			
 
				+        x_center = float(output_a[0, idx])
			
 
				+        y_center = float(output_a[1, idx])
			
 
				+        width = float(output_a[2, idx])
			
 
				+        height = float(output_a[3, idx])
			
 
				+        class_probs_raw = output_a[5:9, idx]
			
 
				+        class_probs = sigmoid(class_probs_raw)
			
 
				+        class_id = int(np.argmax(class_probs))
			
 
				+        class_conf = float(class_probs[class_id])
			
 
				+        confidence = obj_conf * class_conf
			
 
				+        
			
 
				+        if class_id not in LABEL_MAP:
			
 
				+            continue
			
 
				+        
			
 
				+        conf_threshold = 0.01
			
 
				+        if confidence < conf_threshold:
			
 
				+            continue
			
 
				+        
			
 
				+        x1 = int(x_center - width/2)
			
 
				+        y1 = int(y_center - height/2)
			
 
				+        x2 = int(x_center + width/2)
			
 
				+        y2 = int(y_center + height/2)
			
 
				+        
			
 
				+        x1 = int(x1 * (w0/640))
			
 
				+        y1 = int(y1 * (h0/640))
			
 
				+        x2 = int(x2 * (w0/640))
			
 
				+        y2 = int(y2 * (h0/640))
			
 
				+        
			
 
				+        x1 = max(0, x1)
			
 
				+        y1 = max(0, y1)
			
 
				+        x2 = min(w0, x2)
			
 
				+        y2 = min(h0, y2)
			
 
				+        
			
 
				+        dets.append({
			
 
				+            'class_id': class_id,
			
 
				+            'class_name': LABEL_MAP[class_id],
			
 
				+            'confidence': confidence,
			
 
				+            'bbox': (x1, y1, x2, y2)
			
 
				+        })
			
 
				+    
			
 
				+    print(f"\n检测到 {len(dets)} 个框 (阈值 0.01)")
			
 
				+    for d in dets[:20]:
			
 
				+        print(f"  {d['class_name']}: conf={d['confidence']:.3f}, box={d['bbox']}")
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    test_onnx()
			
--- a/testrk3588/yolo11m_safety.pt
+++ b/testrk3588/yolo11m_safety.pt
--- a/testrk3588/yolo11n.onnx
+++ b/testrk3588/yolo11n.onnx
--- a/testrk3588/yolo11n_rk3588.rknn
+++ b/testrk3588/yolo11n_rk3588.rknn