panorama_camera.py 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270
  1. """
  2. 全景摄像头模块
  3. 负责获取视频流和物体检测
  4. """
  5. import os
  6. # 必须在导入cv2之前设置,防止FFmpeg多线程解码崩溃
  7. # pthread_frame.c:167 async_lock assertion
  8. os.environ['OPENCV_FFMPEG_CAPTURE_OPTIONS'] = 'rtsp_transport;tcp|threads;1'
  9. import cv2
  10. import numpy as np
  11. import threading
  12. import queue
  13. import time
  14. import logging
  15. from datetime import datetime
  16. from typing import Optional, List, Tuple, Dict, Any
  17. from dataclasses import dataclass
  18. from pathlib import Path
  19. from config import DETECTION_CONFIG
  20. from config.camera import parse_resolution
  21. from video_lock import safe_read, safe_is_opened
  22. from inference_backend import nms
  23. logger = logging.getLogger(__name__)
  24. @dataclass
  25. class DetectedObject:
  26. """检测到的物体"""
  27. class_name: str # 类别名称
  28. confidence: float # 置信度
  29. bbox: Tuple[int, int, int, int] # 边界框 (x, y, width, height)
  30. center: Tuple[int, int] # 中心点坐标
  31. track_id: Optional[int] = None # 跟踪ID
  32. class PanoramaCamera:
  33. """全景摄像头类"""
  34. def __init__(self, camera_config: Dict = None):
  35. """
  36. 初始化全景摄像头
  37. Args:
  38. camera_config: 摄像头配置
  39. """
  40. self.config = camera_config or {}
  41. # 解析期望分辨率
  42. self.frame_width, self.frame_height = parse_resolution(self.config.get('resolution'))
  43. # 摄像头品牌 / SDK 使用策略
  44. # brand: 'dahua' | 'hikvision' | 'uniview' | 'auto'
  45. # use_sdk: True 时使用大华 SDK 登录;False 时仅使用 RTSP 取流
  46. self.brand = self.config.get('brand', 'auto').lower()
  47. self.use_sdk = self.config.get('use_sdk', self.brand != 'hikvision')
  48. if self.brand == 'hikvision':
  49. self.use_sdk = False
  50. self.login_handle = None
  51. self.play_handle = None
  52. self.connected = False
  53. # 视频流
  54. self.frame_queue = queue.Queue(maxsize=10)
  55. self.current_frame = None
  56. self.frame_lock = threading.Lock()
  57. self.rtsp_cap = None # RTSP视频捕获
  58. self._camera_id = 'panorama' # 用于per-camera锁
  59. # 检测器
  60. self.detector = None
  61. # 控制标志
  62. self.running = False
  63. self.stream_thread = None
  64. # 断线重连
  65. self.auto_reconnect = True
  66. self.reconnect_interval = 5.0 # 重连间隔(秒)
  67. self.max_reconnect_attempts = 3 # 最大重连次数
  68. def connect(self) -> bool:
  69. """
  70. 连接摄像头
  71. Returns:
  72. 是否成功
  73. """
  74. if not self.use_sdk:
  75. print(f"[PanoramaCamera] {self.config.get('ip')} 配置为 RTSP-only 模式,跳过 SDK 登录")
  76. self.connected = True
  77. return True
  78. login_handle, error = self.sdk.login(
  79. self.config['ip'],
  80. self.config['port'],
  81. self.config['username'],
  82. self.config['password']
  83. )
  84. if login_handle is None:
  85. print(f"连接全景摄像头失败: IP={self.config['ip']}, 错误码={error}")
  86. return False
  87. self.login_handle = login_handle
  88. self.connected = True
  89. print(f"成功连接全景摄像头: {self.config['ip']}")
  90. return True
  91. def disconnect(self):
  92. """断开连接"""
  93. self.stop_stream()
  94. if self.use_sdk and self.login_handle:
  95. self.sdk.logout(self.login_handle)
  96. self.login_handle = None
  97. self.connected = False
  98. def is_connected(self) -> bool:
  99. """是否已连接"""
  100. return self.connected
  101. def start_stream(self) -> bool:
  102. """
  103. 开始视频流 (SDK 模式,仅 Dahua 等品牌支持)
  104. Returns:
  105. 是否成功
  106. """
  107. if not self.connected:
  108. return False
  109. if not self.use_sdk:
  110. print("[PanoramaCamera] 当前为 RTSP-only 模式,跳过 SDK 视频流")
  111. return False
  112. self.play_handle = self.sdk.real_play(
  113. self.login_handle,
  114. self.config['channel']
  115. )
  116. if self.play_handle is None:
  117. print("启动视频流失败")
  118. return False
  119. self.running = True
  120. self.stream_thread = threading.Thread(target=self._stream_worker, daemon=True)
  121. self.stream_thread.start()
  122. print("视频流已启动")
  123. return True
  124. def start_stream_rtsp(self, rtsp_url: str = None) -> bool:
  125. if rtsp_url is None:
  126. rtsp_url = self.config.get('rtsp_url') or f"rtsp://{self.config['username']}:{self.config['password']}@{self.config['ip']}:{self.config.get('rtsp_port', 554)}/h264/ch{self.config['channel']}/main/av_stream"
  127. try:
  128. # 先尝试FFmpeg后端
  129. self.rtsp_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
  130. if not self.rtsp_cap.isOpened():
  131. # FFmpeg失败,尝试GStreamer后端
  132. print(f"FFmpeg后端无法打开RTSP流,尝试GStreamer后端...")
  133. try:
  134. gst_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_GSTREAMER)
  135. if gst_cap.isOpened():
  136. self.rtsp_cap = gst_cap
  137. print(f"使用GStreamer后端打开RTSP流成功")
  138. else:
  139. print(f"无法打开RTSP流: {rtsp_url}")
  140. return False
  141. except Exception as ge:
  142. print(f"GStreamer后端也不可用: {ge}")
  143. return False
  144. self.rtsp_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
  145. self.running = True
  146. self.stream_thread = threading.Thread(target=self._rtsp_stream_worker, daemon=True)
  147. self.stream_thread.start()
  148. print(f"RTSP视频流已启动: {rtsp_url} (期望分辨率 {self.frame_width}x{self.frame_height})")
  149. return True
  150. except Exception as e:
  151. print(f"RTSP流启动失败: {e}")
  152. return False
  153. def _stream_worker(self):
  154. """视频流工作线程 (SDK模式)"""
  155. retry_count = 0
  156. max_retries = 10
  157. while self.running:
  158. try:
  159. # 尝试从 SDK 帧缓冲区获取帧 (如果可用)
  160. frame_buffer = self.sdk.get_video_frame_buffer(self.config['channel'])
  161. if frame_buffer:
  162. frame_info = frame_buffer.get(timeout=0.1)
  163. if frame_info and frame_info.get('data'):
  164. # 解码帧数据 (如果需要)
  165. # 注意: SDK回调返回的是编码数据,需要解码
  166. # 这里暂时跳过,因为解码需要额外处理
  167. pass
  168. # RTSP 模式获取帧 (推荐方式)
  169. if self.rtsp_cap is not None and safe_is_opened(self.rtsp_cap, self._camera_id):
  170. ret, frame = safe_read(self.rtsp_cap, self._camera_id)
  171. if ret and frame is not None:
  172. with self.frame_lock:
  173. self.current_frame = frame.copy()
  174. try:
  175. self.frame_queue.put(frame.copy(), block=False)
  176. except queue.Full:
  177. pass
  178. retry_count = 0 # 重置重试计数
  179. time.sleep(0.001) # 减少CPU占用
  180. continue
  181. # 如果 RTSP 不可用,尝试自动连接
  182. if retry_count < max_retries:
  183. rtsp_url = self._build_rtsp_url()
  184. try:
  185. if self.rtsp_cap is None:
  186. self.rtsp_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
  187. self.rtsp_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) # 减少缓冲延迟
  188. if safe_is_opened(self.rtsp_cap, self._camera_id):
  189. retry_count = 0
  190. continue
  191. except Exception as e:
  192. pass
  193. retry_count += 1
  194. time.sleep(1.0) # 重试间隔
  195. else:
  196. # 超过最大重试次数,使用与配置分辨率一致的模拟帧
  197. frame = np.zeros((self.frame_height, self.frame_width, 3), dtype=np.uint8)
  198. with self.frame_lock:
  199. self.current_frame = frame
  200. try:
  201. self.frame_queue.put(frame, block=False)
  202. except queue.Full:
  203. pass
  204. time.sleep(0.1)
  205. except Exception as e:
  206. err_str = str(e)
  207. if 'async_lock' in err_str or 'Assertion' in err_str:
  208. print(f"视频流FFmpeg内部错误,重建连接: {e}")
  209. self._reconnect_rtsp()
  210. else:
  211. print(f"视频流错误: {e}")
  212. time.sleep(0.5)
  213. def _build_rtsp_url(self) -> str:
  214. return self.config.get('rtsp_url') or f"rtsp://{self.config['username']}:{self.config['password']}@{self.config['ip']}:{self.config.get('rtsp_port', 554)}/h264/ch{self.config['channel']}/main/av_stream"
  215. def _rtsp_stream_worker(self):
  216. """RTSP视频流工作线程"""
  217. import signal
  218. # 屏蔽SIGINT在此线程,由主线程处理
  219. if hasattr(signal, 'pthread_sigmask'):
  220. try:
  221. signal.pthread_sigmask(signal.SIG_BLOCK, {signal.SIGINT})
  222. except (AttributeError, OSError):
  223. pass
  224. max_consecutive_errors = 50
  225. error_count = 0
  226. while self.running:
  227. try:
  228. if self.rtsp_cap is None or not safe_is_opened(self.rtsp_cap, self._camera_id):
  229. time.sleep(0.1)
  230. continue
  231. ret, frame = safe_read(self.rtsp_cap, self._camera_id)
  232. if not ret or frame is None:
  233. error_count += 1
  234. if error_count > max_consecutive_errors:
  235. print(f"全景RTSP流连续{max_consecutive_errors}次读取失败,尝试重连...")
  236. self._reconnect_rtsp()
  237. error_count = 0
  238. time.sleep(0.01)
  239. continue
  240. error_count = 0
  241. # 记录实际分辨率,仅做校验与提示(不做拉伸缩放,避免丢精度)
  242. actual_h, actual_w = frame.shape[:2]
  243. if not getattr(self, '_resolution_logged', False):
  244. print(f"全景摄像头实际分辨率: {actual_w}x{actual_h},期望分辨率: "
  245. f"{self.frame_width}x{self.frame_height}")
  246. self._resolution_logged = True
  247. if (actual_w, actual_h) != (self.frame_width, self.frame_height):
  248. if not getattr(self, '_resolution_warned', False):
  249. logger.warning(
  250. f"全景摄像头分辨率 {actual_w}x{actual_h} 与期望分辨率 "
  251. f"{self.frame_width}x{self.frame_height} 不一致,"
  252. f"模型推理时将使用 letterbox 灰度填充保持比例"
  253. )
  254. self._resolution_warned = True
  255. with self.frame_lock:
  256. self.current_frame = frame.copy()
  257. try:
  258. self.frame_queue.put(frame, block=False)
  259. except queue.Full:
  260. pass
  261. except Exception as e:
  262. err_str = str(e)
  263. if 'async_lock' in err_str or 'Assertion' in err_str:
  264. print(f"全景RTSP流FFmpeg内部错误,3秒后重建连接: {e}")
  265. time.sleep(3)
  266. self._reconnect_rtsp()
  267. else:
  268. print(f"全景RTSP视频流错误: {e}")
  269. time.sleep(0.5)
  270. def _reconnect_rtsp(self):
  271. """重建RTSP连接"""
  272. rtsp_url = self._build_rtsp_url()
  273. if self.rtsp_cap is not None:
  274. try:
  275. self.rtsp_cap.release()
  276. except Exception:
  277. pass
  278. self.rtsp_cap = None
  279. time.sleep(1)
  280. try:
  281. self.rtsp_cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
  282. if safe_is_opened(self.rtsp_cap, self._camera_id):
  283. self.rtsp_cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
  284. print("全景RTSP流重连成功")
  285. else:
  286. print("全景RTSP流重连失败")
  287. self.rtsp_cap = None
  288. except Exception as e:
  289. print(f"全景RTSP流重连异常: {e}")
  290. self.rtsp_cap = None
  291. def stop_stream(self):
  292. """停止视频流"""
  293. self.running = False
  294. if self.stream_thread:
  295. self.stream_thread.join(timeout=2)
  296. if self.play_handle:
  297. self.sdk.stop_real_play(self.play_handle)
  298. self.play_handle = None
  299. if self.rtsp_cap:
  300. self.rtsp_cap.release()
  301. self.rtsp_cap = None
  302. def get_frame(self) -> Optional[np.ndarray]:
  303. """
  304. 获取当前帧
  305. Returns:
  306. 当前帧图像
  307. """
  308. with self.frame_lock:
  309. return self.current_frame.copy() if self.current_frame is not None else None
  310. def get_frame_from_queue(self, timeout: float = 0.1) -> Optional[np.ndarray]:
  311. """
  312. 从帧队列获取帧 (用于批量处理)
  313. Args:
  314. timeout: 等待超时时间
  315. Returns:
  316. 帧图像或None
  317. """
  318. try:
  319. return self.frame_queue.get(timeout=timeout)
  320. except:
  321. return None
  322. def get_frame_buffer(self, count: int = 5) -> List[np.ndarray]:
  323. """
  324. 获取帧缓冲 (用于运动检测等需要多帧的场景)
  325. Args:
  326. count: 获取帧数
  327. Returns:
  328. 帧列表
  329. """
  330. frames = []
  331. while len(frames) < count:
  332. frame = self.get_frame_from_queue(timeout=0.05)
  333. if frame is not None:
  334. frames.append(frame)
  335. else:
  336. break
  337. return frames
  338. def set_detector(self, detector):
  339. """设置物体检测器"""
  340. self.detector = detector
  341. def detect_objects(self, frame: np.ndarray = None) -> List[DetectedObject]:
  342. """
  343. 检测物体
  344. Args:
  345. frame: 输入帧,如果为None则使用当前帧
  346. Returns:
  347. 检测到的物体列表
  348. """
  349. if frame is None:
  350. frame = self.get_frame()
  351. if frame is None or self.detector is None:
  352. return []
  353. return self.detector.detect(frame)
  354. def get_detection_position(self, obj: DetectedObject,
  355. frame_size: Tuple[int, int]) -> Tuple[float, float]:
  356. """
  357. 获取检测物体在画面中的相对位置
  358. Args:
  359. obj: 检测到的物体
  360. frame_size: 画面尺寸 (width, height)
  361. Returns:
  362. 相对位置 (x_ratio, y_ratio) 范围0-1
  363. """
  364. width, height = frame_size
  365. x_ratio = obj.center[0] / width
  366. y_ratio = obj.center[1] / height
  367. return (x_ratio, y_ratio)
  368. class ObjectDetector:
  369. """
  370. 物体检测器
  371. 使用YOLO11模型进行人体检测
  372. 支持 YOLO (.pt), RKNN (.rknn), ONNX (.onnx) 模型
  373. """
  374. def __init__(self, model_path: str = None, use_gpu: bool = True, model_size: str = 'n',
  375. model_type: str = 'auto'):
  376. """
  377. 初始化检测器
  378. Args:
  379. model_path: 模型路径 (支持 .pt, .rknn, .onnx)
  380. use_gpu: 是否使用GPU
  381. model_size: 模型尺寸 ('n', 's', 'm', 'l', 'x') - 仅 YOLO 模型有效
  382. model_type: 模型类型 ('auto', 'yolo', 'rknn', 'onnx')
  383. """
  384. self.model = None
  385. self.rknn_detector = None
  386. self.model_path = model_path
  387. self.use_gpu = use_gpu
  388. self.model_size = model_size
  389. self.model_type = model_type
  390. self.is_end2end = False
  391. self.config = DETECTION_CONFIG
  392. self.device = 'cuda:0' if use_gpu else 'cpu'
  393. # 检测图片保存配置
  394. self._save_image_enabled = self.config.get('save_detection_image', False)
  395. self._image_save_dir = Path(self.config.get('detection_image_dir', './detection_images'))
  396. self._image_max_count = self.config.get('detection_image_max_count', 1000)
  397. self._last_save_time = 0
  398. # 保存间隔:优先使用配置值,否则基于检测帧率计算(检测间隔的1.5倍)
  399. detection_fps = self.config.get('detection_fps', 2)
  400. self._save_interval = self.config.get('save_interval', 1.5 / detection_fps)
  401. # 创建保存目录
  402. if self._save_image_enabled:
  403. self._ensure_save_dir()
  404. # 根据扩展名自动判断模型类型
  405. if model_path:
  406. ext = os.path.splitext(model_path)[1].lower()
  407. if ext == '.rknn':
  408. self.model_type = 'rknn'
  409. elif ext == '.onnx':
  410. self.model_type = 'onnx'
  411. elif ext == '.pt':
  412. self.model_type = 'yolo'
  413. # end2end 模型(内置NMS),输出格式 (N, 6) = (x1,y1,x2,y2,conf,cls)
  414. if 'end2end' in os.path.basename(model_path).lower():
  415. self.is_end2end = True
  416. self._load_model()
  417. def _load_model(self):
  418. """加载检测模型"""
  419. if self.model_type == 'rknn':
  420. self._load_rknn_model()
  421. elif self.model_type == 'onnx':
  422. self._load_onnx_model()
  423. else:
  424. self._load_yolo_model()
  425. def _load_rknn_model(self):
  426. """加载 RKNN 模型"""
  427. if not self.model_path:
  428. raise ValueError("RKNN 模型需要指定 model_path")
  429. try:
  430. from rknnlite.api import RKNNLite
  431. self.rknn = RKNNLite()
  432. ret = self.rknn.load_rknn(self.model_path)
  433. if ret != 0:
  434. raise RuntimeError(f"加载 RKNN 模型失败: {self.model_path}")
  435. ret = self.rknn.init_runtime(core_mask=RKNNLite.NPU_CORE_0_1_2)
  436. if ret != 0:
  437. raise RuntimeError(f"初始化 RKNN 运行时失败")
  438. print(f"RKNN 模型加载成功: {self.model_path}")
  439. except ImportError:
  440. raise ImportError("未安装 rknnlite,请运行: pip install rknnlite2")
  441. def _load_onnx_model(self):
  442. """加载 ONNX 模型(优先 ONNX Runtime,回退 Ultralytics)"""
  443. if not self.model_path:
  444. raise ValueError("ONNX 模型需要指定 model_path")
  445. # 尝试直接使用 ONNX Runtime(end2end 格式)
  446. try:
  447. import onnxruntime as ort
  448. available = ort.get_available_providers()
  449. providers = [p for p in ['CPUExecutionProvider'] if p in available]
  450. session = ort.InferenceSession(self.model_path, providers=providers)
  451. self.session = session
  452. self.input_name = session.get_inputs()[0].name
  453. self.is_end2end = True # 导出的 ONNX 为 end2end 格式 (1, 300, 6)
  454. print(f"ONNX 模型加载成功: {self.model_path} (via ONNX Runtime, providers={providers})")
  455. return
  456. except Exception as e:
  457. print(f"ONNX Runtime 加载失败,尝试 Ultralytics: {e}")
  458. try:
  459. from ultralytics import YOLO
  460. self.model = YOLO(self.model_path, task='detect')
  461. self.device = 'cpu'
  462. print(f"ONNX 模型加载成功: {self.model_path} (via Ultralytics, device=cpu)")
  463. except ImportError:
  464. raise ImportError("未安装 ultralytics")
  465. def _load_yolo_model(self):
  466. """加载YOLO11检测模型"""
  467. try:
  468. from ultralytics import YOLO
  469. if self.model_path:
  470. self.model = YOLO(self.model_path)
  471. else:
  472. model_name = f'yolo11{self.model_size}.pt'
  473. self.model = YOLO(model_name)
  474. dummy = np.zeros((640, 640, 3), dtype=np.uint8)
  475. self.model(dummy, device=self.device, verbose=False)
  476. print(f"成功加载YOLO11检测模型 (device={self.device})")
  477. except ImportError:
  478. print("未安装ultralytics,请运行: pip install ultralytics")
  479. self._load_opencv_model()
  480. except Exception as e:
  481. print(f"加载YOLO11模型失败: {e}")
  482. self._load_opencv_model()
  483. def _load_opencv_model(self):
  484. """使用OpenCV加载模型"""
  485. pass
  486. def _ensure_save_dir(self):
  487. """确保保存目录存在"""
  488. try:
  489. self._image_save_dir.mkdir(parents=True, exist_ok=True)
  490. logger.info(f"检测图片保存目录: {self._image_save_dir}")
  491. except Exception as e:
  492. logger.error(f"创建检测图片目录失败: {e}")
  493. self._save_image_enabled = False
  494. def _cleanup_old_images(self):
  495. """清理旧图片,保持目录下图片数量不超过上限"""
  496. try:
  497. image_files = list(self._image_save_dir.glob("*.jpg"))
  498. if len(image_files) > self._image_max_count:
  499. # 按修改时间排序,删除最旧的
  500. image_files.sort(key=lambda x: x.stat().st_mtime)
  501. to_delete = image_files[:len(image_files) - self._image_max_count]
  502. for f in to_delete:
  503. f.unlink()
  504. logger.info(f"已清理 {len(to_delete)} 张旧检测图片")
  505. except Exception as e:
  506. logger.error(f"清理旧图片失败: {e}")
  507. def _save_detection_image(self, frame: np.ndarray, detections: List[DetectedObject]):
  508. """
  509. 保存带有检测标记的图片(只标记达到置信度阈值的人)
  510. Args:
  511. frame: 原始图像
  512. detections: 检测结果列表
  513. """
  514. if not self._save_image_enabled or not detections:
  515. return
  516. # 检查保存间隔
  517. current_time = time.time()
  518. if current_time - self._last_save_time < self._save_interval:
  519. return
  520. try:
  521. # 复制图像避免修改原图
  522. marked_frame = frame.copy()
  523. # 置信度阈值(人员检测用更高阈值)
  524. person_threshold = self.config.get('person_threshold', 0.8)
  525. # 只标记达到阈值的人
  526. person_count = 0
  527. for det in detections:
  528. # 只处理人且达到阈值
  529. is_person = det.class_name in ['person']
  530. if not is_person:
  531. continue
  532. # 未达阈值的不标记
  533. if det.confidence < person_threshold:
  534. continue
  535. x, y, w, h = det.bbox
  536. # 绘制边界框(绿色)
  537. cv2.rectangle(marked_frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
  538. # 绘制序号标签
  539. label = f"person_{person_count}"
  540. person_count += 1
  541. (label_w, label_h), baseline = cv2.getTextSize(
  542. label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2
  543. )
  544. cv2.rectangle(
  545. marked_frame,
  546. (x, y - label_h - 8),
  547. (x + label_w, y),
  548. (0, 255, 0),
  549. -1
  550. )
  551. # 绘制标签文字(黑色)
  552. cv2.putText(
  553. marked_frame, label,
  554. (x, y - 4),
  555. cv2.FONT_HERSHEY_SIMPLEX, 0.8,
  556. (0, 0, 0), 2
  557. )
  558. # 无有效目标则不保存
  559. if person_count == 0:
  560. return
  561. # 生成文件名(时间戳+有效人数)
  562. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
  563. filename = f"panorama_{timestamp}_n{person_count}.png"
  564. filepath = self._image_save_dir / filename
  565. # 保存图片(PNG无损格式,不压缩)
  566. cv2.imwrite(str(filepath), marked_frame)
  567. self._last_save_time = current_time
  568. logger.info(f"[全景] 已保存检测图片: {filepath},有效人数 {person_count} (阈值={person_threshold})")
  569. # 定期清理旧图片
  570. self._cleanup_old_images()
  571. except Exception as e:
  572. logger.error(f"[全景] 保存检测图片失败: {e}")
  573. def _letterbox(self, image, size=(640, 640)):
  574. """Letterbox 预处理"""
  575. h0, w0 = image.shape[:2]
  576. ih, iw = size
  577. scale = min(iw / w0, ih / h0)
  578. new_w, new_h = int(w0 * scale), int(h0 * scale)
  579. pad_w = (iw - new_w) // 2
  580. pad_h = (ih - new_h) // 2
  581. resized = cv2.resize(image, (new_w, new_h))
  582. canvas = np.full((ih, iw, 3), 114, dtype=np.uint8)
  583. canvas[pad_h:pad_h+new_h, pad_w:pad_w+new_w] = resized
  584. return canvas, scale, pad_w, pad_h, h0, w0
  585. @staticmethod
  586. def _make_grid(h: int, w: int):
  587. """生成特征图网格坐标"""
  588. yv, xv = np.meshgrid(np.arange(h), np.arange(w), indexing='ij')
  589. return np.stack([xv, yv], axis=-1).reshape(-1, 2)
  590. @staticmethod
  591. def _pseudo_person_confidence(bboxes: np.ndarray, orig_h: int, orig_w: int) -> np.ndarray:
  592. """
  593. 当 RKNN 模型 cls 输出恒定时,根据 bbox 形状生成伪置信度。
  594. bboxes: (N, 4) [x1, y1, x2, y2] 原始图像坐标
  595. """
  596. x1, y1, x2, y2 = bboxes[:, 0], bboxes[:, 1], bboxes[:, 2], bboxes[:, 3]
  597. w = np.maximum(1.0, x2 - x1)
  598. h = np.maximum(1.0, y2 - y1)
  599. aspect = h / w
  600. # 人体宽高比通常在 1.5 ~ 4.0 之间,以 2.5 为最佳
  601. aspect_score = np.exp(-0.5 * ((aspect - 2.5) / 1.0) ** 2)
  602. # 面积占画面比例,过大/过小都降权
  603. area = w * h
  604. img_area = orig_w * orig_h
  605. area_ratio = area / img_area
  606. size_score = np.clip(area_ratio * 80, 0.0, 1.0) # 占画面 1.25% 以上得满分
  607. # 综合伪置信度,范围 0.55 ~ 0.95
  608. conf = 0.55 + 0.30 * aspect_score + 0.10 * size_score
  609. return conf
  610. def _decode_yolo11_outputs(self, outputs: list, canvas_size: tuple,
  611. scale: float, pad_w: int, pad_h: int,
  612. orig_h: int, orig_w: int,
  613. conf_threshold: float = 0.5,
  614. iou_threshold: float = 0.45) -> list:
  615. """
  616. 解码 YOLO11 RKNN 多输出格式 (DFL bbox + cls + mask,每尺度 3 个输出)
  617. outputs: [bbox_80x80, cls_80x80, mask_80x80, bbox_40x40, cls_40x40, ...]
  618. 返回: [[x1, y1, x2, y2, score, class_id], ...] (原始图像坐标)
  619. """
  620. strides = [8, 16, 32]
  621. reg_max = 16
  622. dets = []
  623. # 记录 cls 输出是否异常(常量),用于后续 fallback
  624. cls_constant = True
  625. for scale_idx, stride in enumerate(strides):
  626. bbox_out = outputs[scale_idx * 3]
  627. cls_out = outputs[scale_idx * 3 + 1]
  628. # 检测 cls 是否为常量(量化/导出异常导致)
  629. if np.ptp(cls_out) > 1e-4:
  630. cls_constant = False
  631. _, _, h, w = bbox_out.shape
  632. # (64, H, W) -> (H*W, 4, 16)
  633. bbox = bbox_out[0].transpose(1, 2, 0).reshape(h * w, 4, reg_max)
  634. # (80, H, W) -> (H*W, 80)
  635. cls = cls_out[0].transpose(1, 2, 0).reshape(h * w, 80)
  636. # DFL 解码
  637. prob = np.exp(bbox - np.max(bbox, axis=-1, keepdims=True))
  638. prob = prob / np.sum(prob, axis=-1, keepdims=True)
  639. bins = np.arange(reg_max).reshape(1, 1, reg_max)
  640. decoded = np.sum(prob * bins, axis=-1) # (H*W, 4)
  641. # 网格中心
  642. grid = self._make_grid(h, w) + 0.5 # (H*W, 2)
  643. l, t, r, b = decoded[:, 0], decoded[:, 1], decoded[:, 2], decoded[:, 3]
  644. x1 = (grid[:, 0] - l) * stride
  645. y1 = (grid[:, 1] - t) * stride
  646. x2 = (grid[:, 0] + r) * stride
  647. y2 = (grid[:, 1] + b) * stride
  648. # cls sigmoid
  649. cls = 1.0 / (1.0 + np.exp(-cls))
  650. scores = np.max(cls, axis=1)
  651. labels = np.argmax(cls, axis=1)
  652. for i in range(len(scores)):
  653. if scores[i] < conf_threshold:
  654. continue
  655. dets.append([x1[i], y1[i], x2[i], y2[i], scores[i], labels[i]])
  656. if not dets:
  657. return []
  658. dets = np.array(dets)
  659. # 从 canvas(640x640) 坐标映射回原始图像坐标:去 padding -> 除以 scale
  660. dets[:, [0, 2]] = (dets[:, [0, 2]] - pad_w) / scale
  661. dets[:, [1, 3]] = (dets[:, [1, 3]] - pad_h) / scale
  662. # clip
  663. dets[:, [0, 2]] = np.clip(dets[:, [0, 2]], 0, orig_w)
  664. dets[:, [1, 3]] = np.clip(dets[:, [1, 3]], 0, orig_h)
  665. # 若 cls 输出异常常量,用伪置信度替代,帮助 NMS 和后续过滤
  666. if cls_constant:
  667. pseudo_scores = self._pseudo_person_confidence(dets[:, :4], orig_h, orig_w)
  668. dets[:, 4] = pseudo_scores
  669. dets[:, 5] = 0 # 强制为 person 类别
  670. # NMS
  671. x1, y1, x2, y2 = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3]
  672. scores = dets[:, 4]
  673. areas = (x2 - x1) * (y2 - y1)
  674. order = scores.argsort()[::-1]
  675. keep = []
  676. while order.size > 0:
  677. i = order[0]
  678. keep.append(i)
  679. xx1 = np.maximum(x1[i], x1[order[1:]])
  680. yy1 = np.maximum(y1[i], y1[order[1:]])
  681. xx2 = np.minimum(x2[i], x2[order[1:]])
  682. yy2 = np.minimum(y2[i], y2[order[1:]])
  683. w = np.maximum(0.0, xx2 - xx1)
  684. h = np.maximum(0.0, yy2 - yy1)
  685. inter = w * h
  686. iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-6)
  687. inds = np.where(iou <= iou_threshold)[0]
  688. order = order[inds + 1]
  689. return dets[keep].tolist()
  690. def _detect_rknn(self, frame: np.ndarray) -> List[DetectedObject]:
  691. """使用 RKNN/ONNX 模型检测"""
  692. results = []
  693. h0, w0 = frame.shape[:2]
  694. # 宽幅图(宽高比>1.5)使用分区域检测,避免letterbox后人太小
  695. if w0 / h0 > 1.5:
  696. return self._detect_rknn_tiled(frame)
  697. try:
  698. conf_threshold = self.config['confidence_threshold']
  699. class_map = self.config.get('class_map', {0: 'person'})
  700. # -------------------------------------------------------
  701. # end2end 模型(内置NMS):letterbox + NHWC 预处理
  702. # 输出格式 (N, max_det, 6) = (x1,y1,x2,y2,conf,cls) 在 letterbox 空间
  703. # -------------------------------------------------------
  704. if self.is_end2end:
  705. canvas, scale, pad_w, pad_h, h0, w0 = self._letterbox(frame)
  706. img = canvas[..., ::-1].astype(np.float32) / 255.0
  707. blob = img[None, ...] # NHWC
  708. if hasattr(self, 'rknn'):
  709. outputs = self.rknn.inference(inputs=[blob])
  710. else:
  711. # ONNX 通常期望 NCHW,需转置
  712. nchw = blob.transpose(0, 3, 1, 2)
  713. outputs = self.session.run(None, {self.input_name: nchw})
  714. output = outputs[0]
  715. if len(output.shape) == 3:
  716. output = output[0]
  717. for i in range(output.shape[0]):
  718. x1_lb, y1_lb, x2_lb, y2_lb, conf, cls_id = output[i]
  719. if not np.isfinite(conf) or conf < conf_threshold:
  720. continue
  721. if not (np.isfinite(x1_lb) and np.isfinite(y1_lb) and np.isfinite(x2_lb) and np.isfinite(y2_lb)):
  722. continue
  723. if x1_lb >= x2_lb or y1_lb >= y2_lb:
  724. continue
  725. cls_name = class_map.get(int(cls_id), str(int(cls_id)))
  726. if cls_name not in self.config['target_classes']:
  727. continue
  728. # 从 letterbox 空间映射回原图
  729. x1 = int((x1_lb - pad_w) / scale)
  730. y1 = int((y1_lb - pad_h) / scale)
  731. x2 = int((x2_lb - pad_w) / scale)
  732. y2 = int((y2_lb - pad_h) / scale)
  733. x1 = max(0, min(w0, x1))
  734. y1 = max(0, min(h0, y1))
  735. x2 = max(0, min(w0, x2))
  736. y2 = max(0, min(h0, y2))
  737. if x2 - x1 < 10 or y2 - y1 < 10:
  738. continue
  739. results.append(DetectedObject(
  740. class_name=cls_name,
  741. confidence=float(conf),
  742. bbox=(x1, y1, x2 - x1, y2 - y1),
  743. center=((x1 + x2) // 2, (y1 + y2) // 2)
  744. ))
  745. return results
  746. # -------------------------------------------------------
  747. # 非 end2end 模型:letterbox + NHWC 预处理
  748. # -------------------------------------------------------
  749. canvas, scale, pad_w, pad_h, h0, w0 = self._letterbox(frame)
  750. if hasattr(self, 'rknn'):
  751. # RKNN
  752. img = canvas[..., ::-1].astype(np.float32) / 255.0
  753. blob = img[None, ...]
  754. outputs = self.rknn.inference(inputs=[blob])
  755. else:
  756. # ONNX
  757. img = canvas[..., ::-1].astype(np.float32) / 255.0
  758. img = img.transpose(2, 0, 1)
  759. blob = img[None, ...]
  760. outputs = self.session.run(None, {self.input_name: blob})
  761. # 根据输出数量判断格式:YOLO11 DFL 为 9 个输出(3 scales x 3 branches)
  762. if len(outputs) == 9:
  763. dets = self._decode_yolo11_outputs(
  764. outputs, (640, 640), scale, pad_w, pad_h, h0, w0,
  765. conf_threshold=conf_threshold
  766. )
  767. for x1, y1, x2, y2, score, cls_id in dets:
  768. cls_name = class_map.get(int(cls_id), str(int(cls_id)))
  769. if cls_name not in self.config['target_classes']:
  770. continue
  771. if x2 - x1 < 10 or y2 - y1 < 10:
  772. continue
  773. obj = DetectedObject(
  774. class_name=cls_name,
  775. confidence=float(score),
  776. bbox=(int(x1), int(y1), int(x2 - x1), int(y2 - y1)),
  777. center=(int((x1 + x2) / 2), int((y1 + y2) / 2))
  778. )
  779. results.append(obj)
  780. else:
  781. output = outputs[0]
  782. if len(output.shape) == 3:
  783. output = output[0]
  784. num_boxes = output.shape[1]
  785. candidates = []
  786. # YOLO11 单输出格式 (9, 8400): bbox(4) + obj(1) + cls(4)
  787. if output.shape[0] == 9:
  788. for i in range(num_boxes):
  789. obj = float(output[4, i])
  790. cls_logits = output[5:9, i]
  791. best_class = int(np.argmax(cls_logits))
  792. confidence = 1.0 / (1.0 + np.exp(-float(cls_logits[best_class])))
  793. if confidence < conf_threshold:
  794. continue
  795. x_center = float(output[0, i])
  796. y_center = float(output[1, i])
  797. w = float(output[2, i])
  798. h = float(output[3, i])
  799. x1 = int(((x_center - w / 2) - pad_w) / scale)
  800. y1 = int(((y_center - h / 2) - pad_h) / scale)
  801. x2 = int(((x_center + w / 2) - pad_w) / scale)
  802. y2 = int(((y_center + h / 2) - pad_h) / scale)
  803. x1 = max(0, min(w0, x1))
  804. y1 = max(0, min(h0, y1))
  805. x2 = max(0, min(w0, x2))
  806. y2 = max(0, min(h0, y2))
  807. if x2 - x1 < 10 or y2 - y1 < 10:
  808. continue
  809. cls_name = class_map.get(best_class, str(best_class))
  810. if cls_name not in self.config['target_classes']:
  811. continue
  812. candidates.append(DetectedObject(
  813. class_name=cls_name,
  814. confidence=confidence,
  815. bbox=(x1, y1, x2 - x1, y2 - y1),
  816. center=((x1 + x2) // 2, (y1 + y2) // 2)
  817. ))
  818. else:
  819. # 标准 (84, 8400) 格式(ONNX 或新 RKNN)
  820. for i in range(num_boxes):
  821. x_center = float(output[0, i])
  822. y_center = float(output[1, i])
  823. width = float(output[2, i])
  824. height = float(output[3, i])
  825. class_probs = output[4:, i]
  826. best_class = int(np.argmax(class_probs))
  827. confidence = 1.0 / (1.0 + np.exp(-float(class_probs[best_class])))
  828. if confidence < conf_threshold:
  829. continue
  830. x1 = int(((x_center - width / 2) - pad_w) / scale)
  831. y1 = int(((y_center - height / 2) - pad_h) / scale)
  832. x2 = int(((x_center + width / 2) - pad_w) / scale)
  833. y2 = int(((y_center + height / 2) - pad_h) / scale)
  834. x1 = max(0, min(w0, x1))
  835. y1 = max(0, min(h0, y1))
  836. x2 = max(0, min(w0, x2))
  837. y2 = max(0, min(h0, y2))
  838. if x2 - x1 < 10 or y2 - y1 < 10:
  839. continue
  840. cls_name = class_map.get(best_class, str(best_class))
  841. if cls_name not in self.config['target_classes']:
  842. continue
  843. candidates.append(DetectedObject(
  844. class_name=cls_name,
  845. confidence=confidence,
  846. bbox=(x1, y1, x2 - x1, y2 - y1),
  847. center=((x1 + x2) // 2, (y1 + y2) // 2)
  848. ))
  849. results = nms(candidates, iou_threshold=0.45)
  850. except Exception as e:
  851. logger.error(f"RKNN/ONNX 检测错误: {e}")
  852. import traceback
  853. logger.error(traceback.format_exc())
  854. return results
  855. def _detect_rknn_tiled(self, frame: np.ndarray) -> List[DetectedObject]:
  856. """分区域检测 - 将宽幅全景图分成多个重叠区域分别检测,提高远处目标识别率"""
  857. results = []
  858. h0, w0 = frame.shape[:2]
  859. need_nms = not self.is_end2end
  860. conf_threshold = self.config['confidence_threshold']
  861. class_map = self.config.get('class_map', {0: 'person'})
  862. # 分3个重叠区域
  863. overlap = int(h0 * 0.2)
  864. regions = [
  865. (0, w0 // 3 + overlap),
  866. (w0 // 3 - overlap // 2, 2 * w0 // 3 + overlap // 2),
  867. (2 * w0 // 3 - overlap, w0),
  868. ]
  869. seen_centers = []
  870. for x_start, x_end in regions:
  871. crop = frame[:, x_start:x_end]
  872. ch, cw = crop.shape[:2]
  873. # end2end 模型:letterbox + NCHW
  874. if self.is_end2end:
  875. canvas, scale, pad_w, pad_h, ch, cw = self._letterbox(crop)
  876. img = canvas[..., ::-1].astype(np.float32) / 255.0
  877. if hasattr(self, 'rknn'):
  878. outputs = self.rknn.inference(inputs=[img[None, ...]])
  879. else:
  880. outputs = self.session.run(None, {self.input_name: img.transpose(2, 0, 1)[None, ...]})
  881. output = outputs[0]
  882. if len(output.shape) == 3:
  883. output = output[0]
  884. dets = []
  885. for i in range(output.shape[0]):
  886. x1_lb, y1_lb, x2_lb, y2_lb, conf, cls_id = output[i]
  887. if conf < conf_threshold:
  888. continue
  889. cls_name = class_map.get(int(cls_id), str(int(cls_id)))
  890. if cls_name not in self.config['target_classes']:
  891. continue
  892. # 从 letterbox 空间映射回 crop 坐标
  893. _x1 = int((x1_lb - pad_w) / scale)
  894. _y1 = int((y1_lb - pad_h) / scale)
  895. _x2 = int((x2_lb - pad_w) / scale)
  896. _y2 = int((y2_lb - pad_h) / scale)
  897. dets.append([_x1, _y1, _x2, _y2, float(conf), int(cls_id)])
  898. else:
  899. canvas, scale, pad_w, pad_h, ch, cw = self._letterbox(crop)
  900. if hasattr(self, 'rknn'):
  901. img = canvas[..., ::-1].astype(np.float32) / 255.0
  902. outputs = self.rknn.inference(inputs=[img[None, ...]])
  903. else:
  904. img = canvas[..., ::-1].astype(np.float32) / 255.0
  905. img = img.transpose(2, 0, 1)
  906. outputs = self.session.run(None, {self.input_name: img[None, ...]})
  907. if len(outputs) == 9:
  908. dets = self._decode_yolo11_outputs(
  909. outputs, (640, 640), scale, pad_w, pad_h, ch, cw,
  910. conf_threshold=conf_threshold
  911. )
  912. else:
  913. output = outputs[0]
  914. if len(output.shape) == 3:
  915. output = output[0]
  916. dets = []
  917. # YOLO11 单输出格式 (9, 8400): bbox(4) + obj(1) + cls(4)
  918. if output.shape[0] == 9:
  919. for i in range(output.shape[1]):
  920. obj = float(output[4, i])
  921. cls_logits = output[5:9, i]
  922. best_class = int(np.argmax(cls_logits))
  923. confidence = 1.0 / (1.0 + np.exp(-float(cls_logits[best_class])))
  924. if confidence < conf_threshold:
  925. continue
  926. x_center = float(output[0, i])
  927. y_center = float(output[1, i])
  928. w = float(output[2, i])
  929. h = float(output[3, i])
  930. x1 = x_center - w / 2
  931. y1 = y_center - h / 2
  932. x2 = x_center + w / 2
  933. y2 = y_center + h / 2
  934. dets.append([x1, y1, x2, y2, confidence, best_class])
  935. else:
  936. # 标准 (84, 8400) 格式
  937. for i in range(output.shape[1]):
  938. class_probs = output[4:, i]
  939. best_class = int(np.argmax(class_probs))
  940. confidence = 1.0 / (1.0 + np.exp(-float(class_probs[best_class])))
  941. if confidence < conf_threshold:
  942. continue
  943. x1 = output[0, i] - output[2, i] / 2
  944. y1 = output[1, i] - output[3, i] / 2
  945. x2 = output[0, i] + output[2, i] / 2
  946. y2 = output[1, i] + output[3, i] / 2
  947. dets.append([x1, y1, x2, y2, confidence, best_class])
  948. # 从 canvas 坐标映射回 crop 坐标
  949. dets = np.array(dets) if dets else np.zeros((0, 6))
  950. dets[:, [0, 2]] = (dets[:, [0, 2]] - pad_w) / scale
  951. dets[:, [1, 3]] = (dets[:, [1, 3]] - pad_h) / scale
  952. dets = dets.tolist()
  953. for x1, y1, x2, y2, confidence, best_class in dets:
  954. cls_name = class_map.get(int(best_class), str(int(best_class)))
  955. if cls_name not in self.config['target_classes']:
  956. continue
  957. # 转换到原图坐标(加上区域偏移)
  958. x1 = int(x1) + x_start
  959. y1 = int(y1)
  960. x2 = int(x2) + x_start
  961. y2 = int(y2)
  962. x1 = max(0, min(w0, x1))
  963. y1 = max(0, min(h0, y1))
  964. x2 = max(0, min(w0, x2))
  965. y2 = max(0, min(h0, y2))
  966. if x2 - x1 < 10 or y2 - y1 < 10:
  967. continue
  968. # 去重:同一目标可能被相邻区域重复检测
  969. cx, cy = (x1 + x2) // 2, (y1 + y2) // 2
  970. if any(abs(cx - sx) < 50 and abs(cy - sy) < 50 for sx, sy in seen_centers):
  971. continue
  972. seen_centers.append((cx, cy))
  973. obj = DetectedObject(
  974. class_name=cls_name,
  975. confidence=float(confidence),
  976. bbox=(x1, y1, x2 - x1, y2 - y1),
  977. center=(cx, cy)
  978. )
  979. results.append(obj)
  980. if need_nms and results:
  981. results = nms(results, iou_threshold=0.45)
  982. return results
  983. def detect(self, frame: np.ndarray) -> List[DetectedObject]:
  984. """检测物体返回所有类别结果"""
  985. if frame is None:
  986. return []
  987. if hasattr(self, 'rknn') and self.rknn is not None:
  988. results = self._detect_rknn(frame)
  989. if results:
  990. self._log_detections("RKNN", results, frame)
  991. self._save_detection_image(frame, results)
  992. return results
  993. elif hasattr(self, 'session') and self.session is not None:
  994. results = self._detect_rknn(frame)
  995. if results:
  996. self._log_detections("ONNX", results, frame)
  997. self._save_detection_image(frame, results)
  998. return results
  999. elif self.model is not None:
  1000. results = self._detect_yolo(frame)
  1001. if results:
  1002. self._log_detections("YOLO", results, frame)
  1003. self._save_detection_image(frame, results)
  1004. return results
  1005. else:
  1006. logger.error("[YOLO] 没有可用的检测模型")
  1007. return []
  1008. def _log_detections(self, model_type: str, results: List[DetectedObject], frame: np.ndarray):
  1009. if not results:
  1010. return
  1011. class_counts = {}
  1012. for r in results:
  1013. class_counts[r.class_name] = class_counts.get(r.class_name, 0) + 1
  1014. h, w = frame.shape[:2]
  1015. logger.info(f"[YOLO] {model_type}: {len(results)}个目标 {class_counts} (帧尺寸={w}x{h})")
  1016. def _detect_yolo(self, frame: np.ndarray) -> List[DetectedObject]:
  1017. """使用 YOLO 模型检测"""
  1018. results = []
  1019. try:
  1020. detections = self.model(
  1021. frame,
  1022. device=self.device,
  1023. verbose=False,
  1024. conf=self.config['confidence_threshold']
  1025. )
  1026. for det in detections:
  1027. boxes = det.boxes
  1028. if boxes is None:
  1029. continue
  1030. for i in range(len(boxes)):
  1031. cls_id = int(boxes.cls[i])
  1032. cls_name = det.names[cls_id]
  1033. if cls_name not in self.config['target_classes']:
  1034. continue
  1035. conf = float(boxes.conf[i])
  1036. xyxy = boxes.xyxy[i].cpu().numpy()
  1037. x1, y1, x2, y2 = map(int, xyxy)
  1038. width = x2 - x1
  1039. height = y2 - y1
  1040. if width < 10 or height < 10:
  1041. continue
  1042. center_x = x1 + width // 2
  1043. center_y = y1 + height // 2
  1044. obj = DetectedObject(
  1045. class_name=cls_name,
  1046. confidence=conf,
  1047. bbox=(x1, y1, width, height),
  1048. center=(center_x, center_y)
  1049. )
  1050. results.append(obj)
  1051. except Exception as e:
  1052. logger.error(f"YOLO11检测错误: {e}")
  1053. return results
  1054. def detect_with_keypoints(self, frame: np.ndarray) -> List[DetectedObject]:
  1055. """
  1056. 使用YOLO11-pose检测人体并返回关键点
  1057. Args:
  1058. frame: 输入图像
  1059. Returns:
  1060. 带关键点的检测结果列表
  1061. """
  1062. return self.detect(frame)
  1063. def detect_persons(self, frame: np.ndarray) -> List[DetectedObject]:
  1064. """检测人体(支持中英文类别名)"""
  1065. all_detections = self.detect(frame)
  1066. person_classes = {'person', '人'}
  1067. return [obj for obj in all_detections if obj.class_name in person_classes]
  1068. def release(self):
  1069. """释放模型资源"""
  1070. if hasattr(self, 'rknn') and self.rknn:
  1071. self.rknn.release()
  1072. self.rknn = None
  1073. self.model = None
  1074. self.session = None