Kaynağa Gözat

fix(third_party_pusher,object_detector): 修复并优化PTZ位置与目标检测坐标映射

1. 完善third_party_pusher的PTZ位置默认值处理,缺失时使用0/0/1
2. 修复object_detector的end2end模型预处理和坐标映射逻辑,替换resize为letterbox
wenhongquan 1 hafta önce
ebeveyn
işleme
c212cadd56

+ 10 - 9
dual_camera_system/panorama_camera.py

@@ -1037,12 +1037,12 @@ class ObjectDetector:
             crop = frame[:, x_start:x_end]
             ch, cw = crop.shape[:2]
 
-            # end2end 模型:resize + NCHW
+            # end2end 模型:letterbox + NCHW
             if self.is_end2end:
-                img = cv2.resize(crop, (640, 640))
-                img = img.astype(np.float32) / 255.0
+                canvas, scale, pad_w, pad_h, ch, cw = self._letterbox(crop)
+                img = canvas[..., ::-1].astype(np.float32) / 255.0
                 if hasattr(self, 'rknn'):
-                    outputs = self.rknn.inference(inputs=[img.transpose(2, 0, 1)[None, ...]])
+                    outputs = self.rknn.inference(inputs=[img[None, ...]])
                 else:
                     outputs = self.session.run(None, {self.input_name: img.transpose(2, 0, 1)[None, ...]})
                 output = outputs[0]
@@ -1050,16 +1050,17 @@ class ObjectDetector:
                     output = output[0]
                 dets = []
                 for i in range(output.shape[0]):
-                    x1, y1, x2, y2, conf, cls_id = output[i]
+                    x1_lb, y1_lb, x2_lb, y2_lb, conf, cls_id = output[i]
                     if conf < conf_threshold:
                         continue
                     cls_name = class_map.get(int(cls_id), str(int(cls_id)))
                     if cls_name not in self.config['target_classes']:
                         continue
-                    _x1 = int(x1 * cw / 640)
-                    _y1 = int(y1 * ch / 640)
-                    _x2 = int(x2 * cw / 640)
-                    _y2 = int(y2 * ch / 640)
+                    # 从 letterbox 空间映射回 crop 坐标
+                    _x1 = int((x1_lb - pad_w) / scale)
+                    _y1 = int((y1_lb - pad_h) / scale)
+                    _x2 = int((x2_lb - pad_w) / scale)
+                    _y2 = int((y2_lb - pad_h) / scale)
                     dets.append([_x1, _y1, _x2, _y2, float(conf), int(cls_id)])
             else:
                 canvas, scale, pad_w, pad_h, ch, cw = self._letterbox(crop)

+ 7 - 4
dual_camera_system/third_party_pusher.py

@@ -89,11 +89,14 @@ def _convert_to_legacy_batch_info(new_info: Dict[str, Any]) -> Dict[str, Any]:
         # PTZ 流检测时,把同一张 PTZ 图作为每个人的特写图复用
         # 始终包含 ptz_position 字段,第三方平台要求必须有 pan/tilt/zoom 数值
         if is_ptz:
-            # 使用实际检测时的 PTZ 位置
+            # 使用实际检测时的 PTZ 位置(若无实际位置,用默认值 0/0/1)
+            ptz_pan = ptz_position.get("pan") if isinstance(ptz_position, dict) else 0
+            ptz_tilt = ptz_position.get("tilt") if isinstance(ptz_position, dict) else 0
+            ptz_zoom = ptz_position.get("zoom") if isinstance(ptz_position, dict) else 1
             person["ptz_position"] = {
-                "pan": ptz_position.get("pan") if isinstance(ptz_position, dict) else ptz_position,
-                "tilt": ptz_position.get("tilt") if isinstance(ptz_position, dict) else None,
-                "zoom": ptz_position.get("zoom") if isinstance(ptz_position, dict) else None,
+                "pan": ptz_pan if ptz_pan is not None else 0,
+                "tilt": ptz_tilt if ptz_tilt is not None else 0,
+                "zoom": ptz_zoom if ptz_zoom is not None else 1,
             }
             person["ptz_bbox"] = {"x1": x1, "y1": y1, "x2": x2, "y2": y2}
             person["ptz_image_saved"] = bool(marked_path and os.path.exists(marked_path))