我正在使用 Vision 来检测物体,然后得到[VNRecognizedObjectObservation]
我在显示标准化矩形之前对其进行转换:
let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -CGFloat(height))
VNImageRectForNormalizedRect(normalizedRect, width, height) // Displayed with SwiftUI, that's why I'm applying transform
.applying(transform)
宽度和高度来自 SwiftUI GeometryReader:
Image(...)
.resizable()
.scaledToFit()
.overlay {
GeometryReader { geometry in // ZStack and ForEach([VNRecognizedObjectObservation], id: \.uuid), then:
let calculatedRect = calculateRect(boundingBox, geometry)
Rectangle()
.frame(width: calculatedRect.width, height: calculatedRect.height)
.offset(x: calculatedRect.origin.x, y: calculatedRect.origin.y)
}
}
但问题是,即使在方形图像上,许多框的位置也不正确(尽管有些是准确的)。
这与模型无关,因为当我在 Xcode 模型预览部分尝试相同的图像(使用相同的 MLModel)时,它们具有非常准确的 BB。
我的应用程序中的示例图像:
Xcode 预览中的示例图像:
更新(最小可重现示例):
里面有这段代码ContentView.swift
as a macOS SwiftUI
项目同时拥有YOLOv3Tiny.ml模型在项目捆绑包中将产生相同的结果。
import SwiftUI
import Vision
import CoreML
class Detection: ObservableObject {
let imgURL = URL(string: "https://i.imgur.com/EqsxxTc.jpg")! // Xcode preview generates this: https://i.imgur.com/6IPNQ8b.png
@Published var objects: [VNRecognizedObjectObservation] = []
func getModel() -> VNCoreMLModel? {
if let modelURL = Bundle.main.url(forResource: "YOLOv3Tiny", withExtension: "mlmodelc") {
if let mlModel = try? MLModel(contentsOf: modelURL, configuration: MLModelConfiguration()) {
return try? VNCoreMLModel(for: mlModel)
}
}
return nil
}
func detect() async {
guard let model = getModel(), let tiff = NSImage(contentsOf: imgURL)?.tiffRepresentation else {
fatalError("Either YOLOv3Tiny.mlmodel is not in project bundle, or image failed to load.")
// YOLOv3Tiny: https://ml-assets.apple.com/coreml/models/Image/ObjectDetection/YOLOv3Tiny/YOLOv3Tiny.mlmodel
}
let request = VNCoreMLRequest(model: model) { (request, error) in
DispatchQueue.main.async {
self.objects = (request.results as? [VNRecognizedObjectObservation]) ?? []
}
}
try? VNImageRequestHandler(data: tiff).perform([request])
}
func deNormalize(_ rect: CGRect, _ geometry: GeometryProxy) -> CGRect {
let transform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -CGFloat(geometry.size.height))
return VNImageRectForNormalizedRect(rect, Int(geometry.size.width), Int(geometry.size.height)).applying(transform)
}
}
struct ContentView: View {
@StateObject var detection = Detection()
var body: some View {
AsyncImage(url: detection.imgURL) { img in
img.resizable().scaledToFit().overlay {
GeometryReader { geometry in
ZStack {
ForEach(detection.objects, id: \.uuid) { object in
let rect = detection.deNormalize(object.boundingBox, geometry)
Rectangle()
.stroke(lineWidth: 2)
.foregroundColor(.red)
.frame(width: rect.width, height: rect.height)
.offset(x: rect.origin.x, y: rect.origin.y)
}
}
}
}
} placeholder: {
ProgressView()
}
.onAppear {
Task { await self.detection.detect() }
}
}
}
Edit:进一步的测试表明 VN 返回正确的位置,而我的deNormalize()
函数还返回正确的位置和大小,因此它必须与 SwiftUI 相关。