Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adjust scale before running recognition model #91

Merged
merged 1 commit into from
Oct 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CardScan/Classes/FindFourOcr.swift
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ struct FindFourOcr {
numCols: kDetectionModelCols)

var lines = postDetectionAlgorithm.horizontalNumbers()
var (number, numberBoxes, detectedCard) = recognizeNumbers.number(lines: lines)
var (number, numberBoxes, detectedCard) = recognizeNumbers.number(lines: lines, useScale: true)
var didDetectCard = detectedCard
if number == nil {
let verticalLines = postDetectionAlgorithm.verticalNumbers()
Expand Down
45 changes: 41 additions & 4 deletions CardScan/Classes/RecognizeNumbers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,26 @@ struct RecognizeNumbers {
self.recognizedDigits = Array(repeating: Array(repeating: nil, count: numCols), count: numRows)
}

func calculateScale(line: [DetectedBox]) -> Double? {
if line.count != 4 {
return nil
}

let numberMinX = line.map({ $0.rect.minX }).min() ?? 0.0
let numberMaxX = line.map({ $0.rect.maxX }).max() ?? 0.0
let numberWidth = numberMaxX - numberMinX
let boxWidth = line.first?.rect.width ?? 1.0
let scale = Double(numberWidth * 1.2 / (boxWidth * 4.0))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please explain this line? Why 1.2?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, does it apply to Amex cards?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I evaluated this empirically. On our validation set from TestOcr the frame prediction recall went from 54% to 64% and on our test set it went from 44% to 65%. The larger increase in testing is likely due to the fact that our testing set has more embossed cards, which appear to benefit more from this optimization.

So in other words, I pulled it out of thin air, but it does seem to work :)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It only applies to horizontal 16 digit cards. Check out the changes to FindFourOcr.swift, if you expand the hidden parts you'll see that when it calls the number method on vertical and amex that useScale uses the default value (ie false)


if (scale <= 0.0) {
return nil
}

return scale
}

@available(iOS 11.2, *)
mutating func number(lines: [[DetectedBox]]) -> (String?, [CGRect]?, Bool) {
mutating func number(lines: [[DetectedBox]], useScale: Bool = false) -> (String?, [CGRect]?, Bool) {
let maxRow = lines.map { $0.map { $0.row }}.flatMap { $0 }.max() ?? 0
let maxCol = lines.map { $0.map { $0.col }}.flatMap { $0 }.max() ?? 0

Expand All @@ -40,8 +58,10 @@ struct RecognizeNumbers {
var candidateNumber = ""
var detectedDigitsCount = 0

let scale: Double? = useScale ? calculateScale(line: line) : nil

for word in line {
guard let recognized = self.cachedDigits(box: word) else {
guard let recognized = self.cachedDigits(box: word, scale: scale) else {
return (nil, nil, false)
}

Expand All @@ -66,10 +86,15 @@ struct RecognizeNumbers {
}

@available(iOS 11.2, *)
mutating func cachedDigits(box: DetectedBox) -> RecognizedDigits? {
mutating func cachedDigits(box: DetectedBox, scale: Double? = nil) -> RecognizedDigits? {
var recognizedDigits: RecognizedDigits? = nil
if self.recognizedDigits[box.row][box.col] == nil {
recognizedDigits = RecognizedDigits.from(image: self.image, within: box.rect)

if let scale = scale {
recognizedDigits = RecognizedDigits.from(image: self.image, within: box.rect.scale(scale))
} else {
recognizedDigits = RecognizedDigits.from(image: self.image, within: box.rect)
}
self.recognizedDigits[box.row][box.col] = recognizedDigits
} else {
recognizedDigits = self.recognizedDigits[box.row][box.col]
Expand Down Expand Up @@ -143,3 +168,15 @@ struct RecognizeNumbers {
return (nil, nil)
}
}

extension CGRect {
func scale(_ scale: Double) -> CGRect {
let width = Double(self.width) * scale
let height = Double(self.height) * scale
let cx = Double(self.minX + self.width * 0.5)
let cy = Double(self.minY + self.height * 0.5)
let x = cx - width * 0.5
let y = cy - height * 0.5
return CGRect(x: x, y: y, width: width, height: height)
}
}
6 changes: 3 additions & 3 deletions CardScan/Classes/VideoFeed.swift
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ class VideoFeed {
}
session.addOutput(videoDeviceOutput)

if session.canSetSessionPreset(.iFrame960x540) {
session.sessionPreset = .iFrame960x540
}
if session.canSetSessionPreset(.high) {
session.sessionPreset = .high
}

let connection = videoDeviceOutput.connection(with: .video)
if connection?.isVideoOrientationSupported ?? false {
Expand Down
Loading