Bill cropper
Automatic tool for cropping bills from scan
What is it?
We have this project at work, that is intended for small and medium companies. It covers much more than just simple accounting. It has many cool features that I'm not going to write about, except one. Automatic processing bills/ invoices. You can do it via your mobile client (Android, iOS) or a web application.What to improve?
We ran into a problem that people scanned more bills together, instead of one image - one document rule. So I decided to create a small script to recognize and crop these multi-bills images into separate images.Algorithm
To crop the bills out of the images I used a few simple preprocessing image methods. First, I remove the noise. After that I convert the image to binary and separate it into white areas. Finally I find enough big white areas and find a bounding box for them. I think the code below is self explaining enough.- 1. RESIZE
- 2. IMOPEN - MORPHOLOGICAL OPERATION
- 3. IMG TO BINARY
- 4. BLOD DETECTS
- 5. FIND BIGGEST BLOB
- 6. GET BOUNDING BOX
- 7. PERCENTAGE LIMIT
- 9. CROP WITH RESIZE
Requirements
- GNU Octave, version 3.6.4
- Octave with image package
#! /bin/octave -qf
# a sample Octave program
% @Author Miroslav Bodis
% created: 2014-10-04
% updated: 2015-10-04
% -- -- CUT BILL/BILLS FROM PICTURE -- --
% 0. VALIDATE INPUT
% 1. RESIZE
% 2. IMOPEN - MORPHOLOGICAL OPERATION
% 4. IMG TO BINARY
% 5. BLOD DETECTS
% 6. FIND BIGGEST BLOB
% 7. GET BOUNDING BOX
% 8. PERCENTAGE LIMIT
% 9. CROP WITH RESIZE
IMG_TO_BINARY_TRASH = 0.3;
IMG_LIMIT_BILL_DETECT = 1000;
IMG_LIMIT_BILL_PERCENT = 15;
IMG_SIZE_LIMIT = 550;
DEBUG = false;
% DEBUG = true;
% 0 ---- VALIDATE INPUT
%------------------------------
% - input require input image
arg_list = argv();
if ( size(arg_list, 1) == 0)
printf("\nrequired 1 arg, input file \n");
return;
endif;
%validate input
if ( size(arg_list, 1) != 1)
printf("\nERROR invalid input, allowing only one image as input_file! \n\n");
return;
endif;
%input exists
if (!exist(arg_list{1}))
fprintf("\n image %s not exists\n", arg_list{1});
return;
endif;
pkg load image;
% 1 ---- RESIZE
%------------------------------
img_rgb_full = imread(arg_list{1});
[height, width, rgb] = size(img_rgb_full);
resize = 1;
if (width > IMG_SIZE_LIMIT && width > height)
resize = IMG_SIZE_LIMIT / width;
elseif (height > IMG_SIZE_LIMIT)
resize = IMG_SIZE_LIMIT / height;
endif;
img_rgb = imresize(img_rgb_full, resize, 'nearest');
if (DEBUG)
imwrite(img_rgb, 'debug_0_resize.jpg', 'jpg', 'Quality', 100);
endif;
% 2 ---- IMOPEN - MORPHOLOGICAL OPERATION
%------------------------------
gray = rgb2gray(img_rgb);
% disc = uint8 ([0 0 0 1 0 0 0
% 0 1 1 1 1 1 0
% 0 1 1 1 1 1 0
% 1 1 1 1 1 1 1
% 0 1 1 1 1 1 0
% 0 1 1 1 1 1 0
% 0 0 0 1 0 0 0]);
disc = uint8 ([0 0 0 0 0 1 0 0 0 0 0
0 0 0 1 1 1 1 1 0 0 0
0 0 1 1 1 1 1 1 1 0 0
0 1 1 1 1 1 1 1 1 1 0
0 1 1 1 1 1 1 1 1 1 0
1 1 1 1 1 1 1 1 1 1 1
0 1 1 1 1 1 1 1 1 1 0
0 1 1 1 1 1 1 1 1 1 0
0 0 1 1 1 1 1 1 1 0 0
0 0 0 1 1 1 1 1 0 0 0
0 0 0 0 0 1 0 0 0 0 0]);
gray = imdilate (imerode(gray, disc), disc);
if (DEBUG)
imwrite(gray, 'debug_1_opening.jpg', 'jpg', 'Quality', 100);
endif;
% 3 ---- IMG TO BINARY
%------------------------------
bw = im2bw(gray, IMG_TO_BINARY_TRASH);
if (DEBUG)
imwrite(bw, 'debug_2_im2bw.jpg', 'jpg', 'Quality', 100);
endif;
% 4 ---- BLOB DETECTS
%------------------------------
cc = bwconncomp(bw, 4);
% cc.NumObjects % print number of blobs
% cc.ImageSize % print image size
% -- -- select blob number 225
% grain = false(size(bw));
% grain(cc.PixelIdxList{225}) = true;
% imwrite(grain, 'output.jpg', 'jpg', 'Quality', 100);
% 5 ---- FIND BIGGEST BLOB
%------------------------------
% -- -- biggest blob
% numPixels = cellfun(@numel,cc.PixelIdxList);
% [biggest,idx] = max(numPixels);
% -- find more bigger objects
object = 0;
for j = 1:cc.NumObjects
[s1,s2] = size(cc.PixelIdxList{j});
if (s1 > IMG_LIMIT_BILL_DETECT)
object+=1;
bw = false(size(bw)); % black img
bw(cc.PixelIdxList{j}) = true; % add selected object
if (DEBUG)
imwrite(bw, 'debug_34_biggest_bloc.jpg', 'jpg', 'Quality', 100);
endif;
% 6 ---- GET BOUNDING BOX
%------------------------------
boundaries = bwboundaries(bw);
numberOfBoundaries = size(boundaries);
from = boundaries{1};
fromx = min(from(:,1));
fromy = min(from(:,2));
to = boundaries{2};
tox = max(to(:,1));
toy = max(to(:,2));
if (DEBUG)
for k = 1 : numberOfBoundaries
thisBoundary = boundaries{k};
plot(thisBoundary(:,2), thisBoundary(:,1), 'g', 'LineWidth', 2);
end
print("debug_5_boundbox_plot.png", "-dpng");
endif;
% 7 ---- PERCENTAGE LIMIT
%------------------------------
l=tox-fromx;
w=toy-fromy;
[x,y,rgb] = size(img_rgb);
blob_percent_area = ((l*w) / (x*y) *100)
if ( blob_percent_area > IMG_LIMIT_BILL_PERCENT)
% 8 ---- CROP WITH RESIZE
%------------------------------
extra = (1/resize);
x1 = round( fromx * extra);
x2 = round( (x1 + l * extra) );
y1 = round( fromy * extra );
y2 = round( (y1 + w * extra) );
crop_img = img_rgb_full(x1:x2, y1:y2);
name = char ([98, 105, 108, 108, 45, 48+object]); % bill-1, bill-2 ...
imwrite(crop_img, name, 'jpg', 'Quality', 100);
endif;
endif;
endfor
Comments
Post a Comment