source: liacs/MIR2010/SourceCode/main.cpp@ 97

Last change on this file since 97 was 97, checked in by Rick van der Zwet, 15 years ago

New classifier block detection. Not yet optimized for blocks

File size: 17.5 KB
Line 
1// main.cpp : Defines the entry point for the console application.
2//
3
4#include "config.h"
5#include <commdlg.h>
6#include <conio.h>
7#include <iostream>
8
9#include "cximage/ximage.h"
10#include "xbrowseforfolder.h"
11// function prototypes
12bool CalculateDescriptors(const char *basedir);
13bool CategorizeDescriptors(const char *basedir);
14bool DetermineWinterSportSelect();
15bool DetermineWinterSportBatch(const char *basedir);
16bool LoadAverages(const char *basedir);
17
18#define TRAINING_DIR_DEBUG "C:\\Documents and Settings\\rvdzwet\\Desktop\\liacs\\MIR2010\\trainingSet"
19#define TESTSET_DIR_DEBUG "C:\\Documents and Settings\\rvdzwet\\Desktop\\liacs\\MIR2010\\testSet"
20#define DEBUG 0
21// number of bins to use for each color
22// Note: number of bins must be a whole fraction of 256. if we would
23// use 256 bins for each color then a single histogram would be
24// 64MB, so we should choose a more sensible number
25#define BIN_COUNT 32
26
27// names of categories
28#define CATEGORY_SIZE 8
29#define CATEGORY_1 "cat1.crowd"
30#define CATEGORY_2 "cat2.skijump"
31#define CATEGORY_3 "cat3.snowboarding"
32#define CATEGORY_4 "opt1.bobsleigh"
33#define CATEGORY_5 "opt2.icehockey"
34#define CATEGORY_6 "opt3.speedskating"
35#define CATEGORY_7 "opt4.downhillski"
36#define CATEGORY_8 "opt5.curling"
37
38const char *categories[CATEGORY_SIZE] = { CATEGORY_1, CATEGORY_2, CATEGORY_3,
39 CATEGORY_4, CATEGORY_5, CATEGORY_6, CATEGORY_7, CATEGORY_8};
40float average[CATEGORY_SIZE][BIN_COUNT*BIN_COUNT*BIN_COUNT];
41
42
43// Find common coloured shapes as characteristics
44// Walk over image with SPREAD steps, if pixel -within TOLERANCE- matches previous pixel
45// make the size of the specific block bigger.
46#define TOLERANCE 20
47#define SPREAD 20
48#define MAX_SIZE 500
49float average_block[CATEGORY_SIZE][MAX_SIZE/SPREAD];
50
51
52// Some prototyping foo on functions
53#define p_err(err_msg) printf("ERROR: %s\n", err_msg);
54#define _return(err_msg, retval) printf("DEBUG: %s\n",err_msg); cin.get(); return(retval);
55#define cout_status(msg,flag) cout << msg; (flag) ? cout << " off" : cout << " on"; cout << endl;
56
57
58bool opt_verbose = false;
59bool opt_histogram = true;
60bool opt_block = true;
61
62bool file_exists(const char * filename)
63{
64 if (FILE * file = fopen(filename, "r"))
65 {
66 fclose(file);
67 return true;
68 }
69 return false;
70}
71
72
73int main(int argc, char **argv)
74{
75 char dbdir[MAX_PATH];
76 char testdir[MAX_PATH];
77
78#if defined(TRAINING_DIR_DEBUG)
79 if (strcpy_s(dbdir, TRAINING_DIR_DEBUG) != 0) {
80 _return("Cannot copy TRAINING_DIR_DEBUG",1);
81 }
82 if (strcpy_s(testdir, TESTSET_DIR_DEBUG) != 0) {
83 _return("Cannot copy TESTSET_DIR_DEBUG",2);
84 }
85#else
86 // ask the user for the image database
87 // in the image directory, the images are stored in categorized
88 // folders. store the descriptors we calculate in the same
89 // folder as the image
90 if (XBrowseForFolder(NULL, "Please select image database folder", NULL, dbdir, sizeof(dbdir)) == FALSE)
91 return 0;
92 if (strlen(dbdir) == 0)
93 return 0;
94 if (XBrowseForFolder(NULL, "Please select image testset folder", NULL, testdir, sizeof(testdir)) == FALSE)
95 return 0;
96 if (strlen(testdir) == 0)
97 return 0;
98
99#endif
100
101#if DEBUG
102 if (!LoadAverages(dbdir)){
103 _return("Unable to load averages",1);
104 }
105
106 if (!DetermineWinterSportBatch(testdir)) {
107 _return("could not run winter sport batch",1);
108 }
109 return(0);
110#endif
111
112 // ask the user which option to use
113 while (true)
114 {
115 //system("cls");
116 cout << "Using database directory: " << dbdir << endl;
117 cout << "Using test directory: " << testdir << endl;
118 cout << "Using categories: " << endl;
119 for (int i = 0; i < CATEGORY_SIZE; i++)
120 {
121 cout << " - " << categories[i] << endl;
122 }
123 cout << "***************************" << endl;
124 cout << "* Winter Olympic Imagery *" << endl;
125 cout << "***************************" << endl;
126 cout << endl;
127 cout << "1. calculate descriptors" << endl;
128 cout << "2. categorize descriptors (aka averages histograms)" << endl;
129 cout << "3. determine winter sport" << endl;
130 cout << "4. batch test winter sport" << endl;
131 cout_status("v. Turn verbose mode", opt_verbose);
132 cout_status("h. Turn histogram classifier", opt_histogram);
133 cout_status("b. Turn block classifier", opt_block);
134
135 cout << endl;
136 cout << "Please select option, or type 'q' to quit: ";
137 char c = _getch();
138 cout << c << endl;
139 fflush(stdin);
140 // start the chosen option
141 switch (c)
142 {
143 case 'q':
144 return 0;
145 case '1':
146 if (!CalculateDescriptors(dbdir)) {
147 _return("could not calculate descriptors",1);
148 }
149 break;
150 case '2':
151 if (!CategorizeDescriptors(dbdir)){
152 _return("could not categorize descriptors",1);
153 }
154 break;
155 case '3':
156 if (!LoadAverages(dbdir)){
157 _return("Unable to load averages",1);
158 }
159
160 if (!DetermineWinterSportSelect()){
161 _return("could not determine winter sport",1);
162 }
163 break;
164 case '4':
165 if (!LoadAverages(dbdir)){
166 _return("Unable to load averages",1);
167 }
168
169 if (!DetermineWinterSportBatch(testdir)) {
170 _return("could not run winter sport batch",1);
171 }
172 break;
173 case 'v':
174 opt_verbose = (!opt_verbose);
175 break;
176 case 'b':
177 opt_block = (!opt_block);
178 break;
179 case 'h':
180 opt_histogram = (!opt_histogram);
181 break;
182 default:
183 continue;
184 }
185 }
186 return 0;
187}
188
189// histogram should be a preallocated array of size BIN_COUNT*BIN_COUNT*BIN_COUNT elements and will
190// be filled with the color histogram of the image where path points at
191bool CalculateDescriptor(const char *path, float *histogram)
192{
193 // load the image
194 CxImage image(path, CXIMAGE_FORMAT_JPG);
195 if (!image.IsValid())
196 return false;
197 // clear histogram
198 memset(histogram, 0, BIN_COUNT*BIN_COUNT*BIN_COUNT * sizeof(float));
199 // walk through the pixels to fill the histogram
200 int width = (int)image.GetWidth();
201 int height = (int)image.GetHeight();
202 int bin_r, bin_g, bin_b;
203 for (int y = 0; y < height; y++)
204 {
205 for (int x = 0; x < width; x++)
206 {
207 // Note: CxImage library starts counting at lower-left corner of the image,
208 // which is seen as the top of the image. however, usually images
209 // start counting from the top-left corner of the image. thus if you
210 // want to get pixel(2, 2) from the top-left you would have to ask
211 // for pixel (2, height - 2 - 1) from CxImage. although in this
212 // situation we don't care which pixel is where, we only care about
213 // its color.
214 RGBQUAD rgb = image.BlindGetPixelColor(x, y, false);
215 // determine the bin this color falls in
216 bin_r = rgb.rgbRed / (256 / BIN_COUNT);
217 bin_g = rgb.rgbGreen / (256 / BIN_COUNT);
218 bin_b = rgb.rgbBlue / (256 / BIN_COUNT);
219 histogram[bin_r*BIN_COUNT*BIN_COUNT + bin_g*BIN_COUNT + bin_b]++;
220 }
221 }
222 // normalize the histogram so that all together the values will add up
223 // to one. since there are width * height pixels, we divide each value
224 // by this amount
225 for (int i = 0; i < BIN_COUNT*BIN_COUNT*BIN_COUNT; i++)
226 histogram[i] /= width * height;
227 return true;
228}
229
230// histogram should be a preallocated array of size BIN_COUNT*BIN_COUNT*BIN_COUNT elements and will
231// be filled with the color histogram of the image where path points at
232bool CalculateBlock(const char *path, float *block)
233{
234 // load the image
235 CxImage image(path, CXIMAGE_FORMAT_JPG);
236 if (!image.IsValid())
237 return false;
238 // clear histogram
239 memset(block, 0, MAX_SIZE/SPREAD * sizeof(float));
240 // walk through the pixels to fill the histogram
241 const int width = (int)image.GetWidth();
242 const int height = (int)image.GetHeight();
243
244 int rgb_value = 0;
245 int rgb_prev = 0;
246
247 int block_size = 0;
248
249 for (int y = 0; y < height; y += SPREAD)
250 {
251 for (int x = 0; x < width; x += SPREAD)
252 {
253 // Note: CxImage library starts counting at lower-left corner of the image,
254 // which is seen as the top of the image.
255 RGBQUAD rgb = image.BlindGetPixelColor(x, y, false);
256 rgb_value = (rgb.rgbRed + rgb.rgbBlue + rgb.rgbGreen);
257
258 if (abs(rgb_value - rgb_prev) > TOLERANCE) {
259 block[block_size]++;
260 block_size = 1;
261 }
262 rgb_prev = rgb_value;
263 }
264 }
265 return true;
266}
267
268bool CalculateDescriptors(const char *basedir)
269{
270 // the histogram that we reuse for each image
271 float *histogram = new float[BIN_COUNT*BIN_COUNT*BIN_COUNT];
272 float *block = new float[MAX_SIZE/SPREAD];
273 // walk through all images
274 // Note: each of the three categories has 50 images
275 char path[MAX_PATH];
276 char catdir[MAX_PATH];
277 const char *catname;
278 FILE *file = NULL;
279 for (int c = 0; c < CATEGORY_SIZE; c++)
280 {
281 catname = categories[c];
282 sprintf(catdir, "%s\\%s\\", basedir, catname);
283 cout << "[" << catname << "] Using directory " << catdir << endl;
284
285 // process the images in the directory
286 for (int i = 1; i <= 50; i++)
287 {
288 SAFE_SPRINTF(path, sizeof(path), "%s%i.jpg", catdir, i);
289 if (!file_exists(path)) {
290 continue;
291 }
292 cout << "[" << catname << "] processing image " << i << endl;
293 // calculate the histogram descriptor
294 if (!CalculateDescriptor(path, histogram))
295 goto failure;
296
297 if (!CalculateBlock(path, block))
298 goto failure;
299
300 // save the descriptor,block to disk
301 SAFE_SPRINTF(path, sizeof(path), "%s%i.dat", catdir, i);
302 if ((file = fopen(path, "wb")) == NULL)
303 goto failure;
304 if (fwrite(histogram, sizeof(float), BIN_COUNT*BIN_COUNT*BIN_COUNT, file) != BIN_COUNT*BIN_COUNT*BIN_COUNT)
305 goto failure;
306 if (fwrite(block, sizeof(float), MAX_SIZE/SPREAD, file) != MAX_SIZE/SPREAD)
307 goto failure;
308 SAFE_CLOSEFILE(file);
309
310
311 }
312 }
313 // release resources
314 SAFE_DELETE_ARRAY(histogram);
315 SAFE_DELETE_ARRAY(block);
316 return true;
317
318failure:
319 SAFE_CLOSEFILE(file);
320 SAFE_DELETE_ARRAY(histogram);
321 SAFE_DELETE_ARRAY(block);
322 return false;
323}
324
325bool CategorizeDescriptors(const char *basedir)
326{
327 // analyze the descriptors per category to determine the
328 // characteristics of that category
329 float *histogram = new float[BIN_COUNT*BIN_COUNT*BIN_COUNT];
330 float *block = new float[MAX_SIZE/SPREAD];
331 float *average_block = new float[MAX_SIZE/SPREAD];
332 float *average = new float[BIN_COUNT*BIN_COUNT*BIN_COUNT];
333 // walk through all descriptors
334 char path[MAX_PATH];
335 char catdir[MAX_PATH];
336 const char *catname;
337 FILE *file = NULL;
338 int c_size = 0;
339 for (int c = 0; c < CATEGORY_SIZE; c++)
340 {
341 c_size = 0;
342 catname = categories[c];
343 sprintf(catdir,"%s\\%s\\", basedir, catname);
344
345 // average all descriptors
346 memset(average, 0, BIN_COUNT*BIN_COUNT*BIN_COUNT * sizeof(float));
347 for (int i = 1; i <= 50; i++)
348 {
349 SAFE_SPRINTF(path, sizeof(path), "%s%i.dat", catdir, i);
350 if (!file_exists(path)) {
351 p_err("File does not exists");
352 continue;
353 }
354 cout << "[" << catname << "] processing image " << i << endl;
355 // load the histogram descriptor
356 if ((file = fopen(path, "rb")) == NULL) {
357 p_err("Cannot open average datafile");
358 goto failure;
359 }
360 if (fread(histogram, sizeof(float), BIN_COUNT*BIN_COUNT*BIN_COUNT, file) != BIN_COUNT*BIN_COUNT*BIN_COUNT) {
361 p_err("Cannot read histogram");
362 goto failure;
363 }
364 if (fread(block, sizeof(float), MAX_SIZE/SPREAD, file) != MAX_SIZE/SPREAD) {
365 p_err("Cannot read block");
366 goto failure;
367 }
368
369 SAFE_CLOSEFILE(file);
370 // add the value of each bin to the average
371 for (int b = 0; b < BIN_COUNT*BIN_COUNT*BIN_COUNT; b++)
372 average[b] += histogram[b];
373
374 for (int b = 0; b < MAX_SIZE/SPREAD; b++)
375 average_block[b] += block[b];
376
377 c_size++;
378 }
379
380 for (int b = 0; b < BIN_COUNT*BIN_COUNT*BIN_COUNT; b++)
381 average[b] /= c_size;
382
383 for (int b = 0; b < MAX_SIZE/SPREAD; b++)
384 average_block[b] /= c_size;
385
386 // save the average to disk
387 SAFE_SPRINTF(path, sizeof(path), "%s%s.dat", catdir, "average");
388 if ((file = fopen(path, "wb")) == NULL)
389 goto failure;
390 if (fwrite(average, sizeof(float), BIN_COUNT*BIN_COUNT*BIN_COUNT, file) != BIN_COUNT*BIN_COUNT*BIN_COUNT)
391 goto failure;
392 if (fwrite(average_block, sizeof(float), MAX_SIZE/SPREAD, file) != MAX_SIZE/SPREAD)
393 goto failure;
394
395 SAFE_CLOSEFILE(file);
396 }
397 // release resources
398 SAFE_DELETE_ARRAY(histogram);
399 SAFE_DELETE_ARRAY(average);
400 SAFE_DELETE_ARRAY(block);
401 return true;
402
403failure:
404 SAFE_CLOSEFILE(file);
405 SAFE_DELETE_ARRAY(histogram);
406 SAFE_DELETE_ARRAY(block);
407 return false;
408}
409
410bool LoadAverages(const char *basedir) {
411 /* determine the distance to each category */
412 const char *catname;
413 char catdir[MAX_PATH];
414 char path[MAX_PATH];
415 FILE *file = NULL;
416
417 for (int c = 0; c < CATEGORY_SIZE; c++)
418 {
419 catname = categories[c];
420 sprintf(catdir, "%s\\%s\\", basedir, catname);
421
422 // load the average from disk
423 SAFE_SPRINTF(path, sizeof(path), "%s%s.dat", catdir, "average");
424 if ((file = fopen(path, "rb")) == NULL) {
425 cout << "Cannot open " << path << endl;
426 return false;
427 }
428 if (fread(average[c], sizeof(float), BIN_COUNT*BIN_COUNT*BIN_COUNT, file) != BIN_COUNT*BIN_COUNT*BIN_COUNT)
429 return false;
430 if (fread(average_block[c], sizeof(float), MAX_SIZE/SPREAD, file) != MAX_SIZE/SPREAD)
431 return false;
432
433 SAFE_CLOSEFILE(file);
434 }
435 return true;
436}
437
438int DetermineCategory(const char *path, const int guess=-1, const bool verbose=false) {
439 float *histogram = new float[BIN_COUNT*BIN_COUNT*BIN_COUNT];
440 float *block = new float[MAX_SIZE/SPREAD];
441 float cat2dist[CATEGORY_SIZE];
442 float cat2block[CATEGORY_SIZE];
443
444 /* First category default best canidate */
445 int cat_histogram = 0;
446 int cat_block = 0;
447
448 /* calculate the histogram of the image */
449 if (!CalculateDescriptor(path, histogram))
450 return -1;
451
452 if (!CalculateBlock(path, block))
453 return -1;
454
455 /* determine the distance to each category */
456 for (int c = 0; c < CATEGORY_SIZE; c++)
457 {
458 // determine distance
459 cat2dist[c] = 0.0f;
460 cat2block[c] = 0.0f;
461 for (int b = 0; b < BIN_COUNT*BIN_COUNT*BIN_COUNT; b++)
462 cat2dist[c] += fabs(histogram[b] - average[c][b]);
463
464 for (int b = 0; b < MAX_SIZE/SPREAD; b++)
465 cat2block[c] += fabs(block[b] - average_block[c][b]);
466 }
467
468 /* determine the winning category */
469 for (int i = 1; i < CATEGORY_SIZE; i++) {
470 if (cat2dist[i] < cat2dist[cat_histogram])
471 cat_histogram = i;
472 if (cat2block[i] < cat2block[cat_block])
473 cat_block = i;
474 }
475
476 if (verbose) {
477 /* Dirty hack to show some more details in case of failure */
478 if (opt_histogram && guess != -1 && guess != cat_histogram) {
479 for (int i = 0; i < CATEGORY_SIZE; i++) {
480 printf("%s [histogram] distance to %-20s: %f %s\n", (cat_histogram == i) ? "*" : " ",
481 categories[i], cat2dist[i],(cat_histogram == i) ? "*" : "");
482 }
483 }
484 if (opt_block && guess != -1 && guess != cat_block) {
485 for (int i = 0; i < CATEGORY_SIZE; i++) {
486 printf("%s [block] distance to %-20s: %f %s\n", (cat_block == i) ? "*" : " ",
487 categories[i], cat2block[i],(cat_block == i) ? "*" : "");
488 }
489 }
490
491 }
492
493 /* return result */
494 if (opt_histogram) {
495 return cat_histogram;
496 }
497 else if (opt_block) {
498 return cat_block;
499 } else {
500 return -1;
501 }
502}
503
504/* ask for an input image and determine the most likely
505* category it belongs to
506*/
507bool DetermineWinterSportSelect()
508{
509 float *histogram = new float[BIN_COUNT*BIN_COUNT*BIN_COUNT];
510 char path[MAX_PATH] = {0};
511 char catdir[MAX_PATH] = {0};
512 const char *catname = NULL;
513 FILE *file = NULL;
514 int c = NULL;
515 float cat2dist[CATEGORY_SIZE] = {0};
516
517 OPENFILENAME ofn = {0};
518 //ofn.lpstrFilter = "Image files\0*.jpg;*.png;*.bmp\0\0";
519 ofn.lpstrFilter = "Image files\0*.jpg\0\0";
520 ofn.lpstrFile = path;
521 ofn.nMaxFile = MAX_PATH;
522 ofn.lpstrTitle = "Choose image file";
523 ofn.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST;
524 ofn.lStructSize = sizeof(OPENFILENAME);
525
526 if (GetOpenFileName(&ofn) == FALSE)
527 goto failure;
528
529 if ((c = DetermineCategory(path,-2)) == -1)
530 return false;
531
532 cout << "The category this image belongs is category: " << categories[c] << " [" << c << "]" << endl;
533 cout << "Press any key to continue... ";
534 _getch();
535 fflush(stdin);
536 // release resources
537 SAFE_DELETE_ARRAY(histogram);
538 return true;
539
540failure:
541 SAFE_CLOSEFILE(file);
542 SAFE_DELETE_ARRAY(histogram);
543 return false;
544}
545
546bool DetermineWinterSportBatch(const char *basedir)
547{
548 const char *catname;
549 char catdir[MAX_PATH];
550 char path[MAX_PATH];
551
552 int all_total = 0;
553 int all_succes = 0;
554 int c_total[CATEGORY_SIZE] = {0};
555 int c_succes[CATEGORY_SIZE] = {0};
556
557 /* determine the distance to each category */
558 for (int c = 0; c < CATEGORY_SIZE; c++)
559 {
560 catname = categories[c];
561 sprintf(catdir, "%s\\%s\\", basedir, catname);
562
563 /* process the images in the directory */
564 for (int i = 1; i <= 50; i++)
565 {
566 SAFE_SPRINTF(path, sizeof(path), "%s%i.jpg", catdir, i);
567 if (!file_exists(path)) {
568 continue;
569 }
570
571 c_total[c]++;
572 /* Check if file matches category */
573 if (DetermineCategory(path,c) == c) {
574 cout << "[" << catname << "] testing image " << i << " : OK" << endl;
575 c_succes[c]++;
576 } else {
577 cout << "[" << catname << "] testing image " << i << " : FAIL" << endl;
578 DetermineCategory(path,c,opt_verbose);
579 }
580 }
581 cout << "[" << catname << "] results " << c_succes[c] << "/" << c_total[c] << endl;
582 }
583
584 /* Display grand total */
585 cout << "=== Totals ===" << endl;
586 cout << "Clasifier used: ";
587 if (opt_histogram)
588 cout << "histogram";
589 else if (opt_block)
590 cout << "block";
591 cout << endl;
592
593 for (int c = 0; c < CATEGORY_SIZE; c++)
594 {
595 catname = categories[c];
596 printf ("[%-20s] %i/%i\n",catname,c_succes[c],c_total[c]);
597 all_total += c_total[c];
598 all_succes += c_succes[c];
599 }
600 printf ("[%-20s] %i/%i\n","total",all_succes,all_succes);
601 cout << "Press any key to continue..."; cin.get();
602
603
604 return true;
605}
Note: See TracBrowser for help on using the repository browser.