1 Important Params:
  2 def term: init = [0.01    0     0.01   0]
  3                   dx^2    dx    dy^2  dy
  4
  5                   regmult                   learnmult            lower bounds                  init
  6         filter :     1                        1                     -100                        0
  7 filter  bias   :     0                        0(20)                 -100     (root)             0
  8 filter  bias   :     0                        0(20)                 -100     (part)             0
  9 AND(*)  bias   :     0                       20                     -100                        0
 10         root df:     0                        0               [0.01 -100 0.01 -100]        [1000    0     1000   0]
 11         part df:     10                      0.1              [0.01 -100 0.01 -100]        [0.01    0     0.01   0]
 12
 13 *: in Leo's paper, no bias term, i.e. bias = 0
 14
 15
 16 Model initialization:
 17
 18 m.class       = cls;        % object class/category
 19 m.year        = VOCyear;    % dataset year (PASCAL specific)
 20 m.note        = note;       % decription of the model
 21 m.filters     = [];         % filters (terminals) array of structs:
 22   w: filter weights
 23   blocklabel: block label
 24               used to mapped to m.(blocksize,regmult,learmult,lowerbounds)
 25   symmetric: 'M' vertical mirored partner 'N' none
 26   size: 2d size
 27   flip: is this filter vertically flipped
 28   symbol: symbol index in m.symbols
 29   
 30   
 31 m.rules       = {};         % rules indexed by lhs
 32   m.rules{lhs} array of struct:
 33   type: 'S' structural rules 'D' deformation rules
 34   lhs: lhs
 35   rhs: rhs
 36   detwindows: det window size 1 x 2 (hxw)
 37   i: current index in m.rules{lhs}
 38   offset:
 39     offset.w: rule off-set weight
 40     offset.blocklabel: offsetbl index in model
 41   % only in 'S' rule
 42   anchor:
 43   % only in 'D' rule
 44   def.w: param [ 1000,0,1000,0] ? dx dx^2 dy dy^2
 45   def.blocklabel: blockindex corresponds to defblock
 46   flip: whether this rule is flipped filter [?]
 47   symmetric: 'M' or 'N' def rule with symmetric parenter
 48   
 49 m.symbols     = [];         % grammar symbol table array of structs:
 50   type: 'N' nonterminal 'T' terminal
 51   i: index in m.symbols [?]
 52   filter: filter index in m.filters
 53  
 54
 55 m.numfilters  = 0;          % length(model.filters)
 56 m.numblocks   = 0;          % length(model.blocks)
 57 m.numsymbols  = 0;          % length(model.symbols)
 58 m.blocksizes  = [];         % length of each block of model parameters
 59 m.start       = [];         % grammar start symbol
 60 m.maxsize     = -[inf inf]; % size of the largest detection window
 61 m.minsize     = [inf inf];  % size of the smallest detection window
 62 m.interval    = 10;         % # levels in each feature pyramid octave
 63 m.sbin        = 8;          % pixel size of the HOG cells
 64 m.thresh      = 0;          % detection threshold
 65 m.regmult     = [];         % per block regularization multiplier
 66 m.learnmult   = [];         % per block learning rate multiplier
 67 m.lowerbounds = {};         % per parameter lower bound
 68
 69 model.scoretpt{i}: zeros of max response image size at each level i
 70 model.interval: number of pyramid levels to double size of image
 71
 72 pyramid.pady pyramid.padx = m.maxsize [ double check ]
 73
 74 model add filter:
 75 1. create a filter with index = m.numfilters + 1
 76 2. if blocklabel is not provided: add block with index = m.numblocks + 1
 77    and follow settings:
 78    regmult = 1, learnmult = 1, lowerbounds=-100 for each dimension
 79 3. add new symodel for terminal associated with filter with
 80    index = m.numsymbols + 1
 81    m.symbols(i).i = i;( i = index )[?]
 82
 83 model add rule:
 84 1. create a rule m.rules{lhs}(i)
 85 2. set type,lhs,rhs,detwindows,i
 86 3. if( offset block is empty )
 87      'S' rule: create an offsetbl with size 1, regmult = 0, learmult = 20
 88      'D' rule: create an offsetbl with size 1, regmult = learnmult = 0
 89 4. set offset.w offset.blocklabel
 90 5. if( 'S' ) rule: set m.rules{lhs}(i).anchor
 91 6. if( 'D' ) rule:
 92       if defbl is empty:
 93         add a block with (numel(params)=4,10,0.1,[0.01 -100 0.01 -100])
 94       else
 95         set this rule to mirroring
 96       set def.w def.blocklabel def.flip def.symmetric
 97       
 98
 99 train.m
100  neg examples: keep cache at least half full but keep all negative sv
101
102 -----------------------------------------------------------
103              build featpyramid for a model
104 -----------------------------------------------------------
105 pyra.feat{i} is the i-th level of the feature pyramid.
106 pyra.scales{i} is the scaling factor used for the i-th level.
107 pyra.feat{i+interval} is computed at exactly half the resolution of feat{i}.
108
109                              |<-      max_scale           ->|
110 pyramid: 1   2    ...     n  n+1 n+2 ...  2n        ...     m
111          ^   ^            ^   ^            ^                ^
112          |   |< interval->|   |<-interval->|                |
113          |                |                |                |
114      size:2x            size:1x        size:0.5x     min(imsize) = 5*sbin
115
116 (padx,pady) <- model max detection window size
117
118 foreach( feat, pyra.feat{i} )
119    // each feature is paded pady,padx around feature
120    // add 1 to padding because feature generation deletes a 1-cell
121    feat = padarray( feat, [pady+1,padx+1,0] )
122    set the 32^{th}-dim border of feat to 1, others to 0
123 end
124
125 -----------------------------------------------------------
126   notes of file communicate between train.m and learn.cc
127 -----------------------------------------------------------
128 hdrfile: header file (writeheader)
129  [num labelsize model.numblocks model.blocksizes]: int32
130  model.regmult: float vector
131  model.learnmult: float vector,
132  where num: number of data instance, including both pos and neg
133  reset header file: num = 0
134
135 datfile:
136  refer to poswarp/negrandom function
137
138 modfile: model file (writemodel)
139  write model.blocks information into file
140  reset model  file: zeros(sum(model.blocksizes),1) double
141  weight vector corresponding to each block
142
143 inffile:
144  [labels, scores, unique] = textscan( fid, '%d%f%d' )
145
146 lobfile: write lower bound data file
147  foreach of blocks in model, write model.lowerbounds{i} double
148
149 cmpfile: writecomponentinfo
150  write the block labels used by each component
151  format: #components {#blocks blk1 ... blk#blocks}^#components
152
153 objfile
154  [nl pl rt] = textread( objfile, '%f%f%f' )
155  nl: negative loss
156  pl: positive loss
157  rt: ?
158  
159
160 labelsize = 5; %[label id level x y]
161
162 --------------------------------------------------
163       Write pos/neg instances to data file
164 --------------------------------------------------
165 poswarp: write pos to datafile(append)
166 1. get warped pos based on bbox
167 2. write to datafile according to following format:
168    for i = 1 : numpos
169       write to data file:
170       [ 1 i 0 0 0 2 dim] int32, where dim = numel(feat) + 3
171       [ obl 1 ] float (obl: offset block index )
172       fbl       float (fbl: filter block index )
173       feat      float (feat: warped pos HoG feature)
174    end
175
176 negrandom: write pos to datafile(append)
177 1. numneg = number of neg images
178 2. rndneg = floor(maxnum/numneg): number of neg per image
179 3. foreach image of index i, generate rndneg of index j
180      uniform random x,y and extract feat
181      negindex = (i-1)*rndneg + j
182      write to data file:
183      [ -1 negindex 0 0 0 2 dim] int32 where dim = numel(feat) + 3
184      [ obl 1 ] float (obl: offset block index )
185      fbl       float (fbl: filter block index )
186      feat      float (feat: neg HoG feature )
187    end
188
189 poslatent: write latent pos to data file(append)
190 set model.interval = 5(pyramid height to 5)
191 foreach bbox with area > minsize filter of model
192   1. get image patch from image with double size of bbox
193   2. build feature pyramid
194      a. get feature with [pady padx] = model.maxsize
195      b. pad image with pady,padx on boundary of image with 0 and set
196         32^{th} dim of HoG feature to 1 to indicate it is padding feature
197   3. compute response (bs) getdetect
198   4. record pos score
199   5. write bs to data file(gdetectwrite)
200   6. inc fusage
201 end
202
203 neghard: write hard negative to data file(append), this function
204          takes a position variable(negpos) as input. After current buffer
205          is full and svm solver process current buffer. The next
206          round of buffer filling will be start from current position
207   %a kind of limit negative examples per image
208   1. set model.interval = 4 (pyramid height to 4 );
209   % resume from previous neg image position
210   2. inds = circshift( 1 : numneg, [ 0, -negpos ];
211   3. all bbox with gdetect reshponse > threshold = -1.002 % neg margin = -1
212   4. add them to negative buffer
213   5. update fusage
214   6. break if negbuffer is full
215   notes: neghard only input images without given labels, thus never need to figure out whether
216          a given detection position is +1 or -1
217
218
219 buffer filling: 214-270 train.m
220   P = all pos
221   pos_sv = numel( pos_values < 1 )
222   model.threshold = below 95% of pos response
223   neg_sv = numel( neg_value > -1 )
224   #neg = max( neg_sv, buffersize/2 ); % keep 1/2 buffer full at least
225   N = top #neg of neg_values
226   buffer in next hard mining iteration = [P N];
227   P will be removed if we want to update P, new neg instance is going to
228   add until buffer full
229   
230 cache = [nl pl rt nl+pl+rt]:  neg loss, pos loss, regualize loss, total loss
231
232 latent pos iteration break condition(outer loop):
233  pos_loss_pre: before recompute pos latent
234  pos_loss    : after recompute pos latent loss
235  break if: pos_loss/pos_loss_pre > 0.999
236
237 hard neg iteration break condition(inner loop):
238  1. have a full pass over neg image set
239  2. prev_pl + prev_rt + nl / (prev_pl+prev_rt+prev_nl ) < 1.05
240
241
242 --------------------------------------------------
243   inference with dynamic programming
244 --------------------------------------------------
245 gdetect:
246   1. compute filter response, cache in model.symbols(i).score
247   2. topological sort of non-terminal symbols in model's grammar
248   3. apply each rule attach a score of each pyramid response to each rule
249   4. get score for all symbols
250   5. find score > threshold:  
251        X: x index Y: y index I: 1d index L: level S: score
252   6. sore socre in descend order, if pos latent, take only the top score
253   7. call getdetections.cc
254   8. if latent post:
255       clip detections into image boundary, if less than overlap, report
256       overlap requirment failed
257      end
258   
259 validatelevels:
260    if ~latent, return all pyramid levels
261    if latent,
262      if levels with >overlap than bbox based on fixed size filter &&
263         levels > interval (1-interval = 2x image -> 1ximage, root filter
264                            only one 1ximage )
265         return levels
266
267 symbol_score: (summarize/take pointwise max over scores for each rule and
268 store it in model.symbols)
269    if( latent pos && s = model.start )
270       for each rule i related with model.start
271           % 1-model.interval = half size, root can only sit
272           % on less than full size
273           for level = model.interval + 1 : length(i.score )
274              if does not overlap with det enough
275                score{level}(inds) = -inf
276              end
277       end
278    end
279    take pointwise max over scores for each rule
280
281 apply_structural_rule(sum over part at 2x zoom):
282   1. set score{i} to r.offset.w, (score{i} is 2D array of largest reponse over all filters)
283   2. foreach j in rhl rule in r
284         1. (r.anchor{j}(1:3)) -> (ax,ay,ds)
285         2. step = 2^ds; // step size for down sampling
286         foreach i in level of score
287            level <- i - model.interval * ds (part level with root level = i)
288            sp <- subsample of s{level}  according to step (score at level i )
289            sz <- subsample size of sp
290            score{i} += sp
291         end
292      end
293   3. attach score to model.rules{r.lhs}(r.i).score
294 Notes: parts could out of image, root filter could not go out of image
295
296 apply_deformation_rule: ( attach each rule a pyramid of score )
297 dt.cc    
298 input:   score, // filter response: 2-dim array
299          def(1),// ax <-- quadric term  
300          def(2),// bx <-- linear term
301          def(3),// ay
302          def(4) // by
303 // quadric > 0 because score should decrease as part are farther away from its desired location
304 // input: score(x,y): the score of part sit on location (x,y)
305
306 output:  // the same size as input score
307          score  //
308          Ix, Iy //
309 // output: score(x,y): the best score at location (x,y) no matter where the part from,
310 //                     i.e. the best score of desired part location at (x,y)
311
312 In any words,
313
314 out_score(x,y) = \argmax_{dx,dy} in_score(x+dx,y+dx) - ax * dx^2 - bx * dx - ay * dy^2 - by * dy
315 where ax,ay>0.
316 Ix(x,y) = x+dx
317 Iy(x,y) = y+dy
318 where record where the actual parts from at desired part location at(x,y).
319
320         
321 key concept to understand dt_helper( why devide and conque work? ):
322
323 Define function: f(p,d) =  A[p] - a(d-p)^2 - b(d-p) with implicit params A,a,b and
324 Given (A: input score, B: output score ):
325 s = \argmax_p A[p] - a(d-p)^2 - b(d-p) for p\in [s1,s2].
326 B[p] = A[s] - a(d-s)^2 - b(d-s)
327
328 For any d^\prime = d -x,where (x>0), s^\prime can only be in range [s1,s]
329 and d^\prime = d + x, where x>0, s^\prime can only be in range [s,s2].
330
331 I will show the first statement is true, the proof for the second is similiar.
332 q = \argmax_p f(p, d^\prime )
333 B[d^\prime] = f( q, d^\prime )
334
335 f(p, d^\prime ) = A[p] - a( (d-x) - p )^2 - b( (d-x) - p )
336                 = A[p] - a(d-p)^2 -b(d-p)  - ax^2 + bx + 2axd-2axp
337                   |---- f(p,d) ---------|    |-----const---|   \- depends on p
338 for p > s, we know f(s,d) > f(p,d) and -2axs > -2axp. Thus, p only need to check range
339 [s1,s].
340
341
342 getdetections.cc (backtrack where does dynamic result score from )
343 input:  model,     // detector model
344         pyra.padx, // pyramid padding x
345         pyra.pady, // pyramid padding y
346         pyra.scles,// array of scales at each pyramid level
347         X,         // array of X position
348         Y,         // array of Y position
349         L,         // array of level
350         S          // array of score, S(i) from X(i),Y(i) of level L(i)
351         // X,Y,L,S are sorted in descending order according to score
352
353 output:
354         [dets] is a matrix with 6 columns and one row per detection.  Columns 1-4
355         give the pixel coordinates (x1,y1,x2,y2) of each detection bounding box.  
356         Column 5 specifies the model component used for each detection and column
357         6 gives the score of each detection.  [ numdetection * (4+1+1)]
358
359         [boxes] is a matrix with one row per detection and each sequential group
360         of 4 columns specifies the pixel coordinates of each model filter bounding
361         box (i.e., where the parts were placed).  The index in the sequence is
362         the same as the index in model.filters. [ numdetection * (4*numfilters+2)]
363         The last 2 dim is: 1) rule index produce such detection 2) bbox score
364
365         [info] contains detailed information about each detection required for
366         extracted feature vectors during learning.  [DET_SZ * numsymbols * numdetection]
367
368 info matrix: 9 x #symbols x #detection matrix where each colum
369 represents detection
370 results for each symbol. Each element of of a fixed column has the following
371 meanings:
372
373 DET_USE = 0;    % current symbol is used
374 DET_IND = 1;    % rule index
375 DET_X   = 2;    % x coord (filter and deformation)
376 DET_Y   = 3;    % y coord (filter and deformation)
377 DET_L   = 4;    % level (filter)
378 DET_DS  = 5;    % # of 2x scalings relative to the start symbol location
379                 % (0=>root level 1=> first part level ... )
380 DET_PX  = 6;    % x coord of "probe" (deformation)
381 DET_PY  = 7;    % y coord of "probe" (deformation)
382 DET_VAL = 8;    % score of current symbol
383 DET_SZ  = 9;    number of elements above
384
385 Q: (stack) node[numsymbols], where node is defined as follows:
386 struct node {
387   int symbol;   // grammar symbol
388   int x;        // x location for symbol
389   int y;        // y location for symbol
390   int l;        // scale level for symbol
391   int ds;       // # of 2x scalings relative to the start symbol location
392   double val;   // score for symbol
393 };
394
395 // trace detections and write output into out
396 for i = 0 to numdetection
397    trace( padx, pady,                       // [in]  input padx,pady of pyramid
398           scales,                           // [in]  pyramid scales array descending order
399           X[i] - 1, Y[i] -1, L[i] -1, S[i], // [in]  X,Y,level, and score of detection i
400           out,                              // [out] info 3D matrix
401           dets, detsdim,                    // [out] dets matrix of size detsdim[0] x detsdims[1]
402           boxes, boxesdim                   // [out] bbox matrix of size boxesdim[0] x boxesdim[1]
403          )
404     // inc ptr
405     out += DET_SZ * numsymbols
406     boxes++;
407     dets++;
408 end
409
410 // trace a single detection
411 trace( int padx, int pady,                 // padx,pady of pyramid
412        double* scales,                     // scales array
413        int sx,int sy, int sl, double sval, // current detection (sx,sy) of
414                                            // level sl with score sval
415        double* out,                        // info ptr, column major
416        double* dets, mwSize *detsdim,      // dets matrix(2D) ptr and size, column major
417        double* boxes, mwSize *boxesdim     // boxes matrix(2D) ptr and size, column major
418 ){
419   // init stack
420   Q[0].(symbol,x,y,l,ds,valu) <-  (start_symbol, sx, sy, sl, 0, sval )
421   // pop stack
422   while( stack Q not empty ) {
423      node n = Q.pop();
424      // terminal symbol
425      if( n.symbol is type 'T' ) {
426          // get size
427          fsz <- size of n.symbol.filter
428          // convert (x,y) in feature space at level n.l to image space
429          scale = model.sbin/scale[n.l]; // hog -> image pixel scale
430          // update x1,y1,x2,y2
431          x1 = (n.x - padx * pow2(n.ds) ) * scale
432                 ^     ^          ^          ^
433                 |     |          |          |
434             feature  padx    n.ds = 1(part) \- map to image
435                x             n.ds = 0(root)
436                          \--|--/
437                        correct padding
438          x2 = x1 + fsz[1] * scale - 1;
439          set bbox (x1,y1,x2,y2) of corresponding filter
440          continue;
441      }
442      // non-terminal symbol
443      find ( Rule r, rules with lhs = n.symbol ) with
444         models.rules{r}.score{n.l}[probex,proby] == n.val
445                                      |      |
446                                     n.x,n.y minus virtual padding
447      info[DET_INDEX] = r+1 <- r mapped to matlab index
448      // start_symbol
449      if( n.symbol == start_symbol ) {
450          map( n.x,n.y,n.l ) to (x1,y1,x2,y2)
451          set a) dets matrix b)last two dim of boxes matrix c) info matrix
452      }
453      // process Rule r
454      if( r is structure rule ) { // sum of parts rule
455         for j = 0 : numpart-1 {
456            (ax,ay,ds) <- r.anchor{j}
457            px = n.x * 2^ds + ax   // part x
458            py = n.y * 2^ds + ay   // part y
459            pl = n. - interval * ds// part level
460            remove padding from (px,py)
461            push part{j} to stack
462         }
463      }
464      else { // deformation rule, only 1 rhs symbol
465          get part location (px,py) from Ix,Iy and n.x,n.y with correct padding
466          puch rhs('T' rule) to stack
467      }
468   }
469 }
470
471 --------------------------------------------------
472       Read/Write communiate with learn.cc
473 --------------------------------------------------
474 writemodel
475 1. blocks{bl} = filter if it corresponds to non-flip filter
476 2. blocks{bl} = offset.w if it corresponds to offsets
477 3. blocks{bl} = def.w if it corresponds to non-flip 'D' rule
478 4. write blocks to model file
479      number of entries should = sum(model.blocksizes)
480
481 readmodel
482 for i = 1 : model.numblocks
483  blocks{i} = read model.blocksizes(i) double from file
484 end
485
486 parsemodel (update model parameters from weight vector representation)
487 parsemodel( model, blocks, i ):
488   model.symbols(i).type == 'T' terminal
489      1. fi = model.symbols(i).filter
490      2. f = reshape  blocks{model.filters{fi}.blocklable} to proper size
491      3. flip filter if model.filters{fi}.symmetric == 'M'
492      4. model.filters(fi).w = f;
493
494   model.symbols(i).type == 'N' non-terminal
495   for all rules r with lhs = i
496      1. set model.rules{r.lhs}(r.i).offset.w = blocks{r.offset.blocklabel}
497      2. if r.type == 'D':
498         model.rules{r.lhs}(r.i).def.w from blocks{r.def.blocklabel}
499         flip linear term if r.def.symmetric == 'M' and r.def.flip
500      3. for s = r.rhs
501           parsemodel( model, blocks, s );
502         end
503   end
504
505 writecomponentinfo: write the block labels used by each component
506 ( summary of rules related with block )
507 1. n = number of root rules
508 2. foreach of root rules( skip symmetric one ) i = 1 : 2 : n
509      comp{i}(end+1) = i^{th} rule offset blocklabel
510      foreach rhs of root rules
511        if 'T': comp{i}(end+) = the blockindex corresponds to filter
512        else %rules
513           comp{i}(end+1) = rules{j}.def.blocklabel
514           comp{i}(end+1) = rules{j}.offset.blocklabel
515           comp{i}(end+1) = filter block lable of rules{j}.rhs(1)
516        end
517      end
518    end
519 3. write n: int32
520 4. for i = 1 : n
521      write k = length( comp{i} ) :int32
522      write comp{i}: int32
523    end
524
525 readinfo
526  [labels,scores,unique] = textscan( fid, '%d%f%d' )
527
528 rewritedaa: shrink cache
529 update datfile, inffile based on index I and hdrfile
530
531 gdetectwrite:
532   foreach detection result(index by i)
533      ex.header = [ datalabel dataid l x y 0 0]
534                                     ^ ^ ^
535                                     | | \- ylocation
536                                 level xlocation
537      allocate memory: ex.blocks(model.numblocks).w
538      foreach symbol in model.numsymbols(index by j)
539         if info(DET_USE, j i ) == 0
540           continue; %skip unused symbol
541         if symbol j is of type 'T'
542           addfilterfeat to ex, flip feat if filter is flipped
543         else
544           ruleid = info( DET_IND,j,i) %get rule id
545           if ruleid is 'D' rule
546               bl = def.blocklabel
547               % compute dx,dy
548               dx = info( DET_PX, j, i ) - info( DET_X, j, i );
549               dy = info( DET_PY, j, i ) - info( DET_Y, j, i );
550               def = [ -dx^2 -dx -dy^2 -dy ]
551               if model.rules{j}.def.flip
552                  def(2) = -def(2) %flip rule
553               end
554               add def to to ex.blocks(bl)
555           end
556           bl = model.rules{j}(ruleid).offset.blocklabel; %get blocklabel
557           ex.blocks(bl).w = 1; %set current block to 1
558         end
559      end
560      exwrite(ex)
561   end
562
563 exwrite: helper function of gdetectwrite
564   1. cat all none empty ex.blocks(i).w of [ blockInde ex.blocks(i).w ] to buf
565   2. numBlocks = #none empty w blocks
566   3. set header from [ datalabel dataid 1 x y 0 0 ]
567                   to [ datalabel dataid 1 x y numblocks length(buf) ]
568   4. write ex.header: int32
569   5. write buf float
570
571 --------------------------------------------------
572   learn.cc Optimize LSVM via gradient descent
573 --------------------------------------------------
574
575 how weight vector is organized:
576
577 structurerule   : [w]  // weight of dim 1, the is bias term for this rule
578 deformation rule: [        // weight of dim 4
579                     def(1),// ax <-- quadric term  
580                     def(2),// bx <-- linear term
581                     def(3),// ay
582                     def(4) // by
583                   ]
584 'T'erminal rule : filter weights dimension
585
586 example:
587    poswarp wrtie:
588          [ 1 i 0 0 0 2 dim] int32, where dim = numel(feat) + 3
589    negrandom write:
590         [ -1 negindex 0 0 0 2 dim] int32 where dim = numel(feat) + 3
591
592
593 Internal Binary Format:
594       len:                   int        byte length of EXAMPLE
595       EXAMPLE:
596 ptr2 --> |- long label       ints       [label id level x y]
597 ptr0 --> |- blocks           int        number of blocks in DATA{blocks}
598          |- dim              int        dimension of data in DATA
599 ptr1 --> \--DATA{blocks}:    DATA
600              |-- block label float      label attatch to this block
601                  block data  floats     block weights vector
602       unique flag            float      byte
603
604 reference position(ref): ex
605 NUM_NONZERO(ex): ptr0
606 EX_DATA(ex)    : ptr1
607 LABEL(ex)      : ptr2
608 BLOCK_IDX(data): convert ptr1 index to 0 based [?]
609
610 default regualarization: max-component L2 regularization
611
612 Data structure:
613 struct collapsed {
614   char **seq;           //  x[i].seq --
615   int num
616 };
617
618          x0.num=2           x1.num=1  x2.num= ?
619          x0.seq               x1.seq  x2.seq        
620             |                   |      |
621             |                   |      |
622             v                   v      v
623 examples:   0          1        2      3 .... ( sorted unique )  char**
624             ^          ^
625             |          |
626         x[0].seq[0] x[0].seq[1]
627
628 thus x[i].seq[m] p  oints to example in collaped x[i] index.
629 x[i].seq[0],x[i].seq[1], ..., x[i].seq[ x[i].num-1 ] corresponding to x[i].num
630 latent placements and only one of them should activate
631
632
633 // set of collapsed examples
634 struct data {
635   // set at step 4-5
636   collapsed *x;          // x[i] for i = 0,1, ..., num -1
637   int num;                                
638   // ---- Read from component file ----
639   int numcomponents;     // number of non-symmetric rule from model.start(num of mixture model)
640   int *componentsizes;   // componentsizes[i]: number of block in component i,
641                          // where i\in[0,numcomponents)
642   int **componentblocks; // componentblcoks[i] of length componentsizes[i]. list block index
643                          // used by i^{th} component, 0-based index
644   // ---- Read from header file --------
645   int numblocks;    
646   int *blocksizes;        // length numblocks, store the lenght of weight vector of each block
647   float *regmult;         // length numblocks, regulaization param for each block
648   float *learnmult;       // length numblocks, learning param for each block
649 };
650
651
652 main:
653 // ------------------------------------------
654 //        Set X of type data
655 // ------------------------------------------
656 1. read from header file: ( header file = [num labelsize model.numblocks model.blocksizes]: int32 )
657    num( number of examples in data file),
658    labelsize( labelsize = 5; %[label id level x y] )
659    X.numblocks, X.blocksizes, X.regmult, X.learnmult
660
661 2. read from compnent file:
662    X.numcomponents
663    X.componentblocks,
664    X.componentsizes,
665    X.componentblocks[i]: vecotr of x.componentsizes[i]
666
667 3. read examples from datafile: // each example is an detection result
668    repeat step 3.1-3.4 for num(num example from header file) times
669     1. read va into buf[labelsize+2]
670     2. define len(of type char) of current data example segment:
671        len = sizeof(int) * len(va) + sizeof(float) * dim of va
672     3. examples[i] of size: sizeof(int) + len +  1 : char
673                                  ^         ^     ^
674                                  |         |     |
675                          front pad = len  data  back pad= 0 <- whether it is unique
676     4. copy data to examples[i]
677   
678    data file example(from poswarp):
679                                   number of blocks( filter block + bias block from structure rule )
680                                     |
681                                     v
682 va:   [ 1            i        0 0 0 2 dim] int32, where dim = numel(feat) + 3
683         ^            ^        ^ ^ ^
684         |            |        | | |
685        label  example_index   x y level  <---  identify as labels, step 5 merge rule compare this
686       [ obl 1 ] float (obl: offset block index )
687       fbl       float (fbl: filter block index )
688       feat      float (feat: warped pos HoG feature)
689
690 4. get sorted array from example array contain unique examples of size num_unique
691    1. sort examples array and copy to sorted array according to:
692        1. compare label of example[i]
693        2. compare len of example[i]
694        3. compare bits :(
695    2. get unique array
696
697 5. collapse examples( [in,out] X.x and X.num, [in] sorted, [in] num_unique )
698     X.x of size num_unique
699
700     merge examplex with the identical labels( labelsize*sizeof(int), i.e. merge detection result at the
701     same (x,y,l) for the same example and the same label. ). Multiple detection result happens
702     if numcomponent >1
703
704     X.x.seq = &examples[i]
705     X.x.num = num of examples with identical labels
706     X.num number of examples after merge
707   
708 // ------------------------------------------
709 //               init model
710 // ------------------------------------------
711 6. init model weight vector and lower bounds
712    1. read weight vector w (of size \sum_{i=1}^{X.numblocks} X.blocksizes[i] ) from
713       model file of type double
714    2. read lower bound vector lb from lobfile
715
716 // ------------------------------------------
717 //        train model: gradient descent
718 // ------------------------------------------
719 * We use an adaptive cache mechanism.  After a negative example
720 * scores beyond the margin multiple times it is removed from the
721 * training set for a fixed number of iterations.
722 7. gd( C, // neg mult
723        J, // pos mult = C*J
724        X,
725        w,
726        lb,// lower bounds
727        logdir,  // log file output directory
728        logtag   // log file name
729       )
730    ITER = 10e6 and MIN_ITER = 5e6
731    // go through data with random permuation
732    // learning rate
733    double T = min(ITER/2.0, t + 10000.0);
734    double rateX = cnum * C / T; //cnum = number of example in cache
735   
736    // compute loss for very 1e5 iterations and check stop condition
737    out = compute_loss(C,J,X,W) // max L-2 component regularization
738                                // out[0]: loos on neg
739                                // out[1]: loos on pos
740                                // out[2]: loos on regularization
741     delta = 1 - | prev_loss-loss|/loss
742     stop if: 1) delta > DELTA_STOP = 0.9995 => |prev_loss-loss|/loss < 5e-4
743                 happens consecutive for STOP_COUNT = 6 iteration
744           && 2) t >= MIN_ITER
745
746     compute loss for a example x = X.x[i] as follows:
747       max over latent placements: \max_m  score(x.seq[m])
748     
749     if(  score(x.seq[m] * label < 1 ){ //  sv
750        update gradient w.r.t. x.seq[m] with learning rate:
751           mult = X.learnmult[b] * rateX * (label>0?J:-1), where b is blockIndex of x.seq[m]
752     }
753     else {  // not sv, no udpate,  update cache(may skip this example latter)
754         if( x is in cache and have not update for INCACHE iteration ) //INCACHE=25
755              skip x in future MINWAIT iteration //(MINWAIT = 50)
756     }
757     
758     // TODO: compare with Leon Bottou's code: http://leon.bottou.org/projects/sgd
759     // periodically regualize the model, i.e. shrink w ( period = REGFREQ(20) )
760     // lower bound for df term [ quadric term lower bound = 0.01 linear term lower bound = -100]
761     // this force quadric term is useful
762     w[j][k] = max( w[j][k],lb[j][k]) where, j-> block index k-> weight index in block j
763     regualize all w[j][k] as follows in FULL_L2, otherwise regualize w[j][k] in max component
764     w[j][k] = mult * w[j][k] where mult = pow( 1- (regmult[b]*learnmult[b])/T, REGFREQ )
765
766
767     model rule{3} root, rule{17} part(block 15)
768     regmult: only filter weights block with regmult = 1,
769     learnmult: filter weights =1, bias = 20, [dx^2 dx dy^2 dy] = 0.1
770               regmult                 learnmult
771     filter:     1                        1
772     bias  :     0                       20
773     df    :     10                      0.1
774     
775 // ------------------------------------------
776 //       write model back and clean up
777 // ------------------------------------------
778 8. write w to modelfile
779 9. compute score for each examples and write to inffile
780    1. score s[i] = score(examples[i],w )
781    2. print:  label[i]  s[i]   unique_flag[i] from Step 4
782 10. write negloss, posloss, regloss to objfile
783                           
784
785
786 --------------------------------------------------
787                model operations
788 --------------------------------------------------
789
790 initmodel:
791 1. compute filter size
792 2. add filter
793 3. create an non-terminal and set as start
794 4. add rule from start('N') to filter ('T') 'S' rule with off set = 0
795    anchor = {[ 0 0 0]}
796                ^ ^ ^
797                | | |
798              ax ay ds
799 5. set detwindowow
800
801 lrmodel:
802 1. add a new 'N' symbol
803 2. add new 'D' rule from new 'N' -> rootfilter symbol with
804    [ 1000 0 1000 0 ] param (rigid deformation rule)
805 3. set model.rules{model.start} to new added rule
806 4. add a mirrored filter and set root_filter.symmetric = 'M'
807 5. add a new 'N'on-terminal
808 6. add a 'D' rule from new added symbol: new 'N' -> mirrored filter with
809    [ 1000 0 1000 0] param (rigid deformation rule) and
810    shared offsetbl and defbl with rule in 2, but set def.flip = true
811 7. add a new rule to model.rules{model.start}: model.start-> new 'N'
812    (copy offset and bl from old rule with unflipped filter)
813
814 mergemodels: merge a set of models into a single mixture model
815 1. merge filters, symbols, rules, starting rules
816 5. sync rhl starting rules
817 6. clear modle starting rules
818 7. sync maxsize,minsize,numblocks,numfilters,numsymbols
819   
820 model_addparts:
821 pfilter = mkpartfilters( filter, psize,num )
822 for i = 1 : numparts
823   add filter pfilters(i).w
824   add 'N' symbol corresponding to this filter
825   // quadric term = 0.1 linear term = 0
826   add deformation rule with defparam = pfilters(i).alpha * [0.1 0 0.1 0 ];
827   if( mirrored deformation exists )
828      x = pfilter(i).anchor(1) + 1;
829      y = pfilter(i).anchor(1) + 1;
830   end
831 end
832
833
834
835 mkpartfilters( filter, // input filter h x w x d
836                 psize, // part size ph x pw
837                 num    // number of parts
838              ):
839
840 // only positive enery is used here
841 energy = sum( max( filter2x,0 ).^2 ,3 );// where filter2x is bicubic of filter 2 times
842 for k = 1 : num
843   [x y] <- placepart( filter2x )
844    // f = max energy region and normalize to L2 norm(a typical value 1.8) to alpha = 0.1
845   f = mkfilter( filter2x,x,y )
846   pfilter(k).anchor = [x-1 y-1 1];
847                         ^   ^  ^
848                         |   /  |
849             0-based index,/     \ 0: root filter, 1: part filter
850             (x,y) starts at 1
851
852   pfilter(k).w = f          // filter
853   pfilter(k).alpha = alpha  // filter norm
854 end
855 // sample part placements and pick the best energy covering
856 ...
857
858 --------------------------------------------------
859               train model
860 --------------------------------------------------
861 spos = all pos bbox + flipped pos bbox
862 Phase 1: discriminate training
863 1. init submodel based on split of pos bbox
864 2. lrsplit model and define pos = left faced pos bbox(ouptut of cluster)
865 3. train each sub model of mixture model with corresponding
866    warped positives and random negatives
867
868 Phase 2:
869 1. produce a model with left/right symmetric root filters
870 2. train each sub model of mixture model with latent positive position
871    and hard negative position( recompute pos 4 iteration each
872    run 3 itr of mining hard neg )
873
874 Phase 3:
875 1. mergemodels
876 2. train as a whole model with latent pos instance(1 iteration)
877    and hard neg instance( 5 iteration )
878
879 Phase 4:
880 1. add parts
881 2. init train pos, neg(1:maxneg), positr = 8, negitr = 10
882 3. train pos,neg,positr= 1, negitr = 5
883
884 -------------------------
885 Data:
886 svm param: C = 0.002 J = 1; (pos wieght = C*J)
887 learn.cc Optimize LSVM via gradient descent