Bullet Collision Detection & Physics Library
btDantzigLCP.cpp
Go to the documentation of this file.
1/*************************************************************************
2* *
3* Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4* All rights reserved. Email: russ@q12.org Web: www.q12.org *
5* *
6* This library is free software; you can redistribute it and/or *
7* modify it under the terms of EITHER: *
8* (1) The GNU Lesser General Public License as published by the Free *
9* Software Foundation; either version 2.1 of the License, or (at *
10* your option) any later version. The text of the GNU Lesser *
11* General Public License is included with this library in the *
12* file LICENSE.TXT. *
13* (2) The BSD-style license that is included with this library in *
14* the file LICENSE-BSD.TXT. *
15* *
16* This library is distributed in the hope that it will be useful, *
17* but WITHOUT ANY WARRANTY; without even the implied warranty of *
18* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19* LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20* *
21*************************************************************************/
22
23/*
24
25
26THE ALGORITHM
27-------------
28
29solve A*x = b+w, with x and w subject to certain LCP conditions.
30each x(i),w(i) must lie on one of the three line segments in the following
31diagram. each line segment corresponds to one index set :
32
33 w(i)
34 /|\ | :
35 | | :
36 | |i in N :
37 w>0 | |state[i]=0 :
38 | | :
39 | | : i in C
40 w=0 + +-----------------------+
41 | : |
42 | : |
43 w<0 | : |i in N
44 | : |state[i]=1
45 | : |
46 | : |
47 +-------|-----------|-----------|----------> x(i)
48 lo 0 hi
49
50the Dantzig algorithm proceeds as follows:
51 for i=1:n
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53 negative towards the line. as this is done, the other (x(j),w(j))
54 for j<i are constrained to be on the line. if any (x,w) reaches the
55 end of a line segment then it is switched between index sets.
56 * i is added to the appropriate index set depending on what line segment
57 it hits.
58
59we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60simpler, because the starting point for x(i),w(i) is always on the dotted
61line x=0 and x will only ever increase in one direction, so it can only hit
62two out of the three line segments.
63
64
65NOTES
66-----
67
68this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69the implementation is split into an LCP problem object (btLCP) and an LCP
70driver function. most optimization occurs in the btLCP object.
71
72a naive implementation of the algorithm requires either a lot of data motion
73or a lot of permutation-array lookup, because we are constantly re-ordering
74rows and columns. to avoid this and make a more optimized algorithm, a
75non-trivial data structure is used to represent the matrix A (this is
76implemented in the fast version of the btLCP object).
77
78during execution of this algorithm, some indexes in A are clamped (set C),
79some are non-clamped (set N), and some are "don't care" (where x=0).
80A,x,b,w (and other problem vectors) are permuted such that the clamped
81indexes are first, the unclamped indexes are next, and the don't-care
82indexes are last. this permutation is recorded in the array `p'.
83initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84the corresponding elements of p are swapped.
85
86because the C and N elements are grouped together in the rows of A, we can do
87lots of work with a fast dot product function. if A,x,etc were not permuted
88and we only had a permutation array, then those dot products would be much
89slower as we would have a permutation array lookup in some inner loops.
90
91A is accessed through an array of row pointers, so that element (i,j) of the
92permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93we still have to actually move the data.
94
95during execution of this algorithm we maintain an L*D*L' factorization of
96the clamped submatrix of A (call it `AC') which is the top left nC*nC
97submatrix of A. there are two ways we could arrange the rows/columns in AC.
98
99(1) AC is always permuted such that L*D*L' = AC. this causes a problem
100when a row/column is removed from C, because then all the rows/columns of A
101between the deleted index and the end of C need to be rotated downward.
102this results in a lot of data motion and slows things down.
103(2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104itself a permutation of the underlying A). this is what we do - the
105permutation is recorded in the vector C. call this permutation A[C,C].
106when a row/column is removed from C, all we have to do is swap two
107rows/columns and manipulate C.
108
109*/
110
111
112#include "btDantzigLCP.h"
113
114#include <string.h>//memcpy
115
116bool s_error = false;
117
118//***************************************************************************
119// code generation parameters
120
121
122#define btLCP_FAST // use fast btLCP object
123
124// option 1 : matrix row pointers (less data copying)
125#define BTROWPTRS
126#define BTATYPE btScalar **
127#define BTAROW(i) (m_A[i])
128
129// option 2 : no matrix row pointers (slightly faster inner loops)
130//#define NOROWPTRS
131//#define BTATYPE btScalar *
132//#define BTAROW(i) (m_A+(i)*m_nskip)
133
134#define BTNUB_OPTIMIZATIONS
135
136
137
138/* solve L*X=B, with B containing 1 right hand sides.
139 * L is an n*n lower triangular matrix with ones on the diagonal.
140 * L is stored by rows and its leading dimension is lskip.
141 * B is an n*1 matrix that contains the right hand sides.
142 * B is stored by columns and its leading dimension is also lskip.
143 * B is overwritten with X.
144 * this processes blocks of 2*2.
145 * if this is in the factorizer source file, n must be a multiple of 2.
146 */
147
148static void btSolveL1_1 (const btScalar *L, btScalar *B, int n, int lskip1)
149{
150 /* declare variables - Z matrix, p and q vectors, etc */
151 btScalar Z11,m11,Z21,m21,p1,q1,p2,*ex;
152 const btScalar *ell;
153 int i,j;
154 /* compute all 2 x 1 blocks of X */
155 for (i=0; i < n; i+=2) {
156 /* compute all 2 x 1 block of X, from rows i..i+2-1 */
157 /* set the Z matrix to 0 */
158 Z11=0;
159 Z21=0;
160 ell = L + i*lskip1;
161 ex = B;
162 /* the inner loop that computes outer products and adds them to Z */
163 for (j=i-2; j >= 0; j -= 2) {
164 /* compute outer product and add it to the Z matrix */
165 p1=ell[0];
166 q1=ex[0];
167 m11 = p1 * q1;
168 p2=ell[lskip1];
169 m21 = p2 * q1;
170 Z11 += m11;
171 Z21 += m21;
172 /* compute outer product and add it to the Z matrix */
173 p1=ell[1];
174 q1=ex[1];
175 m11 = p1 * q1;
176 p2=ell[1+lskip1];
177 m21 = p2 * q1;
178 /* advance pointers */
179 ell += 2;
180 ex += 2;
181 Z11 += m11;
182 Z21 += m21;
183 /* end of inner loop */
184 }
185 /* compute left-over iterations */
186 j += 2;
187 for (; j > 0; j--) {
188 /* compute outer product and add it to the Z matrix */
189 p1=ell[0];
190 q1=ex[0];
191 m11 = p1 * q1;
192 p2=ell[lskip1];
193 m21 = p2 * q1;
194 /* advance pointers */
195 ell += 1;
196 ex += 1;
197 Z11 += m11;
198 Z21 += m21;
199 }
200 /* finish computing the X(i) block */
201 Z11 = ex[0] - Z11;
202 ex[0] = Z11;
203 p1 = ell[lskip1];
204 Z21 = ex[1] - Z21 - p1*Z11;
205 ex[1] = Z21;
206 /* end of outer loop */
207 }
208}
209
210/* solve L*X=B, with B containing 2 right hand sides.
211 * L is an n*n lower triangular matrix with ones on the diagonal.
212 * L is stored by rows and its leading dimension is lskip.
213 * B is an n*2 matrix that contains the right hand sides.
214 * B is stored by columns and its leading dimension is also lskip.
215 * B is overwritten with X.
216 * this processes blocks of 2*2.
217 * if this is in the factorizer source file, n must be a multiple of 2.
218 */
219
220static void btSolveL1_2 (const btScalar *L, btScalar *B, int n, int lskip1)
221{
222 /* declare variables - Z matrix, p and q vectors, etc */
223 btScalar Z11,m11,Z12,m12,Z21,m21,Z22,m22,p1,q1,p2,q2,*ex;
224 const btScalar *ell;
225 int i,j;
226 /* compute all 2 x 2 blocks of X */
227 for (i=0; i < n; i+=2) {
228 /* compute all 2 x 2 block of X, from rows i..i+2-1 */
229 /* set the Z matrix to 0 */
230 Z11=0;
231 Z12=0;
232 Z21=0;
233 Z22=0;
234 ell = L + i*lskip1;
235 ex = B;
236 /* the inner loop that computes outer products and adds them to Z */
237 for (j=i-2; j >= 0; j -= 2) {
238 /* compute outer product and add it to the Z matrix */
239 p1=ell[0];
240 q1=ex[0];
241 m11 = p1 * q1;
242 q2=ex[lskip1];
243 m12 = p1 * q2;
244 p2=ell[lskip1];
245 m21 = p2 * q1;
246 m22 = p2 * q2;
247 Z11 += m11;
248 Z12 += m12;
249 Z21 += m21;
250 Z22 += m22;
251 /* compute outer product and add it to the Z matrix */
252 p1=ell[1];
253 q1=ex[1];
254 m11 = p1 * q1;
255 q2=ex[1+lskip1];
256 m12 = p1 * q2;
257 p2=ell[1+lskip1];
258 m21 = p2 * q1;
259 m22 = p2 * q2;
260 /* advance pointers */
261 ell += 2;
262 ex += 2;
263 Z11 += m11;
264 Z12 += m12;
265 Z21 += m21;
266 Z22 += m22;
267 /* end of inner loop */
268 }
269 /* compute left-over iterations */
270 j += 2;
271 for (; j > 0; j--) {
272 /* compute outer product and add it to the Z matrix */
273 p1=ell[0];
274 q1=ex[0];
275 m11 = p1 * q1;
276 q2=ex[lskip1];
277 m12 = p1 * q2;
278 p2=ell[lskip1];
279 m21 = p2 * q1;
280 m22 = p2 * q2;
281 /* advance pointers */
282 ell += 1;
283 ex += 1;
284 Z11 += m11;
285 Z12 += m12;
286 Z21 += m21;
287 Z22 += m22;
288 }
289 /* finish computing the X(i) block */
290 Z11 = ex[0] - Z11;
291 ex[0] = Z11;
292 Z12 = ex[lskip1] - Z12;
293 ex[lskip1] = Z12;
294 p1 = ell[lskip1];
295 Z21 = ex[1] - Z21 - p1*Z11;
296 ex[1] = Z21;
297 Z22 = ex[1+lskip1] - Z22 - p1*Z12;
298 ex[1+lskip1] = Z22;
299 /* end of outer loop */
300 }
301}
302
303
304void btFactorLDLT (btScalar *A, btScalar *d, int n, int nskip1)
305{
306 int i,j;
307 btScalar sum,*ell,*dee,dd,p1,p2,q1,q2,Z11,m11,Z21,m21,Z22,m22;
308 if (n < 1) return;
309
310 for (i=0; i<=n-2; i += 2) {
311 /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */
312 btSolveL1_2 (A,A+i*nskip1,i,nskip1);
313 /* scale the elements in a 2 x i block at A(i,0), and also */
314 /* compute Z = the outer product matrix that we'll need. */
315 Z11 = 0;
316 Z21 = 0;
317 Z22 = 0;
318 ell = A+i*nskip1;
319 dee = d;
320 for (j=i-6; j >= 0; j -= 6) {
321 p1 = ell[0];
322 p2 = ell[nskip1];
323 dd = dee[0];
324 q1 = p1*dd;
325 q2 = p2*dd;
326 ell[0] = q1;
327 ell[nskip1] = q2;
328 m11 = p1*q1;
329 m21 = p2*q1;
330 m22 = p2*q2;
331 Z11 += m11;
332 Z21 += m21;
333 Z22 += m22;
334 p1 = ell[1];
335 p2 = ell[1+nskip1];
336 dd = dee[1];
337 q1 = p1*dd;
338 q2 = p2*dd;
339 ell[1] = q1;
340 ell[1+nskip1] = q2;
341 m11 = p1*q1;
342 m21 = p2*q1;
343 m22 = p2*q2;
344 Z11 += m11;
345 Z21 += m21;
346 Z22 += m22;
347 p1 = ell[2];
348 p2 = ell[2+nskip1];
349 dd = dee[2];
350 q1 = p1*dd;
351 q2 = p2*dd;
352 ell[2] = q1;
353 ell[2+nskip1] = q2;
354 m11 = p1*q1;
355 m21 = p2*q1;
356 m22 = p2*q2;
357 Z11 += m11;
358 Z21 += m21;
359 Z22 += m22;
360 p1 = ell[3];
361 p2 = ell[3+nskip1];
362 dd = dee[3];
363 q1 = p1*dd;
364 q2 = p2*dd;
365 ell[3] = q1;
366 ell[3+nskip1] = q2;
367 m11 = p1*q1;
368 m21 = p2*q1;
369 m22 = p2*q2;
370 Z11 += m11;
371 Z21 += m21;
372 Z22 += m22;
373 p1 = ell[4];
374 p2 = ell[4+nskip1];
375 dd = dee[4];
376 q1 = p1*dd;
377 q2 = p2*dd;
378 ell[4] = q1;
379 ell[4+nskip1] = q2;
380 m11 = p1*q1;
381 m21 = p2*q1;
382 m22 = p2*q2;
383 Z11 += m11;
384 Z21 += m21;
385 Z22 += m22;
386 p1 = ell[5];
387 p2 = ell[5+nskip1];
388 dd = dee[5];
389 q1 = p1*dd;
390 q2 = p2*dd;
391 ell[5] = q1;
392 ell[5+nskip1] = q2;
393 m11 = p1*q1;
394 m21 = p2*q1;
395 m22 = p2*q2;
396 Z11 += m11;
397 Z21 += m21;
398 Z22 += m22;
399 ell += 6;
400 dee += 6;
401 }
402 /* compute left-over iterations */
403 j += 6;
404 for (; j > 0; j--) {
405 p1 = ell[0];
406 p2 = ell[nskip1];
407 dd = dee[0];
408 q1 = p1*dd;
409 q2 = p2*dd;
410 ell[0] = q1;
411 ell[nskip1] = q2;
412 m11 = p1*q1;
413 m21 = p2*q1;
414 m22 = p2*q2;
415 Z11 += m11;
416 Z21 += m21;
417 Z22 += m22;
418 ell++;
419 dee++;
420 }
421 /* solve for diagonal 2 x 2 block at A(i,i) */
422 Z11 = ell[0] - Z11;
423 Z21 = ell[nskip1] - Z21;
424 Z22 = ell[1+nskip1] - Z22;
425 dee = d + i;
426 /* factorize 2 x 2 block Z,dee */
427 /* factorize row 1 */
428 dee[0] = btRecip(Z11);
429 /* factorize row 2 */
430 sum = 0;
431 q1 = Z21;
432 q2 = q1 * dee[0];
433 Z21 = q2;
434 sum += q1*q2;
435 dee[1] = btRecip(Z22 - sum);
436 /* done factorizing 2 x 2 block */
437 ell[nskip1] = Z21;
438 }
439 /* compute the (less than 2) rows at the bottom */
440 switch (n-i) {
441 case 0:
442 break;
443
444 case 1:
445 btSolveL1_1 (A,A+i*nskip1,i,nskip1);
446 /* scale the elements in a 1 x i block at A(i,0), and also */
447 /* compute Z = the outer product matrix that we'll need. */
448 Z11 = 0;
449 ell = A+i*nskip1;
450 dee = d;
451 for (j=i-6; j >= 0; j -= 6) {
452 p1 = ell[0];
453 dd = dee[0];
454 q1 = p1*dd;
455 ell[0] = q1;
456 m11 = p1*q1;
457 Z11 += m11;
458 p1 = ell[1];
459 dd = dee[1];
460 q1 = p1*dd;
461 ell[1] = q1;
462 m11 = p1*q1;
463 Z11 += m11;
464 p1 = ell[2];
465 dd = dee[2];
466 q1 = p1*dd;
467 ell[2] = q1;
468 m11 = p1*q1;
469 Z11 += m11;
470 p1 = ell[3];
471 dd = dee[3];
472 q1 = p1*dd;
473 ell[3] = q1;
474 m11 = p1*q1;
475 Z11 += m11;
476 p1 = ell[4];
477 dd = dee[4];
478 q1 = p1*dd;
479 ell[4] = q1;
480 m11 = p1*q1;
481 Z11 += m11;
482 p1 = ell[5];
483 dd = dee[5];
484 q1 = p1*dd;
485 ell[5] = q1;
486 m11 = p1*q1;
487 Z11 += m11;
488 ell += 6;
489 dee += 6;
490 }
491 /* compute left-over iterations */
492 j += 6;
493 for (; j > 0; j--) {
494 p1 = ell[0];
495 dd = dee[0];
496 q1 = p1*dd;
497 ell[0] = q1;
498 m11 = p1*q1;
499 Z11 += m11;
500 ell++;
501 dee++;
502 }
503 /* solve for diagonal 1 x 1 block at A(i,i) */
504 Z11 = ell[0] - Z11;
505 dee = d + i;
506 /* factorize 1 x 1 block Z,dee */
507 /* factorize row 1 */
508 dee[0] = btRecip(Z11);
509 /* done factorizing 1 x 1 block */
510 break;
511
512 //default: *((char*)0)=0; /* this should never happen! */
513 }
514}
515
516/* solve L*X=B, with B containing 1 right hand sides.
517 * L is an n*n lower triangular matrix with ones on the diagonal.
518 * L is stored by rows and its leading dimension is lskip.
519 * B is an n*1 matrix that contains the right hand sides.
520 * B is stored by columns and its leading dimension is also lskip.
521 * B is overwritten with X.
522 * this processes blocks of 4*4.
523 * if this is in the factorizer source file, n must be a multiple of 4.
524 */
525
526void btSolveL1 (const btScalar *L, btScalar *B, int n, int lskip1)
527{
528 /* declare variables - Z matrix, p and q vectors, etc */
529 btScalar Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex;
530 const btScalar *ell;
531 int lskip2,lskip3,i,j;
532 /* compute lskip values */
533 lskip2 = 2*lskip1;
534 lskip3 = 3*lskip1;
535 /* compute all 4 x 1 blocks of X */
536 for (i=0; i <= n-4; i+=4) {
537 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
538 /* set the Z matrix to 0 */
539 Z11=0;
540 Z21=0;
541 Z31=0;
542 Z41=0;
543 ell = L + i*lskip1;
544 ex = B;
545 /* the inner loop that computes outer products and adds them to Z */
546 for (j=i-12; j >= 0; j -= 12) {
547 /* load p and q values */
548 p1=ell[0];
549 q1=ex[0];
550 p2=ell[lskip1];
551 p3=ell[lskip2];
552 p4=ell[lskip3];
553 /* compute outer product and add it to the Z matrix */
554 Z11 += p1 * q1;
555 Z21 += p2 * q1;
556 Z31 += p3 * q1;
557 Z41 += p4 * q1;
558 /* load p and q values */
559 p1=ell[1];
560 q1=ex[1];
561 p2=ell[1+lskip1];
562 p3=ell[1+lskip2];
563 p4=ell[1+lskip3];
564 /* compute outer product and add it to the Z matrix */
565 Z11 += p1 * q1;
566 Z21 += p2 * q1;
567 Z31 += p3 * q1;
568 Z41 += p4 * q1;
569 /* load p and q values */
570 p1=ell[2];
571 q1=ex[2];
572 p2=ell[2+lskip1];
573 p3=ell[2+lskip2];
574 p4=ell[2+lskip3];
575 /* compute outer product and add it to the Z matrix */
576 Z11 += p1 * q1;
577 Z21 += p2 * q1;
578 Z31 += p3 * q1;
579 Z41 += p4 * q1;
580 /* load p and q values */
581 p1=ell[3];
582 q1=ex[3];
583 p2=ell[3+lskip1];
584 p3=ell[3+lskip2];
585 p4=ell[3+lskip3];
586 /* compute outer product and add it to the Z matrix */
587 Z11 += p1 * q1;
588 Z21 += p2 * q1;
589 Z31 += p3 * q1;
590 Z41 += p4 * q1;
591 /* load p and q values */
592 p1=ell[4];
593 q1=ex[4];
594 p2=ell[4+lskip1];
595 p3=ell[4+lskip2];
596 p4=ell[4+lskip3];
597 /* compute outer product and add it to the Z matrix */
598 Z11 += p1 * q1;
599 Z21 += p2 * q1;
600 Z31 += p3 * q1;
601 Z41 += p4 * q1;
602 /* load p and q values */
603 p1=ell[5];
604 q1=ex[5];
605 p2=ell[5+lskip1];
606 p3=ell[5+lskip2];
607 p4=ell[5+lskip3];
608 /* compute outer product and add it to the Z matrix */
609 Z11 += p1 * q1;
610 Z21 += p2 * q1;
611 Z31 += p3 * q1;
612 Z41 += p4 * q1;
613 /* load p and q values */
614 p1=ell[6];
615 q1=ex[6];
616 p2=ell[6+lskip1];
617 p3=ell[6+lskip2];
618 p4=ell[6+lskip3];
619 /* compute outer product and add it to the Z matrix */
620 Z11 += p1 * q1;
621 Z21 += p2 * q1;
622 Z31 += p3 * q1;
623 Z41 += p4 * q1;
624 /* load p and q values */
625 p1=ell[7];
626 q1=ex[7];
627 p2=ell[7+lskip1];
628 p3=ell[7+lskip2];
629 p4=ell[7+lskip3];
630 /* compute outer product and add it to the Z matrix */
631 Z11 += p1 * q1;
632 Z21 += p2 * q1;
633 Z31 += p3 * q1;
634 Z41 += p4 * q1;
635 /* load p and q values */
636 p1=ell[8];
637 q1=ex[8];
638 p2=ell[8+lskip1];
639 p3=ell[8+lskip2];
640 p4=ell[8+lskip3];
641 /* compute outer product and add it to the Z matrix */
642 Z11 += p1 * q1;
643 Z21 += p2 * q1;
644 Z31 += p3 * q1;
645 Z41 += p4 * q1;
646 /* load p and q values */
647 p1=ell[9];
648 q1=ex[9];
649 p2=ell[9+lskip1];
650 p3=ell[9+lskip2];
651 p4=ell[9+lskip3];
652 /* compute outer product and add it to the Z matrix */
653 Z11 += p1 * q1;
654 Z21 += p2 * q1;
655 Z31 += p3 * q1;
656 Z41 += p4 * q1;
657 /* load p and q values */
658 p1=ell[10];
659 q1=ex[10];
660 p2=ell[10+lskip1];
661 p3=ell[10+lskip2];
662 p4=ell[10+lskip3];
663 /* compute outer product and add it to the Z matrix */
664 Z11 += p1 * q1;
665 Z21 += p2 * q1;
666 Z31 += p3 * q1;
667 Z41 += p4 * q1;
668 /* load p and q values */
669 p1=ell[11];
670 q1=ex[11];
671 p2=ell[11+lskip1];
672 p3=ell[11+lskip2];
673 p4=ell[11+lskip3];
674 /* compute outer product and add it to the Z matrix */
675 Z11 += p1 * q1;
676 Z21 += p2 * q1;
677 Z31 += p3 * q1;
678 Z41 += p4 * q1;
679 /* advance pointers */
680 ell += 12;
681 ex += 12;
682 /* end of inner loop */
683 }
684 /* compute left-over iterations */
685 j += 12;
686 for (; j > 0; j--) {
687 /* load p and q values */
688 p1=ell[0];
689 q1=ex[0];
690 p2=ell[lskip1];
691 p3=ell[lskip2];
692 p4=ell[lskip3];
693 /* compute outer product and add it to the Z matrix */
694 Z11 += p1 * q1;
695 Z21 += p2 * q1;
696 Z31 += p3 * q1;
697 Z41 += p4 * q1;
698 /* advance pointers */
699 ell += 1;
700 ex += 1;
701 }
702 /* finish computing the X(i) block */
703 Z11 = ex[0] - Z11;
704 ex[0] = Z11;
705 p1 = ell[lskip1];
706 Z21 = ex[1] - Z21 - p1*Z11;
707 ex[1] = Z21;
708 p1 = ell[lskip2];
709 p2 = ell[1+lskip2];
710 Z31 = ex[2] - Z31 - p1*Z11 - p2*Z21;
711 ex[2] = Z31;
712 p1 = ell[lskip3];
713 p2 = ell[1+lskip3];
714 p3 = ell[2+lskip3];
715 Z41 = ex[3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
716 ex[3] = Z41;
717 /* end of outer loop */
718 }
719 /* compute rows at end that are not a multiple of block size */
720 for (; i < n; i++) {
721 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
722 /* set the Z matrix to 0 */
723 Z11=0;
724 ell = L + i*lskip1;
725 ex = B;
726 /* the inner loop that computes outer products and adds them to Z */
727 for (j=i-12; j >= 0; j -= 12) {
728 /* load p and q values */
729 p1=ell[0];
730 q1=ex[0];
731 /* compute outer product and add it to the Z matrix */
732 Z11 += p1 * q1;
733 /* load p and q values */
734 p1=ell[1];
735 q1=ex[1];
736 /* compute outer product and add it to the Z matrix */
737 Z11 += p1 * q1;
738 /* load p and q values */
739 p1=ell[2];
740 q1=ex[2];
741 /* compute outer product and add it to the Z matrix */
742 Z11 += p1 * q1;
743 /* load p and q values */
744 p1=ell[3];
745 q1=ex[3];
746 /* compute outer product and add it to the Z matrix */
747 Z11 += p1 * q1;
748 /* load p and q values */
749 p1=ell[4];
750 q1=ex[4];
751 /* compute outer product and add it to the Z matrix */
752 Z11 += p1 * q1;
753 /* load p and q values */
754 p1=ell[5];
755 q1=ex[5];
756 /* compute outer product and add it to the Z matrix */
757 Z11 += p1 * q1;
758 /* load p and q values */
759 p1=ell[6];
760 q1=ex[6];
761 /* compute outer product and add it to the Z matrix */
762 Z11 += p1 * q1;
763 /* load p and q values */
764 p1=ell[7];
765 q1=ex[7];
766 /* compute outer product and add it to the Z matrix */
767 Z11 += p1 * q1;
768 /* load p and q values */
769 p1=ell[8];
770 q1=ex[8];
771 /* compute outer product and add it to the Z matrix */
772 Z11 += p1 * q1;
773 /* load p and q values */
774 p1=ell[9];
775 q1=ex[9];
776 /* compute outer product and add it to the Z matrix */
777 Z11 += p1 * q1;
778 /* load p and q values */
779 p1=ell[10];
780 q1=ex[10];
781 /* compute outer product and add it to the Z matrix */
782 Z11 += p1 * q1;
783 /* load p and q values */
784 p1=ell[11];
785 q1=ex[11];
786 /* compute outer product and add it to the Z matrix */
787 Z11 += p1 * q1;
788 /* advance pointers */
789 ell += 12;
790 ex += 12;
791 /* end of inner loop */
792 }
793 /* compute left-over iterations */
794 j += 12;
795 for (; j > 0; j--) {
796 /* load p and q values */
797 p1=ell[0];
798 q1=ex[0];
799 /* compute outer product and add it to the Z matrix */
800 Z11 += p1 * q1;
801 /* advance pointers */
802 ell += 1;
803 ex += 1;
804 }
805 /* finish computing the X(i) block */
806 Z11 = ex[0] - Z11;
807 ex[0] = Z11;
808 }
809}
810
811/* solve L^T * x=b, with b containing 1 right hand side.
812 * L is an n*n lower triangular matrix with ones on the diagonal.
813 * L is stored by rows and its leading dimension is lskip.
814 * b is an n*1 matrix that contains the right hand side.
815 * b is overwritten with x.
816 * this processes blocks of 4.
817 */
818
819void btSolveL1T (const btScalar *L, btScalar *B, int n, int lskip1)
820{
821 /* declare variables - Z matrix, p and q vectors, etc */
822 btScalar Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
823 const btScalar *ell;
824 int lskip2,i,j;
825// int lskip3;
826 /* special handling for L and B because we're solving L1 *transpose* */
827 L = L + (n-1)*(lskip1+1);
828 B = B + n-1;
829 lskip1 = -lskip1;
830 /* compute lskip values */
831 lskip2 = 2*lskip1;
832 //lskip3 = 3*lskip1;
833 /* compute all 4 x 1 blocks of X */
834 for (i=0; i <= n-4; i+=4) {
835 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
836 /* set the Z matrix to 0 */
837 Z11=0;
838 Z21=0;
839 Z31=0;
840 Z41=0;
841 ell = L - i;
842 ex = B;
843 /* the inner loop that computes outer products and adds them to Z */
844 for (j=i-4; j >= 0; j -= 4) {
845 /* load p and q values */
846 p1=ell[0];
847 q1=ex[0];
848 p2=ell[-1];
849 p3=ell[-2];
850 p4=ell[-3];
851 /* compute outer product and add it to the Z matrix */
852 m11 = p1 * q1;
853 m21 = p2 * q1;
854 m31 = p3 * q1;
855 m41 = p4 * q1;
856 ell += lskip1;
857 Z11 += m11;
858 Z21 += m21;
859 Z31 += m31;
860 Z41 += m41;
861 /* load p and q values */
862 p1=ell[0];
863 q1=ex[-1];
864 p2=ell[-1];
865 p3=ell[-2];
866 p4=ell[-3];
867 /* compute outer product and add it to the Z matrix */
868 m11 = p1 * q1;
869 m21 = p2 * q1;
870 m31 = p3 * q1;
871 m41 = p4 * q1;
872 ell += lskip1;
873 Z11 += m11;
874 Z21 += m21;
875 Z31 += m31;
876 Z41 += m41;
877 /* load p and q values */
878 p1=ell[0];
879 q1=ex[-2];
880 p2=ell[-1];
881 p3=ell[-2];
882 p4=ell[-3];
883 /* compute outer product and add it to the Z matrix */
884 m11 = p1 * q1;
885 m21 = p2 * q1;
886 m31 = p3 * q1;
887 m41 = p4 * q1;
888 ell += lskip1;
889 Z11 += m11;
890 Z21 += m21;
891 Z31 += m31;
892 Z41 += m41;
893 /* load p and q values */
894 p1=ell[0];
895 q1=ex[-3];
896 p2=ell[-1];
897 p3=ell[-2];
898 p4=ell[-3];
899 /* compute outer product and add it to the Z matrix */
900 m11 = p1 * q1;
901 m21 = p2 * q1;
902 m31 = p3 * q1;
903 m41 = p4 * q1;
904 ell += lskip1;
905 ex -= 4;
906 Z11 += m11;
907 Z21 += m21;
908 Z31 += m31;
909 Z41 += m41;
910 /* end of inner loop */
911 }
912 /* compute left-over iterations */
913 j += 4;
914 for (; j > 0; j--) {
915 /* load p and q values */
916 p1=ell[0];
917 q1=ex[0];
918 p2=ell[-1];
919 p3=ell[-2];
920 p4=ell[-3];
921 /* compute outer product and add it to the Z matrix */
922 m11 = p1 * q1;
923 m21 = p2 * q1;
924 m31 = p3 * q1;
925 m41 = p4 * q1;
926 ell += lskip1;
927 ex -= 1;
928 Z11 += m11;
929 Z21 += m21;
930 Z31 += m31;
931 Z41 += m41;
932 }
933 /* finish computing the X(i) block */
934 Z11 = ex[0] - Z11;
935 ex[0] = Z11;
936 p1 = ell[-1];
937 Z21 = ex[-1] - Z21 - p1*Z11;
938 ex[-1] = Z21;
939 p1 = ell[-2];
940 p2 = ell[-2+lskip1];
941 Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21;
942 ex[-2] = Z31;
943 p1 = ell[-3];
944 p2 = ell[-3+lskip1];
945 p3 = ell[-3+lskip2];
946 Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
947 ex[-3] = Z41;
948 /* end of outer loop */
949 }
950 /* compute rows at end that are not a multiple of block size */
951 for (; i < n; i++) {
952 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
953 /* set the Z matrix to 0 */
954 Z11=0;
955 ell = L - i;
956 ex = B;
957 /* the inner loop that computes outer products and adds them to Z */
958 for (j=i-4; j >= 0; j -= 4) {
959 /* load p and q values */
960 p1=ell[0];
961 q1=ex[0];
962 /* compute outer product and add it to the Z matrix */
963 m11 = p1 * q1;
964 ell += lskip1;
965 Z11 += m11;
966 /* load p and q values */
967 p1=ell[0];
968 q1=ex[-1];
969 /* compute outer product and add it to the Z matrix */
970 m11 = p1 * q1;
971 ell += lskip1;
972 Z11 += m11;
973 /* load p and q values */
974 p1=ell[0];
975 q1=ex[-2];
976 /* compute outer product and add it to the Z matrix */
977 m11 = p1 * q1;
978 ell += lskip1;
979 Z11 += m11;
980 /* load p and q values */
981 p1=ell[0];
982 q1=ex[-3];
983 /* compute outer product and add it to the Z matrix */
984 m11 = p1 * q1;
985 ell += lskip1;
986 ex -= 4;
987 Z11 += m11;
988 /* end of inner loop */
989 }
990 /* compute left-over iterations */
991 j += 4;
992 for (; j > 0; j--) {
993 /* load p and q values */
994 p1=ell[0];
995 q1=ex[0];
996 /* compute outer product and add it to the Z matrix */
997 m11 = p1 * q1;
998 ell += lskip1;
999 ex -= 1;
1000 Z11 += m11;
1001 }
1002 /* finish computing the X(i) block */
1003 Z11 = ex[0] - Z11;
1004 ex[0] = Z11;
1005 }
1006}
1007
1008
1009
1010void btVectorScale (btScalar *a, const btScalar *d, int n)
1011{
1012 btAssert (a && d && n >= 0);
1013 for (int i=0; i<n; i++) {
1014 a[i] *= d[i];
1015 }
1016}
1017
1018void btSolveLDLT (const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip)
1019{
1020 btAssert (L && d && b && n > 0 && nskip >= n);
1021 btSolveL1 (L,b,n,nskip);
1022 btVectorScale (b,d,n);
1023 btSolveL1T (L,b,n,nskip);
1024}
1025
1026
1027
1028//***************************************************************************
1029
1030// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
1031// A is nskip. this only references and swaps the lower triangle.
1032// if `do_fast_row_swaps' is nonzero and row pointers are being used, then
1033// rows will be swapped by exchanging row pointers. otherwise the data will
1034// be copied.
1035
1036static void btSwapRowsAndCols (BTATYPE A, int n, int i1, int i2, int nskip,
1037 int do_fast_row_swaps)
1038{
1039 btAssert (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
1040 nskip >= n && i1 < i2);
1041
1042# ifdef BTROWPTRS
1043 btScalar *A_i1 = A[i1];
1044 btScalar *A_i2 = A[i2];
1045 for (int i=i1+1; i<i2; ++i) {
1046 btScalar *A_i_i1 = A[i] + i1;
1047 A_i1[i] = *A_i_i1;
1048 *A_i_i1 = A_i2[i];
1049 }
1050 A_i1[i2] = A_i1[i1];
1051 A_i1[i1] = A_i2[i1];
1052 A_i2[i1] = A_i2[i2];
1053 // swap rows, by swapping row pointers
1054 if (do_fast_row_swaps) {
1055 A[i1] = A_i2;
1056 A[i2] = A_i1;
1057 }
1058 else {
1059 // Only swap till i2 column to match A plain storage variant.
1060 for (int k = 0; k <= i2; ++k) {
1061 btScalar tmp = A_i1[k];
1062 A_i1[k] = A_i2[k];
1063 A_i2[k] = tmp;
1064 }
1065 }
1066 // swap columns the hard way
1067 for (int j=i2+1; j<n; ++j) {
1068 btScalar *A_j = A[j];
1069 btScalar tmp = A_j[i1];
1070 A_j[i1] = A_j[i2];
1071 A_j[i2] = tmp;
1072 }
1073# else
1074 btScalar *A_i1 = A+i1*nskip;
1075 btScalar *A_i2 = A+i2*nskip;
1076 for (int k = 0; k < i1; ++k) {
1077 btScalar tmp = A_i1[k];
1078 A_i1[k] = A_i2[k];
1079 A_i2[k] = tmp;
1080 }
1081 btScalar *A_i = A_i1 + nskip;
1082 for (int i=i1+1; i<i2; A_i+=nskip, ++i) {
1083 btScalar tmp = A_i2[i];
1084 A_i2[i] = A_i[i1];
1085 A_i[i1] = tmp;
1086 }
1087 {
1088 btScalar tmp = A_i1[i1];
1089 A_i1[i1] = A_i2[i2];
1090 A_i2[i2] = tmp;
1091 }
1092 btScalar *A_j = A_i2 + nskip;
1093 for (int j=i2+1; j<n; A_j+=nskip, ++j) {
1094 btScalar tmp = A_j[i1];
1095 A_j[i1] = A_j[i2];
1096 A_j[i2] = tmp;
1097 }
1098# endif
1099}
1100
1101
1102// swap two indexes in the n*n LCP problem. i1 must be <= i2.
1103
1104static void btSwapProblem (BTATYPE A, btScalar *x, btScalar *b, btScalar *w, btScalar *lo,
1105 btScalar *hi, int *p, bool *state, int *findex,
1106 int n, int i1, int i2, int nskip,
1107 int do_fast_row_swaps)
1108{
1109 btScalar tmpr;
1110 int tmpi;
1111 bool tmpb;
1112 btAssert (n>0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
1113 if (i1==i2) return;
1114
1115 btSwapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps);
1116
1117 tmpr = x[i1];
1118 x[i1] = x[i2];
1119 x[i2] = tmpr;
1120
1121 tmpr = b[i1];
1122 b[i1] = b[i2];
1123 b[i2] = tmpr;
1124
1125 tmpr = w[i1];
1126 w[i1] = w[i2];
1127 w[i2] = tmpr;
1128
1129 tmpr = lo[i1];
1130 lo[i1] = lo[i2];
1131 lo[i2] = tmpr;
1132
1133 tmpr = hi[i1];
1134 hi[i1] = hi[i2];
1135 hi[i2] = tmpr;
1136
1137 tmpi = p[i1];
1138 p[i1] = p[i2];
1139 p[i2] = tmpi;
1140
1141 tmpb = state[i1];
1142 state[i1] = state[i2];
1143 state[i2] = tmpb;
1144
1145 if (findex) {
1146 tmpi = findex[i1];
1147 findex[i1] = findex[i2];
1148 findex[i2] = tmpi;
1149 }
1150}
1151
1152
1153
1154
1155//***************************************************************************
1156// btLCP manipulator object. this represents an n*n LCP problem.
1157//
1158// two index sets C and N are kept. each set holds a subset of
1159// the variable indexes 0..n-1. an index can only be in one set.
1160// initially both sets are empty.
1161//
1162// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
1163
1164//***************************************************************************
1165// fast implementation of btLCP. see the above definition of btLCP for
1166// interface comments.
1167//
1168// `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
1169// permuted as the other vectors/matrices are permuted.
1170//
1171// A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
1172// contiguous indexes. the don't-care indexes follow N.
1173//
1174// an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
1175// added or removed from the set C the factorization is updated.
1176// thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
1177// the leading dimension of the matrix L is always `nskip'.
1178//
1179// at the start there may be other indexes that are unbounded but are not
1180// included in `nub'. btLCP will permute the matrix so that absolutely all
1181// unbounded vectors are at the start. thus there may be some initial
1182// permutation.
1183//
1184// the algorithms here assume certain patterns, particularly with respect to
1185// index transfer.
1186
1187#ifdef btLCP_FAST
1188
1189struct btLCP
1190{
1191 const int m_n;
1192 const int m_nskip;
1194 int m_nC, m_nN; // size of each index set
1195 BTATYPE const m_A; // A rows
1196 btScalar *const m_x, * const m_b, *const m_w, *const m_lo,* const m_hi; // permuted LCP problem data
1197 btScalar *const m_L, *const m_d; // L*D*L' factorization of set C
1198 btScalar *const m_Dell, *const m_ell, *const m_tmp;
1199 bool *const m_state;
1200 int *const m_findex, *const m_p, *const m_C;
1201
1202 btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1203 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1204 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1205 bool *_state, int *_findex, int *p, int *c, btScalar **Arows);
1206 int getNub() const { return m_nub; }
1207 void transfer_i_to_C (int i);
1208 void transfer_i_to_N (int i) { m_nN++; } // because we can assume C and N span 1:i-1
1209 void transfer_i_from_N_to_C (int i);
1211 int numC() const { return m_nC; }
1212 int numN() const { return m_nN; }
1213 int indexC (int i) const { return i; }
1214 int indexN (int i) const { return i+m_nC; }
1215 btScalar Aii (int i) const { return BTAROW(i)[i]; }
1216 btScalar AiC_times_qC (int i, btScalar *q) const { return btLargeDot (BTAROW(i), q, m_nC); }
1217 btScalar AiN_times_qN (int i, btScalar *q) const { return btLargeDot (BTAROW(i)+m_nC, q+m_nC, m_nN); }
1219 void pN_plusequals_ANi (btScalar *p, int i, int sign=1);
1222 void solve1 (btScalar *a, int i, int dir=1, int only_transfer=0);
1223 void unpermute();
1224};
1225
1226
1227btLCP::btLCP (int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w,
1228 btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d,
1229 btScalar *_Dell, btScalar *_ell, btScalar *_tmp,
1230 bool *_state, int *_findex, int *p, int *c, btScalar **Arows):
1231 m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
1232# ifdef BTROWPTRS
1233 m_A(Arows),
1234#else
1235 m_A(_Adata),
1236#endif
1237 m_x(_x), m_b(_b), m_w(_w), m_lo(_lo), m_hi(_hi),
1238 m_L(l), m_d(_d), m_Dell(_Dell), m_ell(_ell), m_tmp(_tmp),
1239 m_state(_state), m_findex(_findex), m_p(p), m_C(c)
1240{
1241 {
1242 btSetZero (m_x,m_n);
1243 }
1244
1245 {
1246# ifdef BTROWPTRS
1247 // make matrix row pointers
1248 btScalar *aptr = _Adata;
1249 BTATYPE A = m_A;
1250 const int n = m_n, nskip = m_nskip;
1251 for (int k=0; k<n; aptr+=nskip, ++k) A[k] = aptr;
1252# endif
1253 }
1254
1255 {
1256 int *p = m_p;
1257 const int n = m_n;
1258 for (int k=0; k<n; ++k) p[k]=k; // initially unpermuted
1259 }
1260
1261 /*
1262 // for testing, we can do some random swaps in the area i > nub
1263 {
1264 const int n = m_n;
1265 const int nub = m_nub;
1266 if (nub < n) {
1267 for (int k=0; k<100; k++) {
1268 int i1,i2;
1269 do {
1270 i1 = dRandInt(n-nub)+nub;
1271 i2 = dRandInt(n-nub)+nub;
1272 }
1273 while (i1 > i2);
1274 //printf ("--> %d %d\n",i1,i2);
1275 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,m_findex,n,i1,i2,m_nskip,0);
1276 }
1277 }
1278 */
1279
1280 // permute the problem so that *all* the unbounded variables are at the
1281 // start, i.e. look for unbounded variables not included in `nub'. we can
1282 // potentially push up `nub' this way and get a bigger initial factorization.
1283 // note that when we swap rows/cols here we must not just swap row pointers,
1284 // as the initial factorization relies on the data being all in one chunk.
1285 // variables that have findex >= 0 are *not* considered to be unbounded even
1286 // if lo=-inf and hi=inf - this is because these limits may change during the
1287 // solution process.
1288
1289 {
1290 int *findex = m_findex;
1291 btScalar *lo = m_lo, *hi = m_hi;
1292 const int n = m_n;
1293 for (int k = m_nub; k<n; ++k) {
1294 if (findex && findex[k] >= 0) continue;
1295 if (lo[k]==-BT_INFINITY && hi[k]==BT_INFINITY) {
1296 btSwapProblem (m_A,m_x,m_b,m_w,lo,hi,m_p,m_state,findex,n,m_nub,k,m_nskip,0);
1297 m_nub++;
1298 }
1299 }
1300 }
1301
1302 // if there are unbounded variables at the start, factorize A up to that
1303 // point and solve for x. this puts all indexes 0..nub-1 into C.
1304 if (m_nub > 0) {
1305 const int nub = m_nub;
1306 {
1307 btScalar *Lrow = m_L;
1308 const int nskip = m_nskip;
1309 for (int j=0; j<nub; Lrow+=nskip, ++j) memcpy(Lrow,BTAROW(j),(j+1)*sizeof(btScalar));
1310 }
1312 memcpy (m_x,m_b,nub*sizeof(btScalar));
1314 btSetZero (m_w,nub);
1315 {
1316 int *C = m_C;
1317 for (int k=0; k<nub; ++k) C[k] = k;
1318 }
1319 m_nC = nub;
1320 }
1321
1322 // permute the indexes > nub such that all findex variables are at the end
1323 if (m_findex) {
1324 const int nub = m_nub;
1325 int *findex = m_findex;
1326 int num_at_end = 0;
1327 for (int k=m_n-1; k >= nub; k--) {
1328 if (findex[k] >= 0) {
1329 btSwapProblem (m_A,m_x,m_b,m_w,m_lo,m_hi,m_p,m_state,findex,m_n,k,m_n-1-num_at_end,m_nskip,1);
1330 num_at_end++;
1331 }
1332 }
1333 }
1334
1335 // print info about indexes
1336 /*
1337 {
1338 const int n = m_n;
1339 const int nub = m_nub;
1340 for (int k=0; k<n; k++) {
1341 if (k<nub) printf ("C");
1342 else if (m_lo[k]==-BT_INFINITY && m_hi[k]==BT_INFINITY) printf ("c");
1343 else printf (".");
1344 }
1345 printf ("\n");
1346 }
1347 */
1348}
1349
1350
1352{
1353 {
1354 if (m_nC > 0) {
1355 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
1356 {
1357 const int nC = m_nC;
1358 btScalar *const Ltgt = m_L + nC*m_nskip, *ell = m_ell;
1359 for (int j=0; j<nC; ++j) Ltgt[j] = ell[j];
1360 }
1361 const int nC = m_nC;
1362 m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC));
1363 }
1364 else {
1365 m_d[0] = btRecip (BTAROW(i)[i]);
1366 }
1367
1369
1370 const int nC = m_nC;
1371 m_C[nC] = nC;
1372 m_nC = nC + 1; // nC value is outdated after this line
1373 }
1374
1375}
1376
1377
1379{
1380 {
1381 if (m_nC > 0) {
1382 {
1383 btScalar *const aptr = BTAROW(i);
1384 btScalar *Dell = m_Dell;
1385 const int *C = m_C;
1386# ifdef BTNUB_OPTIMIZATIONS
1387 // if nub>0, initial part of aptr unpermuted
1388 const int nub = m_nub;
1389 int j=0;
1390 for ( ; j<nub; ++j) Dell[j] = aptr[j];
1391 const int nC = m_nC;
1392 for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
1393# else
1394 const int nC = m_nC;
1395 for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
1396# endif
1397 }
1399 {
1400 const int nC = m_nC;
1401 btScalar *const Ltgt = m_L + nC*m_nskip;
1402 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1403 for (int j=0; j<nC; ++j) Ltgt[j] = ell[j] = Dell[j] * d[j];
1404 }
1405 const int nC = m_nC;
1406 m_d[nC] = btRecip (BTAROW(i)[i] - btLargeDot(m_ell,m_Dell,nC));
1407 }
1408 else {
1409 m_d[0] = btRecip (BTAROW(i)[i]);
1410 }
1411
1413
1414 const int nC = m_nC;
1415 m_C[nC] = nC;
1416 m_nN--;
1417 m_nC = nC + 1; // nC value is outdated after this line
1418 }
1419
1420 // @@@ TO DO LATER
1421 // if we just finish here then we'll go back and re-solve for
1422 // delta_x. but actually we can be more efficient and incrementally
1423 // update delta_x here. but if we do this, we wont have ell and Dell
1424 // to use in updating the factorization later.
1425
1426}
1427
1428void btRemoveRowCol (btScalar *A, int n, int nskip, int r)
1429{
1430 btAssert(A && n > 0 && nskip >= n && r >= 0 && r < n);
1431 if (r >= n-1) return;
1432 if (r > 0) {
1433 {
1434 const size_t move_size = (n-r-1)*sizeof(btScalar);
1435 btScalar *Adst = A + r;
1436 for (int i=0; i<r; Adst+=nskip,++i) {
1437 btScalar *Asrc = Adst + 1;
1438 memmove (Adst,Asrc,move_size);
1439 }
1440 }
1441 {
1442 const size_t cpy_size = r*sizeof(btScalar);
1443 btScalar *Adst = A + r * nskip;
1444 for (int i=r; i<(n-1); ++i) {
1445 btScalar *Asrc = Adst + nskip;
1446 memcpy (Adst,Asrc,cpy_size);
1447 Adst = Asrc;
1448 }
1449 }
1450 }
1451 {
1452 const size_t cpy_size = (n-r-1)*sizeof(btScalar);
1453 btScalar *Adst = A + r * (nskip + 1);
1454 for (int i=r; i<(n-1); ++i) {
1455 btScalar *Asrc = Adst + (nskip + 1);
1456 memcpy (Adst,Asrc,cpy_size);
1457 Adst = Asrc - 1;
1458 }
1459 }
1460}
1461
1462
1463
1464
1465void btLDLTAddTL (btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray<btScalar>& scratch)
1466{
1467 btAssert (L && d && a && n > 0 && nskip >= n);
1468
1469 if (n < 2) return;
1470 scratch.resize(2*nskip);
1471 btScalar *W1 = &scratch[0];
1472
1473 btScalar *W2 = W1 + nskip;
1474
1475 W1[0] = btScalar(0.0);
1476 W2[0] = btScalar(0.0);
1477 for (int j=1; j<n; ++j) {
1478 W1[j] = W2[j] = (btScalar) (a[j] * SIMDSQRT12);
1479 }
1480 btScalar W11 = (btScalar) ((btScalar(0.5)*a[0]+1)*SIMDSQRT12);
1481 btScalar W21 = (btScalar) ((btScalar(0.5)*a[0]-1)*SIMDSQRT12);
1482
1483 btScalar alpha1 = btScalar(1.0);
1484 btScalar alpha2 = btScalar(1.0);
1485
1486 {
1487 btScalar dee = d[0];
1488 btScalar alphanew = alpha1 + (W11*W11)*dee;
1489 btAssert(alphanew != btScalar(0.0));
1490 dee /= alphanew;
1491 btScalar gamma1 = W11 * dee;
1492 dee *= alpha1;
1493 alpha1 = alphanew;
1494 alphanew = alpha2 - (W21*W21)*dee;
1495 dee /= alphanew;
1496 //btScalar gamma2 = W21 * dee;
1497 alpha2 = alphanew;
1498 btScalar k1 = btScalar(1.0) - W21*gamma1;
1499 btScalar k2 = W21*gamma1*W11 - W21;
1500 btScalar *ll = L + nskip;
1501 for (int p=1; p<n; ll+=nskip, ++p) {
1502 btScalar Wp = W1[p];
1503 btScalar ell = *ll;
1504 W1[p] = Wp - W11*ell;
1505 W2[p] = k1*Wp + k2*ell;
1506 }
1507 }
1508
1509 btScalar *ll = L + (nskip + 1);
1510 for (int j=1; j<n; ll+=nskip+1, ++j) {
1511 btScalar k1 = W1[j];
1512 btScalar k2 = W2[j];
1513
1514 btScalar dee = d[j];
1515 btScalar alphanew = alpha1 + (k1*k1)*dee;
1516 btAssert(alphanew != btScalar(0.0));
1517 dee /= alphanew;
1518 btScalar gamma1 = k1 * dee;
1519 dee *= alpha1;
1520 alpha1 = alphanew;
1521 alphanew = alpha2 - (k2*k2)*dee;
1522 dee /= alphanew;
1523 btScalar gamma2 = k2 * dee;
1524 dee *= alpha2;
1525 d[j] = dee;
1526 alpha2 = alphanew;
1527
1528 btScalar *l = ll + nskip;
1529 for (int p=j+1; p<n; l+=nskip, ++p) {
1530 btScalar ell = *l;
1531 btScalar Wp = W1[p] - k1 * ell;
1532 ell += gamma1 * Wp;
1533 W1[p] = Wp;
1534 Wp = W2[p] - k2 * ell;
1535 ell -= gamma2 * Wp;
1536 W2[p] = Wp;
1537 *l = ell;
1538 }
1539 }
1540}
1541
1542
1543#define _BTGETA(i,j) (A[i][j])
1544//#define _GETA(i,j) (A[(i)*nskip+(j)])
1545#define BTGETA(i,j) ((i > j) ? _BTGETA(i,j) : _BTGETA(j,i))
1546
1547inline size_t btEstimateLDLTAddTLTmpbufSize(int nskip)
1548{
1549 return nskip * 2 * sizeof(btScalar);
1550}
1551
1552
1553void btLDLTRemove (btScalar **A, const int *p, btScalar *L, btScalar *d,
1554 int n1, int n2, int r, int nskip, btAlignedObjectArray<btScalar>& scratch)
1555{
1556 btAssert(A && p && L && d && n1 > 0 && n2 > 0 && r >= 0 && r < n2 &&
1557 n1 >= n2 && nskip >= n1);
1558 #ifdef BT_DEBUG
1559 for (int i=0; i<n2; ++i)
1560 btAssert(p[i] >= 0 && p[i] < n1);
1561 #endif
1562
1563 if (r==n2-1) {
1564 return; // deleting last row/col is easy
1565 }
1566 else {
1567 size_t LDLTAddTL_size = btEstimateLDLTAddTLTmpbufSize(nskip);
1568 btAssert(LDLTAddTL_size % sizeof(btScalar) == 0);
1569 scratch.resize(nskip * 2+n2);
1570 btScalar *tmp = &scratch[0];
1571 if (r==0) {
1572 btScalar *a = (btScalar *)((char *)tmp + LDLTAddTL_size);
1573 const int p_0 = p[0];
1574 for (int i=0; i<n2; ++i) {
1575 a[i] = -BTGETA(p[i],p_0);
1576 }
1577 a[0] += btScalar(1.0);
1578 btLDLTAddTL (L,d,a,n2,nskip,scratch);
1579 }
1580 else {
1581 btScalar *t = (btScalar *)((char *)tmp + LDLTAddTL_size);
1582 {
1583 btScalar *Lcurr = L + r*nskip;
1584 for (int i=0; i<r; ++Lcurr, ++i) {
1585 btAssert(d[i] != btScalar(0.0));
1586 t[i] = *Lcurr / d[i];
1587 }
1588 }
1589 btScalar *a = t + r;
1590 {
1591 btScalar *Lcurr = L + r*nskip;
1592 const int *pp_r = p + r, p_r = *pp_r;
1593 const int n2_minus_r = n2-r;
1594 for (int i=0; i<n2_minus_r; Lcurr+=nskip,++i) {
1595 a[i] = btLargeDot(Lcurr,t,r) - BTGETA(pp_r[i],p_r);
1596 }
1597 }
1598 a[0] += btScalar(1.0);
1599 btLDLTAddTL (L + r*nskip+r, d+r, a, n2-r, nskip, scratch);
1600 }
1601 }
1602
1603 // snip out row/column r from L and d
1604 btRemoveRowCol (L,n2,nskip,r);
1605 if (r < (n2-1)) memmove (d+r,d+r+1,(n2-r-1)*sizeof(btScalar));
1606}
1607
1608
1610{
1611 {
1612 int *C = m_C;
1613 // remove a row/column from the factorization, and adjust the
1614 // indexes (black magic!)
1615 int last_idx = -1;
1616 const int nC = m_nC;
1617 int j = 0;
1618 for ( ; j<nC; ++j) {
1619 if (C[j]==nC-1) {
1620 last_idx = j;
1621 }
1622 if (C[j]==i) {
1623 btLDLTRemove (m_A,C,m_L,m_d,m_n,nC,j,m_nskip,scratch);
1624 int k;
1625 if (last_idx == -1) {
1626 for (k=j+1 ; k<nC; ++k) {
1627 if (C[k]==nC-1) {
1628 break;
1629 }
1630 }
1631 btAssert (k < nC);
1632 }
1633 else {
1634 k = last_idx;
1635 }
1636 C[k] = C[j];
1637 if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int));
1638 break;
1639 }
1640 }
1641 btAssert (j < nC);
1642
1644
1645 m_nN++;
1646 m_nC = nC - 1; // nC value is outdated after this line
1647 }
1648
1649}
1650
1651
1653{
1654 // we could try to make this matrix-vector multiplication faster using
1655 // outer product matrix tricks, e.g. with the dMultidotX() functions.
1656 // but i tried it and it actually made things slower on random 100x100
1657 // problems because of the overhead involved. so we'll stick with the
1658 // simple method for now.
1659 const int nC = m_nC;
1660 btScalar *ptgt = p + nC;
1661 const int nN = m_nN;
1662 for (int i=0; i<nN; ++i) {
1663 ptgt[i] = btLargeDot (BTAROW(i+nC),q,nC);
1664 }
1665}
1666
1667
1668void btLCP::pN_plusequals_ANi (btScalar *p, int i, int sign)
1669{
1670 const int nC = m_nC;
1671 btScalar *aptr = BTAROW(i) + nC;
1672 btScalar *ptgt = p + nC;
1673 if (sign > 0) {
1674 const int nN = m_nN;
1675 for (int j=0; j<nN; ++j) ptgt[j] += aptr[j];
1676 }
1677 else {
1678 const int nN = m_nN;
1679 for (int j=0; j<nN; ++j) ptgt[j] -= aptr[j];
1680 }
1681}
1682
1684{
1685 const int nC = m_nC;
1686 for (int i=0; i<nC; ++i) {
1687 p[i] += s*q[i];
1688 }
1689}
1690
1692{
1693 const int nC = m_nC;
1694 btScalar *ptgt = p + nC, *qsrc = q + nC;
1695 const int nN = m_nN;
1696 for (int i=0; i<nN; ++i) {
1697 ptgt[i] += s*qsrc[i];
1698 }
1699}
1700
1701void btLCP::solve1 (btScalar *a, int i, int dir, int only_transfer)
1702{
1703 // the `Dell' and `ell' that are computed here are saved. if index i is
1704 // later added to the factorization then they can be reused.
1705 //
1706 // @@@ question: do we need to solve for entire delta_x??? yes, but
1707 // only if an x goes below 0 during the step.
1708
1709 if (m_nC > 0) {
1710 {
1711 btScalar *Dell = m_Dell;
1712 int *C = m_C;
1713 btScalar *aptr = BTAROW(i);
1714# ifdef BTNUB_OPTIMIZATIONS
1715 // if nub>0, initial part of aptr[] is guaranteed unpermuted
1716 const int nub = m_nub;
1717 int j=0;
1718 for ( ; j<nub; ++j) Dell[j] = aptr[j];
1719 const int nC = m_nC;
1720 for ( ; j<nC; ++j) Dell[j] = aptr[C[j]];
1721# else
1722 const int nC = m_nC;
1723 for (int j=0; j<nC; ++j) Dell[j] = aptr[C[j]];
1724# endif
1725 }
1727 {
1728 btScalar *ell = m_ell, *Dell = m_Dell, *d = m_d;
1729 const int nC = m_nC;
1730 for (int j=0; j<nC; ++j) ell[j] = Dell[j] * d[j];
1731 }
1732
1733 if (!only_transfer) {
1734 btScalar *tmp = m_tmp, *ell = m_ell;
1735 {
1736 const int nC = m_nC;
1737 for (int j=0; j<nC; ++j) tmp[j] = ell[j];
1738 }
1739 btSolveL1T (m_L,tmp,m_nC,m_nskip);
1740 if (dir > 0) {
1741 int *C = m_C;
1742 btScalar *tmp = m_tmp;
1743 const int nC = m_nC;
1744 for (int j=0; j<nC; ++j) a[C[j]] = -tmp[j];
1745 } else {
1746 int *C = m_C;
1747 btScalar *tmp = m_tmp;
1748 const int nC = m_nC;
1749 for (int j=0; j<nC; ++j) a[C[j]] = tmp[j];
1750 }
1751 }
1752 }
1753}
1754
1755
1757{
1758 // now we have to un-permute x and w
1759 {
1760 memcpy (m_tmp,m_x,m_n*sizeof(btScalar));
1761 btScalar *x = m_x, *tmp = m_tmp;
1762 const int *p = m_p;
1763 const int n = m_n;
1764 for (int j=0; j<n; ++j) x[p[j]] = tmp[j];
1765 }
1766 {
1767 memcpy (m_tmp,m_w,m_n*sizeof(btScalar));
1768 btScalar *w = m_w, *tmp = m_tmp;
1769 const int *p = m_p;
1770 const int n = m_n;
1771 for (int j=0; j<n; ++j) w[p[j]] = tmp[j];
1772 }
1773}
1774
1775#endif // btLCP_FAST
1776
1777
1778//***************************************************************************
1779// an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
1780
1782 btScalar* outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory& scratchMem)
1783{
1784 s_error = false;
1785
1786// printf("btSolveDantzigLCP n=%d\n",n);
1787 btAssert (n>0 && A && x && b && lo && hi && nub >= 0 && nub <= n);
1788 btAssert(outer_w);
1789
1790#ifdef BT_DEBUG
1791 {
1792 // check restrictions on lo and hi
1793 for (int k=0; k<n; ++k)
1794 btAssert (lo[k] <= 0 && hi[k] >= 0);
1795 }
1796# endif
1797
1798
1799 // if all the variables are unbounded then we can just factor, solve,
1800 // and return
1801 if (nub >= n)
1802 {
1803
1804
1805 int nskip = (n);
1806 btFactorLDLT (A, outer_w, n, nskip);
1807 btSolveLDLT (A, outer_w, b, n, nskip);
1808 memcpy (x, b, n*sizeof(btScalar));
1809
1810 return !s_error;
1811 }
1812
1813 const int nskip = (n);
1814 scratchMem.L.resize(n*nskip);
1815
1816 scratchMem.d.resize(n);
1817
1818 btScalar *w = outer_w;
1819 scratchMem.delta_w.resize(n);
1820 scratchMem.delta_x.resize(n);
1821 scratchMem.Dell.resize(n);
1822 scratchMem.ell.resize(n);
1823 scratchMem.Arows.resize(n);
1824 scratchMem.p.resize(n);
1825 scratchMem.C.resize(n);
1826
1827 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
1828 scratchMem.state.resize(n);
1829
1830
1831 // create LCP object. note that tmp is set to delta_w to save space, this
1832 // optimization relies on knowledge of how tmp is used, so be careful!
1833 btLCP lcp(n,nskip,nub,A,x,b,w,lo,hi,&scratchMem.L[0],&scratchMem.d[0],&scratchMem.Dell[0],&scratchMem.ell[0],&scratchMem.delta_w[0],&scratchMem.state[0],findex,&scratchMem.p[0],&scratchMem.C[0],&scratchMem.Arows[0]);
1834 int adj_nub = lcp.getNub();
1835
1836 // loop over all indexes adj_nub..n-1. for index i, if x(i),w(i) satisfy the
1837 // LCP conditions then i is added to the appropriate index set. otherwise
1838 // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
1839 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
1840 // while driving x(i) we maintain the LCP conditions on the other variables
1841 // 0..i-1. we do this by watching out for other x(i),w(i) values going
1842 // outside the valid region, and then switching them between index sets
1843 // when that happens.
1844
1845 bool hit_first_friction_index = false;
1846 for (int i=adj_nub; i<n; ++i)
1847 {
1848 s_error = false;
1849 // the index i is the driving index and indexes i+1..n-1 are "dont care",
1850 // i.e. when we make changes to the system those x's will be zero and we
1851 // don't care what happens to those w's. in other words, we only consider
1852 // an (i+1)*(i+1) sub-problem of A*x=b+w.
1853
1854 // if we've hit the first friction index, we have to compute the lo and
1855 // hi values based on the values of x already computed. we have been
1856 // permuting the indexes, so the values stored in the findex vector are
1857 // no longer valid. thus we have to temporarily unpermute the x vector.
1858 // for the purposes of this computation, 0*infinity = 0 ... so if the
1859 // contact constraint's normal force is 0, there should be no tangential
1860 // force applied.
1861
1862 if (!hit_first_friction_index && findex && findex[i] >= 0) {
1863 // un-permute x into delta_w, which is not being used at the moment
1864 for (int j=0; j<n; ++j) scratchMem.delta_w[scratchMem.p[j]] = x[j];
1865
1866 // set lo and hi values
1867 for (int k=i; k<n; ++k) {
1868 btScalar wfk = scratchMem.delta_w[findex[k]];
1869 if (wfk == 0) {
1870 hi[k] = 0;
1871 lo[k] = 0;
1872 }
1873 else {
1874 hi[k] = btFabs (hi[k] * wfk);
1875 lo[k] = -hi[k];
1876 }
1877 }
1878 hit_first_friction_index = true;
1879 }
1880
1881 // thus far we have not even been computing the w values for indexes
1882 // greater than i, so compute w[i] now.
1883 w[i] = lcp.AiC_times_qC (i,x) + lcp.AiN_times_qN (i,x) - b[i];
1884
1885 // if lo=hi=0 (which can happen for tangential friction when normals are
1886 // 0) then the index will be assigned to set N with some state. however,
1887 // set C's line has zero size, so the index will always remain in set N.
1888 // with the "normal" switching logic, if w changed sign then the index
1889 // would have to switch to set C and then back to set N with an inverted
1890 // state. this is pointless, and also computationally expensive. to
1891 // prevent this from happening, we use the rule that indexes with lo=hi=0
1892 // will never be checked for set changes. this means that the state for
1893 // these indexes may be incorrect, but that doesn't matter.
1894
1895 // see if x(i),w(i) is in a valid region
1896 if (lo[i]==0 && w[i] >= 0) {
1897 lcp.transfer_i_to_N (i);
1898 scratchMem.state[i] = false;
1899 }
1900 else if (hi[i]==0 && w[i] <= 0) {
1901 lcp.transfer_i_to_N (i);
1902 scratchMem.state[i] = true;
1903 }
1904 else if (w[i]==0) {
1905 // this is a degenerate case. by the time we get to this test we know
1906 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
1907 // and similarly that hi > 0. this means that the line segment
1908 // corresponding to set C is at least finite in extent, and we are on it.
1909 // NOTE: we must call lcp.solve1() before lcp.transfer_i_to_C()
1910 lcp.solve1 (&scratchMem.delta_x[0],i,0,1);
1911
1912 lcp.transfer_i_to_C (i);
1913 }
1914 else {
1915 // we must push x(i) and w(i)
1916 for (;;) {
1917 int dir;
1918 btScalar dirf;
1919 // find direction to push on x(i)
1920 if (w[i] <= 0) {
1921 dir = 1;
1922 dirf = btScalar(1.0);
1923 }
1924 else {
1925 dir = -1;
1926 dirf = btScalar(-1.0);
1927 }
1928
1929 // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
1930 lcp.solve1 (&scratchMem.delta_x[0],i,dir);
1931
1932 // note that delta_x[i] = dirf, but we wont bother to set it
1933
1934 // compute: delta_w = A*delta_x ... note we only care about
1935 // delta_w(N) and delta_w(i), the rest is ignored
1936 lcp.pN_equals_ANC_times_qC (&scratchMem.delta_w[0],&scratchMem.delta_x[0]);
1937 lcp.pN_plusequals_ANi (&scratchMem.delta_w[0],i,dir);
1938 scratchMem.delta_w[i] = lcp.AiC_times_qC (i,&scratchMem.delta_x[0]) + lcp.Aii(i)*dirf;
1939
1940 // find largest step we can take (size=s), either to drive x(i),w(i)
1941 // to the valid LCP region or to drive an already-valid variable
1942 // outside the valid region.
1943
1944 int cmd = 1; // index switching command
1945 int si = 0; // si = index to switch if cmd>3
1946 btScalar s = -w[i]/scratchMem.delta_w[i];
1947 if (dir > 0) {
1948 if (hi[i] < BT_INFINITY) {
1949 btScalar s2 = (hi[i]-x[i])*dirf; // was (hi[i]-x[i])/dirf // step to x(i)=hi(i)
1950 if (s2 < s) {
1951 s = s2;
1952 cmd = 3;
1953 }
1954 }
1955 }
1956 else {
1957 if (lo[i] > -BT_INFINITY) {
1958 btScalar s2 = (lo[i]-x[i])*dirf; // was (lo[i]-x[i])/dirf // step to x(i)=lo(i)
1959 if (s2 < s) {
1960 s = s2;
1961 cmd = 2;
1962 }
1963 }
1964 }
1965
1966 {
1967 const int numN = lcp.numN();
1968 for (int k=0; k < numN; ++k) {
1969 const int indexN_k = lcp.indexN(k);
1970 if (!scratchMem.state[indexN_k] ? scratchMem.delta_w[indexN_k] < 0 : scratchMem.delta_w[indexN_k] > 0) {
1971 // don't bother checking if lo=hi=0
1972 if (lo[indexN_k] == 0 && hi[indexN_k] == 0) continue;
1973 btScalar s2 = -w[indexN_k] / scratchMem.delta_w[indexN_k];
1974 if (s2 < s) {
1975 s = s2;
1976 cmd = 4;
1977 si = indexN_k;
1978 }
1979 }
1980 }
1981 }
1982
1983 {
1984 const int numC = lcp.numC();
1985 for (int k=adj_nub; k < numC; ++k) {
1986 const int indexC_k = lcp.indexC(k);
1987 if (scratchMem.delta_x[indexC_k] < 0 && lo[indexC_k] > -BT_INFINITY) {
1988 btScalar s2 = (lo[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k];
1989 if (s2 < s) {
1990 s = s2;
1991 cmd = 5;
1992 si = indexC_k;
1993 }
1994 }
1995 if (scratchMem.delta_x[indexC_k] > 0 && hi[indexC_k] < BT_INFINITY) {
1996 btScalar s2 = (hi[indexC_k]-x[indexC_k]) / scratchMem.delta_x[indexC_k];
1997 if (s2 < s) {
1998 s = s2;
1999 cmd = 6;
2000 si = indexC_k;
2001 }
2002 }
2003 }
2004 }
2005
2006 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
2007 // "C->NL","C->NH"};
2008 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
2009
2010 // if s <= 0 then we've got a problem. if we just keep going then
2011 // we're going to get stuck in an infinite loop. instead, just cross
2012 // our fingers and exit with the current solution.
2013 if (s <= btScalar(0.0))
2014 {
2015// printf("LCP internal error, s <= 0 (s=%.4e)",(double)s);
2016 if (i < n) {
2017 btSetZero (x+i,n-i);
2018 btSetZero (w+i,n-i);
2019 }
2020 s_error = true;
2021 break;
2022 }
2023
2024 // apply x = x + s * delta_x
2025 lcp.pC_plusequals_s_times_qC (x, s, &scratchMem.delta_x[0]);
2026 x[i] += s * dirf;
2027
2028 // apply w = w + s * delta_w
2029 lcp.pN_plusequals_s_times_qN (w, s, &scratchMem.delta_w[0]);
2030 w[i] += s * scratchMem.delta_w[i];
2031
2032// void *tmpbuf;
2033 // switch indexes between sets if necessary
2034 switch (cmd) {
2035 case 1: // done
2036 w[i] = 0;
2037 lcp.transfer_i_to_C (i);
2038 break;
2039 case 2: // done
2040 x[i] = lo[i];
2041 scratchMem.state[i] = false;
2042 lcp.transfer_i_to_N (i);
2043 break;
2044 case 3: // done
2045 x[i] = hi[i];
2046 scratchMem.state[i] = true;
2047 lcp.transfer_i_to_N (i);
2048 break;
2049 case 4: // keep going
2050 w[si] = 0;
2051 lcp.transfer_i_from_N_to_C (si);
2052 break;
2053 case 5: // keep going
2054 x[si] = lo[si];
2055 scratchMem.state[si] = false;
2056 lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch);
2057 break;
2058 case 6: // keep going
2059 x[si] = hi[si];
2060 scratchMem.state[si] = true;
2061 lcp.transfer_i_from_C_to_N (si, scratchMem.m_scratch);
2062 break;
2063 }
2064
2065 if (cmd <= 3) break;
2066 } // for (;;)
2067 } // else
2068
2069 if (s_error)
2070 {
2071 break;
2072 }
2073 } // for (int i=adj_nub; i<n; ++i)
2074
2075 lcp.unpermute();
2076
2077
2078 return !s_error;
2079}
2080
void btLDLTAddTL(btScalar *L, btScalar *d, const btScalar *a, int n, int nskip, btAlignedObjectArray< btScalar > &scratch)
static void btSolveL1_1(const btScalar *L, btScalar *B, int n, int lskip1)
void btLDLTRemove(btScalar **A, const int *p, btScalar *L, btScalar *d, int n1, int n2, int r, int nskip, btAlignedObjectArray< btScalar > &scratch)
#define BTAROW(i)
void btSolveL1(const btScalar *L, btScalar *B, int n, int lskip1)
void btSolveL1T(const btScalar *L, btScalar *B, int n, int lskip1)
static void btSwapRowsAndCols(BTATYPE A, int n, int i1, int i2, int nskip, int do_fast_row_swaps)
static void btSwapProblem(BTATYPE A, btScalar *x, btScalar *b, btScalar *w, btScalar *lo, btScalar *hi, int *p, bool *state, int *findex, int n, int i1, int i2, int nskip, int do_fast_row_swaps)
bool btSolveDantzigLCP(int n, btScalar *A, btScalar *x, btScalar *b, btScalar *outer_w, int nub, btScalar *lo, btScalar *hi, int *findex, btDantzigScratchMemory &scratchMem)
#define BTATYPE
void btSolveLDLT(const btScalar *L, const btScalar *d, btScalar *b, int n, int nskip)
bool s_error
size_t btEstimateLDLTAddTLTmpbufSize(int nskip)
void btRemoveRowCol(btScalar *A, int n, int nskip, int r)
void btFactorLDLT(btScalar *A, btScalar *d, int n, int nskip1)
void btVectorScale(btScalar *a, const btScalar *d, int n)
#define BTGETA(i, j)
#define BTROWPTRS
static void btSolveL1_2(const btScalar *L, btScalar *B, int n, int lskip1)
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:292
void btSetZero(T *a, int n)
Definition: btScalar.h:717
#define SIMDSQRT12
Definition: btScalar.h:509
btScalar btFabs(btScalar x)
Definition: btScalar.h:475
#define BT_INFINITY
Definition: btScalar.h:383
#define btRecip(x)
Definition: btScalar.h:511
btScalar btLargeDot(const btScalar *a, const btScalar *b, int n)
Definition: btScalar.h:728
#define btAssert(x)
Definition: btScalar.h:131
static T sum(const btAlignedObjectArray< T > &items)
void resize(int newsize, const T &fillData=T())
btAlignedObjectArray< int > C
Definition: btDantzigLCP.h:67
btAlignedObjectArray< btScalar > delta_w
Definition: btDantzigLCP.h:61
btAlignedObjectArray< btScalar > delta_x
Definition: btDantzigLCP.h:62
btAlignedObjectArray< btScalar > ell
Definition: btDantzigLCP.h:64
btAlignedObjectArray< btScalar * > Arows
Definition: btDantzigLCP.h:65
btAlignedObjectArray< btScalar > Dell
Definition: btDantzigLCP.h:63
btAlignedObjectArray< btScalar > d
Definition: btDantzigLCP.h:60
btAlignedObjectArray< btScalar > L
Definition: btDantzigLCP.h:59
btAlignedObjectArray< bool > state
Definition: btDantzigLCP.h:68
btAlignedObjectArray< btScalar > m_scratch
Definition: btDantzigLCP.h:58
btAlignedObjectArray< int > p
Definition: btDantzigLCP.h:66
btScalar Aii(int i) const
int indexN(int i) const
bool *const m_state
int indexC(int i) const
void pN_equals_ANC_times_qC(btScalar *p, btScalar *q)
BTATYPE const m_A
btScalar *const *const *const *const m_lo
btScalar *const m_L
btScalar AiN_times_qN(int i, btScalar *q) const
int getNub() const
btScalar *const *const m_ell
int *const m_findex
void transfer_i_to_N(int i)
void unpermute()
int *const *const *const m_C
btScalar *const m_Dell
void pN_plusequals_s_times_qN(btScalar *p, btScalar s, btScalar *q)
btScalar *const *const m_d
void transfer_i_from_N_to_C(int i)
btScalar *const *const *const *const *const m_hi
int *const *const m_p
void solve1(btScalar *a, int i, int dir=1, int only_transfer=0)
int numN() const
btScalar *const *const *const m_tmp
int numC() const
void pC_plusequals_s_times_qC(btScalar *p, btScalar s, btScalar *q)
void pN_plusequals_ANi(btScalar *p, int i, int sign=1)
void transfer_i_to_C(int i)
const int m_n
btScalar *const *const m_b
btScalar AiC_times_qC(int i, btScalar *q) const
btLCP(int _n, int _nskip, int _nub, btScalar *_Adata, btScalar *_x, btScalar *_b, btScalar *_w, btScalar *_lo, btScalar *_hi, btScalar *l, btScalar *_d, btScalar *_Dell, btScalar *_ell, btScalar *_tmp, bool *_state, int *_findex, int *p, int *c, btScalar **Arows)
const int m_nskip
btScalar *const m_x
btScalar *const *const *const m_w
void transfer_i_from_C_to_N(int i, btAlignedObjectArray< btScalar > &scratch)