source: trunk/Packages/bgrabitmap/bgrasse.pas

Last change on this file was 2, checked in by chronos, 5 years ago
File size: 12.9 KB
Line 
1unit BGRASSE;
2
3{$mode objfpc}{$H+}
4
5{$i bgrasse.inc}
6
7interface
8
9{begin //param: eax, edx, ecx //float: eax ecx edx
10 //flds $d9
11 //fadds $d8
12 //fstps $d9 +$18
13 //fmuls $d8 +$08
14 //fsubrs $d8 +$28
15 //offset +$40 $..}
16uses
17 BGRABitmapTypes {$ifdef CPUI386}, cpu, mmx{$endif};
18
19const FLAG_ENABLED_SSE = true;
20
21var UseSSE, UseSSE2, UseSSE3 : boolean;
22
23{$ifdef CPUI386}
24 {$asmmode intel}
25{$ENDIF}
26{$ifdef cpux86_64}
27 {$asmmode intel}
28{$ENDIF}
29
30{$ifdef BGRASSE_AVAILABLE}
31 //SSE rotate singles
32 const Shift231 = 1 + 8;
33 Shift312 = 2 + 16;
34{$endif}
35
36type
37 TPoint3D_128 = packed record x,y,z,t: single; end;
38 PPoint3D_128 = ^TPoint3D_128;
39
40 function Point3D(const point3D_128: TPoint3D_128): TPoint3D; inline; overload;
41 function Point3D_128(const point3D: TPoint3D): TPoint3D_128; inline; overload;
42 function Point3D_128(const pointF: TPointF): TPoint3D_128; inline; overload;
43 function Point3D_128(x,y,z: single): TPoint3D_128; inline; overload;
44 function Point3D_128(x,y,z,t: single): TPoint3D_128; inline; overload;
45 procedure Normalize3D_128_SqLen(var v: TPoint3D_128; out SqLen: single);
46 operator * (const v1: TPoint3D_128; const factor: single): TPoint3D_128;
47 operator + (constref v1,v2: TPoint3D_128): TPoint3D_128;
48 operator - (const v1,v2: TPoint3D_128): TPoint3D_128;
49 operator - (const v: TPoint3D_128): TPoint3D_128; inline;
50 operator = (const v1,v2: TPoint3D_128): boolean; inline;
51 procedure ClearPoint3D_128(out v: TPoint3D_128);
52 {$IFDEF BGRASSE_AVAILABLE}
53 procedure ClearPoint3D_128_AlignedSSE(out v: TPoint3D_128);
54 {$ENDIF}
55 function IsPoint3D_128_Zero(const v: TPoint3D_128): boolean; inline;
56
57var
58 Add3D_Aligned : procedure (var dest: TPoint3D_128; constref src: TPoint3D_128);
59 Normalize3D_128 : procedure (var v: TPoint3D_128);
60 VectProduct3D_128 : procedure (const u,v: TPoint3D_128; out w: TPoint3D_128);
61 DotProduct3D_128 : function (constref v1,v2: TPoint3D_128): single;
62
63const
64 Point3D_128_Zero : TPoint3D_128 = (x:0; y:0; z:0; t:0);
65
66type
67
68 { TMemoryBlockAlign128 }
69
70 TMemoryBlockAlign128 = class
71 private
72 FContainer: Pointer;
73 FData: Pointer;
74 public
75 constructor Create(size: integer);
76 destructor Destroy; override;
77 property Data: pointer read FData;
78 end;
79
80 PBasicLightingContext = ^TBasicLightingContext;
81 TBasicLightingContext = packed record
82 {0} Position, {16} Normal: TPoint3D_128;
83 {32} PositionInvZ, {48} NormalInvZ: TPoint3D_128;
84 {64} PositionStepInvZ, {80} NormalStepInvZ: TPoint3D_128;
85 {96} dummy4: single;
86 {100} dummy3: LongBool;
87 {104} dummy1: longword;
88 {108} dummy2: longword;
89 {112} dummy: packed array[0..15]of byte;
90 end; {128}
91
92const ExtendedLightingContextSize = 128;
93
94implementation
95
96function Point3D(const point3D_128: TPoint3D_128): TPoint3D; inline; overload;
97begin
98 result.x := point3D_128.x;
99 result.y := point3D_128.y;
100 result.z := point3D_128.z;
101end;
102
103function Point3D_128(const point3D: TPoint3D): TPoint3D_128; inline; overload;
104begin
105 result.x := point3D.x;
106 result.y := point3D.y;
107 result.z := point3D.z;
108 result.t := 0;
109end;
110
111function Point3D_128(const pointF: TPointF): TPoint3D_128;
112begin
113 result.x := pointF.x;
114 result.y := pointF.y;
115 result.z := 0;
116 result.t := 0;
117end;
118
119function Point3D_128(x,y,z: single): TPoint3D_128; inline; overload;
120begin
121 result.x := x;
122 result.y := y;
123 result.z := z;
124 result.t := 0;
125end;
126
127function Point3D_128(x,y,z,t: single): TPoint3D_128; inline; overload;
128begin
129 result.x := x;
130 result.y := y;
131 result.z := z;
132 result.t := t;
133end;
134
135operator + (constref v1,v2: TPoint3D_128): TPoint3D_128;
136{$ifdef CPUI386} assembler;
137asm
138 db $d9, $00 //flds [eax]
139 db $d8, $02 //fadds [edx]
140 db $d9, $19 //fstps [ecx]
141
142 db $d9, $40, $04 //flds [eax+4]
143 db $d8, $42, $04 //fadds [edx+4]
144 db $d9, $59, $04 //fstps [ecx+4]
145
146 db $d9, $40, $08 //flds [eax+8]
147 db $d8, $42, $08 //fadds [edx+8]
148 db $d9, $59, $08 //fstps [ecx+8]
149
150 xor eax,eax
151 mov [ecx+12],eax
152end;
153{$else}
154begin
155 result.x := v1.x+v2.x;
156 result.y := v1.y+v2.y;
157 result.z := v1.z+v2.z;
158 result.t := 0;
159end;
160{$endif}
161
162{$ifdef BGRASSE_AVAILABLE}
163procedure Add3D_AlignedSSE(var dest: TPoint3D_128; constref src: TPoint3D_128); assembler;
164asm
165 movaps xmm0, [dest]
166 movups xmm1, [src]
167 addps xmm0, xmm1
168 movaps [dest], xmm0
169end;
170{$endif}
171
172procedure Add3D_NoSSE(var dest: TPoint3D_128; constref src: TPoint3D_128);
173{$ifdef CPUI386} assembler;
174asm
175 db $d9, $00 //flds [eax]
176 db $d8, $02 //fadds [edx]
177 db $d9, $18 //fstps [eax]
178
179 db $d9, $40, $04 //flds [eax+4]
180 db $d8, $42, $04 //fadds [edx+4]
181 db $d9, $58, $04 //fstps [eax+4]
182
183 db $d9, $40, $08 //flds [eax+8]
184 db $d8, $42, $08 //fadds [edx+8]
185 db $d9, $58, $08 //fstps [eax+8]
186end;
187{$else}
188begin
189 dest.x += src.x;
190 dest.y += src.y;
191 dest.z += src.z;
192end;
193{$endif}
194
195operator - (const v1,v2: TPoint3D_128): TPoint3D_128;
196{$ifdef CPUI386} assembler;
197asm
198 db $d9, $02 //flds [edx]
199 db $d8, $28 //fsubrs [eax]
200 db $d9, $19 //fstps [ecx]
201
202 db $d9, $42, $04 //flds [edx+4]
203 db $d8, $68, $04 //fsubrs [eax+4]
204 db $d9, $59, $04 //fstps [ecx+4]
205
206 db $d9, $42, $08 //flds [edx+8]
207 db $d8, $68, $08 //fsubrs [eax+8]
208 db $d9, $59, $08 //fstps [ecx+8]
209
210 xor eax,eax
211 mov [ecx+12],eax
212end;
213{$else}
214begin
215 result.x := v1.x-v2.x;
216 result.y := v1.y-v2.y;
217 result.z := v1.z-v2.z;
218 result.t := 0;
219end;
220{$endif}
221
222operator-(const v: TPoint3D_128): TPoint3D_128; inline;
223begin
224 result.x := -v.x;
225 result.y := -v.y;
226 result.z := -v.z;
227 result.t := 0;
228end;
229
230operator=(const v1, v2: TPoint3D_128): boolean; inline;
231begin
232 result := (v1.x=v2.x) and (v1.y=v2.y) and (v1.z=v2.z);
233end;
234
235procedure ClearPoint3D_128(out v: TPoint3D_128);
236{$ifdef cpux86_64} assembler;
237asm
238 push rbx
239 mov rax,v
240 xor rbx,rbx
241 mov [rax],rbx
242 mov [rax+8],rbx
243 pop rbx
244end;
245{$else}
246 {$ifdef CPUI386} assembler;
247 asm
248 push ebx
249 mov eax,v
250 xor ebx,ebx
251 mov [eax],ebx
252 mov [eax+4],ebx
253 mov [eax+8],ebx
254 pop ebx
255 end;
256 {$else}
257 var p: pdword;
258 begin
259 p := @v;
260 p^ := 0;
261 inc(p);
262 p^ := 0;
263 inc(p);
264 p^ := 0;
265 end;
266 {$endif}
267{$endif}
268
269procedure ClearPoint3D_128_AlignedSSE(out v: TPoint3D_128);
270{$ifdef BGRASSE_AVAILABLE} assembler;
271 asm
272 xorps xmm0,xmm0
273 {$ifdef cpux86_64}
274 mov rax,v
275 movaps [rax],xmm0
276 {$else}
277 mov eax,v
278 movaps [eax],xmm0
279 {$endif}
280 end;
281{$else}
282var p: pdword;
283begin
284 p := @v;
285 p^ := 0;
286 inc(p);
287 p^ := 0;
288 inc(p);
289 p^ := 0;
290end;
291{$endif}
292
293function IsPoint3D_128_Zero(const v: TPoint3D_128): boolean;
294begin
295 result := (v.x=0) and (v.y=0) and (v.z=0);
296end;
297
298operator * (const v1: TPoint3D_128; const factor: single): TPoint3D_128;
299{$ifdef CPUI386} assembler;
300asm
301 db $d9, $00 //flds [eax]
302 db $d8, $4d, $08 //fmuls [ebp+8]
303 db $d9, $1a //fstps [edx]
304
305 db $d9, $40, $04 //flds [eax+4]
306 db $d8, $4d, $08 //fmuls [ebp+8]
307 db $d9, $5a, $04 //fstps [edx+4]
308
309 db $d9, $40, $08 //flds [eax+8]
310 db $d8, $4d, $08 //fmuls [ebp+8]
311 db $d9, $5a, $08 //fstps [edx+8]
312
313 xor eax,eax
314 mov [edx+12],eax
315end;
316{$else}
317begin
318 result.x := v1.x*factor;
319 result.y := v1.y*factor;
320 result.z := v1.z*factor;
321 result.t := 0;
322end;
323{$endif}
324
325{$ifdef BGRASSE_AVAILABLE}
326function DotProduct3D_128_SSE3(constref v1,v2: TPoint3D_128): single; assembler;
327asm
328 movups xmm0, [v1]
329 movups xmm1, [v2]
330 mulps xmm0, xmm1
331
332 haddps xmm0,xmm0
333 haddps xmm0,xmm0
334 movss [result], xmm0
335end;
336{$endif}
337
338function DotProduct3D_128_NoSSE(constref v1,v2: TPoint3D_128): single;
339begin
340 result := v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
341end;
342
343procedure Normalize3D_128_NoSSE(var v: TPoint3D_128);
344var len: single;
345begin
346 len := DotProduct3D_128_NoSSE(v,v);
347 if len = 0 then exit;
348 len := 1/sqrt(len);
349 v.x *= len;
350 v.y *= len;
351 v.z *= len;
352end;
353
354{$ifdef BGRASSE_AVAILABLE}
355procedure Normalize3D_128_SSE1(var v: TPoint3D_128);
356var len: single;
357begin
358 asm
359 {$DEFINE SSE_LOADV}{$i bgrasse.inc}
360 movaps xmm2, xmm1
361 mulps xmm2, xmm2
362
363 //mix1
364 movaps xmm7, xmm2
365 shufps xmm7, xmm7, $4e
366 addps xmm2, xmm7
367 //mix2
368 movaps xmm7, xmm2
369 shufps xmm7, xmm7, $11
370 addps xmm2, xmm7
371
372 movss len, xmm2
373 end;
374 if (len = 0) then exit;
375 if len < 1e-6 then //out of bounds for SSE instruction
376 begin
377 len := 1/sqrt(len);
378 v.x *= len;
379 v.y *= len;
380 v.z *= len;
381 end else
382 asm
383 rsqrtps xmm2, xmm2
384 mulps xmm1, xmm2 //apply
385 {$DEFINE SSE_SAVEV}{$i bgrasse.inc}
386 end;
387end;
388{$endif}
389
390{$ifdef BGRASSE_AVAILABLE}
391procedure Normalize3D_128_SSE3(var v: TPoint3D_128);
392var len: single;
393begin
394 asm
395 {$DEFINE SSE_LOADV}{$i bgrasse.inc}
396 movaps xmm2, xmm1
397 mulps xmm2, xmm2
398
399 haddps xmm2,xmm2
400 haddps xmm2,xmm2
401
402 movss len, xmm2
403 end;
404 if (len = 0) then exit;
405 if len < 1e-6 then //out of bounds for SSE instruction
406 begin
407 len := 1/sqrt(len);
408 v.x *= len;
409 v.y *= len;
410 v.z *= len;
411 end else
412 asm
413 rsqrtps xmm2, xmm2
414 mulps xmm1, xmm2 //apply
415 {$DEFINE SSE_SAVEV}{$i bgrasse.inc}
416 end;
417end;
418{$endif}
419
420procedure Normalize3D_128_SqLen(var v: TPoint3D_128; out SqLen: single);
421var InvLen: single;
422begin
423 {$ifdef BGRASSE_AVAILABLE}
424 if UseSSE then
425 begin
426 asm
427 {$DEFINE SSE_LOADV}{$i bgrasse.inc}
428 movaps xmm2, xmm1
429 mulps xmm2, xmm2
430 end;
431 if UseSSE3 then
432 asm
433 haddps xmm2,xmm2
434 haddps xmm2,xmm2
435 movss SqLen, xmm2
436 end else
437 asm
438 //mix1
439 movaps xmm7, xmm2
440 shufps xmm7, xmm7, $4e
441 addps xmm2, xmm7
442 //mix2
443 movaps xmm7, xmm2
444 shufps xmm7, xmm7, $11
445 addps xmm2, xmm7
446 movss SqLen, xmm2
447 end;
448 if SqLen = 0 then exit;
449 if SqLen < 1e-6 then //out of bounds for SSE instruction
450 begin
451 InvLen := 1/sqrt(SqLen);
452 v.x *= InvLen;
453 v.y *= InvLen;
454 v.z *= InvLen;
455 end else
456 asm
457 rsqrtps xmm2, xmm2
458 mulps xmm1, xmm2 //apply
459 {$DEFINE SSE_SAVEV}{$i bgrasse.inc}
460 end;
461 end
462 else
463{$endif}
464 begin
465 SqLen := DotProduct3D_128_NoSSE(v,v);
466 if SqLen = 0 then exit;
467 InvLen := 1/sqrt(SqLen);
468 v.x *= InvLen;
469 v.y *= InvLen;
470 v.z *= InvLen;
471 end;
472end;
473
474procedure VectProduct3D_128_NoSSE(const u,v: TPoint3D_128; out w: TPoint3D_128);
475begin
476 w.x := u.y*v.z-u.z*v.y;
477 w.y := u.z*v.x-u.x*v.z;
478 w.z := u.x*v.Y-u.y*v.x;
479 w.t := 0;
480end;
481
482{$ifdef BGRASSE_AVAILABLE}
483procedure VectProduct3D_128_SSE(constref u,v: TPoint3D_128; out w: TPoint3D_128); assembler;
484asm
485 {$ifdef cpux86_64}
486 mov rax,u
487 movups xmm6,[rax]
488 {$else}
489 mov eax,u
490 movups xmm6,[eax]
491 {$endif}
492 movaps xmm4, xmm6
493 shufps xmm6, xmm6, Shift231
494
495 {$ifdef cpux86_64}
496 mov rax,v
497 movups xmm7,[rax]
498 {$else}
499 mov eax,v
500 movups xmm7,[eax]
501 {$endif}
502 movaps xmm5,xmm7
503 shufps xmm7, xmm7, Shift312
504
505 movaps xmm3,xmm6
506 mulps xmm3,xmm7
507
508 shufps xmm4, xmm4, Shift312
509 shufps xmm5, xmm5, Shift231
510
511 mulps xmm4,xmm5
512 subps xmm3,xmm4
513
514 {$ifdef cpux86_64}
515 mov rax,w
516 movups [rax],xmm3
517 {$else}
518 mov eax,w
519 movups [eax],xmm3
520 {$endif}
521end;
522{$endif}
523
524{ TMemoryBlockAlign128 }
525
526{$hints off}
527constructor TMemoryBlockAlign128.Create(size: integer);
528{$IFDEF BGRASSE_AVAILABLE}
529var
530 delta: PtrUInt;
531begin
532 getmem(FContainer, size+15);
533 delta := PtrUInt(FContainer) and 15;
534 if delta <> 0 then delta := 16-delta;
535 FData := pbyte(FContainer)+delta;
536end;
537{$ELSE}
538begin
539 getmem(FContainer, size);
540 FData := FContainer;
541end;
542{$ENDIF}
543{$hints on}
544
545destructor TMemoryBlockAlign128.Destroy;
546begin
547 freemem(FContainer);
548 inherited Destroy;
549end;
550
551{$ifdef BGRASSE_AVAILABLE}
552function sse3_support : boolean;
553
554 var
555 _ecx : longint;
556
557 begin
558 {$IFDEF CPUI386}
559 if cpuid_support then
560 begin
561 asm
562 push ebx
563 mov eax,1
564 cpuid
565 mov _ecx,ecx
566 pop ebx
567 end;
568 sse3_support:=(_ecx and 1)<>0;
569 end
570 else
571 sse3_support:=false;
572 {$ELSE}
573 asm
574 push rbx
575 mov eax,1
576 cpuid
577 mov _ecx,ecx
578 pop rbx
579 end;
580 sse3_support:=(_ecx and 1)<>0;
581 {$ENDIF}
582 end;
583{$endif}
584
585initialization
586
587 {$ifdef CPUI386}
588 UseSSE := is_sse_cpu and FLAG_ENABLED_SSE;
589 {$else}
590 {$ifdef cpux86_64}
591 UseSSE := FLAG_ENABLED_SSE;
592 {$else}
593 UseSSE := false;
594 {$endif}
595 {$endif}
596
597 {$IFDEF BGRASSE_AVAILABLE}
598 if UseSSE then
599 begin
600 {$ifdef cpux86_64}
601 UseSSE2 := true;
602 {$else}
603 UseSSE2 := is_sse2_cpu;
604 {$endif}
605 UseSSE3 := sse3_support;
606
607 Add3D_Aligned := @Add3D_AlignedSSE;
608 VectProduct3D_128 := @VectProduct3D_128_NoSSE; //VectProduct3D_128_SSE is slower (due to access penalty?)
609 if UseSSE3 then
610 begin
611 Normalize3D_128 := @Normalize3D_128_SSE3;
612 DotProduct3D_128 := @DotProduct3D_128_NoSSE; //DotProduct3D_128_SSE3 is slower (due to access penalty?)
613 end
614 else
615 begin
616 Normalize3D_128 := @Normalize3D_128_SSE1;
617 DotProduct3D_128 := @DotProduct3D_128_NoSSE;
618 end;
619 end
620 else
621 {$ENDIF}
622 begin
623 UseSSE := false;
624 UseSSE2 := false;
625 UseSSE3 := false;
626
627 Add3D_Aligned := @Add3D_NoSSE;
628 Normalize3D_128 := @Normalize3D_128_NoSSE;
629 VectProduct3D_128 := @VectProduct3D_128_NoSSE;
630 DotProduct3D_128 := @DotProduct3D_128_NoSSE;
631 end;
632
633end.
634
Note: See TracBrowser for help on using the repository browser.