1 | unit BGRASSE;
|
---|
2 |
|
---|
3 | {$mode objfpc}{$H+}
|
---|
4 |
|
---|
5 | {$i bgrasse.inc}
|
---|
6 |
|
---|
7 | interface
|
---|
8 |
|
---|
9 | {begin //param: eax, edx, ecx //float: eax ecx edx
|
---|
10 | //flds $d9
|
---|
11 | //fadds $d8
|
---|
12 | //fstps $d9 +$18
|
---|
13 | //fmuls $d8 +$08
|
---|
14 | //fsubrs $d8 +$28
|
---|
15 | //offset +$40 $..}
|
---|
16 | uses
|
---|
17 | BGRABitmapTypes {$ifdef CPUI386}, cpu, mmx{$endif};
|
---|
18 |
|
---|
19 | const FLAG_ENABLED_SSE = true;
|
---|
20 |
|
---|
21 | var UseSSE, UseSSE2, UseSSE3 : boolean;
|
---|
22 |
|
---|
23 | {$ifdef CPUI386}
|
---|
24 | {$asmmode intel}
|
---|
25 | {$ENDIF}
|
---|
26 | {$ifdef cpux86_64}
|
---|
27 | {$asmmode intel}
|
---|
28 | {$ENDIF}
|
---|
29 |
|
---|
30 | {$ifdef BGRASSE_AVAILABLE}
|
---|
31 | //SSE rotate singles
|
---|
32 | const Shift231 = 1 + 8;
|
---|
33 | Shift312 = 2 + 16;
|
---|
34 | {$endif}
|
---|
35 |
|
---|
36 | type
|
---|
37 | TPoint3D_128 = packed record x,y,z,t: single; end;
|
---|
38 | PPoint3D_128 = ^TPoint3D_128;
|
---|
39 |
|
---|
40 | function Point3D(const point3D_128: TPoint3D_128): TPoint3D; inline; overload;
|
---|
41 | function Point3D_128(const point3D: TPoint3D): TPoint3D_128; inline; overload;
|
---|
42 | function Point3D_128(const pointF: TPointF): TPoint3D_128; inline; overload;
|
---|
43 | function Point3D_128(x,y,z: single): TPoint3D_128; inline; overload;
|
---|
44 | function Point3D_128(x,y,z,t: single): TPoint3D_128; inline; overload;
|
---|
45 | procedure Normalize3D_128_SqLen(var v: TPoint3D_128; out SqLen: single);
|
---|
46 | operator * (const v1: TPoint3D_128; const factor: single): TPoint3D_128;
|
---|
47 | operator + (constref v1,v2: TPoint3D_128): TPoint3D_128;
|
---|
48 | operator - (const v1,v2: TPoint3D_128): TPoint3D_128;
|
---|
49 | operator - (const v: TPoint3D_128): TPoint3D_128; inline;
|
---|
50 | operator = (const v1,v2: TPoint3D_128): boolean; inline;
|
---|
51 | procedure ClearPoint3D_128(out v: TPoint3D_128);
|
---|
52 | {$IFDEF BGRASSE_AVAILABLE}
|
---|
53 | procedure ClearPoint3D_128_AlignedSSE(out v: TPoint3D_128);
|
---|
54 | {$ENDIF}
|
---|
55 | function IsPoint3D_128_Zero(const v: TPoint3D_128): boolean; inline;
|
---|
56 |
|
---|
57 | var
|
---|
58 | Add3D_Aligned : procedure (var dest: TPoint3D_128; constref src: TPoint3D_128);
|
---|
59 | Normalize3D_128 : procedure (var v: TPoint3D_128);
|
---|
60 | VectProduct3D_128 : procedure (const u,v: TPoint3D_128; out w: TPoint3D_128);
|
---|
61 | DotProduct3D_128 : function (constref v1,v2: TPoint3D_128): single;
|
---|
62 |
|
---|
63 | const
|
---|
64 | Point3D_128_Zero : TPoint3D_128 = (x:0; y:0; z:0; t:0);
|
---|
65 |
|
---|
66 | type
|
---|
67 |
|
---|
68 | { TMemoryBlockAlign128 }
|
---|
69 |
|
---|
70 | TMemoryBlockAlign128 = class
|
---|
71 | private
|
---|
72 | FContainer: Pointer;
|
---|
73 | FData: Pointer;
|
---|
74 | public
|
---|
75 | constructor Create(size: integer);
|
---|
76 | destructor Destroy; override;
|
---|
77 | property Data: pointer read FData;
|
---|
78 | end;
|
---|
79 |
|
---|
80 | PBasicLightingContext = ^TBasicLightingContext;
|
---|
81 | TBasicLightingContext = packed record
|
---|
82 | {0} Position, {16} Normal: TPoint3D_128;
|
---|
83 | {32} PositionInvZ, {48} NormalInvZ: TPoint3D_128;
|
---|
84 | {64} PositionStepInvZ, {80} NormalStepInvZ: TPoint3D_128;
|
---|
85 | {96} dummy4: single;
|
---|
86 | {100} dummy3: LongBool;
|
---|
87 | {104} dummy1: longword;
|
---|
88 | {108} dummy2: longword;
|
---|
89 | {112} dummy: packed array[0..15]of byte;
|
---|
90 | end; {128}
|
---|
91 |
|
---|
92 | const ExtendedLightingContextSize = 128;
|
---|
93 |
|
---|
94 | implementation
|
---|
95 |
|
---|
96 | function Point3D(const point3D_128: TPoint3D_128): TPoint3D; inline; overload;
|
---|
97 | begin
|
---|
98 | result.x := point3D_128.x;
|
---|
99 | result.y := point3D_128.y;
|
---|
100 | result.z := point3D_128.z;
|
---|
101 | end;
|
---|
102 |
|
---|
103 | function Point3D_128(const point3D: TPoint3D): TPoint3D_128; inline; overload;
|
---|
104 | begin
|
---|
105 | result.x := point3D.x;
|
---|
106 | result.y := point3D.y;
|
---|
107 | result.z := point3D.z;
|
---|
108 | result.t := 0;
|
---|
109 | end;
|
---|
110 |
|
---|
111 | function Point3D_128(const pointF: TPointF): TPoint3D_128;
|
---|
112 | begin
|
---|
113 | result.x := pointF.x;
|
---|
114 | result.y := pointF.y;
|
---|
115 | result.z := 0;
|
---|
116 | result.t := 0;
|
---|
117 | end;
|
---|
118 |
|
---|
119 | function Point3D_128(x,y,z: single): TPoint3D_128; inline; overload;
|
---|
120 | begin
|
---|
121 | result.x := x;
|
---|
122 | result.y := y;
|
---|
123 | result.z := z;
|
---|
124 | result.t := 0;
|
---|
125 | end;
|
---|
126 |
|
---|
127 | function Point3D_128(x,y,z,t: single): TPoint3D_128; inline; overload;
|
---|
128 | begin
|
---|
129 | result.x := x;
|
---|
130 | result.y := y;
|
---|
131 | result.z := z;
|
---|
132 | result.t := t;
|
---|
133 | end;
|
---|
134 |
|
---|
135 | operator + (constref v1,v2: TPoint3D_128): TPoint3D_128;
|
---|
136 | {$ifdef CPUI386} assembler;
|
---|
137 | asm
|
---|
138 | db $d9, $00 //flds [eax]
|
---|
139 | db $d8, $02 //fadds [edx]
|
---|
140 | db $d9, $19 //fstps [ecx]
|
---|
141 |
|
---|
142 | db $d9, $40, $04 //flds [eax+4]
|
---|
143 | db $d8, $42, $04 //fadds [edx+4]
|
---|
144 | db $d9, $59, $04 //fstps [ecx+4]
|
---|
145 |
|
---|
146 | db $d9, $40, $08 //flds [eax+8]
|
---|
147 | db $d8, $42, $08 //fadds [edx+8]
|
---|
148 | db $d9, $59, $08 //fstps [ecx+8]
|
---|
149 |
|
---|
150 | xor eax,eax
|
---|
151 | mov [ecx+12],eax
|
---|
152 | end;
|
---|
153 | {$else}
|
---|
154 | begin
|
---|
155 | result.x := v1.x+v2.x;
|
---|
156 | result.y := v1.y+v2.y;
|
---|
157 | result.z := v1.z+v2.z;
|
---|
158 | result.t := 0;
|
---|
159 | end;
|
---|
160 | {$endif}
|
---|
161 |
|
---|
162 | {$ifdef BGRASSE_AVAILABLE}
|
---|
163 | procedure Add3D_AlignedSSE(var dest: TPoint3D_128; constref src: TPoint3D_128); assembler;
|
---|
164 | asm
|
---|
165 | movaps xmm0, [dest]
|
---|
166 | movups xmm1, [src]
|
---|
167 | addps xmm0, xmm1
|
---|
168 | movaps [dest], xmm0
|
---|
169 | end;
|
---|
170 | {$endif}
|
---|
171 |
|
---|
172 | procedure Add3D_NoSSE(var dest: TPoint3D_128; constref src: TPoint3D_128);
|
---|
173 | {$ifdef CPUI386} assembler;
|
---|
174 | asm
|
---|
175 | db $d9, $00 //flds [eax]
|
---|
176 | db $d8, $02 //fadds [edx]
|
---|
177 | db $d9, $18 //fstps [eax]
|
---|
178 |
|
---|
179 | db $d9, $40, $04 //flds [eax+4]
|
---|
180 | db $d8, $42, $04 //fadds [edx+4]
|
---|
181 | db $d9, $58, $04 //fstps [eax+4]
|
---|
182 |
|
---|
183 | db $d9, $40, $08 //flds [eax+8]
|
---|
184 | db $d8, $42, $08 //fadds [edx+8]
|
---|
185 | db $d9, $58, $08 //fstps [eax+8]
|
---|
186 | end;
|
---|
187 | {$else}
|
---|
188 | begin
|
---|
189 | dest.x += src.x;
|
---|
190 | dest.y += src.y;
|
---|
191 | dest.z += src.z;
|
---|
192 | end;
|
---|
193 | {$endif}
|
---|
194 |
|
---|
195 | operator - (const v1,v2: TPoint3D_128): TPoint3D_128;
|
---|
196 | {$ifdef CPUI386} assembler;
|
---|
197 | asm
|
---|
198 | db $d9, $02 //flds [edx]
|
---|
199 | db $d8, $28 //fsubrs [eax]
|
---|
200 | db $d9, $19 //fstps [ecx]
|
---|
201 |
|
---|
202 | db $d9, $42, $04 //flds [edx+4]
|
---|
203 | db $d8, $68, $04 //fsubrs [eax+4]
|
---|
204 | db $d9, $59, $04 //fstps [ecx+4]
|
---|
205 |
|
---|
206 | db $d9, $42, $08 //flds [edx+8]
|
---|
207 | db $d8, $68, $08 //fsubrs [eax+8]
|
---|
208 | db $d9, $59, $08 //fstps [ecx+8]
|
---|
209 |
|
---|
210 | xor eax,eax
|
---|
211 | mov [ecx+12],eax
|
---|
212 | end;
|
---|
213 | {$else}
|
---|
214 | begin
|
---|
215 | result.x := v1.x-v2.x;
|
---|
216 | result.y := v1.y-v2.y;
|
---|
217 | result.z := v1.z-v2.z;
|
---|
218 | result.t := 0;
|
---|
219 | end;
|
---|
220 | {$endif}
|
---|
221 |
|
---|
222 | operator-(const v: TPoint3D_128): TPoint3D_128; inline;
|
---|
223 | begin
|
---|
224 | result.x := -v.x;
|
---|
225 | result.y := -v.y;
|
---|
226 | result.z := -v.z;
|
---|
227 | result.t := 0;
|
---|
228 | end;
|
---|
229 |
|
---|
230 | operator=(const v1, v2: TPoint3D_128): boolean; inline;
|
---|
231 | begin
|
---|
232 | result := (v1.x=v2.x) and (v1.y=v2.y) and (v1.z=v2.z);
|
---|
233 | end;
|
---|
234 |
|
---|
235 | procedure ClearPoint3D_128(out v: TPoint3D_128);
|
---|
236 | {$ifdef cpux86_64} assembler;
|
---|
237 | asm
|
---|
238 | push rbx
|
---|
239 | mov rax,v
|
---|
240 | xor rbx,rbx
|
---|
241 | mov [rax],rbx
|
---|
242 | mov [rax+8],rbx
|
---|
243 | pop rbx
|
---|
244 | end;
|
---|
245 | {$else}
|
---|
246 | {$ifdef CPUI386} assembler;
|
---|
247 | asm
|
---|
248 | push ebx
|
---|
249 | mov eax,v
|
---|
250 | xor ebx,ebx
|
---|
251 | mov [eax],ebx
|
---|
252 | mov [eax+4],ebx
|
---|
253 | mov [eax+8],ebx
|
---|
254 | pop ebx
|
---|
255 | end;
|
---|
256 | {$else}
|
---|
257 | var p: pdword;
|
---|
258 | begin
|
---|
259 | p := @v;
|
---|
260 | p^ := 0;
|
---|
261 | inc(p);
|
---|
262 | p^ := 0;
|
---|
263 | inc(p);
|
---|
264 | p^ := 0;
|
---|
265 | end;
|
---|
266 | {$endif}
|
---|
267 | {$endif}
|
---|
268 |
|
---|
269 | procedure ClearPoint3D_128_AlignedSSE(out v: TPoint3D_128);
|
---|
270 | {$ifdef BGRASSE_AVAILABLE} assembler;
|
---|
271 | asm
|
---|
272 | xorps xmm0,xmm0
|
---|
273 | {$ifdef cpux86_64}
|
---|
274 | mov rax,v
|
---|
275 | movaps [rax],xmm0
|
---|
276 | {$else}
|
---|
277 | mov eax,v
|
---|
278 | movaps [eax],xmm0
|
---|
279 | {$endif}
|
---|
280 | end;
|
---|
281 | {$else}
|
---|
282 | var p: pdword;
|
---|
283 | begin
|
---|
284 | p := @v;
|
---|
285 | p^ := 0;
|
---|
286 | inc(p);
|
---|
287 | p^ := 0;
|
---|
288 | inc(p);
|
---|
289 | p^ := 0;
|
---|
290 | end;
|
---|
291 | {$endif}
|
---|
292 |
|
---|
293 | function IsPoint3D_128_Zero(const v: TPoint3D_128): boolean;
|
---|
294 | begin
|
---|
295 | result := (v.x=0) and (v.y=0) and (v.z=0);
|
---|
296 | end;
|
---|
297 |
|
---|
298 | operator * (const v1: TPoint3D_128; const factor: single): TPoint3D_128;
|
---|
299 | {$ifdef CPUI386} assembler;
|
---|
300 | asm
|
---|
301 | db $d9, $00 //flds [eax]
|
---|
302 | db $d8, $4d, $08 //fmuls [ebp+8]
|
---|
303 | db $d9, $1a //fstps [edx]
|
---|
304 |
|
---|
305 | db $d9, $40, $04 //flds [eax+4]
|
---|
306 | db $d8, $4d, $08 //fmuls [ebp+8]
|
---|
307 | db $d9, $5a, $04 //fstps [edx+4]
|
---|
308 |
|
---|
309 | db $d9, $40, $08 //flds [eax+8]
|
---|
310 | db $d8, $4d, $08 //fmuls [ebp+8]
|
---|
311 | db $d9, $5a, $08 //fstps [edx+8]
|
---|
312 |
|
---|
313 | xor eax,eax
|
---|
314 | mov [edx+12],eax
|
---|
315 | end;
|
---|
316 | {$else}
|
---|
317 | begin
|
---|
318 | result.x := v1.x*factor;
|
---|
319 | result.y := v1.y*factor;
|
---|
320 | result.z := v1.z*factor;
|
---|
321 | result.t := 0;
|
---|
322 | end;
|
---|
323 | {$endif}
|
---|
324 |
|
---|
325 | {$ifdef BGRASSE_AVAILABLE}
|
---|
326 | function DotProduct3D_128_SSE3(constref v1,v2: TPoint3D_128): single; assembler;
|
---|
327 | asm
|
---|
328 | movups xmm0, [v1]
|
---|
329 | movups xmm1, [v2]
|
---|
330 | mulps xmm0, xmm1
|
---|
331 |
|
---|
332 | haddps xmm0,xmm0
|
---|
333 | haddps xmm0,xmm0
|
---|
334 | movss [result], xmm0
|
---|
335 | end;
|
---|
336 | {$endif}
|
---|
337 |
|
---|
338 | function DotProduct3D_128_NoSSE(constref v1,v2: TPoint3D_128): single;
|
---|
339 | begin
|
---|
340 | result := v1.x*v2.x + v1.y*v2.y + v1.z*v2.z;
|
---|
341 | end;
|
---|
342 |
|
---|
343 | procedure Normalize3D_128_NoSSE(var v: TPoint3D_128);
|
---|
344 | var len: single;
|
---|
345 | begin
|
---|
346 | len := DotProduct3D_128_NoSSE(v,v);
|
---|
347 | if len = 0 then exit;
|
---|
348 | len := 1/sqrt(len);
|
---|
349 | v.x *= len;
|
---|
350 | v.y *= len;
|
---|
351 | v.z *= len;
|
---|
352 | end;
|
---|
353 |
|
---|
354 | {$ifdef BGRASSE_AVAILABLE}
|
---|
355 | procedure Normalize3D_128_SSE1(var v: TPoint3D_128);
|
---|
356 | var len: single;
|
---|
357 | begin
|
---|
358 | asm
|
---|
359 | {$DEFINE SSE_LOADV}{$i bgrasse.inc}
|
---|
360 | movaps xmm2, xmm1
|
---|
361 | mulps xmm2, xmm2
|
---|
362 |
|
---|
363 | //mix1
|
---|
364 | movaps xmm7, xmm2
|
---|
365 | shufps xmm7, xmm7, $4e
|
---|
366 | addps xmm2, xmm7
|
---|
367 | //mix2
|
---|
368 | movaps xmm7, xmm2
|
---|
369 | shufps xmm7, xmm7, $11
|
---|
370 | addps xmm2, xmm7
|
---|
371 |
|
---|
372 | movss len, xmm2
|
---|
373 | end;
|
---|
374 | if (len = 0) then exit;
|
---|
375 | if len < 1e-6 then //out of bounds for SSE instruction
|
---|
376 | begin
|
---|
377 | len := 1/sqrt(len);
|
---|
378 | v.x *= len;
|
---|
379 | v.y *= len;
|
---|
380 | v.z *= len;
|
---|
381 | end else
|
---|
382 | asm
|
---|
383 | rsqrtps xmm2, xmm2
|
---|
384 | mulps xmm1, xmm2 //apply
|
---|
385 | {$DEFINE SSE_SAVEV}{$i bgrasse.inc}
|
---|
386 | end;
|
---|
387 | end;
|
---|
388 | {$endif}
|
---|
389 |
|
---|
390 | {$ifdef BGRASSE_AVAILABLE}
|
---|
391 | procedure Normalize3D_128_SSE3(var v: TPoint3D_128);
|
---|
392 | var len: single;
|
---|
393 | begin
|
---|
394 | asm
|
---|
395 | {$DEFINE SSE_LOADV}{$i bgrasse.inc}
|
---|
396 | movaps xmm2, xmm1
|
---|
397 | mulps xmm2, xmm2
|
---|
398 |
|
---|
399 | haddps xmm2,xmm2
|
---|
400 | haddps xmm2,xmm2
|
---|
401 |
|
---|
402 | movss len, xmm2
|
---|
403 | end;
|
---|
404 | if (len = 0) then exit;
|
---|
405 | if len < 1e-6 then //out of bounds for SSE instruction
|
---|
406 | begin
|
---|
407 | len := 1/sqrt(len);
|
---|
408 | v.x *= len;
|
---|
409 | v.y *= len;
|
---|
410 | v.z *= len;
|
---|
411 | end else
|
---|
412 | asm
|
---|
413 | rsqrtps xmm2, xmm2
|
---|
414 | mulps xmm1, xmm2 //apply
|
---|
415 | {$DEFINE SSE_SAVEV}{$i bgrasse.inc}
|
---|
416 | end;
|
---|
417 | end;
|
---|
418 | {$endif}
|
---|
419 |
|
---|
420 | procedure Normalize3D_128_SqLen(var v: TPoint3D_128; out SqLen: single);
|
---|
421 | var InvLen: single;
|
---|
422 | begin
|
---|
423 | {$ifdef BGRASSE_AVAILABLE}
|
---|
424 | if UseSSE then
|
---|
425 | begin
|
---|
426 | asm
|
---|
427 | {$DEFINE SSE_LOADV}{$i bgrasse.inc}
|
---|
428 | movaps xmm2, xmm1
|
---|
429 | mulps xmm2, xmm2
|
---|
430 | end;
|
---|
431 | if UseSSE3 then
|
---|
432 | asm
|
---|
433 | haddps xmm2,xmm2
|
---|
434 | haddps xmm2,xmm2
|
---|
435 | movss SqLen, xmm2
|
---|
436 | end else
|
---|
437 | asm
|
---|
438 | //mix1
|
---|
439 | movaps xmm7, xmm2
|
---|
440 | shufps xmm7, xmm7, $4e
|
---|
441 | addps xmm2, xmm7
|
---|
442 | //mix2
|
---|
443 | movaps xmm7, xmm2
|
---|
444 | shufps xmm7, xmm7, $11
|
---|
445 | addps xmm2, xmm7
|
---|
446 | movss SqLen, xmm2
|
---|
447 | end;
|
---|
448 | if SqLen = 0 then exit;
|
---|
449 | if SqLen < 1e-6 then //out of bounds for SSE instruction
|
---|
450 | begin
|
---|
451 | InvLen := 1/sqrt(SqLen);
|
---|
452 | v.x *= InvLen;
|
---|
453 | v.y *= InvLen;
|
---|
454 | v.z *= InvLen;
|
---|
455 | end else
|
---|
456 | asm
|
---|
457 | rsqrtps xmm2, xmm2
|
---|
458 | mulps xmm1, xmm2 //apply
|
---|
459 | {$DEFINE SSE_SAVEV}{$i bgrasse.inc}
|
---|
460 | end;
|
---|
461 | end
|
---|
462 | else
|
---|
463 | {$endif}
|
---|
464 | begin
|
---|
465 | SqLen := DotProduct3D_128_NoSSE(v,v);
|
---|
466 | if SqLen = 0 then exit;
|
---|
467 | InvLen := 1/sqrt(SqLen);
|
---|
468 | v.x *= InvLen;
|
---|
469 | v.y *= InvLen;
|
---|
470 | v.z *= InvLen;
|
---|
471 | end;
|
---|
472 | end;
|
---|
473 |
|
---|
474 | procedure VectProduct3D_128_NoSSE(const u,v: TPoint3D_128; out w: TPoint3D_128);
|
---|
475 | begin
|
---|
476 | w.x := u.y*v.z-u.z*v.y;
|
---|
477 | w.y := u.z*v.x-u.x*v.z;
|
---|
478 | w.z := u.x*v.Y-u.y*v.x;
|
---|
479 | w.t := 0;
|
---|
480 | end;
|
---|
481 |
|
---|
482 | {$ifdef BGRASSE_AVAILABLE}
|
---|
483 | procedure VectProduct3D_128_SSE(constref u,v: TPoint3D_128; out w: TPoint3D_128); assembler;
|
---|
484 | asm
|
---|
485 | {$ifdef cpux86_64}
|
---|
486 | mov rax,u
|
---|
487 | movups xmm6,[rax]
|
---|
488 | {$else}
|
---|
489 | mov eax,u
|
---|
490 | movups xmm6,[eax]
|
---|
491 | {$endif}
|
---|
492 | movaps xmm4, xmm6
|
---|
493 | shufps xmm6, xmm6, Shift231
|
---|
494 |
|
---|
495 | {$ifdef cpux86_64}
|
---|
496 | mov rax,v
|
---|
497 | movups xmm7,[rax]
|
---|
498 | {$else}
|
---|
499 | mov eax,v
|
---|
500 | movups xmm7,[eax]
|
---|
501 | {$endif}
|
---|
502 | movaps xmm5,xmm7
|
---|
503 | shufps xmm7, xmm7, Shift312
|
---|
504 |
|
---|
505 | movaps xmm3,xmm6
|
---|
506 | mulps xmm3,xmm7
|
---|
507 |
|
---|
508 | shufps xmm4, xmm4, Shift312
|
---|
509 | shufps xmm5, xmm5, Shift231
|
---|
510 |
|
---|
511 | mulps xmm4,xmm5
|
---|
512 | subps xmm3,xmm4
|
---|
513 |
|
---|
514 | {$ifdef cpux86_64}
|
---|
515 | mov rax,w
|
---|
516 | movups [rax],xmm3
|
---|
517 | {$else}
|
---|
518 | mov eax,w
|
---|
519 | movups [eax],xmm3
|
---|
520 | {$endif}
|
---|
521 | end;
|
---|
522 | {$endif}
|
---|
523 |
|
---|
524 | { TMemoryBlockAlign128 }
|
---|
525 |
|
---|
526 | {$hints off}
|
---|
527 | constructor TMemoryBlockAlign128.Create(size: integer);
|
---|
528 | {$IFDEF BGRASSE_AVAILABLE}
|
---|
529 | var
|
---|
530 | delta: PtrUInt;
|
---|
531 | begin
|
---|
532 | getmem(FContainer, size+15);
|
---|
533 | delta := PtrUInt(FContainer) and 15;
|
---|
534 | if delta <> 0 then delta := 16-delta;
|
---|
535 | FData := pbyte(FContainer)+delta;
|
---|
536 | end;
|
---|
537 | {$ELSE}
|
---|
538 | begin
|
---|
539 | getmem(FContainer, size);
|
---|
540 | FData := FContainer;
|
---|
541 | end;
|
---|
542 | {$ENDIF}
|
---|
543 | {$hints on}
|
---|
544 |
|
---|
545 | destructor TMemoryBlockAlign128.Destroy;
|
---|
546 | begin
|
---|
547 | freemem(FContainer);
|
---|
548 | inherited Destroy;
|
---|
549 | end;
|
---|
550 |
|
---|
551 | {$ifdef BGRASSE_AVAILABLE}
|
---|
552 | function sse3_support : boolean;
|
---|
553 |
|
---|
554 | var
|
---|
555 | _ecx : longint;
|
---|
556 |
|
---|
557 | begin
|
---|
558 | {$IFDEF CPUI386}
|
---|
559 | if cpuid_support then
|
---|
560 | begin
|
---|
561 | asm
|
---|
562 | push ebx
|
---|
563 | mov eax,1
|
---|
564 | cpuid
|
---|
565 | mov _ecx,ecx
|
---|
566 | pop ebx
|
---|
567 | end;
|
---|
568 | sse3_support:=(_ecx and 1)<>0;
|
---|
569 | end
|
---|
570 | else
|
---|
571 | sse3_support:=false;
|
---|
572 | {$ELSE}
|
---|
573 | asm
|
---|
574 | push rbx
|
---|
575 | mov eax,1
|
---|
576 | cpuid
|
---|
577 | mov _ecx,ecx
|
---|
578 | pop rbx
|
---|
579 | end;
|
---|
580 | sse3_support:=(_ecx and 1)<>0;
|
---|
581 | {$ENDIF}
|
---|
582 | end;
|
---|
583 | {$endif}
|
---|
584 |
|
---|
585 | initialization
|
---|
586 |
|
---|
587 | {$ifdef CPUI386}
|
---|
588 | UseSSE := is_sse_cpu and FLAG_ENABLED_SSE;
|
---|
589 | {$else}
|
---|
590 | {$ifdef cpux86_64}
|
---|
591 | UseSSE := FLAG_ENABLED_SSE;
|
---|
592 | {$else}
|
---|
593 | UseSSE := false;
|
---|
594 | {$endif}
|
---|
595 | {$endif}
|
---|
596 |
|
---|
597 | {$IFDEF BGRASSE_AVAILABLE}
|
---|
598 | if UseSSE then
|
---|
599 | begin
|
---|
600 | {$ifdef cpux86_64}
|
---|
601 | UseSSE2 := true;
|
---|
602 | {$else}
|
---|
603 | UseSSE2 := is_sse2_cpu;
|
---|
604 | {$endif}
|
---|
605 | UseSSE3 := sse3_support;
|
---|
606 |
|
---|
607 | Add3D_Aligned := @Add3D_AlignedSSE;
|
---|
608 | VectProduct3D_128 := @VectProduct3D_128_NoSSE; //VectProduct3D_128_SSE is slower (due to access penalty?)
|
---|
609 | if UseSSE3 then
|
---|
610 | begin
|
---|
611 | Normalize3D_128 := @Normalize3D_128_SSE3;
|
---|
612 | DotProduct3D_128 := @DotProduct3D_128_NoSSE; //DotProduct3D_128_SSE3 is slower (due to access penalty?)
|
---|
613 | end
|
---|
614 | else
|
---|
615 | begin
|
---|
616 | Normalize3D_128 := @Normalize3D_128_SSE1;
|
---|
617 | DotProduct3D_128 := @DotProduct3D_128_NoSSE;
|
---|
618 | end;
|
---|
619 | end
|
---|
620 | else
|
---|
621 | {$ENDIF}
|
---|
622 | begin
|
---|
623 | UseSSE := false;
|
---|
624 | UseSSE2 := false;
|
---|
625 | UseSSE3 := false;
|
---|
626 |
|
---|
627 | Add3D_Aligned := @Add3D_NoSSE;
|
---|
628 | Normalize3D_128 := @Normalize3D_128_NoSSE;
|
---|
629 | VectProduct3D_128 := @VectProduct3D_128_NoSSE;
|
---|
630 | DotProduct3D_128 := @DotProduct3D_128_NoSSE;
|
---|
631 | end;
|
---|
632 |
|
---|
633 | end.
|
---|
634 |
|
---|