Context Navigation

GR32_Math.pas

Last change on this file was 2, checked in by chronos, 5 years ago
Source code taken from https://svn.zdechov.net/PascalClassLibrary/GraphicTest/ rev 543.
File size: 30.5 KB

Line
1	unit GR32_Math;
2
3	(* *** BEGIN LICENSE BLOCK ***
4	* Version: MPL 1.1 or LGPL 2.1 with linking exception
5	*
6	* The contents of this file are subject to the Mozilla Public License Version
7	* 1.1 (the "License"); you may not use this file except in compliance with
8	* the License. You may obtain a copy of the License at
9	* http://www.mozilla.org/MPL/
10	*
11	* Software distributed under the License is distributed on an "AS IS" basis,
12	* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13	* for the specific language governing rights and limitations under the
14	* License.
15	*
16	* Alternatively, the contents of this file may be used under the terms of the
17	* Free Pascal modified version of the GNU Lesser General Public License
18	* Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
19	* of this license are applicable instead of those above.
20	* Please see the file LICENSE.txt for additional information concerning this
21	* license.
22	*
23	* The Original Code is Additional Math Routines for Graphics32
24	*
25	* The Initial Developer of the Original Code is
26	* Mattias Andersson <mattias@centaurix.com>
27	* (parts of this unit were moved here from GR32_System.pas and GR32.pas by Alex A. Denisov)
28	*
29	* Portions created by the Initial Developer are Copyright (C) 2005-2009
30	* the Initial Developer. All Rights Reserved.
31	*
32	* Contributor(s):
33	* Michael Hansen <dyster_tid@hotmail.com>
34	*
35	* *** END LICENSE BLOCK *** *)
36
37	interface
38
39	{$I GR32.inc}
40
41	uses GR32;
42
43	{ Fixed point math routines }
44	function FixedFloor(A: TFixed): Integer;
45	function FixedCeil(A: TFixed): Integer;
46	function FixedMul(A, B: TFixed): TFixed;
47	function FixedDiv(A, B: TFixed): TFixed;
48	function OneOver(Value: TFixed): TFixed;
49	function FixedRound(A: TFixed): Integer;
50	function FixedSqr(Value: TFixed): TFixed;
51	function FixedSqrtLP(Value: TFixed): TFixed; // 8-bit precision
52	function FixedSqrtHP(Value: TFixed): TFixed; // 16-bit precision
53	// Fixed point interpolation
54	function FixedCombine(W, X, Y: TFixed): TFixed;
55
56
57	{ Trigonometric routines }
58
59	procedure SinCos(const Theta: TFloat; out Sin, Cos: TFloat); overload;
60	procedure SinCos(const Theta, Radius: Single; out Sin, Cos: Single); overload;
61	procedure SinCos(const Theta, ScaleX, ScaleY: TFloat; out Sin, Cos: Single); overload;
62	function Hypot(const X, Y: TFloat): TFloat; overload;
63	function Hypot(const X, Y: Integer): Integer; overload;
64	function FastSqrt(const Value: TFloat): TFloat;
65	function FastSqrtBab1(const Value: TFloat): TFloat;
66	function FastSqrtBab2(const Value: TFloat): TFloat;
67	function FastInvSqrt(const Value: Single): Single; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF} overload;
68
69
70	{ Misc. Routines }
71
72	{ MulDiv a faster implementation of Windows.MulDiv funtion }
73	function MulDiv(Multiplicand, Multiplier, Divisor: Integer): Integer;
74
75	// tells if X is a power of 2, returns true when X = 1,2,4,8,16 etc.
76	function IsPowerOf2(Value: Integer): Boolean; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF}
77	// returns X rounded down to the nearest power of two
78	function PrevPowerOf2(Value: Integer): Integer;
79	// returns X rounded down to the nearest power of two, i.e. 5 -> 8, 7 -> 8, 15 -> 16
80	function NextPowerOf2(Value: Integer): Integer;
81
82	// fast average without overflow, useful for e.g. fixed point math
83	function Average(A, B: Integer): Integer;
84	// fast sign function
85	function Sign(Value: Integer): Integer;
86
87	function FloatMod(x, y: Double): Double; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF}
88
89	function DivMod(Dividend, Divisor: Integer; var Remainder: Integer): Integer;
90
91
92	{$IFDEF FPC}
93	{$IFDEF TARGET_X64}
94	(*
95	FPC has no similar {$EXCESSPRECISION OFF} directive,
96	but we can easily emulate that by overriding some internal math functions
97	*)
98	function PI: Single; [internproc: fpc_in_pi_real];
99	//function Abs(D: Single): Single; [internproc: fpc_in_abs_real];
100	//function Sqr(D: Single): Single; [internproc: fpc_in_sqr_real];
101	function Sqrt(D: Single): Single; [internproc: fpc_in_sqrt_real];
102	function ArcTan(D: Single): Single; [internproc: fpc_in_arctan_real];
103	function Ln(D: Single): Single; [internproc: fpc_in_ln_real];
104	function Sin(D: Single): Single; [internproc: fpc_in_sin_real];
105	function Cos(D: Single): Single; [internproc: fpc_in_cos_real];
106	function Exp(D: Single): Single; [internproc: fpc_in_exp_real];
107	function Round(D: Single): Int64; [internproc: fpc_in_round_real];
108	function Frac(D: Single): Single; [internproc: fpc_in_frac_real];
109	function Int(D: Single): Single; [internproc: fpc_in_int_real];
110	function Trunc(D: Single): Int64; [internproc: fpc_in_trunc_real];
111
112	function Ceil(X: Single): Integer; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF}
113	function Floor(X: Single): Integer; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF}
114	{$ENDIF}
115	{$ENDIF}
116
117	type
118	TCumSumProc = procedure(Values: PSingleArray; Count: Integer);
119
120	var
121	CumSum: TCumSumProc;
122
123	implementation
124
125	uses
126	Math, GR32_System;
127
128	{$IFDEF PUREPASCAL}
129	const
130	FixedOneS: Single = 65536;
131	{$ENDIF}
132
133
134	{$IFDEF FPC}
135	{$IFDEF TARGET_X64}
136	function Ceil(X: Single): Integer;
137	begin
138	Result := Trunc(X);
139	if (X - Result) > 0 then
140	Inc(Result);
141	end;
142
143	function Floor(X: Single): Integer;
144	begin
145	Result := Trunc(X);
146	if (X - Result) < 0 then
147	Dec(Result);
148	end;
149	{$ENDIF}
150	{$ENDIF}
151
152
153	{ Fixed-point math }
154
155	function FixedFloor(A: TFixed): Integer;
156	{$IFDEF PUREPASCAL}
157	begin
158	Result := A div FIXEDONE;
159	{$ELSE}
160	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
161	asm
162	{$IFDEF TARGET_x86}
163	SAR EAX, 16
164	{$ENDIF}
165	{$IFDEF TARGET_x64}
166	MOV EAX, ECX
167	SAR EAX, 16
168	{$ENDIF}
169	{$ENDIF}
170	end;
171
172	function FixedCeil(A: TFixed): Integer;
173	{$IFDEF PUREPASCAL}
174	begin
175	Result := (A + $FFFF) div FIXEDONE;
176	{$ELSE}
177	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
178	asm
179	{$IFDEF TARGET_x86}
180	ADD EAX, $0000FFFF
181	SAR EAX, 16
182	{$ENDIF}
183	{$IFDEF TARGET_x64}
184	MOV EAX, ECX
185	ADD EAX, $0000FFFF
186	SAR EAX, 16
187	{$ENDIF}
188	{$ENDIF}
189	end;
190
191	function FixedRound(A: TFixed): Integer;
192	{$IFDEF PUREPASCAL}
193	begin
194	Result := (A + $7FFF) div FIXEDONE;
195	{$ELSE}
196	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
197	asm
198	{$IFDEF TARGET_x86}
199	ADD EAX, $00007FFF
200	SAR EAX, 16
201	{$ENDIF}
202	{$IFDEF TARGET_x64}
203	MOV EAX, ECX
204	ADD EAX, $00007FFF
205	SAR EAX, 16
206	{$ENDIF}
207	{$ENDIF}
208	end;
209
210	function FixedMul(A, B: TFixed): TFixed;
211	{$IFDEF PUREPASCAL}
212	begin
213	Result := Round(A * FixedToFloat * B);
214	{$ELSE}
215	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
216	asm
217	{$IFDEF TARGET_x86}
218	IMUL EDX
219	SHRD EAX, EDX, 16
220	{$ENDIF}
221	{$IFDEF TARGET_x64}
222	MOV EAX, ECX
223	IMUL EDX
224	SHRD EAX, EDX, 16
225	{$ENDIF}
226	{$ENDIF}
227	end;
228
229	function FixedDiv(A, B: TFixed): TFixed;
230	{$IFDEF PUREPASCAL}
231	begin
232	Result := Round(A / B * FixedOne);
233	{$ELSE}
234	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
235	asm
236	{$IFDEF TARGET_x86}
237	MOV ECX, B
238	CDQ
239	SHLD EDX, EAX, 16
240	SHL EAX, 16
241	IDIV ECX
242	{$ENDIF}
243	{$IFDEF TARGET_x64}
244	MOV EAX, ECX
245	MOV ECX, EDX
246	CDQ
247	SHLD EDX, EAX, 16
248	SHL EAX, 16
249	IDIV ECX
250	{$ENDIF}
251	{$ENDIF}
252	end;
253
254	function OneOver(Value: TFixed): TFixed;
255	{$IFDEF PUREPASCAL}
256	const
257	Dividend: Single = 4294967296; // FixedOne * FixedOne
258	begin
259	Result := Round(Dividend / Value);
260	{$ELSE}
261	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
262	asm
263	{$IFDEF TARGET_x86}
264	MOV ECX, Value
265	XOR EAX, EAX
266	MOV EDX, 1
267	IDIV ECX
268	{$ENDIF}
269	{$IFDEF TARGET_x64}
270	XOR EAX, EAX
271	MOV EDX, 1
272	IDIV ECX
273	{$ENDIF}
274	{$ENDIF}
275	end;
276
277	function FixedSqr(Value: TFixed): TFixed;
278	{$IFDEF PUREPASCAL}
279	begin
280	Result := Round(Value * FixedToFloat * Value);
281	{$ELSE}
282	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
283	asm
284	{$IFDEF TARGET_x86}
285	IMUL EAX
286	SHRD EAX, EDX, 16
287	{$ENDIF}
288	{$IFDEF TARGET_x64}
289	MOV EAX, Value
290	IMUL EAX
291	SHRD EAX, EDX, 16
292	{$ENDIF}
293	{$ENDIF}
294	end;
295
296	function FixedSqrtLP(Value: TFixed): TFixed;
297	{$IFDEF PUREPASCAL}
298	begin
299	Result := Round(Sqrt(Value * FixedOneS));
300	{$ELSE}
301	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
302	asm
303	{$IFDEF TARGET_x86}
304	PUSH EBX
305	MOV ECX, EAX
306	XOR EAX, EAX
307	MOV EBX, $40000000
308	@SqrtLP1:
309	MOV EDX, ECX
310	SUB EDX, EBX
311	JL @SqrtLP2
312	SUB EDX, EAX
313	JL @SqrtLP2
314	MOV ECX,EDX
315	SHR EAX, 1
316	OR EAX, EBX
317	SHR EBX, 2
318	JNZ @SqrtLP1
319	SHL EAX, 8
320	JMP @SqrtLP3
321	@SqrtLP2:
322	SHR EAX, 1
323	SHR EBX, 2
324	JNZ @SqrtLP1
325	SHL EAX, 8
326	@SqrtLP3:
327	POP EBX
328	{$ENDIF}
329	{$IFDEF TARGET_x64}
330	PUSH RBX
331	XOR EAX, EAX
332	MOV EBX, $40000000
333	@SqrtLP1:
334	MOV EDX, ECX
335	SUB EDX, EBX
336	JL @SqrtLP2
337	SUB EDX, EAX
338	JL @SqrtLP2
339	MOV ECX,EDX
340	SHR EAX, 1
341	OR EAX, EBX
342	SHR EBX, 2
343	JNZ @SqrtLP1
344	SHL EAX, 8
345	JMP @SqrtLP3
346	@SqrtLP2:
347	SHR EAX, 1
348	SHR EBX, 2
349	JNZ @SqrtLP1
350	SHL EAX, 8
351	@SqrtLP3:
352	POP RBX
353	{$ENDIF}
354	{$ENDIF}
355	end;
356
357	function FixedSqrtHP(Value: TFixed): TFixed;
358	{$IFDEF PUREPASCAL}
359	begin
360	Result := Round(Sqrt(Value * FixedOneS));
361	{$ELSE}
362	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
363	asm
364	{$IFDEF TARGET_x86}
365	PUSH EBX
366	MOV ECX, EAX
367	XOR EAX, EAX
368	MOV EBX, $40000000
369	@SqrtHP1:
370	MOV EDX, ECX
371	SUB EDX, EBX
372	jb @SqrtHP2
373	SUB EDX, EAX
374	jb @SqrtHP2
375	MOV ECX,EDX
376	SHR EAX, 1
377	OR EAX, EBX
378	SHR EBX, 2
379	JNZ @SqrtHP1
380	JZ @SqrtHP5
381	@SqrtHP2:
382	SHR EAX, 1
383	SHR EBX, 2
384	JNZ @SqrtHP1
385	@SqrtHP5:
386	MOV EBX, $00004000
387	SHL EAX, 16
388	SHL ECX, 16
389	@SqrtHP3:
390	MOV EDX, ECX
391	SUB EDX, EBX
392	jb @SqrtHP4
393	SUB EDX, EAX
394	jb @SqrtHP4
395	MOV ECX, EDX
396	SHR EAX, 1
397	OR EAX, EBX
398	SHR EBX, 2
399	JNZ @SqrtHP3
400	JMP @SqrtHP6
401	@SqrtHP4:
402	SHR EAX, 1
403	SHR EBX, 2
404	JNZ @SqrtHP3
405	@SqrtHP6:
406	POP EBX
407	{$ENDIF}
408	{$IFDEF TARGET_x64}
409	PUSH RBX
410	XOR EAX, EAX
411	MOV EBX, $40000000
412	@SqrtHP1:
413	MOV EDX, ECX
414	SUB EDX, EBX
415	jb @SqrtHP2
416	SUB EDX, EAX
417	jb @SqrtHP2
418	MOV ECX,EDX
419	SHR EAX, 1
420	OR EAX, EBX
421	SHR EBX, 2
422	JNZ @SqrtHP1
423	JZ @SqrtHP5
424	@SqrtHP2:
425	SHR EAX, 1
426	SHR EBX, 2
427	JNZ @SqrtHP1
428	@SqrtHP5:
429	MOV EBX, $00004000
430	SHL EAX, 16
431	SHL ECX, 16
432	@SqrtHP3:
433	MOV EDX, ECX
434	SUB EDX, EBX
435	jb @SqrtHP4
436	SUB EDX, EAX
437	jb @SqrtHP4
438	MOV ECX, EDX
439	SHR EAX, 1
440	OR EAX, EBX
441	SHR EBX, 2
442	JNZ @SqrtHP3
443	JMP @SqrtHP6
444	@SqrtHP4:
445	SHR EAX, 1
446	SHR EBX, 2
447	JNZ @SqrtHP3
448	@SqrtHP6:
449	POP RBX
450	{$ENDIF}
451	{$ENDIF}
452	end;
453
454	function FixedCombine(W, X, Y: TFixed): TFixed;
455	// EAX <- W, EDX <- X, ECX <- Y
456	// combine fixed value X and fixed value Y with the weight of X given in W
457	// Result Z = W * X + (1 - W) * Y = Y + (X - Y) * W
458	// Fixed Point Version: Result Z = Y + (X - Y) * W / 65536
459	{$IFDEF PUREPASCAL}
460	begin
461	Result := Round(Y + (X - Y) * FixedToFloat * W);
462	{$ELSE}
463	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
464	asm
465	{$IFDEF TARGET_x86}
466	SUB EDX, ECX
467	IMUL EDX
468	SHRD EAX, EDX, 16
469	ADD EAX, ECX
470	{$ENDIF}
471	{$IFDEF TARGET_x64}
472	MOV EAX, ECX
473	SUB EDX, R8D
474	IMUL EDX
475	SHRD EAX, EDX, 16
476	ADD EAX, R8D
477	{$ENDIF}
478	{$ENDIF}
479	end;
480
481	{ Trigonometry }
482
483	procedure SinCos(const Theta: TFloat; out Sin, Cos: TFloat);
484	{$IFDEF NATIVE_SINCOS}
485	var
486	S, C: Extended;
487	begin
488	Math.SinCos(Theta, S, C);
489	Sin := S;
490	Cos := C;
491	{$ELSE}
492	{$IFDEF TARGET_x64}
493	var
494	Temp: TFloat;
495	{$ENDIF}
496	asm
497	{$IFDEF TARGET_x86}
498	FLD Theta
499	FSINCOS
500	FSTP DWORD PTR [EDX] // cosine
501	FSTP DWORD PTR [EAX] // sine
502	{$ENDIF}
503	{$IFDEF TARGET_x64}
504	MOVD Temp, Theta
505	FLD Temp
506	FSINCOS
507	FSTP [Sin] // cosine
508	FSTP [Cos] // sine
509	{$ENDIF}
510	{$ENDIF}
511	end;
512
513	procedure SinCos(const Theta, Radius: TFloat; out Sin, Cos: TFloat);
514	{$IFDEF NATIVE_SINCOS}
515	var
516	S, C: Extended;
517	begin
518	Math.SinCos(Theta, S, C);
519	Sin := S * Radius;
520	Cos := C * Radius;
521	{$ELSE}
522	{$IFDEF TARGET_x64}
523	var
524	Temp: TFloat;
525	{$ENDIF}
526	asm
527	{$IFDEF TARGET_x86}
528	FLD Theta
529	FSINCOS
530	FMUL Radius
531	FSTP DWORD PTR [EDX] // cosine
532	FMUL Radius
533	FSTP DWORD PTR [EAX] // sine
534	{$ENDIF}
535	{$IFDEF TARGET_x64}
536	MOVD Temp, Theta
537	FLD Temp
538	MOVD Temp, Radius
539	FSINCOS
540	FMUL Temp
541	FSTP [Cos]
542	FMUL Temp
543	FSTP [Sin]
544	{$ENDIF}
545	{$ENDIF}
546	end;
547
548	procedure SinCos(const Theta, ScaleX, ScaleY: TFloat; out Sin, Cos: Single); overload;
549	{$IFDEF NATIVE_SINCOS}
550	var
551	S, C: Extended;
552	begin
553	Math.SinCos(Theta, S, C);
554	Sin := S * ScaleX;
555	Cos := C * ScaleY;
556	{$ELSE}
557	{$IFDEF TARGET_x64}
558	var
559	Temp: TFloat;
560	{$ENDIF}
561	asm
562	{$IFDEF TARGET_x86}
563	FLD Theta
564	FSINCOS
565	FMUL ScaleX
566	FSTP DWORD PTR [EDX] // cosine
567	FMUL ScaleY
568	FSTP DWORD PTR [EAX] // sine
569	{$ENDIF}
570	{$IFDEF TARGET_x64}
571	MOVD Temp, Theta
572	FLD Temp
573	FSINCOS
574	MOVD Temp, ScaleX
575	FMUL Temp
576	FSTP [Cos]
577	MOVD Temp, ScaleY
578	FMUL Temp
579	FSTP [Sin]
580	{$ENDIF}
581	{$ENDIF}
582	end;
583
584	function Hypot(const X, Y: TFloat): TFloat;
585	{$IFDEF PUREPASCAL}
586	begin
587	Result := Sqrt(Sqr(X) + Sqr(Y));
588	{$ELSE}
589	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
590	asm
591	{$IFDEF TARGET_x86}
592	FLD X
593	FMUL ST,ST
594	FLD Y
595	FMUL ST,ST
596	FADDP ST(1),ST
597	FSQRT
598	FWAIT
599	{$ENDIF}
600	{$IFDEF TARGET_x64}
601	MULSS XMM0, XMM0
602	MULSS XMM1, XMM1
603	ADDSS XMM0, XMM1
604	SQRTSS XMM0, XMM0
605	{$ENDIF}
606	{$ENDIF}
607	end;
608
609	function Hypot(const X, Y: Integer): Integer;
610	//{$IFDEF PUREPASCAL}
611	begin
612	Result := Round(Math.Hypot(X, Y));
613	(*
614	{$ELSE}
615	asm
616	{$IFDEF TARGET_x64}
617	IMUL RAX, RCX, RDX
618	{$ELSE}
619	FLD X
620	FMUL ST,ST
621	FLD Y
622	FMUL ST,ST
623	FADDP ST(1),ST
624	FSQRT
625	FISTP [ESP - 4]
626	MOV EAX, [ESP - 4]
627	FWAIT
628	{$ENDIF}
629	{$ENDIF}
630	*)
631	end;
632
633	function FastSqrt(const Value: TFloat): TFloat;
634	// see http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Approximations_that_depend_on_IEEE_representation
635	{$IFDEF PUREPASCAL}
636	var
637	I: Integer absolute Value;
638	J: Integer absolute Result;
639	begin
640	J := (I - $3F800000) div 2 + $3F800000;
641	{$ELSE}
642	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
643	asm
644	{$IFDEF TARGET_x86}
645	MOV EAX, DWORD PTR Value
646	SUB EAX, $3F800000
647	SAR EAX, 1
648	ADD EAX, $3F800000
649	MOV DWORD PTR [ESP - 4], EAX
650	FLD DWORD PTR [ESP - 4]
651	{$ENDIF}
652	{$IFDEF TARGET_x64}
653	SQRTSS XMM0, XMM0
654	{$ENDIF}
655	{$ENDIF}
656	end;
657
658	function FastSqrtBab1(const Value: TFloat): TFloat;
659	// see http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Approximations_that_depend_on_IEEE_representation
660	// additionally one babylonian step added
661	{$IFNDEF PUREPASCAL}
662	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
663	{$ENDIF}
664	const
665	CHalf : TFloat = 0.5;
666	{$IFDEF PUREPASCAL}
667	var
668	I: Integer absolute Value;
669	J: Integer absolute Result;
670	begin
671	J := (I - $3F800000) div 2 + $3F800000;
672	Result := CHalf * (Result + Value / Result);
673	{$ELSE}
674	asm
675	{$IFDEF TARGET_x86}
676	MOV EAX, Value
677	SUB EAX, $3F800000
678	SAR EAX, 1
679	ADD EAX, $3F800000
680	MOV DWORD PTR [ESP - 4], EAX
681	FLD Value
682	FDIV DWORD PTR [ESP - 4]
683	FADD DWORD PTR [ESP - 4]
684	FMUL CHalf
685	{$ENDIF}
686	{$IFDEF TARGET_x64}
687	SQRTSS XMM0, XMM0
688	{$ENDIF}
689	{$ENDIF}
690	end;
691
692	function FastSqrtBab2(const Value: TFloat): TFloat;
693	// see http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Approximations_that_depend_on_IEEE_representation
694	// additionally two babylonian steps added
695	{$IFDEF PUREPASCAL}
696	const
697	CQuarter : TFloat = 0.25;
698	var
699	J: Integer absolute Result;
700	begin
701	Result := Value;
702	J := ((J - (1 shl 23)) shr 1) + (1 shl 29);
703	Result := Result + Value / Result;
704	Result := CQuarter * Result + Value / Result;
705	{$ELSE}
706	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
707	const
708	CHalf : TFloat = 0.5;
709	asm
710	{$IFDEF TARGET_x86}
711	MOV EAX, Value
712	SUB EAX, $3F800000
713	SAR EAX, 1
714	ADD EAX, $3F800000
715	MOV DWORD PTR [ESP - 4], EAX
716	FLD Value
717	FDIV DWORD PTR [ESP - 4]
718	FADD DWORD PTR [ESP - 4]
719	FMUL CHalf
720	{$ENDIF}
721	{$IFDEF TARGET_x64}
722	MOVD EAX, Value
723	SUB EAX, $3F800000
724	SAR EAX, 1
725	ADD EAX, $3F800000
726	MOVD XMM1, EAX
727	DIVSS XMM0, XMM1
728	ADDSS XMM0, XMM1
729	MOVD XMM1, [RIP + CHalf]
730	MULSS XMM0, XMM1
731	{$ENDIF}
732	{$ENDIF}
733	end;
734
735	function FastInvSqrt(const Value: Single): Single;
736	var
737	IntCst : Cardinal absolute result;
738	begin
739	Result := Value;
740	IntCst := ($BE6EB50C - IntCst) shr 1;
741	Result := 0.5 * Result * (3 - Value * Sqr(Result));
742	end;
743
744	{ Misc. }
745
746	function MulDiv(Multiplicand, Multiplier, Divisor: Integer): Integer;
747	{$IFDEF PUREPASCAL}
748	begin
749	Result := Int64(Multiplicand) * Int64(Multiplier) div Divisor;
750	{$ELSE}
751	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
752	asm
753	{$IFDEF TARGET_x86}
754	PUSH EBX // Imperative save
755	PUSH ESI // of EBX and ESI
756
757	MOV EBX, EAX // Result will be negative or positive so set rounding direction
758	XOR EBX, EDX // Negative: substract 1 in case of rounding
759	XOR EBX, ECX // Positive: add 1
760
761	OR EAX, EAX // Make all operands positive, ready for unsigned operations
762	JNS @m1Ok // minimizing branching
763	NEG EAX
764	@m1Ok:
765	OR EDX, EDX
766	JNS @m2Ok
767	NEG EDX
768	@m2Ok:
769	OR ECX, ECX
770	JNS @DivOk
771	NEG ECX
772	@DivOK:
773	MUL EDX // Unsigned multiply (Multiplicand*Multiplier)
774
775	MOV ESI, EDX // Check for overflow, by comparing
776	SHL ESI, 1 // 2 times the high-order 32 bits of the product (EDX)
777	CMP ESI, ECX // with the Divisor.
778	JAE @Overfl // If equal or greater than overflow with division anticipated
779
780	DIV ECX // Unsigned divide of product by Divisor
781
782	SUB ECX, EDX // Check if the result must be adjusted by adding or substracting
783	CMP ECX, EDX // 1 (*.5 -> nearest integer), by comparing the difference of
784	JA @NoAdd // Divisor and remainder with the remainder. If it is greater then
785	INC EAX // no rounding needed; add 1 to result otherwise
786	@NoAdd:
787	OR EBX, EDX // From unsigned operations back the to original sign of the result
788	JNS @Exit // must be positive
789	NEG EAX // must be negative
790	JMP @Exit
791	@Overfl:
792	OR EAX, -1 // 3 bytes alternative for MOV EAX,-1. Windows.MulDiv "overflow"
793	// and "zero-divide" return value
794	@Exit:
795	POP ESI // Restore
796	POP EBX // esi and EBX
797	{$ENDIF}
798	{$IFDEF TARGET_x64}
799	MOV EAX, ECX // Result will be negative or positive so set rounding direction
800	XOR ECX, EDX // Negative: substract 1 in case of rounding
801	XOR ECX, R8D // Positive: add 1
802
803	OR EAX, EAX // Make all operands positive, ready for unsigned operations
804	JNS @m1Ok // minimizing branching
805	NEG EAX
806	@m1Ok:
807	OR EDX, EDX
808	JNS @m2Ok
809	NEG EDX
810	@m2Ok:
811	OR R8D, R8D
812	JNS @DivOk
813	NEG R8D
814	@DivOK:
815	MUL EDX // Unsigned multiply (Multiplicand*Multiplier)
816
817	MOV R9D, EDX // Check for overflow, by comparing
818	SHL R9D, 1 // 2 times the high-order 32 bits of the product (EDX)
819	CMP R9D, R8D // with the Divisor.
820	JAE @Overfl // If equal or greater than overflow with division anticipated
821
822	DIV R8D // Unsigned divide of product by Divisor
823
824	SUB R8D, EDX // Check if the result must be adjusted by adding or substracting
825	CMP R8D, EDX // 1 (*.5 -> nearest integer), by comparing the difference of
826	JA @NoAdd // Divisor and remainder with the remainder. If it is greater then
827	INC EAX // no rounding needed; add 1 to result otherwise
828	@NoAdd:
829	OR ECX, EDX // From unsigned operations back the to original sign of the result
830	JNS @Exit // must be positive
831	NEG EAX // must be negative
832	JMP @Exit
833	@Overfl:
834	OR EAX, -1 // 3 bytes alternative for MOV EAX,-1. Windows.MulDiv "overflow"
835	// and "zero-divide" return value
836	@Exit:
837	{$ENDIF}
838	{$ENDIF}
839	end;
840
841	function IsPowerOf2(Value: Integer): Boolean;
842	//returns true when X = 1,2,4,8,16 etc.
843	begin
844	Result := Value and (Value - 1) = 0;
845	end;
846
847	function PrevPowerOf2(Value: Integer): Integer;
848	//returns X rounded down to the power of two
849	{$IFDEF PUREPASCAL}
850	begin
851	Result := 1;
852	while Value shr 1 > 0 do
853	Result := Result shl 1;
854	{$ELSE}
855	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
856	asm
857	{$IFDEF TARGET_x86}
858	BSR ECX, EAX
859	SHR EAX, CL
860	SHL EAX, CL
861	{$ENDIF}
862	{$IFDEF TARGET_x64}
863	MOV EAX, Value
864	BSR ECX, EAX
865	SHR EAX, CL
866	SHL EAX, CL
867	{$ENDIF}
868	{$ENDIF}
869	end;
870
871	function NextPowerOf2(Value: Integer): Integer;
872	//returns X rounded up to the power of two, i.e. 5 -> 8, 7 -> 8, 15 -> 16
873	{$IFDEF PUREPASCAL}
874	begin
875	Result := 2;
876	while Value shr 1 > 0 do
877	Result := Result shl 1;
878	{$ELSE}
879	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
880	asm
881	{$IFDEF TARGET_x86}
882	DEC EAX
883	JLE @1
884	BSR ECX, EAX
885	MOV EAX, 2
886	SHL EAX, CL
887	RET
888	@1:
889	MOV EAX, 1
890	{$ENDIF}
891	{$IFDEF TARGET_x64}
892	MOV EAX, Value
893	DEC EAX
894	JLE @1
895	BSR ECX, EAX
896	MOV EAX, 2
897	SHL EAX, CL
898	RET
899	@1:
900	MOV EAX, 1
901	{$ENDIF}
902	{$ENDIF}
903	end;
904
905	function Average(A, B: Integer): Integer;
906	//fast average without overflow, useful e.g. for fixed point math
907	//(A + B)/2 = (A and B) + (A xor B)/2
908	{$IFDEF PUREPASCAL}
909	begin
910	Result := (A and B) + (A xor B) div 2;
911	{$ELSE}
912	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
913	asm
914	{$IFDEF TARGET_x86}
915	MOV ECX, EDX
916	XOR EDX, EAX
917	SAR EDX, 1
918	AND EAX, ECX
919	ADD EAX, EDX
920	{$ENDIF}
921	{$IFDEF TARGET_x64}
922	MOV EAX, A
923	MOV ECX, EDX
924	XOR EDX, EAX
925	SAR EDX, 1
926	AND EAX, ECX
927	ADD EAX, EDX
928	{$ENDIF}
929	{$ENDIF}
930	end;
931
932	function Sign(Value: Integer): Integer;
933	{$IFDEF PUREPASCAL}
934	begin
935	//Assumes 32 bit integer
936	Result := (- Value) shr 31 - (Value shr 31);
937	{$ELSE}
938	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
939	asm
940	{$IFDEF TARGET_x64}
941	MOV EAX, Value
942	{$ENDIF}
943	CDQ
944	NEG EAX
945	ADC EDX, EDX
946	MOV EAX, EDX
947	{$ENDIF}
948	end;
949
950	function FloatMod(x, y: Double): Double;
951	begin
952	if (y = 0) then
953	Result := X
954	else
955	Result := x - y * Floor(x / y);
956	end;
957
958	function DivMod(Dividend, Divisor: Integer; var Remainder: Integer): Integer;
959	{$IFDEF PUREPASCAL}
960	begin
961	Result := Dividend div Divisor;
962	Remainder := Dividend mod Divisor;
963	{$ELSE}
964	{$IFDEF FPC} assembler; nostackframe; {$ENDIF}
965	asm
966	{$IFDEF TARGET_x86}
967	PUSH EDX
968	CDQ
969	IDIV DWORD PTR [ESP]
970	ADD ESP, $04
971	MOV DWORD PTR [ECX], edx
972	{$ENDIF}
973	{$IFDEF TARGET_x64}
974	MOV RAX, RCX
975	MOV R9, RDX
976	CDQ
977	IDIV R9
978	MOV DWORD PTR [R8], EDX
979	{$ENDIF}
980	{$ENDIF}
981	end;
982
983	procedure CumSum_Pas(Values: PSingleArray; Count: Integer);
984	var
985	I: Integer;
986	V: TFloat;
987	begin
988	V := Values[0];
989	for I := 1 to Count - 1 do
990	begin
991	if PInteger(@Values[I])^ <> 0 then
992	V := V + Values[I];
993	Values[I] := V;
994	end;
995	end;
996
997	{$IFNDEF PUREPASCAL}
998	// Aligned SSE2 version -- Credits: Sanyin <prevodilac@hotmail.com>
999	procedure CumSum_SSE2(Values: PSingleArray; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
1000	asm
1001	{$IFDEF TARGET_x86}
1002	MOV ECX,EDX
1003	CMP ECX,2 // if count < 2, exit
1004	JL @END
1005	CMP ECX,32 // if count < 32, avoid SSE2 overhead
1006	JL @SMALL
1007
1008	{--- align memory ---}
1009	PUSH EBX
1010	PXOR XMM4,XMM4
1011	MOV EBX,EAX
1012	AND EBX,15 // get aligned count
1013	JZ @ENDALIGNING // already aligned
1014	ADD EBX,-16
1015	NEG EBX // get bytes to advance
1016	JZ @ENDALIGNING // already aligned
1017
1018	MOV ECX,EBX
1019	SAR ECX,2 // div with 4 to get cnt
1020	SUB EDX,ECX
1021
1022	ADD EAX,4
1023	DEC ECX
1024	JZ @SETUPLAST // one element
1025
1026	@ALIGNINGLOOP:
1027	FLD DWORD PTR [EAX-4]
1028	FADD DWORD PTR [EAX]
1029	FSTP DWORD PTR [EAX]
1030	ADD EAX,4
1031	DEC ECX
1032	JNZ @ALIGNINGLOOP
1033
1034	@SETUPLAST:
1035	MOVUPS XMM4,[EAX-4]
1036	PSLLDQ XMM4,12
1037	PSRLDQ XMM4,12
1038
1039	@ENDALIGNING:
1040	POP EBX
1041	PUSH EBX
1042	MOV ECX,EDX
1043	SAR ECX,2
1044	@LOOP:
1045	MOVAPS XMM0,[EAX]
1046	PXOR XMM5,XMM5
1047	PCMPEQD XMM5,XMM0
1048	PMOVMSKB EBX,XMM5
1049	CMP EBX,$0000FFFF
1050	JNE @NORMAL
1051	PSHUFD XMM0,XMM4,0
1052	JMP @SKIP
1053
1054	@NORMAL:
1055	ADDPS XMM0,XMM4
1056	PSHUFD XMM1,XMM0,$e4
1057	PSLLDQ XMM1,4
1058	PSHUFD XMM2,XMM1,$90
1059	PSHUFD XMM3,XMM1,$40
1060	ADDPS XMM2,XMM3
1061	ADDPS XMM1,XMM2
1062	ADDPS XMM0,XMM1
1063
1064	PSHUFLW XMM4,XMM0,$E4
1065	PSRLDQ XMM4,12
1066
1067	@SKIP:
1068	PREFETCHNTA [eax+16162]
1069	MOVAPS [EAX],XMM0
1070	ADD EAX,16
1071	SUB ECX,1
1072	JNZ @LOOP
1073	POP EBX
1074	MOV ECX,EDX
1075	SAR ECX,2
1076	SHL ECX,2
1077	SUB EDX,ECX
1078	MOV ECX,EDX
1079	JZ @END
1080
1081	@LOOP2:
1082	FLD DWORD PTR [EAX-4]
1083	FADD DWORD PTR [EAX]
1084	FSTP DWORD PTR [EAX]
1085	ADD EAX,4
1086	DEC ECX
1087	JNZ @LOOP2
1088	JMP @END
1089
1090	@SMALL:
1091	MOV ECX,EDX
1092	ADD EAX,4
1093	DEC ECX
1094	@LOOP3:
1095	FLD DWORD PTR [EAX-4]
1096	FADD DWORD PTR [EAX]
1097	FSTP DWORD PTR [EAX]
1098	ADD EAX,4
1099	DEC ECX
1100	JNZ @LOOP3
1101	{$ENDIF}
1102	{$IFDEF TARGET_x64}
1103	CMP EDX,2 // if count < 2, exit
1104	JL @END
1105
1106	MOV RAX,RCX
1107	MOV ECX,EDX
1108
1109	CMP ECX,32 // if count < 32, avoid SSE2 overhead
1110	JL @SMALL
1111
1112	{--- align memory ---}
1113	PXOR XMM4,XMM4
1114	MOV R8D,EAX
1115	AND R8D,15 // get aligned count
1116	JZ @ENDALIGNING // already aligned
1117	ADD R8D,-16
1118	NEG R8D // get bytes to advance
1119	JZ @ENDALIGNING // already aligned
1120
1121	MOV ECX,R8D
1122	SAR ECX,2 // div with 4 to get cnt
1123	SUB EDX,ECX
1124
1125	ADD RAX,4
1126	DEC ECX
1127	JZ @SETUPLAST // one element
1128
1129	@ALIGNINGLOOP:
1130	FLD DWORD PTR [RAX - 4]
1131	FADD DWORD PTR [RAX]
1132	FSTP DWORD PTR [RAX]
1133	ADD RAX,4
1134	DEC ECX
1135	JNZ @ALIGNINGLOOP
1136
1137	@SETUPLAST:
1138	MOVUPS XMM4,[RAX - 4]
1139	PSLLDQ XMM4,12
1140	PSRLDQ XMM4,12
1141
1142	@ENDALIGNING:
1143	MOV ECX,EDX
1144	SAR ECX,2
1145	@LOOP:
1146	MOVAPS XMM0,[RAX]
1147	PXOR XMM5,XMM5
1148	PCMPEQD XMM5,XMM0
1149	PMOVMSKB R8D,XMM5
1150	CMP R8D,$0000FFFF
1151	JNE @NORMAL
1152	PSHUFD XMM0,XMM4,0
1153	JMP @SKIP
1154
1155	@NORMAL:
1156	ADDPS XMM0,XMM4
1157	PSHUFD XMM1,XMM0,$e4
1158	PSLLDQ XMM1,4
1159	PSHUFD XMM2,XMM1,$90
1160	PSHUFD XMM3,XMM1,$40
1161	ADDPS XMM2,XMM3
1162	ADDPS XMM1,XMM2
1163	ADDPS XMM0,XMM1
1164
1165	PSHUFLW XMM4,XMM0,$E4
1166	PSRLDQ XMM4,12
1167
1168	@SKIP:
1169	PREFETCHNTA [RAX + 32 * 2]
1170	MOVAPS [RAX],XMM0
1171	ADD RAX,16
1172	SUB ECX,1
1173	JNZ @LOOP
1174	MOV ECX,EDX
1175	SAR ECX,2
1176	SHL ECX,2
1177	SUB EDX,ECX
1178	MOV ECX,EDX
1179	JZ @END
1180
1181	@LOOP2:
1182	FLD DWORD PTR [RAX - 4]
1183	FADD DWORD PTR [RAX]
1184	FSTP DWORD PTR [RAX]
1185	ADD RAX,4
1186	DEC ECX
1187	JNZ @LOOP2
1188	JMP @END
1189
1190	@SMALL:
1191	ADD RAX,4
1192	DEC ECX
1193	@LOOP3:
1194	FLD DWORD PTR [RAX - 4]
1195	FADD DWORD PTR [RAX]
1196	FSTP DWORD PTR [RAX]
1197	ADD RAX,4
1198	DEC ECX
1199	JNZ @LOOP3
1200	{$ENDIF}
1201	@END:
1202	end;
1203	{$ENDIF}
1204
1205
1206	initialization
1207	{$IFNDEF PUREPASCAL}
1208	if HasInstructionSet(ciSSE2) then
1209	CumSum := CumSum_SSE2
1210	else
1211	{$ENDIF}
1212	CumSum := CumSum_Pas;
1213
1214	end.

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/Packages/Graphics32/GR32_Math.pas

Download in other formats: