Changeset 522 for GraphicTest/Packages/Graphics32/GR32_Math.pas
- Timestamp:
- Apr 17, 2019, 10:42:18 AM (5 years ago)
- Location:
- GraphicTest/Packages/Graphics32
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
GraphicTest/Packages/Graphics32
-
Property svn:ignore
set to
lib
-
Property svn:ignore
set to
-
GraphicTest/Packages/Graphics32/GR32_Math.pas
r450 r522 59 59 procedure SinCos(const Theta: TFloat; out Sin, Cos: TFloat); overload; 60 60 procedure SinCos(const Theta, Radius: Single; out Sin, Cos: Single); overload; 61 procedure SinCos(const Theta, ScaleX, ScaleY: TFloat; out Sin, Cos: Single); overload; 61 62 function Hypot(const X, Y: TFloat): TFloat; overload; 62 63 function Hypot(const X, Y: Integer): Integer; overload; … … 86 87 function FloatMod(x, y: Double): Double; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF} 87 88 89 function DivMod(Dividend, Divisor: Integer; var Remainder: Integer): Integer; 90 91 92 {$IFDEF FPC} 93 {$IFDEF TARGET_X64} 94 (* 95 FPC has no similar {$EXCESSPRECISION OFF} directive, 96 but we can easily emulate that by overriding some internal math functions 97 *) 98 function PI: Single; [internproc: fpc_in_pi_real]; 99 //function Abs(D: Single): Single; [internproc: fpc_in_abs_real]; 100 //function Sqr(D: Single): Single; [internproc: fpc_in_sqr_real]; 101 function Sqrt(D: Single): Single; [internproc: fpc_in_sqrt_real]; 102 function ArcTan(D: Single): Single; [internproc: fpc_in_arctan_real]; 103 function Ln(D: Single): Single; [internproc: fpc_in_ln_real]; 104 function Sin(D: Single): Single; [internproc: fpc_in_sin_real]; 105 function Cos(D: Single): Single; [internproc: fpc_in_cos_real]; 106 function Exp(D: Single): Single; [internproc: fpc_in_exp_real]; 107 function Round(D: Single): Int64; [internproc: fpc_in_round_real]; 108 function Frac(D: Single): Single; [internproc: fpc_in_frac_real]; 109 function Int(D: Single): Single; [internproc: fpc_in_int_real]; 110 function Trunc(D: Single): Int64; [internproc: fpc_in_trunc_real]; 111 112 function Ceil(X: Single): Integer; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF} 113 function Floor(X: Single): Integer; {$IFDEF INLININGSUPPORTED} inline; {$ENDIF} 114 {$ENDIF} 115 {$ENDIF} 116 117 type 118 TCumSumProc = procedure(Values: PSingleArray; Count: Integer); 119 120 var 121 CumSum: TCumSumProc; 122 88 123 implementation 89 124 90 125 uses 91 Math ;126 Math, GR32_System; 92 127 93 128 {$IFDEF PUREPASCAL} … … 96 131 {$ENDIF} 97 132 133 134 {$IFDEF FPC} 135 {$IFDEF TARGET_X64} 136 function Ceil(X: Single): Integer; 137 begin 138 Result := Trunc(X); 139 if (X - Result) > 0 then 140 Inc(Result); 141 end; 142 143 function Floor(X: Single): Integer; 144 begin 145 Result := Trunc(X); 146 if (X - Result) < 0 then 147 Dec(Result); 148 end; 149 {$ENDIF} 150 {$ENDIF} 151 152 98 153 { Fixed-point math } 99 154 … … 103 158 Result := A div FIXEDONE; 104 159 {$ELSE} 160 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 105 161 asm 106 162 {$IFDEF TARGET_x86} … … 119 175 Result := (A + $FFFF) div FIXEDONE; 120 176 {$ELSE} 177 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 121 178 asm 122 179 {$IFDEF TARGET_x86} … … 137 194 Result := (A + $7FFF) div FIXEDONE; 138 195 {$ELSE} 196 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 139 197 asm 140 198 {$IFDEF TARGET_x86} … … 155 213 Result := Round(A * FixedToFloat * B); 156 214 {$ELSE} 215 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 157 216 asm 158 217 {$IFDEF TARGET_x86} … … 173 232 Result := Round(A / B * FixedOne); 174 233 {$ELSE} 234 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 175 235 asm 176 236 {$IFDEF TARGET_x86} … … 199 259 Result := Round(Dividend / Value); 200 260 {$ELSE} 261 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 201 262 asm 202 263 {$IFDEF TARGET_x86} … … 219 280 Result := Round(Value * FixedToFloat * Value); 220 281 {$ELSE} 282 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 221 283 asm 222 284 {$IFDEF TARGET_x86} … … 237 299 Result := Round(Sqrt(Value * FixedOneS)); 238 300 {$ELSE} 301 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 239 302 asm 240 303 {$IFDEF TARGET_x86} … … 297 360 Result := Round(Sqrt(Value * FixedOneS)); 298 361 {$ELSE} 362 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 299 363 asm 300 364 {$IFDEF TARGET_x86} … … 397 461 Result := Round(Y + (X - Y) * FixedToFloat * W); 398 462 {$ELSE} 463 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 399 464 asm 400 465 {$IFDEF TARGET_x86} … … 427 492 {$IFDEF TARGET_x64} 428 493 var 429 Temp: DWord = 0;494 Temp: TFloat; 430 495 {$ENDIF} 431 496 asm … … 455 520 Cos := C * Radius; 456 521 {$ELSE} 522 {$IFDEF TARGET_x64} 523 var 524 Temp: TFloat; 525 {$ENDIF} 457 526 asm 458 527 {$IFDEF TARGET_x86} … … 477 546 end; 478 547 548 procedure SinCos(const Theta, ScaleX, ScaleY: TFloat; out Sin, Cos: Single); overload; 549 {$IFDEF NATIVE_SINCOS} 550 var 551 S, C: Extended; 552 begin 553 Math.SinCos(Theta, S, C); 554 Sin := S * ScaleX; 555 Cos := C * ScaleY; 556 {$ELSE} 557 {$IFDEF TARGET_x64} 558 var 559 Temp: TFloat; 560 {$ENDIF} 561 asm 562 {$IFDEF TARGET_x86} 563 FLD Theta 564 FSINCOS 565 FMUL ScaleX 566 FSTP DWORD PTR [EDX] // cosine 567 FMUL ScaleY 568 FSTP DWORD PTR [EAX] // sine 569 {$ENDIF} 570 {$IFDEF TARGET_x64} 571 MOVD Temp, Theta 572 FLD Temp 573 FSINCOS 574 MOVD Temp, ScaleX 575 FMUL Temp 576 FSTP [Cos] 577 MOVD Temp, ScaleY 578 FMUL Temp 579 FSTP [Sin] 580 {$ENDIF} 581 {$ENDIF} 582 end; 583 479 584 function Hypot(const X, Y: TFloat): TFloat; 480 585 {$IFDEF PUREPASCAL} … … 482 587 Result := Sqrt(Sqr(X) + Sqr(Y)); 483 588 {$ELSE} 589 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 484 590 asm 485 591 {$IFDEF TARGET_x86} … … 534 640 J := (I - $3F800000) div 2 + $3F800000; 535 641 {$ELSE} 642 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 536 643 asm 537 644 {$IFDEF TARGET_x86} … … 552 659 // see http://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Approximations_that_depend_on_IEEE_representation 553 660 // additionally one babylonian step added 661 {$IFNDEF PUREPASCAL} 662 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 663 {$ENDIF} 554 664 const 555 665 CHalf : TFloat = 0.5; … … 594 704 Result := CQuarter * Result + Value / Result; 595 705 {$ELSE} 706 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 596 707 const 597 708 CHalf : TFloat = 0.5; … … 616 727 DIVSS XMM0, XMM1 617 728 ADDSS XMM0, XMM1 618 MOVD XMM1, CHalf729 MOVD XMM1, [RIP + CHalf] 619 730 MULSS XMM0, XMM1 620 731 {$ENDIF} … … 638 749 Result := Int64(Multiplicand) * Int64(Multiplier) div Divisor; 639 750 {$ELSE} 751 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 640 752 asm 641 753 {$IFDEF TARGET_x86} … … 741 853 Result := Result shl 1; 742 854 {$ELSE} 855 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 743 856 asm 744 857 {$IFDEF TARGET_x86} … … 764 877 Result := Result shl 1; 765 878 {$ELSE} 879 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 766 880 asm 767 881 {$IFDEF TARGET_x86} … … 796 910 Result := (A and B) + (A xor B) div 2; 797 911 {$ELSE} 912 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 798 913 asm 799 914 {$IFDEF TARGET_x86} … … 821 936 Result := (- Value) shr 31 - (Value shr 31); 822 937 {$ELSE} 938 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 823 939 asm 824 940 {$IFDEF TARGET_x64} … … 840 956 end; 841 957 958 function DivMod(Dividend, Divisor: Integer; var Remainder: Integer): Integer; 959 {$IFDEF PUREPASCAL} 960 begin 961 Result := Dividend div Divisor; 962 Remainder := Dividend mod Divisor; 963 {$ELSE} 964 {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 965 asm 966 {$IFDEF TARGET_x86} 967 PUSH EDX 968 CDQ 969 IDIV DWORD PTR [ESP] 970 ADD ESP, $04 971 MOV DWORD PTR [ECX], edx 972 {$ENDIF} 973 {$IFDEF TARGET_x64} 974 MOV RAX, RCX 975 MOV R9, RDX 976 CDQ 977 IDIV R9 978 MOV DWORD PTR [R8], EDX 979 {$ENDIF} 980 {$ENDIF} 981 end; 982 983 procedure CumSum_Pas(Values: PSingleArray; Count: Integer); 984 var 985 I: Integer; 986 V: TFloat; 987 begin 988 V := Values[0]; 989 for I := 1 to Count - 1 do 990 begin 991 if PInteger(@Values[I])^ <> 0 then 992 V := V + Values[I]; 993 Values[I] := V; 994 end; 995 end; 996 997 {$IFNDEF PUREPASCAL} 998 // Aligned SSE2 version -- Credits: Sanyin <prevodilac@hotmail.com> 999 procedure CumSum_SSE2(Values: PSingleArray; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF} 1000 asm 1001 {$IFDEF TARGET_x86} 1002 MOV ECX,EDX 1003 CMP ECX,2 // if count < 2, exit 1004 JL @END 1005 CMP ECX,32 // if count < 32, avoid SSE2 overhead 1006 JL @SMALL 1007 1008 {--- align memory ---} 1009 PUSH EBX 1010 PXOR XMM4,XMM4 1011 MOV EBX,EAX 1012 AND EBX,15 // get aligned count 1013 JZ @ENDALIGNING // already aligned 1014 ADD EBX,-16 1015 NEG EBX // get bytes to advance 1016 JZ @ENDALIGNING // already aligned 1017 1018 MOV ECX,EBX 1019 SAR ECX,2 // div with 4 to get cnt 1020 SUB EDX,ECX 1021 1022 ADD EAX,4 1023 DEC ECX 1024 JZ @SETUPLAST // one element 1025 1026 @ALIGNINGLOOP: 1027 FLD DWORD PTR [EAX-4] 1028 FADD DWORD PTR [EAX] 1029 FSTP DWORD PTR [EAX] 1030 ADD EAX,4 1031 DEC ECX 1032 JNZ @ALIGNINGLOOP 1033 1034 @SETUPLAST: 1035 MOVUPS XMM4,[EAX-4] 1036 PSLLDQ XMM4,12 1037 PSRLDQ XMM4,12 1038 1039 @ENDALIGNING: 1040 POP EBX 1041 PUSH EBX 1042 MOV ECX,EDX 1043 SAR ECX,2 1044 @LOOP: 1045 MOVAPS XMM0,[EAX] 1046 PXOR XMM5,XMM5 1047 PCMPEQD XMM5,XMM0 1048 PMOVMSKB EBX,XMM5 1049 CMP EBX,$0000FFFF 1050 JNE @NORMAL 1051 PSHUFD XMM0,XMM4,0 1052 JMP @SKIP 1053 1054 @NORMAL: 1055 ADDPS XMM0,XMM4 1056 PSHUFD XMM1,XMM0,$e4 1057 PSLLDQ XMM1,4 1058 PSHUFD XMM2,XMM1,$90 1059 PSHUFD XMM3,XMM1,$40 1060 ADDPS XMM2,XMM3 1061 ADDPS XMM1,XMM2 1062 ADDPS XMM0,XMM1 1063 1064 PSHUFLW XMM4,XMM0,$E4 1065 PSRLDQ XMM4,12 1066 1067 @SKIP: 1068 PREFETCHNTA [eax+16*16*2] 1069 MOVAPS [EAX],XMM0 1070 ADD EAX,16 1071 SUB ECX,1 1072 JNZ @LOOP 1073 POP EBX 1074 MOV ECX,EDX 1075 SAR ECX,2 1076 SHL ECX,2 1077 SUB EDX,ECX 1078 MOV ECX,EDX 1079 JZ @END 1080 1081 @LOOP2: 1082 FLD DWORD PTR [EAX-4] 1083 FADD DWORD PTR [EAX] 1084 FSTP DWORD PTR [EAX] 1085 ADD EAX,4 1086 DEC ECX 1087 JNZ @LOOP2 1088 JMP @END 1089 1090 @SMALL: 1091 MOV ECX,EDX 1092 ADD EAX,4 1093 DEC ECX 1094 @LOOP3: 1095 FLD DWORD PTR [EAX-4] 1096 FADD DWORD PTR [EAX] 1097 FSTP DWORD PTR [EAX] 1098 ADD EAX,4 1099 DEC ECX 1100 JNZ @LOOP3 1101 {$ENDIF} 1102 {$IFDEF TARGET_x64} 1103 CMP EDX,2 // if count < 2, exit 1104 JL @END 1105 1106 MOV RAX,RCX 1107 MOV ECX,EDX 1108 1109 CMP ECX,32 // if count < 32, avoid SSE2 overhead 1110 JL @SMALL 1111 1112 {--- align memory ---} 1113 PXOR XMM4,XMM4 1114 MOV R8D,EAX 1115 AND R8D,15 // get aligned count 1116 JZ @ENDALIGNING // already aligned 1117 ADD R8D,-16 1118 NEG R8D // get bytes to advance 1119 JZ @ENDALIGNING // already aligned 1120 1121 MOV ECX,R8D 1122 SAR ECX,2 // div with 4 to get cnt 1123 SUB EDX,ECX 1124 1125 ADD RAX,4 1126 DEC ECX 1127 JZ @SETUPLAST // one element 1128 1129 @ALIGNINGLOOP: 1130 FLD DWORD PTR [RAX - 4] 1131 FADD DWORD PTR [RAX] 1132 FSTP DWORD PTR [RAX] 1133 ADD RAX,4 1134 DEC ECX 1135 JNZ @ALIGNINGLOOP 1136 1137 @SETUPLAST: 1138 MOVUPS XMM4,[RAX - 4] 1139 PSLLDQ XMM4,12 1140 PSRLDQ XMM4,12 1141 1142 @ENDALIGNING: 1143 MOV ECX,EDX 1144 SAR ECX,2 1145 @LOOP: 1146 MOVAPS XMM0,[RAX] 1147 PXOR XMM5,XMM5 1148 PCMPEQD XMM5,XMM0 1149 PMOVMSKB R8D,XMM5 1150 CMP R8D,$0000FFFF 1151 JNE @NORMAL 1152 PSHUFD XMM0,XMM4,0 1153 JMP @SKIP 1154 1155 @NORMAL: 1156 ADDPS XMM0,XMM4 1157 PSHUFD XMM1,XMM0,$e4 1158 PSLLDQ XMM1,4 1159 PSHUFD XMM2,XMM1,$90 1160 PSHUFD XMM3,XMM1,$40 1161 ADDPS XMM2,XMM3 1162 ADDPS XMM1,XMM2 1163 ADDPS XMM0,XMM1 1164 1165 PSHUFLW XMM4,XMM0,$E4 1166 PSRLDQ XMM4,12 1167 1168 @SKIP: 1169 PREFETCHNTA [RAX + 32 * 2] 1170 MOVAPS [RAX],XMM0 1171 ADD RAX,16 1172 SUB ECX,1 1173 JNZ @LOOP 1174 MOV ECX,EDX 1175 SAR ECX,2 1176 SHL ECX,2 1177 SUB EDX,ECX 1178 MOV ECX,EDX 1179 JZ @END 1180 1181 @LOOP2: 1182 FLD DWORD PTR [RAX - 4] 1183 FADD DWORD PTR [RAX] 1184 FSTP DWORD PTR [RAX] 1185 ADD RAX,4 1186 DEC ECX 1187 JNZ @LOOP2 1188 JMP @END 1189 1190 @SMALL: 1191 ADD RAX,4 1192 DEC ECX 1193 @LOOP3: 1194 FLD DWORD PTR [RAX - 4] 1195 FADD DWORD PTR [RAX] 1196 FSTP DWORD PTR [RAX] 1197 ADD RAX,4 1198 DEC ECX 1199 JNZ @LOOP3 1200 {$ENDIF} 1201 @END: 1202 end; 1203 {$ENDIF} 1204 1205 1206 initialization 1207 {$IFNDEF PUREPASCAL} 1208 if HasInstructionSet(ciSSE2) then 1209 CumSum := CumSum_SSE2 1210 else 1211 {$ENDIF} 1212 CumSum := CumSum_Pas; 1213 842 1214 end.
Note:
See TracChangeset
for help on using the changeset viewer.