| 1 | unit GR32_BlendASM;
|
|---|
| 2 |
|
|---|
| 3 | (* ***** BEGIN LICENSE BLOCK *****
|
|---|
| 4 | * Version: MPL 1.1 or LGPL 2.1 with linking exception
|
|---|
| 5 | *
|
|---|
| 6 | * The contents of this file are subject to the Mozilla Public License Version
|
|---|
| 7 | * 1.1 (the "License"); you may not use this file except in compliance with
|
|---|
| 8 | * the License. You may obtain a copy of the License at
|
|---|
| 9 | * http://www.mozilla.org/MPL/
|
|---|
| 10 | *
|
|---|
| 11 | * Software distributed under the License is distributed on an "AS IS" basis,
|
|---|
| 12 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|---|
| 13 | * for the specific language governing rights and limitations under the
|
|---|
| 14 | * License.
|
|---|
| 15 | *
|
|---|
| 16 | * Alternatively, the contents of this file may be used under the terms of the
|
|---|
| 17 | * Free Pascal modified version of the GNU Lesser General Public License
|
|---|
| 18 | * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
|
|---|
| 19 | * of this license are applicable instead of those above.
|
|---|
| 20 | * Please see the file LICENSE.txt for additional information concerning this
|
|---|
| 21 | * license.
|
|---|
| 22 | *
|
|---|
| 23 | * The Original Code is Graphics32
|
|---|
| 24 | *
|
|---|
| 25 | * The Initial Developer of the Original Code is
|
|---|
| 26 | * Alex A. Denisov
|
|---|
| 27 | *
|
|---|
| 28 | * Portions created by the Initial Developer are Copyright (C) 2000-2009
|
|---|
| 29 | * the Initial Developer. All Rights Reserved.
|
|---|
| 30 | *
|
|---|
| 31 | * Contributor(s):
|
|---|
| 32 | * Christian-W. Budde
|
|---|
| 33 | * - 2019/04/01 - Refactoring
|
|---|
| 34 | *
|
|---|
| 35 | * ***** END LICENSE BLOCK ***** *)
|
|---|
| 36 |
|
|---|
| 37 | interface
|
|---|
| 38 |
|
|---|
| 39 | {$I GR32.inc}
|
|---|
| 40 |
|
|---|
| 41 | uses
|
|---|
| 42 | GR32;
|
|---|
| 43 |
|
|---|
| 44 | function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 45 | procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 46 | procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 47 |
|
|---|
| 48 | function BlendRegEx_ASM(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 49 | procedure BlendMemEx_ASM(F: TColor32; var B:TColor32; M: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 50 |
|
|---|
| 51 | procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 52 | procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 53 |
|
|---|
| 54 | function CombineReg_ASM(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 55 | procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 56 |
|
|---|
| 57 | {$IFDEF TARGET_x86}
|
|---|
| 58 | function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 59 | {$ENDIF}
|
|---|
| 60 |
|
|---|
| 61 | procedure EMMS_ASM; {$IFDEF FPC} assembler; {$ENDIF}
|
|---|
| 62 |
|
|---|
| 63 | implementation
|
|---|
| 64 |
|
|---|
| 65 | uses
|
|---|
| 66 | GR32_Blend,
|
|---|
| 67 | GR32_LowLevel,
|
|---|
| 68 | GR32_System;
|
|---|
| 69 |
|
|---|
| 70 | { ASM versions }
|
|---|
| 71 |
|
|---|
| 72 | { Assembler versions }
|
|---|
| 73 |
|
|---|
| 74 | const
|
|---|
| 75 | bias = $00800080;
|
|---|
| 76 |
|
|---|
| 77 |
|
|---|
| 78 | function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 79 | asm
|
|---|
| 80 | // blend foreground color (F) to a background color (B),
|
|---|
| 81 | // using alpha channel value of F
|
|---|
| 82 | // Result Z = Fa * Fargb + (1 - Fa) * Bargb
|
|---|
| 83 | // Result Z = P + Q
|
|---|
| 84 |
|
|---|
| 85 | {$IFDEF TARGET_x86}
|
|---|
| 86 | // EAX <- F
|
|---|
| 87 | // EDX <- B
|
|---|
| 88 |
|
|---|
| 89 | // Test Fa = 255 ?
|
|---|
| 90 | CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
|
|---|
| 91 | JNC @2
|
|---|
| 92 |
|
|---|
| 93 | // Test Fa = 0 ?
|
|---|
| 94 | TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
|
|---|
| 95 | JZ @1
|
|---|
| 96 |
|
|---|
| 97 | // Get weight W = Fa
|
|---|
| 98 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
|---|
| 99 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
|---|
| 100 |
|
|---|
| 101 | PUSH EBX
|
|---|
| 102 |
|
|---|
| 103 | // P = W * F
|
|---|
| 104 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
|---|
| 105 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 106 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
|---|
| 107 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 108 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
|---|
| 109 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 110 | ADD EAX,bias
|
|---|
| 111 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 112 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 113 | ADD EBX,bias
|
|---|
| 114 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 115 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 116 |
|
|---|
| 117 | // W = 1 - W
|
|---|
| 118 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 119 | // Q = W * B
|
|---|
| 120 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
|---|
| 121 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
|---|
| 122 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
|---|
| 123 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
|---|
| 124 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
|---|
| 125 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 126 | ADD EDX,bias
|
|---|
| 127 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 128 | SHR EDX,8 // EDX <- 00 Qr 00 Qb
|
|---|
| 129 | ADD EBX,bias
|
|---|
| 130 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 131 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
|---|
| 132 |
|
|---|
| 133 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 134 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 135 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 136 |
|
|---|
| 137 | POP EBX
|
|---|
| 138 | RET
|
|---|
| 139 |
|
|---|
| 140 | @1: MOV EAX,EDX
|
|---|
| 141 | @2:
|
|---|
| 142 | {$ENDIF}
|
|---|
| 143 |
|
|---|
| 144 | // EAX <- F
|
|---|
| 145 | // EDX <- B
|
|---|
| 146 | {$IFDEF TARGET_x64}
|
|---|
| 147 | MOV RAX, RCX
|
|---|
| 148 |
|
|---|
| 149 | // Test Fa = 255 ?
|
|---|
| 150 | CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
|
|---|
| 151 | JNC @2
|
|---|
| 152 |
|
|---|
| 153 | // Test Fa = 0 ?
|
|---|
| 154 | TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
|
|---|
| 155 | JZ @1
|
|---|
| 156 |
|
|---|
| 157 | // Get weight W = Fa
|
|---|
| 158 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
|---|
| 159 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
|---|
| 160 |
|
|---|
| 161 | // P = W * F
|
|---|
| 162 | MOV R9D,EAX // R9D <- Fa Fr Fg Fb
|
|---|
| 163 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 164 | AND R9D,$FF00FF00 // R9D <- Fa 00 Fg 00
|
|---|
| 165 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 166 | SHR R9D,8 // R9D <- 00 Fa 00 Fg
|
|---|
| 167 | IMUL R9D,ECX // R9D <- Pa ** Pg **
|
|---|
| 168 | ADD EAX,bias
|
|---|
| 169 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 170 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 171 | ADD R9D,bias
|
|---|
| 172 | AND R9D,$FF00FF00 // R9D <- Pa 00 Pg 00
|
|---|
| 173 | OR EAX,R9D // EAX <- Pa Pr Pg Pb
|
|---|
| 174 |
|
|---|
| 175 | // W = 1 - W
|
|---|
| 176 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 177 | // Q = W * B
|
|---|
| 178 | MOV R9D,EDX // R9D <- Ba Br Bg Bb
|
|---|
| 179 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
|---|
| 180 | AND R9D,$FF00FF00 // R9D <- Ba 00 Bg 00
|
|---|
| 181 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
|---|
| 182 | SHR R9D,8 // R9D <- 00 Ba 00 Bg
|
|---|
| 183 | IMUL R9D,ECX // R9D <- Qa ** Qg **
|
|---|
| 184 | ADD EDX,bias
|
|---|
| 185 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 186 | SHR EDX,8 // EDX <- 00 Qr 00 Qb
|
|---|
| 187 | ADD R9D,bias
|
|---|
| 188 | AND R9D,$FF00FF00 // R9D <- Qa 00 Qg 00
|
|---|
| 189 | OR R9D,EDX // R9D <- Qa Qr Qg Qb
|
|---|
| 190 |
|
|---|
| 191 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 192 | ADD EAX,R9D // EAX <- Za Zr Zg Zb
|
|---|
| 193 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 194 | RET
|
|---|
| 195 |
|
|---|
| 196 | @1: MOV EAX,EDX
|
|---|
| 197 | @2:
|
|---|
| 198 | {$ENDIF}
|
|---|
| 199 | end;
|
|---|
| 200 |
|
|---|
| 201 | procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 202 | asm
|
|---|
| 203 | {$IFDEF TARGET_x86}
|
|---|
| 204 | // EAX <- F
|
|---|
| 205 | // [EDX] <- B
|
|---|
| 206 |
|
|---|
| 207 | // Test Fa = 0 ?
|
|---|
| 208 | TEST EAX,$FF000000 // Fa = 0 ? => do not write
|
|---|
| 209 | JZ @2
|
|---|
| 210 |
|
|---|
| 211 | // Get weight W = Fa
|
|---|
| 212 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
|---|
| 213 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
|---|
| 214 |
|
|---|
| 215 | // Test Fa = 255 ?
|
|---|
| 216 | CMP ECX,$FF
|
|---|
| 217 | JZ @1
|
|---|
| 218 |
|
|---|
| 219 | PUSH EBX
|
|---|
| 220 | PUSH ESI
|
|---|
| 221 |
|
|---|
| 222 | // P = W * F
|
|---|
| 223 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
|---|
| 224 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 225 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
|---|
| 226 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 227 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
|---|
| 228 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 229 | ADD EAX,bias // add bias
|
|---|
| 230 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 231 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 232 | ADD EBX,bias // add bias
|
|---|
| 233 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 234 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 235 |
|
|---|
| 236 | MOV ESI,[EDX]
|
|---|
| 237 |
|
|---|
| 238 | // W = 1 - W
|
|---|
| 239 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 240 |
|
|---|
| 241 | // Q = W * B
|
|---|
| 242 | MOV EBX,ESI // EBX <- Ba Br Bg Bb
|
|---|
| 243 | AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
|
|---|
| 244 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
|---|
| 245 | IMUL ESI,ECX // ESI <- Qr ** Qb **
|
|---|
| 246 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
|---|
| 247 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 248 | ADD ESI,bias // add bias
|
|---|
| 249 | AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
|
|---|
| 250 | SHR ESI,8 // ESI <- 00 Qr 00 Qb
|
|---|
| 251 | ADD EBX,bias // add bias
|
|---|
| 252 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 253 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
|---|
| 254 |
|
|---|
| 255 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 256 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 257 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 258 |
|
|---|
| 259 | MOV [EDX],EAX
|
|---|
| 260 | POP ESI
|
|---|
| 261 | POP EBX
|
|---|
| 262 | RET
|
|---|
| 263 |
|
|---|
| 264 | @1: MOV [EDX],EAX
|
|---|
| 265 | @2:
|
|---|
| 266 | {$ENDIF}
|
|---|
| 267 |
|
|---|
| 268 | {$IFDEF TARGET_x64}
|
|---|
| 269 | // ECX <- F
|
|---|
| 270 | // [RDX] <- B
|
|---|
| 271 |
|
|---|
| 272 | // Test Fa = 0 ?
|
|---|
| 273 | TEST ECX,$FF000000 // Fa = 0 ? => do not write
|
|---|
| 274 | JZ @2
|
|---|
| 275 |
|
|---|
| 276 | MOV EAX, ECX // EAX <- Fa Fr Fg Fb
|
|---|
| 277 |
|
|---|
| 278 | // Get weight W = Fa
|
|---|
| 279 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
|---|
| 280 |
|
|---|
| 281 | // Test Fa = 255 ?
|
|---|
| 282 | CMP ECX,$FF
|
|---|
| 283 | JZ @1
|
|---|
| 284 |
|
|---|
| 285 | // P = W * F
|
|---|
| 286 | MOV R8D,EAX // R8D <- Fa Fr Fg Fb
|
|---|
| 287 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 288 | AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
|
|---|
| 289 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 290 | SHR R8D,8 // R8D <- 00 Fa 00 Fg
|
|---|
| 291 | IMUL R8D,ECX // R8D <- Pa ** Pg **
|
|---|
| 292 | ADD EAX,bias
|
|---|
| 293 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 294 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 295 | ADD R8D,bias
|
|---|
| 296 | AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
|
|---|
| 297 | OR EAX,R8D // EAX <- Pa Pr Pg Pb
|
|---|
| 298 |
|
|---|
| 299 | MOV R9D,[RDX]
|
|---|
| 300 |
|
|---|
| 301 | // W = 1 - W
|
|---|
| 302 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 303 | // Q = W * B
|
|---|
| 304 | MOV R8D,R9D // R8D <- Ba Br Bg Bb
|
|---|
| 305 | AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
|
|---|
| 306 | AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
|
|---|
| 307 | IMUL R9D,ECX // R9D <- Qr ** Qb **
|
|---|
| 308 | SHR R8D,8 // R8D <- 00 Ba 00 Bg
|
|---|
| 309 | IMUL R8D,ECX // R8D <- Qa ** Qg **
|
|---|
| 310 | ADD R9D,bias
|
|---|
| 311 | AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
|
|---|
| 312 | SHR R9D,8 // R9D <- 00 Qr 00 Qb
|
|---|
| 313 | ADD R8D,bias
|
|---|
| 314 | AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
|
|---|
| 315 | OR R8D,R9D // R8D <- Qa Qr Qg Qb
|
|---|
| 316 |
|
|---|
| 317 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 318 | ADD EAX,R8D // EAX <- Za Zr Zg Zb
|
|---|
| 319 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 320 |
|
|---|
| 321 | MOV [RDX],EAX
|
|---|
| 322 | RET
|
|---|
| 323 |
|
|---|
| 324 | @1: MOV [RDX],EAX
|
|---|
| 325 | @2:
|
|---|
| 326 | {$ENDIF}
|
|---|
| 327 | end;
|
|---|
| 328 |
|
|---|
| 329 | procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 330 | asm
|
|---|
| 331 | {$IFDEF TARGET_x86}
|
|---|
| 332 | TEST ECX,ECX
|
|---|
| 333 | JZ @4
|
|---|
| 334 |
|
|---|
| 335 | PUSH EBX
|
|---|
| 336 | PUSH ESI
|
|---|
| 337 | PUSH EDI
|
|---|
| 338 |
|
|---|
| 339 | MOV ESI,EAX
|
|---|
| 340 | MOV EDI,EDX
|
|---|
| 341 |
|
|---|
| 342 | @1:
|
|---|
| 343 | // Test Fa = 0 ?
|
|---|
| 344 | MOV EAX,[ESI]
|
|---|
| 345 | TEST EAX,$FF000000
|
|---|
| 346 | JZ @3
|
|---|
| 347 |
|
|---|
| 348 | PUSH ECX
|
|---|
| 349 |
|
|---|
| 350 | // Get weight W = Fa
|
|---|
| 351 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
|---|
| 352 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
|---|
| 353 |
|
|---|
| 354 | // Test Fa = 255 ?
|
|---|
| 355 | CMP ECX,$FF
|
|---|
| 356 | JZ @2
|
|---|
| 357 |
|
|---|
| 358 | // P = W * F
|
|---|
| 359 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
|---|
| 360 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 361 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
|---|
| 362 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 363 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
|---|
| 364 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 365 | ADD EAX,bias // add bias
|
|---|
| 366 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 367 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 368 | ADD EBX,bias // add bias
|
|---|
| 369 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 370 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 371 |
|
|---|
| 372 | MOV EDX,[EDI]
|
|---|
| 373 |
|
|---|
| 374 | // W = 1 - W
|
|---|
| 375 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 376 |
|
|---|
| 377 | // Q = W * B
|
|---|
| 378 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
|---|
| 379 | AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
|
|---|
| 380 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
|---|
| 381 | IMUL EDX,ECX // ESI <- Qr ** Qb **
|
|---|
| 382 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
|---|
| 383 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 384 | ADD EDX,bias // add bias
|
|---|
| 385 | AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
|
|---|
| 386 | SHR EDX,8 // ESI <- 00 Qr 00 Qb
|
|---|
| 387 | ADD EBX,bias // add bias
|
|---|
| 388 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 389 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
|---|
| 390 |
|
|---|
| 391 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 392 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 393 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 394 |
|
|---|
| 395 | @2:
|
|---|
| 396 | OR EAX,$FF000000
|
|---|
| 397 | MOV [EDI],EAX
|
|---|
| 398 | POP ECX
|
|---|
| 399 |
|
|---|
| 400 | @3:
|
|---|
| 401 | ADD ESI,4
|
|---|
| 402 | ADD EDI,4
|
|---|
| 403 |
|
|---|
| 404 | DEC ECX
|
|---|
| 405 | JNZ @1
|
|---|
| 406 |
|
|---|
| 407 | POP EDI
|
|---|
| 408 | POP ESI
|
|---|
| 409 | POP EBX
|
|---|
| 410 |
|
|---|
| 411 | @4:
|
|---|
| 412 | RET
|
|---|
| 413 | {$ENDIF}
|
|---|
| 414 |
|
|---|
| 415 | {$IFDEF TARGET_x64}
|
|---|
| 416 | TEST R8D,R8D
|
|---|
| 417 | JZ @4
|
|---|
| 418 |
|
|---|
| 419 | PUSH RDI
|
|---|
| 420 |
|
|---|
| 421 | MOV R9,RCX
|
|---|
| 422 | MOV RDI,RDX
|
|---|
| 423 |
|
|---|
| 424 | @1:
|
|---|
| 425 | MOV ECX,[RSI]
|
|---|
| 426 | TEST ECX,$FF000000
|
|---|
| 427 | JZ @3
|
|---|
| 428 |
|
|---|
| 429 | PUSH R8
|
|---|
| 430 |
|
|---|
| 431 | MOV R8D,ECX
|
|---|
| 432 | SHR R8D,24
|
|---|
| 433 |
|
|---|
| 434 | CMP R8D,$FF
|
|---|
| 435 | JZ @2
|
|---|
| 436 |
|
|---|
| 437 | MOV EAX,ECX
|
|---|
| 438 | AND ECX,$00FF00FF
|
|---|
| 439 | AND EAX,$FF00FF00
|
|---|
| 440 | IMUL ECX,R8D
|
|---|
| 441 | SHR EAX,8
|
|---|
| 442 | IMUL EAX,R8D
|
|---|
| 443 | ADD ECX,bias
|
|---|
| 444 | AND ECX,$FF00FF00
|
|---|
| 445 | SHR ECX,8
|
|---|
| 446 | ADD EAX,bias
|
|---|
| 447 | AND EAX,$FF00FF00
|
|---|
| 448 | OR ECX,EAX
|
|---|
| 449 |
|
|---|
| 450 | MOV EDX,[RDI]
|
|---|
| 451 | XOR R8D,$000000FF
|
|---|
| 452 | MOV EAX,EDX
|
|---|
| 453 | AND EDX,$00FF00FF
|
|---|
| 454 | AND EAX,$FF00FF00
|
|---|
| 455 | IMUL EDX, R8D
|
|---|
| 456 | SHR EAX,8
|
|---|
| 457 | IMUL EAX,R8D
|
|---|
| 458 | ADD EDX,bias
|
|---|
| 459 | AND EDX,$FF00FF00
|
|---|
| 460 | SHR EDX,8
|
|---|
| 461 | ADD EAX,bias
|
|---|
| 462 | AND EAX,$FF00FF00
|
|---|
| 463 | OR EAX,EDX
|
|---|
| 464 |
|
|---|
| 465 | ADD ECX,EAX
|
|---|
| 466 | @2:
|
|---|
| 467 | OR ECX,$FF000000
|
|---|
| 468 | MOV [RDI],ECX
|
|---|
| 469 | POP R8
|
|---|
| 470 |
|
|---|
| 471 | @3:
|
|---|
| 472 | ADD R9,4
|
|---|
| 473 | ADD RDI,4
|
|---|
| 474 |
|
|---|
| 475 | DEC R8D
|
|---|
| 476 | JNZ @1
|
|---|
| 477 |
|
|---|
| 478 | POP RDI
|
|---|
| 479 |
|
|---|
| 480 | @4:
|
|---|
| 481 | RET
|
|---|
| 482 | {$ENDIF}
|
|---|
| 483 | end;
|
|---|
| 484 |
|
|---|
| 485 | function BlendRegEx_ASM(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 486 | asm
|
|---|
| 487 | // blend foreground color (F) to a background color (B),
|
|---|
| 488 | // using alpha channel value of F multiplied by master alpha (M)
|
|---|
| 489 | // no checking for M = $FF, in this case Graphics32 uses BlendReg
|
|---|
| 490 | // Result Z = Fa * M * Fargb + (1 - Fa * M) * Bargb
|
|---|
| 491 | // Result Z = P + Q
|
|---|
| 492 | // EAX <- F
|
|---|
| 493 | // EDX <- B
|
|---|
| 494 | // ECX <- M
|
|---|
| 495 |
|
|---|
| 496 | {$IFDEF TARGET_x86}
|
|---|
| 497 |
|
|---|
| 498 | // Check Fa > 0 ?
|
|---|
| 499 | TEST EAX,$FF000000 // Fa = 0? => Result := EDX
|
|---|
| 500 | JZ @2
|
|---|
| 501 |
|
|---|
| 502 | PUSH EBX
|
|---|
| 503 |
|
|---|
| 504 | // Get weight W = Fa * M
|
|---|
| 505 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
|---|
| 506 | INC ECX // 255:256 range bias
|
|---|
| 507 | SHR EBX,24 // EBX <- 00 00 00 Fa
|
|---|
| 508 | IMUL ECX,EBX // ECX <- 00 00 W **
|
|---|
| 509 | SHR ECX,8 // ECX <- 00 00 00 W
|
|---|
| 510 | JZ @1 // W = 0 ? => Result := EDX
|
|---|
| 511 |
|
|---|
| 512 | // P = W * F
|
|---|
| 513 | MOV EBX,EAX // EBX <- ** Fr Fg Fb
|
|---|
| 514 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 515 | AND EBX,$FF00FF00 // EBX <- Pa 00 Fg 00
|
|---|
| 516 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 517 | SHR EBX,8 // EBX <- 00 00 00 Fg
|
|---|
| 518 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 519 | ADD EAX,bias
|
|---|
| 520 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 521 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 522 | ADD EBX,bias
|
|---|
| 523 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 524 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 525 |
|
|---|
| 526 | // W = 1 - W
|
|---|
| 527 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 528 | // Q = W * B
|
|---|
| 529 | MOV EBX,EDX // EBX <- 00 Br Bg Bb
|
|---|
| 530 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
|---|
| 531 | AND EBX,$FF00FF00 // EBX <- 00 00 Bg 00
|
|---|
| 532 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
|---|
| 533 | SHR EBX,8 // EBX <- 00 00 00 Bg
|
|---|
| 534 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 535 | ADD EDX,bias
|
|---|
| 536 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 537 | SHR EDX,8 // EDX <- 00 Qr 00 Qb
|
|---|
| 538 | ADD EBX,bias
|
|---|
| 539 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 540 | OR EBX,EDX // EBX <- 00 Qr Qg Qb
|
|---|
| 541 |
|
|---|
| 542 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 543 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 544 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 545 |
|
|---|
| 546 | POP EBX
|
|---|
| 547 | RET
|
|---|
| 548 |
|
|---|
| 549 | @1:
|
|---|
| 550 | POP EBX
|
|---|
| 551 |
|
|---|
| 552 | @2: MOV EAX,EDX
|
|---|
| 553 | {$ENDIF}
|
|---|
| 554 |
|
|---|
| 555 | {$IFDEF TARGET_x64}
|
|---|
| 556 | MOV EAX,ECX // EAX <- Fa Fr Fg Fb
|
|---|
| 557 | TEST EAX,$FF000000 // Fa = 0? => Result := EDX
|
|---|
| 558 | JZ @1
|
|---|
| 559 |
|
|---|
| 560 | // Get weight W = Fa * M
|
|---|
| 561 | INC R8D // 255:256 range bias
|
|---|
| 562 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
|---|
| 563 | IMUL R8D,ECX // R8D <- 00 00 W **
|
|---|
| 564 | SHR R8D,8 // R8D <- 00 00 00 W
|
|---|
| 565 | JZ @1 // W = 0 ? => Result := EDX
|
|---|
| 566 |
|
|---|
| 567 | // P = W * F
|
|---|
| 568 | MOV ECX,EAX // ECX <- ** Fr Fg Fb
|
|---|
| 569 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 570 | AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
|
|---|
| 571 | IMUL EAX,R8D // EAX <- Pr ** Pb **
|
|---|
| 572 | SHR ECX,8 // ECX <- 00 Fa 00 Fg
|
|---|
| 573 | IMUL ECX,R8D // ECX <- Pa ** Pg **
|
|---|
| 574 | ADD EAX,bias
|
|---|
| 575 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 576 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 577 | ADD ECX,bias
|
|---|
| 578 | AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
|
|---|
| 579 | OR EAX,ECX // EAX <- Pa Pr Pg Pb
|
|---|
| 580 |
|
|---|
| 581 | // W = 1 - W
|
|---|
| 582 | XOR R8D,$000000FF // R8D <- 1 - R8D
|
|---|
| 583 | // Q = W * B
|
|---|
| 584 | MOV ECX,EDX // ECX <- 00 Br Bg Bb
|
|---|
| 585 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
|---|
| 586 | AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
|
|---|
| 587 | IMUL EDX,R8D // EDX <- Qr ** Qb **
|
|---|
| 588 | SHR ECX,8 // ECX <- 00 Ba 00 Bg
|
|---|
| 589 | IMUL ECX,R8D // ECX <- Qa ** Qg **
|
|---|
| 590 | ADD EDX,bias
|
|---|
| 591 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 592 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
|---|
| 593 | ADD ECX,bias
|
|---|
| 594 | AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
|
|---|
| 595 | OR ECX,EDX // ECX <- Qa Qr Qg Qb
|
|---|
| 596 |
|
|---|
| 597 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 598 | ADD EAX,ECX // EAX <- Za Zr Zg Zb
|
|---|
| 599 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 600 |
|
|---|
| 601 | RET
|
|---|
| 602 |
|
|---|
| 603 | @1: MOV EAX,EDX
|
|---|
| 604 | {$ENDIF}
|
|---|
| 605 | end;
|
|---|
| 606 |
|
|---|
| 607 | procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 608 | asm
|
|---|
| 609 | {$IFDEF TARGET_x86}
|
|---|
| 610 | // EAX <- F
|
|---|
| 611 | // [EDX] <- B
|
|---|
| 612 | // ECX <- M
|
|---|
| 613 |
|
|---|
| 614 | // Check Fa > 0 ?
|
|---|
| 615 | TEST EAX,$FF000000 // Fa = 0? => write nothing
|
|---|
| 616 | JZ @2
|
|---|
| 617 |
|
|---|
| 618 | PUSH EBX
|
|---|
| 619 |
|
|---|
| 620 | // Get weight W = Fa * M
|
|---|
| 621 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
|---|
| 622 | INC ECX // 255:256 range bias
|
|---|
| 623 | SHR EBX,24 // EBX <- 00 00 00 Fa
|
|---|
| 624 | IMUL ECX,EBX // ECX <- 00 00 W **
|
|---|
| 625 | ADD ECX,bias
|
|---|
| 626 | SHR ECX,8 // ECX <- 00 00 00 W
|
|---|
| 627 | JZ @1 // W = 0 ? => write nothing
|
|---|
| 628 |
|
|---|
| 629 | PUSH ESI
|
|---|
| 630 |
|
|---|
| 631 | // P = W * F
|
|---|
| 632 | MOV EBX,EAX // EBX <- ** Fr Fg Fb
|
|---|
| 633 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 634 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
|---|
| 635 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 636 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
|---|
| 637 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 638 | ADD EAX,bias
|
|---|
| 639 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 640 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 641 | ADD EBX,bias
|
|---|
| 642 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 643 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 644 |
|
|---|
| 645 | // W = 1 - W;
|
|---|
| 646 | MOV ESI,[EDX]
|
|---|
| 647 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 648 | // Q = W * B
|
|---|
| 649 | MOV EBX,ESI // EBX <- 00 Br Bg Bb
|
|---|
| 650 | AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
|
|---|
| 651 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
|---|
| 652 | IMUL ESI,ECX // ESI <- Qr ** Qb **
|
|---|
| 653 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
|---|
| 654 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 655 | ADD ESI,bias
|
|---|
| 656 | AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
|
|---|
| 657 | SHR ESI,8 // ESI <- 00 Qr ** Qb
|
|---|
| 658 | ADD EBX,bias
|
|---|
| 659 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 660 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
|---|
| 661 |
|
|---|
| 662 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 663 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 664 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 665 |
|
|---|
| 666 | MOV [EDX],EAX
|
|---|
| 667 | POP ESI
|
|---|
| 668 |
|
|---|
| 669 | @1: POP EBX
|
|---|
| 670 | @2:
|
|---|
| 671 | {$ENDIF}
|
|---|
| 672 |
|
|---|
| 673 | {$IFDEF TARGET_x64}
|
|---|
| 674 | // ECX <- F
|
|---|
| 675 | // [RDX] <- B
|
|---|
| 676 | // R8 <- M
|
|---|
| 677 |
|
|---|
| 678 | // ECX <- F
|
|---|
| 679 | // [EDX] <- B
|
|---|
| 680 | // R8 <- M
|
|---|
| 681 |
|
|---|
| 682 | // Check Fa > 0 ?
|
|---|
| 683 | TEST ECX,$FF000000 // Fa = 0? => write nothing
|
|---|
| 684 | JZ @1
|
|---|
| 685 |
|
|---|
| 686 | // Get weight W = Fa * M
|
|---|
| 687 | MOV EAX,ECX // EAX <- Fa Fr Fg Fb
|
|---|
| 688 | INC R8D // 255:256 range bias
|
|---|
| 689 | SHR EAX,24 // EAX <- 00 00 00 Fa
|
|---|
| 690 | IMUL R8D,EAX // R8D <- 00 00 W **
|
|---|
| 691 | ADD R8D,bias
|
|---|
| 692 | SHR R8D,8 // R8D <- 00 00 00 W
|
|---|
| 693 | JZ @1 // W = 0 ? => write nothing
|
|---|
| 694 |
|
|---|
| 695 | // P = W * F
|
|---|
| 696 | MOV EAX,ECX // EAX <- ** Fr Fg Fb
|
|---|
| 697 | AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
|
|---|
| 698 | AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
|
|---|
| 699 | IMUL ECX,R8D // ECX <- Pr ** Pb **
|
|---|
| 700 | SHR EAX,8 // EAX <- 00 Fa 00 Fg
|
|---|
| 701 | IMUL EAX,R8D // EAX <- Pa 00 Pg **
|
|---|
| 702 | ADD ECX,bias
|
|---|
| 703 | AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
|
|---|
| 704 | SHR ECX,8 // ECX <- 00 Pr 00 Pb
|
|---|
| 705 | ADD EAX,bias
|
|---|
| 706 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
|---|
| 707 | OR ECX,EAX // ECX <- Pa Pr Pg Pb
|
|---|
| 708 |
|
|---|
| 709 | // W = 1 - W
|
|---|
| 710 | MOV R9D,[RDX]
|
|---|
| 711 | XOR R8D,$000000FF // R8D <- 1 - R8
|
|---|
| 712 | // Q = W * B
|
|---|
| 713 | MOV EAX,R9D // EAX <- 00 Br Bg Bb
|
|---|
| 714 | AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
|
|---|
| 715 | AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
|
|---|
| 716 | IMUL R9D,R8D // R9D <- Qr ** Qb **
|
|---|
| 717 | SHR EAX,8 // EAX <- 00 00 00 Bg
|
|---|
| 718 | IMUL EAX,R8D // EAX <- 00 00 Qg **
|
|---|
| 719 | ADD R9D,bias
|
|---|
| 720 | AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
|
|---|
| 721 | SHR R9D,8 // R9D <- 00 Qr ** Qb
|
|---|
| 722 | ADD EAX,bias
|
|---|
| 723 | AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
|
|---|
| 724 | OR EAX,R9D // EAX <- 00 Qr Qg Qb
|
|---|
| 725 |
|
|---|
| 726 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 727 | ADD ECX,EAX // ECX <- 00 Zr Zg Zb
|
|---|
| 728 |
|
|---|
| 729 | MOV [RDX],ECX
|
|---|
| 730 |
|
|---|
| 731 | @1:
|
|---|
| 732 | {$ENDIF}
|
|---|
| 733 | end;
|
|---|
| 734 |
|
|---|
| 735 | procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 736 | asm
|
|---|
| 737 | {$IFDEF TARGET_x86}
|
|---|
| 738 | // EAX <- Src
|
|---|
| 739 | // EDX <- Dst
|
|---|
| 740 | // ECX <- Count
|
|---|
| 741 |
|
|---|
| 742 | // test the counter for zero or negativity
|
|---|
| 743 | TEST ECX,ECX
|
|---|
| 744 | JS @4
|
|---|
| 745 |
|
|---|
| 746 | PUSH EBX
|
|---|
| 747 | PUSH ESI
|
|---|
| 748 | PUSH EDI
|
|---|
| 749 |
|
|---|
| 750 | MOV ESI,EAX // ESI <- Src
|
|---|
| 751 | MOV EDI,EDX // EDI <- Dst
|
|---|
| 752 |
|
|---|
| 753 | // loop start
|
|---|
| 754 | @1: MOV EAX,[ESI]
|
|---|
| 755 | TEST EAX,$FF000000
|
|---|
| 756 | JZ @3 // complete transparency, proceed to next point
|
|---|
| 757 |
|
|---|
| 758 | PUSH ECX // store counter
|
|---|
| 759 |
|
|---|
| 760 | // Get weight W = Fa
|
|---|
| 761 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
|---|
| 762 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
|---|
| 763 |
|
|---|
| 764 | // Test Fa = 255 ?
|
|---|
| 765 | CMP ECX,$FF
|
|---|
| 766 | JZ @2
|
|---|
| 767 |
|
|---|
| 768 | // P = W * F
|
|---|
| 769 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
|---|
| 770 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 771 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
|---|
| 772 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 773 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
|---|
| 774 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 775 | ADD EAX,bias
|
|---|
| 776 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 777 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 778 | ADD EBX,bias
|
|---|
| 779 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 780 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 781 |
|
|---|
| 782 | // W = 1 - W;
|
|---|
| 783 | MOV EDX,[EDI]
|
|---|
| 784 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 785 | // Q = W * B
|
|---|
| 786 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
|---|
| 787 | AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
|
|---|
| 788 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
|---|
| 789 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
|---|
| 790 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
|---|
| 791 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 792 | ADD EDX,bias
|
|---|
| 793 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 794 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
|---|
| 795 | ADD EBX,bias
|
|---|
| 796 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 797 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
|---|
| 798 |
|
|---|
| 799 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 800 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 801 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 802 | @2:
|
|---|
| 803 | MOV [EDI],EAX
|
|---|
| 804 |
|
|---|
| 805 | POP ECX // restore counter
|
|---|
| 806 |
|
|---|
| 807 | @3:
|
|---|
| 808 | ADD ESI,4
|
|---|
| 809 | ADD EDI,4
|
|---|
| 810 |
|
|---|
| 811 | // loop end
|
|---|
| 812 | DEC ECX
|
|---|
| 813 | JNZ @1
|
|---|
| 814 |
|
|---|
| 815 | POP EDI
|
|---|
| 816 | POP ESI
|
|---|
| 817 | POP EBX
|
|---|
| 818 |
|
|---|
| 819 | @4:
|
|---|
| 820 | {$ENDIF}
|
|---|
| 821 |
|
|---|
| 822 | {$IFDEF TARGET_x64}
|
|---|
| 823 | // RCX <- Src
|
|---|
| 824 | // RDX <- Dst
|
|---|
| 825 | // R8 <- Count
|
|---|
| 826 |
|
|---|
| 827 | // test the counter for zero or negativity
|
|---|
| 828 | TEST R8D,R8D
|
|---|
| 829 | JS @4
|
|---|
| 830 |
|
|---|
| 831 | MOV R10,RCX // R10 <- Src
|
|---|
| 832 | MOV R11,RDX // R11 <- Dst
|
|---|
| 833 | MOV ECX,R8D // RCX <- Count
|
|---|
| 834 |
|
|---|
| 835 | // loop start
|
|---|
| 836 | @1:
|
|---|
| 837 | MOV EAX,[R10]
|
|---|
| 838 | TEST EAX,$FF000000
|
|---|
| 839 | JZ @3 // complete transparency, proceed to next point
|
|---|
| 840 |
|
|---|
| 841 | // Get weight W = Fa
|
|---|
| 842 | MOV R9D,EAX // R9D <- Fa Fr Fg Fb
|
|---|
| 843 | SHR R9D,24 // R9D <- 00 00 00 Fa
|
|---|
| 844 |
|
|---|
| 845 | // Test Fa = 255 ?
|
|---|
| 846 | CMP R9D,$FF
|
|---|
| 847 | JZ @2
|
|---|
| 848 |
|
|---|
| 849 | // P = W * F
|
|---|
| 850 | MOV R8D,EAX // R8D <- Fa Fr Fg Fb
|
|---|
| 851 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 852 | AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
|
|---|
| 853 | IMUL EAX,R9D // EAX <- Pr ** Pb **
|
|---|
| 854 | SHR R8D,8 // R8D <- 00 Fa 00 Fg
|
|---|
| 855 | IMUL R8D,R9D // R8D <- Pa ** Pg **
|
|---|
| 856 | ADD EAX,bias
|
|---|
| 857 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 858 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 859 | ADD R8D,bias
|
|---|
| 860 | AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
|
|---|
| 861 | OR EAX,R8D // EAX <- Pa Pr Pg Pb
|
|---|
| 862 |
|
|---|
| 863 | // W = 1 - W;
|
|---|
| 864 | MOV EDX,[R11]
|
|---|
| 865 | XOR R9D,$000000FF // R9D <- 1 - R9D
|
|---|
| 866 | // Q = W * B
|
|---|
| 867 | MOV R8D,EDX // R8D <- Ba Br Bg Bb
|
|---|
| 868 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
|---|
| 869 | AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
|
|---|
| 870 | IMUL EDX,R9D // EDX <- Qr ** Qb **
|
|---|
| 871 | SHR R8D,8 // R8D <- 00 Ba 00 Bg
|
|---|
| 872 | IMUL R8D,R9D // R8D <- Qa ** Qg **
|
|---|
| 873 | ADD EDX,bias
|
|---|
| 874 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 875 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
|---|
| 876 | ADD R8D,bias
|
|---|
| 877 | AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
|
|---|
| 878 | OR R8D,EDX // R8D <- Qa Qr Qg Qb
|
|---|
| 879 |
|
|---|
| 880 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 881 | ADD EAX,R8D // EAX <- Za Zr Zg Zb
|
|---|
| 882 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 883 | @2:
|
|---|
| 884 | MOV [R11],EAX
|
|---|
| 885 |
|
|---|
| 886 | @3:
|
|---|
| 887 | ADD R10,4
|
|---|
| 888 | ADD R11,4
|
|---|
| 889 |
|
|---|
| 890 | // loop end
|
|---|
| 891 | DEC ECX
|
|---|
| 892 | JNZ @1
|
|---|
| 893 |
|
|---|
| 894 | @4:
|
|---|
| 895 | {$ENDIF}
|
|---|
| 896 | end;
|
|---|
| 897 |
|
|---|
| 898 | procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 899 | asm
|
|---|
| 900 | {$IFDEF TARGET_x86}
|
|---|
| 901 | // EAX <- Src
|
|---|
| 902 | // EDX <- Dst
|
|---|
| 903 | // ECX <- Count
|
|---|
| 904 |
|
|---|
| 905 | // test the counter for zero or negativity
|
|---|
| 906 | TEST ECX,ECX
|
|---|
| 907 | JS @4
|
|---|
| 908 |
|
|---|
| 909 | // test if source if fully transparent
|
|---|
| 910 | TEST EAX,$FF000000
|
|---|
| 911 | JZ @4
|
|---|
| 912 |
|
|---|
| 913 | PUSH EBX
|
|---|
| 914 | PUSH ESI
|
|---|
| 915 | PUSH EDI
|
|---|
| 916 |
|
|---|
| 917 | MOV ESI,EAX // ESI <- Src
|
|---|
| 918 | MOV EDI,EDX // EDI <- Dst
|
|---|
| 919 |
|
|---|
| 920 | // Get weight W = Fa
|
|---|
| 921 | SHR ESI, 24 // ESI <- W
|
|---|
| 922 |
|
|---|
| 923 | // test if source is fully opaque
|
|---|
| 924 | CMP ESI,$FF
|
|---|
| 925 | JZ @4
|
|---|
| 926 |
|
|---|
| 927 | // P = W * F
|
|---|
| 928 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
|---|
| 929 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 930 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
|---|
| 931 | IMUL EAX,ESI // EAX <- Pr ** Pb **
|
|---|
| 932 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
|---|
| 933 | IMUL EBX,ESI // EBX <- Pa ** Pg **
|
|---|
| 934 | ADD EAX,bias
|
|---|
| 935 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 936 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 937 | ADD EBX,bias
|
|---|
| 938 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 939 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 940 | XOR ESI,$000000FF // ESI <- 1 - Fa
|
|---|
| 941 |
|
|---|
| 942 | // loop start
|
|---|
| 943 | @1:
|
|---|
| 944 | MOV EDX,[EDI]
|
|---|
| 945 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
|---|
| 946 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
|---|
| 947 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
|---|
| 948 | IMUL EDX,ESI // EDX <- Qr ** Qb **
|
|---|
| 949 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
|---|
| 950 | IMUL EBX,ESI // EBX <- Qa ** Qg **
|
|---|
| 951 | ADD EDX,bias
|
|---|
| 952 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 953 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
|---|
| 954 | ADD EBX,bias
|
|---|
| 955 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 956 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
|---|
| 957 |
|
|---|
| 958 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 959 | ADD EBX,EAX // EAX <- Za Zr Zg Zb
|
|---|
| 960 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 961 |
|
|---|
| 962 | OR EBX,$FF000000
|
|---|
| 963 | MOV [EDI],EBX
|
|---|
| 964 |
|
|---|
| 965 | ADD EDI,4
|
|---|
| 966 |
|
|---|
| 967 | DEC ECX
|
|---|
| 968 | JNZ @1
|
|---|
| 969 |
|
|---|
| 970 | POP EDI
|
|---|
| 971 | POP ESI
|
|---|
| 972 | POP EBX
|
|---|
| 973 |
|
|---|
| 974 | @3:
|
|---|
| 975 | RET
|
|---|
| 976 |
|
|---|
| 977 | @4:
|
|---|
| 978 | MOV [EDI],EAX
|
|---|
| 979 | ADD EDI,4
|
|---|
| 980 |
|
|---|
| 981 | DEC ECX
|
|---|
| 982 | JNZ @4
|
|---|
| 983 |
|
|---|
| 984 | POP EDI
|
|---|
| 985 | POP ESI
|
|---|
| 986 | POP EBX
|
|---|
| 987 |
|
|---|
| 988 | {$ENDIF}
|
|---|
| 989 |
|
|---|
| 990 | {$IFDEF TARGET_x64}
|
|---|
| 991 | // RCX <- Src
|
|---|
| 992 | // RDX <- Dst
|
|---|
| 993 | // R8 <- Count
|
|---|
| 994 |
|
|---|
| 995 | // test the counter for zero or negativity
|
|---|
| 996 | TEST R8D,R8D // R8D <- Count
|
|---|
| 997 | JZ @2
|
|---|
| 998 |
|
|---|
| 999 | // test if source if fully transparent
|
|---|
| 1000 | TEST ECX,$FF000000
|
|---|
| 1001 | JZ @2
|
|---|
| 1002 |
|
|---|
| 1003 | PUSH RDI
|
|---|
| 1004 |
|
|---|
| 1005 | MOV RDI,RDX // RDI <- Dst
|
|---|
| 1006 | MOV R9D,ECX // R9D <- Src
|
|---|
| 1007 |
|
|---|
| 1008 | // Get weight W = Fa
|
|---|
| 1009 | SHR R9D,24 // R9D <- W
|
|---|
| 1010 |
|
|---|
| 1011 | // Test Fa = 255 ?
|
|---|
| 1012 | CMP R9D,$FF
|
|---|
| 1013 | JZ @3 // complete opaque,copy source
|
|---|
| 1014 |
|
|---|
| 1015 | // P = W * F
|
|---|
| 1016 | MOV EAX,ECX // EAX <- Fa Fr Fg Fb
|
|---|
| 1017 | AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
|
|---|
| 1018 | AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
|
|---|
| 1019 | IMUL ECX,R9D // ECX <- Pr ** Pb **
|
|---|
| 1020 | SHR EAX,8 // EAX <- 00 Fa 00 Fg
|
|---|
| 1021 | IMUL EAX,R9D // EAX <- Pa ** Pg **
|
|---|
| 1022 | ADD ECX,Bias
|
|---|
| 1023 | AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
|
|---|
| 1024 | SHR ECX,8 // ECX <- 00 Pr 00 Pb
|
|---|
| 1025 | ADD EAX,Bias
|
|---|
| 1026 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
|---|
| 1027 | OR ECX,EAX // ECX <- Pa Pr Pg Pb
|
|---|
| 1028 | XOR R9D,$000000FF // R9D <- 1 - Fa
|
|---|
| 1029 |
|
|---|
| 1030 | // loop start
|
|---|
| 1031 | @1:
|
|---|
| 1032 | MOV EDX,[RDI]
|
|---|
| 1033 | MOV EAX,EDX // EAX <- Ba Br Bg Bb
|
|---|
| 1034 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
|---|
| 1035 | AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
|
|---|
| 1036 | IMUL EDX,R9D // EDX <- Qr ** Qb **
|
|---|
| 1037 | SHR EAX,8 // EAX <- 00 Ba 00 Bg
|
|---|
| 1038 | IMUL EAX,R9D // EAX <- Qa ** Qg **
|
|---|
| 1039 | ADD EDX,Bias
|
|---|
| 1040 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 1041 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
|---|
| 1042 | ADD EAX,Bias
|
|---|
| 1043 | AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
|
|---|
| 1044 | OR EAX,EDX // EAX <- Qa Qr Qg Qb
|
|---|
| 1045 |
|
|---|
| 1046 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 1047 | ADD EAX,ECX // EAX <- Za Zr Zg Zb
|
|---|
| 1048 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
|---|
| 1049 |
|
|---|
| 1050 | OR EAX,$FF000000
|
|---|
| 1051 | MOV [RDI],EAX
|
|---|
| 1052 |
|
|---|
| 1053 | ADD RDI,4
|
|---|
| 1054 |
|
|---|
| 1055 | // loop end
|
|---|
| 1056 | DEC R8D
|
|---|
| 1057 | JNZ @1
|
|---|
| 1058 |
|
|---|
| 1059 | POP RDI
|
|---|
| 1060 |
|
|---|
| 1061 | @2:
|
|---|
| 1062 | RET
|
|---|
| 1063 |
|
|---|
| 1064 | @3:
|
|---|
| 1065 | // just copy source
|
|---|
| 1066 | MOV [RDI],ECX
|
|---|
| 1067 | ADD RDI,4
|
|---|
| 1068 |
|
|---|
| 1069 | DEC R8D
|
|---|
| 1070 | JNZ @3
|
|---|
| 1071 |
|
|---|
| 1072 | POP RDI
|
|---|
| 1073 | {$ENDIF}
|
|---|
| 1074 | end;
|
|---|
| 1075 |
|
|---|
| 1076 | {$IFDEF TARGET_x86}
|
|---|
| 1077 |
|
|---|
| 1078 | function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 1079 | asm
|
|---|
| 1080 | { This is an implementation of the merge formula, as described
|
|---|
| 1081 | in a paper by Bruce Wallace in 1981. Merging is associative,
|
|---|
| 1082 | that is, A over (B over C) = (A over B) over C. The formula is,
|
|---|
| 1083 |
|
|---|
| 1084 | Ra = Fa + Ba * (1 - Fa)
|
|---|
| 1085 | Rc = (Fa * (Fc - Bc * Ba) + Bc * Ba) / Ra
|
|---|
| 1086 |
|
|---|
| 1087 | where
|
|---|
| 1088 |
|
|---|
| 1089 | Rc is the resultant color,
|
|---|
| 1090 | Ra is the resultant alpha,
|
|---|
| 1091 | Fc is the foreground color,
|
|---|
| 1092 | Fa is the foreground alpha,
|
|---|
| 1093 | Bc is the background color,
|
|---|
| 1094 | Ba is the background alpha.
|
|---|
| 1095 | }
|
|---|
| 1096 |
|
|---|
| 1097 | // EAX <- F
|
|---|
| 1098 | // EDX <- B
|
|---|
| 1099 |
|
|---|
| 1100 | // if F.A = 0 then
|
|---|
| 1101 | TEST EAX,$FF000000
|
|---|
| 1102 | JZ @exit0
|
|---|
| 1103 |
|
|---|
| 1104 | // else if B.A = 255 then
|
|---|
| 1105 | CMP EDX,$FF000000
|
|---|
| 1106 | JNC @blend
|
|---|
| 1107 |
|
|---|
| 1108 | // else if F.A = 255 then
|
|---|
| 1109 | CMP EAX,$FF000000
|
|---|
| 1110 | JNC @Exit
|
|---|
| 1111 |
|
|---|
| 1112 | // else if B.A = 0 then
|
|---|
| 1113 | TEST EDX,$FF000000
|
|---|
| 1114 | JZ @Exit
|
|---|
| 1115 |
|
|---|
| 1116 | @4:
|
|---|
| 1117 | PUSH EBX
|
|---|
| 1118 | PUSH ESI
|
|---|
| 1119 | PUSH EDI
|
|---|
| 1120 | ADD ESP,-$0C
|
|---|
| 1121 | MOV [ESP+$04],EDX
|
|---|
| 1122 | MOV [ESP],EAX
|
|---|
| 1123 |
|
|---|
| 1124 | // AH <- F.A
|
|---|
| 1125 | // DL, CL <- B.A
|
|---|
| 1126 | SHR EAX,16
|
|---|
| 1127 | AND EAX,$0000FF00
|
|---|
| 1128 | SHR EDX,24
|
|---|
| 1129 | MOV CL,DL
|
|---|
| 1130 | NOP
|
|---|
| 1131 | NOP
|
|---|
| 1132 | NOP
|
|---|
| 1133 |
|
|---|
| 1134 | // EDI <- PF
|
|---|
| 1135 | // EDX <- PB
|
|---|
| 1136 | // ESI <- PR
|
|---|
| 1137 |
|
|---|
| 1138 | // PF := @DivTable[F.A];
|
|---|
| 1139 | LEA EDI,[EAX+DivTable]
|
|---|
| 1140 | // PB := @DivTable[B.A];
|
|---|
| 1141 | SHL EDX,$08
|
|---|
| 1142 | LEA EDX,[EDX+DivTable]
|
|---|
| 1143 |
|
|---|
| 1144 | // Result.A := B.A + F.A - PB[F.A];
|
|---|
| 1145 | SHR EAX,8
|
|---|
| 1146 | ADD ECX,EAX
|
|---|
| 1147 | SUB ECX,[EDX+EAX]
|
|---|
| 1148 | MOV [ESP+$0B],CL
|
|---|
| 1149 | // PR := @RcTable[Result.A];
|
|---|
| 1150 | SHL ECX,$08
|
|---|
| 1151 | AND ECX,$0000FFFF
|
|---|
| 1152 | LEA ESI,[ECX+RcTable]
|
|---|
| 1153 |
|
|---|
| 1154 | { Red component }
|
|---|
| 1155 |
|
|---|
| 1156 | // Result.R := PB[B.R];
|
|---|
| 1157 | XOR EAX,EAX
|
|---|
| 1158 | MOV AL,[ESP+$06]
|
|---|
| 1159 | MOV CL,[EDX+EAX]
|
|---|
| 1160 | MOV [ESP+$0a],CL
|
|---|
| 1161 | // X := F.R - Result.R;
|
|---|
| 1162 | MOV AL,[ESP+$02]
|
|---|
| 1163 | XOR EBX,EBX
|
|---|
| 1164 | MOV BL,CL
|
|---|
| 1165 | SUB EAX,EBX
|
|---|
| 1166 | // if X >= 0 then
|
|---|
| 1167 | JL @5
|
|---|
| 1168 | // Result.R := PR[PF[X] + Result.R]
|
|---|
| 1169 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
|---|
| 1170 | AND ECX,$000000FF
|
|---|
| 1171 | ADD EAX,ECX
|
|---|
| 1172 | MOV AL,[ESI+EAX]
|
|---|
| 1173 | MOV [ESP+$0A],AL
|
|---|
| 1174 | JMP @6
|
|---|
| 1175 | @5:
|
|---|
| 1176 | // Result.R := PR[Result.R - PF[-X]];
|
|---|
| 1177 | NEG EAX
|
|---|
| 1178 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
|---|
| 1179 | XOR ECX,ECX
|
|---|
| 1180 | MOV CL,[ESP+$0A]
|
|---|
| 1181 | SUB ECX,EAX
|
|---|
| 1182 | MOV AL,[ESI+ECX]
|
|---|
| 1183 | MOV [ESP+$0A],AL
|
|---|
| 1184 |
|
|---|
| 1185 |
|
|---|
| 1186 | { Green component }
|
|---|
| 1187 |
|
|---|
| 1188 | @6:
|
|---|
| 1189 | // Result.G := PB[B.G];
|
|---|
| 1190 | XOR EAX,EAX
|
|---|
| 1191 | MOV AL,[ESP+$05]
|
|---|
| 1192 | MOV CL,[EDX+EAX]
|
|---|
| 1193 | MOV [ESP+$09],CL
|
|---|
| 1194 | // X := F.G - Result.G;
|
|---|
| 1195 | MOV AL,[ESP+$01]
|
|---|
| 1196 | XOR EBX,EBX
|
|---|
| 1197 | MOV BL,CL
|
|---|
| 1198 | SUB EAX,EBX
|
|---|
| 1199 | // if X >= 0 then
|
|---|
| 1200 | JL @7
|
|---|
| 1201 | // Result.G := PR[PF[X] + Result.G]
|
|---|
| 1202 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
|---|
| 1203 | AND ECX,$000000FF
|
|---|
| 1204 | ADD EAX,ECX
|
|---|
| 1205 | MOV AL,[ESI+EAX]
|
|---|
| 1206 | MOV [ESP+$09],AL
|
|---|
| 1207 | JMP @8
|
|---|
| 1208 | @7:
|
|---|
| 1209 | // Result.G := PR[Result.G - PF[-X]];
|
|---|
| 1210 | NEG EAX
|
|---|
| 1211 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
|---|
| 1212 | XOR ECX,ECX
|
|---|
| 1213 | MOV CL,[ESP+$09]
|
|---|
| 1214 | SUB ECX,EAX
|
|---|
| 1215 | MOV AL,[ESI+ECX]
|
|---|
| 1216 | MOV [ESP+$09],AL
|
|---|
| 1217 |
|
|---|
| 1218 |
|
|---|
| 1219 | { Blue component }
|
|---|
| 1220 | @8:
|
|---|
| 1221 | // Result.B := PB[B.B];
|
|---|
| 1222 | XOR EAX,EAX
|
|---|
| 1223 | MOV AL,[ESP+$04]
|
|---|
| 1224 | MOV CL,[EDX+EAX]
|
|---|
| 1225 | MOV [ESP+$08],CL
|
|---|
| 1226 | // X := F.B - Result.B;
|
|---|
| 1227 | MOV AL,[ESP]
|
|---|
| 1228 | XOR EDX,EDX
|
|---|
| 1229 | MOV DL,CL
|
|---|
| 1230 | SUB EAX,EDX
|
|---|
| 1231 | // if X >= 0 then
|
|---|
| 1232 | JL @9
|
|---|
| 1233 | // Result.B := PR[PF[X] + Result.B]
|
|---|
| 1234 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
|---|
| 1235 | XOR EDX,EDX
|
|---|
| 1236 | MOV DL,CL
|
|---|
| 1237 | ADD EAX,EDX
|
|---|
| 1238 | MOV AL,[ESI+EAX]
|
|---|
| 1239 | MOV [ESP+$08],AL
|
|---|
| 1240 | JMP @10
|
|---|
| 1241 | @9:
|
|---|
| 1242 | // Result.B := PR[Result.B - PF[-X]];
|
|---|
| 1243 | NEG EAX
|
|---|
| 1244 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
|---|
| 1245 | XOR EDX,EDX
|
|---|
| 1246 | MOV DL,CL
|
|---|
| 1247 | SUB EDX,EAX
|
|---|
| 1248 | MOV AL,[ESI+EDX]
|
|---|
| 1249 | MOV [ESP+$08],AL
|
|---|
| 1250 |
|
|---|
| 1251 | @10:
|
|---|
| 1252 | // EAX <- Result
|
|---|
| 1253 | MOV EAX,[ESP+$08]
|
|---|
| 1254 |
|
|---|
| 1255 | // end;
|
|---|
| 1256 | ADD ESP,$0C
|
|---|
| 1257 | POP EDI
|
|---|
| 1258 | POP ESI
|
|---|
| 1259 | POP EBX
|
|---|
| 1260 | RET
|
|---|
| 1261 | @blend:
|
|---|
| 1262 | CALL DWORD PTR [BlendReg]
|
|---|
| 1263 | OR EAX,$FF000000
|
|---|
| 1264 | RET
|
|---|
| 1265 | @exit0:
|
|---|
| 1266 | MOV EAX,EDX
|
|---|
| 1267 | @Exit:
|
|---|
| 1268 | end;
|
|---|
| 1269 |
|
|---|
| 1270 | {$ENDIF}
|
|---|
| 1271 |
|
|---|
| 1272 | function CombineReg_ASM(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 1273 | asm
|
|---|
| 1274 | // combine RGBA channels of colors X and Y with the weight of X given in W
|
|---|
| 1275 | // Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
|
|---|
| 1276 | {$IFDEF TARGET_x86}
|
|---|
| 1277 | // EAX <- X
|
|---|
| 1278 | // EDX <- Y
|
|---|
| 1279 | // ECX <- W
|
|---|
| 1280 |
|
|---|
| 1281 | // W = 0 or $FF?
|
|---|
| 1282 | JCXZ @1 // CX = 0 ? => Result := EDX
|
|---|
| 1283 | CMP ECX,$FF // CX = $FF ? => Result := EDX
|
|---|
| 1284 | JE @2
|
|---|
| 1285 |
|
|---|
| 1286 | PUSH EBX
|
|---|
| 1287 |
|
|---|
| 1288 | // P = W * X
|
|---|
| 1289 | MOV EBX,EAX // EBX <- Xa Xr Xg Xb
|
|---|
| 1290 | AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
|
|---|
| 1291 | AND EBX,$FF00FF00 // EBX <- Xa 00 Xg 00
|
|---|
| 1292 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 1293 | SHR EBX,8 // EBX <- 00 Xa 00 Xg
|
|---|
| 1294 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 1295 | ADD EAX,bias
|
|---|
| 1296 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
|---|
| 1297 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 1298 | ADD EBX,bias
|
|---|
| 1299 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 1300 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 1301 |
|
|---|
| 1302 | // W = 1 - W
|
|---|
| 1303 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 1304 | MOV EBX,EDX // EBX <- Ya Yr Yg Yb
|
|---|
| 1305 | // Q = W * Y
|
|---|
| 1306 | AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
|
|---|
| 1307 | AND EBX,$FF00FF00 // EBX <- Ya 00 Yg 00
|
|---|
| 1308 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
|---|
| 1309 | SHR EBX,8 // EBX <- 00 Ya 00 Yg
|
|---|
| 1310 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 1311 | ADD EDX,bias
|
|---|
| 1312 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 1313 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
|---|
| 1314 | ADD EBX,bias
|
|---|
| 1315 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 1316 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
|---|
| 1317 |
|
|---|
| 1318 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 1319 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 1320 |
|
|---|
| 1321 | POP EBX
|
|---|
| 1322 | RET
|
|---|
| 1323 |
|
|---|
| 1324 | @1: MOV EAX,EDX
|
|---|
| 1325 | @2:
|
|---|
| 1326 | {$ENDIF}
|
|---|
| 1327 |
|
|---|
| 1328 | {$IFDEF TARGET_x64}
|
|---|
| 1329 | // ECX <- X
|
|---|
| 1330 | // EDX <- Y
|
|---|
| 1331 | // R8D <- W
|
|---|
| 1332 |
|
|---|
| 1333 | // W = 0 or $FF?
|
|---|
| 1334 | TEST R8D,R8D
|
|---|
| 1335 | JZ @1 // W = 0 ? => Result := EDX
|
|---|
| 1336 | MOV EAX,ECX // EAX <- Xa Xr Xg Xb
|
|---|
| 1337 | CMP R8B,$FF // W = $FF ? => Result := EDX
|
|---|
| 1338 | JE @2
|
|---|
| 1339 |
|
|---|
| 1340 | // P = W * X
|
|---|
| 1341 | AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
|
|---|
| 1342 | AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
|
|---|
| 1343 | IMUL EAX,R8D // EAX <- Pr ** Pb **
|
|---|
| 1344 | SHR ECX,8 // ECX <- 00 Xa 00 Xg
|
|---|
| 1345 | IMUL ECX,R8D // ECX <- Pa ** Pg **
|
|---|
| 1346 | ADD EAX,bias
|
|---|
| 1347 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
|---|
| 1348 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 1349 | ADD ECX,bias
|
|---|
| 1350 | AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
|
|---|
| 1351 | OR EAX,ECX // EAX <- Pa Pr Pg Pb
|
|---|
| 1352 |
|
|---|
| 1353 | // W = 1 - W
|
|---|
| 1354 | XOR R8D,$000000FF // R8D <- 1 - R8D
|
|---|
| 1355 | MOV ECX,EDX // ECX <- Ya Yr Yg Yb
|
|---|
| 1356 | // Q = W * Y
|
|---|
| 1357 | AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
|
|---|
| 1358 | AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
|
|---|
| 1359 | IMUL EDX,R8D // EDX <- Qr ** Qb **
|
|---|
| 1360 | SHR ECX,8 // ECX <- 00 Ya 00 Yg
|
|---|
| 1361 | IMUL ECX,R8D // ECX <- Qa ** Qg **
|
|---|
| 1362 | ADD EDX,bias
|
|---|
| 1363 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
|---|
| 1364 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
|---|
| 1365 | ADD ECX,bias
|
|---|
| 1366 | AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
|
|---|
| 1367 | OR ECX,EDX // ECX <- Qa Qr Qg Qb
|
|---|
| 1368 |
|
|---|
| 1369 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 1370 | ADD EAX,ECX // EAX <- Za Zr Zg Zb
|
|---|
| 1371 |
|
|---|
| 1372 | RET
|
|---|
| 1373 |
|
|---|
| 1374 | @1: MOV EAX,EDX
|
|---|
| 1375 | @2:
|
|---|
| 1376 | {$ENDIF}
|
|---|
| 1377 | end;
|
|---|
| 1378 |
|
|---|
| 1379 | procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 1380 | asm
|
|---|
| 1381 | {$IFDEF TARGET_x86}
|
|---|
| 1382 | // EAX <- F
|
|---|
| 1383 | // [EDX] <- B
|
|---|
| 1384 | // ECX <- W
|
|---|
| 1385 |
|
|---|
| 1386 | // Check W
|
|---|
| 1387 | JCXZ @1 // W = 0 ? => write nothing
|
|---|
| 1388 | CMP ECX,$FF // W = 255? => write F
|
|---|
| 1389 | {$IFDEF FPC}
|
|---|
| 1390 | DB $74,$76 // Prob with FPC 2.2.2 and below
|
|---|
| 1391 | {$ELSE}
|
|---|
| 1392 | JZ @2
|
|---|
| 1393 | {$ENDIF}
|
|---|
| 1394 |
|
|---|
| 1395 |
|
|---|
| 1396 | PUSH EBX
|
|---|
| 1397 | PUSH ESI
|
|---|
| 1398 |
|
|---|
| 1399 | // P = W * F
|
|---|
| 1400 | MOV EBX,EAX // EBX <- ** Fr Fg Fb
|
|---|
| 1401 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 1402 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
|---|
| 1403 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
|---|
| 1404 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
|---|
| 1405 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
|---|
| 1406 | ADD EAX,bias
|
|---|
| 1407 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 1408 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 1409 | ADD EBX,bias
|
|---|
| 1410 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
|---|
| 1411 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
|---|
| 1412 |
|
|---|
| 1413 | // W = 1 - W
|
|---|
| 1414 | MOV ESI,[EDX]
|
|---|
| 1415 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
|---|
| 1416 | // Q = W * B
|
|---|
| 1417 | MOV EBX,ESI // EBX <- Ba Br Bg Bb
|
|---|
| 1418 | AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
|
|---|
| 1419 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
|---|
| 1420 | IMUL ESI,ECX // ESI <- Qr ** Qb **
|
|---|
| 1421 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
|---|
| 1422 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
|---|
| 1423 | ADD ESI,bias
|
|---|
| 1424 | AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
|
|---|
| 1425 | SHR ESI,8 // ESI <- 00 Qr ** Qb
|
|---|
| 1426 | ADD EBX,bias
|
|---|
| 1427 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
|---|
| 1428 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
|---|
| 1429 |
|
|---|
| 1430 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 1431 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
|---|
| 1432 |
|
|---|
| 1433 | MOV [EDX],EAX
|
|---|
| 1434 |
|
|---|
| 1435 | POP ESI
|
|---|
| 1436 | POP EBX
|
|---|
| 1437 | @1: RET
|
|---|
| 1438 |
|
|---|
| 1439 | @2: MOV [EDX],EAX
|
|---|
| 1440 | {$ENDIF}
|
|---|
| 1441 |
|
|---|
| 1442 | {$IFDEF TARGET_x64}
|
|---|
| 1443 | // ECX <- F
|
|---|
| 1444 | // [RDX] <- B
|
|---|
| 1445 | // R8 <- W
|
|---|
| 1446 |
|
|---|
| 1447 | // Check W
|
|---|
| 1448 | TEST R8D,R8D // Set flags for R8
|
|---|
| 1449 | JZ @2 // W = 0 ? => Result := EDX
|
|---|
| 1450 | MOV EAX,ECX // EAX <- ** Fr Fg Fb
|
|---|
| 1451 | CMP R8B,$FF // W = 255? => write F
|
|---|
| 1452 | JZ @1
|
|---|
| 1453 |
|
|---|
| 1454 | // P = W * F
|
|---|
| 1455 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
|---|
| 1456 | AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
|
|---|
| 1457 | IMUL EAX,R8D // EAX <- Pr ** Pb **
|
|---|
| 1458 | SHR ECX,8 // ECX <- 00 Fa 00 Fg
|
|---|
| 1459 | IMUL ECX,R8D // ECX <- Pa ** Pg **
|
|---|
| 1460 | ADD EAX,bias
|
|---|
| 1461 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
|---|
| 1462 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
|---|
| 1463 | ADD ECX,bias
|
|---|
| 1464 | AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
|
|---|
| 1465 | OR EAX,ECX // EAX <- Pa Pr Pg Pb
|
|---|
| 1466 |
|
|---|
| 1467 | // W = 1 - W
|
|---|
| 1468 | MOV R9D,[RDX]
|
|---|
| 1469 | XOR R8D,$000000FF // R8D <- 1 - R8D
|
|---|
| 1470 | // Q = W * B
|
|---|
| 1471 | MOV ECX,R9D // ECX <- Ba Br Bg Bb
|
|---|
| 1472 | AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
|
|---|
| 1473 | AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
|
|---|
| 1474 | IMUL R9D,R8D // R9D <- Qr ** Qb **
|
|---|
| 1475 | SHR ECX,8 // ECX <- 00 Ba 00 Bg
|
|---|
| 1476 | IMUL ECX,R8D // ECX <- Qa ** Qg **
|
|---|
| 1477 | ADD R9D,bias
|
|---|
| 1478 | AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
|
|---|
| 1479 | SHR R9D,8 // R9D <- 00 Qr ** Qb
|
|---|
| 1480 | ADD ECX,bias
|
|---|
| 1481 | AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
|
|---|
| 1482 | OR ECX,R9D // ECX <- 00 Qr Qg Qb
|
|---|
| 1483 |
|
|---|
| 1484 | // Z = P + Q (assuming no overflow at each byte)
|
|---|
| 1485 | ADD EAX,ECX // EAX <- 00 Zr Zg Zb
|
|---|
| 1486 |
|
|---|
| 1487 | @1: MOV [RDX],EAX
|
|---|
| 1488 | @2:
|
|---|
| 1489 |
|
|---|
| 1490 | {$ENDIF}
|
|---|
| 1491 | end;
|
|---|
| 1492 |
|
|---|
| 1493 | procedure EMMS_ASM; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
|---|
| 1494 | asm
|
|---|
| 1495 | end;
|
|---|
| 1496 |
|
|---|
| 1497 | end.
|
|---|