source: trunk/Packages/Graphics32/GR32_BlendASM.pas

Last change on this file was 2, checked in by chronos, 5 years ago
File size: 44.3 KB
Line 
1unit GR32_BlendASM;
2
3(* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1 or LGPL 2.1 with linking exception
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Alternatively, the contents of this file may be used under the terms of the
17 * Free Pascal modified version of the GNU Lesser General Public License
18 * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
19 * of this license are applicable instead of those above.
20 * Please see the file LICENSE.txt for additional information concerning this
21 * license.
22 *
23 * The Original Code is Graphics32
24 *
25 * The Initial Developer of the Original Code is
26 * Alex A. Denisov
27 *
28 * Portions created by the Initial Developer are Copyright (C) 2000-2009
29 * the Initial Developer. All Rights Reserved.
30 *
31 * Contributor(s):
32 * Christian-W. Budde
33 * - 2019/04/01 - Refactoring
34 *
35 * ***** END LICENSE BLOCK ***** *)
36
37interface
38
39{$I GR32.inc}
40
41uses
42 GR32;
43
44function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
45procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
46procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
47
48function BlendRegEx_ASM(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
49procedure BlendMemEx_ASM(F: TColor32; var B:TColor32; M: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
50
51procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
52procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
53
54function CombineReg_ASM(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
55procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
56
57{$IFDEF TARGET_x86}
58function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
59{$ENDIF}
60
61procedure EMMS_ASM; {$IFDEF FPC} assembler; {$ENDIF}
62
63implementation
64
65uses
66 GR32_Blend,
67 GR32_LowLevel,
68 GR32_System;
69
70{ ASM versions }
71
72{ Assembler versions }
73
74const
75 bias = $00800080;
76
77
78function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
79asm
80 // blend foreground color (F) to a background color (B),
81 // using alpha channel value of F
82 // Result Z = Fa * Fargb + (1 - Fa) * Bargb
83 // Result Z = P + Q
84
85{$IFDEF TARGET_x86}
86 // EAX <- F
87 // EDX <- B
88
89// Test Fa = 255 ?
90 CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
91 JNC @2
92
93 // Test Fa = 0 ?
94 TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
95 JZ @1
96
97 // Get weight W = Fa
98 MOV ECX,EAX // ECX <- Fa Fr Fg Fb
99 SHR ECX,24 // ECX <- 00 00 00 Fa
100
101 PUSH EBX
102
103 // P = W * F
104 MOV EBX,EAX // EBX <- Fa Fr Fg Fb
105 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
106 AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
107 IMUL EAX,ECX // EAX <- Pr ** Pb **
108 SHR EBX,8 // EBX <- 00 Fa 00 Fg
109 IMUL EBX,ECX // EBX <- Pa ** Pg **
110 ADD EAX,bias
111 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
112 SHR EAX,8 // EAX <- 00 Pr 00 Pb
113 ADD EBX,bias
114 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
115 OR EAX,EBX // EAX <- Pa Pr Pg Pb
116
117 // W = 1 - W
118 XOR ECX,$000000FF // ECX <- 1 - ECX
119 // Q = W * B
120 MOV EBX,EDX // EBX <- Ba Br Bg Bb
121 AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
122 AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
123 IMUL EDX,ECX // EDX <- Qr ** Qb **
124 SHR EBX,8 // EBX <- 00 Ba 00 Bg
125 IMUL EBX,ECX // EBX <- Qa ** Qg **
126 ADD EDX,bias
127 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
128 SHR EDX,8 // EDX <- 00 Qr 00 Qb
129 ADD EBX,bias
130 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
131 OR EBX,EDX // EBX <- Qa Qr Qg Qb
132
133 // Z = P + Q (assuming no overflow at each byte)
134 ADD EAX,EBX // EAX <- Za Zr Zg Zb
135 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
136
137 POP EBX
138 RET
139
140@1: MOV EAX,EDX
141@2:
142{$ENDIF}
143
144 // EAX <- F
145 // EDX <- B
146{$IFDEF TARGET_x64}
147 MOV RAX, RCX
148
149 // Test Fa = 255 ?
150 CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
151 JNC @2
152
153 // Test Fa = 0 ?
154 TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
155 JZ @1
156
157 // Get weight W = Fa
158 MOV ECX,EAX // ECX <- Fa Fr Fg Fb
159 SHR ECX,24 // ECX <- 00 00 00 Fa
160
161 // P = W * F
162 MOV R9D,EAX // R9D <- Fa Fr Fg Fb
163 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
164 AND R9D,$FF00FF00 // R9D <- Fa 00 Fg 00
165 IMUL EAX,ECX // EAX <- Pr ** Pb **
166 SHR R9D,8 // R9D <- 00 Fa 00 Fg
167 IMUL R9D,ECX // R9D <- Pa ** Pg **
168 ADD EAX,bias
169 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
170 SHR EAX,8 // EAX <- 00 Pr 00 Pb
171 ADD R9D,bias
172 AND R9D,$FF00FF00 // R9D <- Pa 00 Pg 00
173 OR EAX,R9D // EAX <- Pa Pr Pg Pb
174
175 // W = 1 - W
176 XOR ECX,$000000FF // ECX <- 1 - ECX
177 // Q = W * B
178 MOV R9D,EDX // R9D <- Ba Br Bg Bb
179 AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
180 AND R9D,$FF00FF00 // R9D <- Ba 00 Bg 00
181 IMUL EDX,ECX // EDX <- Qr ** Qb **
182 SHR R9D,8 // R9D <- 00 Ba 00 Bg
183 IMUL R9D,ECX // R9D <- Qa ** Qg **
184 ADD EDX,bias
185 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
186 SHR EDX,8 // EDX <- 00 Qr 00 Qb
187 ADD R9D,bias
188 AND R9D,$FF00FF00 // R9D <- Qa 00 Qg 00
189 OR R9D,EDX // R9D <- Qa Qr Qg Qb
190
191 // Z = P + Q (assuming no overflow at each byte)
192 ADD EAX,R9D // EAX <- Za Zr Zg Zb
193 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
194 RET
195
196@1: MOV EAX,EDX
197@2:
198{$ENDIF}
199end;
200
201procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
202asm
203{$IFDEF TARGET_x86}
204 // EAX <- F
205 // [EDX] <- B
206
207 // Test Fa = 0 ?
208 TEST EAX,$FF000000 // Fa = 0 ? => do not write
209 JZ @2
210
211 // Get weight W = Fa
212 MOV ECX,EAX // ECX <- Fa Fr Fg Fb
213 SHR ECX,24 // ECX <- 00 00 00 Fa
214
215 // Test Fa = 255 ?
216 CMP ECX,$FF
217 JZ @1
218
219 PUSH EBX
220 PUSH ESI
221
222 // P = W * F
223 MOV EBX,EAX // EBX <- Fa Fr Fg Fb
224 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
225 AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
226 IMUL EAX,ECX // EAX <- Pr ** Pb **
227 SHR EBX,8 // EBX <- 00 Fa 00 Fg
228 IMUL EBX,ECX // EBX <- Pa ** Pg **
229 ADD EAX,bias // add bias
230 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
231 SHR EAX,8 // EAX <- 00 Pr 00 Pb
232 ADD EBX,bias // add bias
233 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
234 OR EAX,EBX // EAX <- Pa Pr Pg Pb
235
236 MOV ESI,[EDX]
237
238 // W = 1 - W
239 XOR ECX,$000000FF // ECX <- 1 - ECX
240
241 // Q = W * B
242 MOV EBX,ESI // EBX <- Ba Br Bg Bb
243 AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
244 AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
245 IMUL ESI,ECX // ESI <- Qr ** Qb **
246 SHR EBX,8 // EBX <- 00 Ba 00 Bg
247 IMUL EBX,ECX // EBX <- Qa ** Qg **
248 ADD ESI,bias // add bias
249 AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
250 SHR ESI,8 // ESI <- 00 Qr 00 Qb
251 ADD EBX,bias // add bias
252 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
253 OR EBX,ESI // EBX <- Qa Qr Qg Qb
254
255 // Z = P + Q (assuming no overflow at each byte)
256 ADD EAX,EBX // EAX <- Za Zr Zg Zb
257 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
258
259 MOV [EDX],EAX
260 POP ESI
261 POP EBX
262 RET
263
264@1: MOV [EDX],EAX
265@2:
266{$ENDIF}
267
268{$IFDEF TARGET_x64}
269 // ECX <- F
270 // [RDX] <- B
271
272 // Test Fa = 0 ?
273 TEST ECX,$FF000000 // Fa = 0 ? => do not write
274 JZ @2
275
276 MOV EAX, ECX // EAX <- Fa Fr Fg Fb
277
278 // Get weight W = Fa
279 SHR ECX,24 // ECX <- 00 00 00 Fa
280
281 // Test Fa = 255 ?
282 CMP ECX,$FF
283 JZ @1
284
285 // P = W * F
286 MOV R8D,EAX // R8D <- Fa Fr Fg Fb
287 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
288 AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
289 IMUL EAX,ECX // EAX <- Pr ** Pb **
290 SHR R8D,8 // R8D <- 00 Fa 00 Fg
291 IMUL R8D,ECX // R8D <- Pa ** Pg **
292 ADD EAX,bias
293 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
294 SHR EAX,8 // EAX <- 00 Pr 00 Pb
295 ADD R8D,bias
296 AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
297 OR EAX,R8D // EAX <- Pa Pr Pg Pb
298
299 MOV R9D,[RDX]
300
301 // W = 1 - W
302 XOR ECX,$000000FF // ECX <- 1 - ECX
303 // Q = W * B
304 MOV R8D,R9D // R8D <- Ba Br Bg Bb
305 AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
306 AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
307 IMUL R9D,ECX // R9D <- Qr ** Qb **
308 SHR R8D,8 // R8D <- 00 Ba 00 Bg
309 IMUL R8D,ECX // R8D <- Qa ** Qg **
310 ADD R9D,bias
311 AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
312 SHR R9D,8 // R9D <- 00 Qr 00 Qb
313 ADD R8D,bias
314 AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
315 OR R8D,R9D // R8D <- Qa Qr Qg Qb
316
317 // Z = P + Q (assuming no overflow at each byte)
318 ADD EAX,R8D // EAX <- Za Zr Zg Zb
319 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
320
321 MOV [RDX],EAX
322 RET
323
324@1: MOV [RDX],EAX
325@2:
326{$ENDIF}
327end;
328
329procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
330asm
331{$IFDEF TARGET_x86}
332 TEST ECX,ECX
333 JZ @4
334
335 PUSH EBX
336 PUSH ESI
337 PUSH EDI
338
339 MOV ESI,EAX
340 MOV EDI,EDX
341
342@1:
343 // Test Fa = 0 ?
344 MOV EAX,[ESI]
345 TEST EAX,$FF000000
346 JZ @3
347
348 PUSH ECX
349
350 // Get weight W = Fa
351 MOV ECX,EAX // ECX <- Fa Fr Fg Fb
352 SHR ECX,24 // ECX <- 00 00 00 Fa
353
354 // Test Fa = 255 ?
355 CMP ECX,$FF
356 JZ @2
357
358 // P = W * F
359 MOV EBX,EAX // EBX <- Fa Fr Fg Fb
360 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
361 AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
362 IMUL EAX,ECX // EAX <- Pr ** Pb **
363 SHR EBX,8 // EBX <- 00 Fa 00 Fg
364 IMUL EBX,ECX // EBX <- Pa ** Pg **
365 ADD EAX,bias // add bias
366 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
367 SHR EAX,8 // EAX <- 00 Pr 00 Pb
368 ADD EBX,bias // add bias
369 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
370 OR EAX,EBX // EAX <- Pa Pr Pg Pb
371
372 MOV EDX,[EDI]
373
374 // W = 1 - W
375 XOR ECX,$000000FF // ECX <- 1 - ECX
376
377 // Q = W * B
378 MOV EBX,EDX // EBX <- Ba Br Bg Bb
379 AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
380 AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
381 IMUL EDX,ECX // ESI <- Qr ** Qb **
382 SHR EBX,8 // EBX <- 00 Ba 00 Bg
383 IMUL EBX,ECX // EBX <- Qa ** Qg **
384 ADD EDX,bias // add bias
385 AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
386 SHR EDX,8 // ESI <- 00 Qr 00 Qb
387 ADD EBX,bias // add bias
388 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
389 OR EBX,ESI // EBX <- Qa Qr Qg Qb
390
391 // Z = P + Q (assuming no overflow at each byte)
392 ADD EAX,EBX // EAX <- Za Zr Zg Zb
393 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
394
395@2:
396 OR EAX,$FF000000
397 MOV [EDI],EAX
398 POP ECX
399
400@3:
401 ADD ESI,4
402 ADD EDI,4
403
404 DEC ECX
405 JNZ @1
406
407 POP EDI
408 POP ESI
409 POP EBX
410
411@4:
412 RET
413{$ENDIF}
414
415{$IFDEF TARGET_x64}
416 TEST R8D,R8D
417 JZ @4
418
419 PUSH RDI
420
421 MOV R9,RCX
422 MOV RDI,RDX
423
424@1:
425 MOV ECX,[RSI]
426 TEST ECX,$FF000000
427 JZ @3
428
429 PUSH R8
430
431 MOV R8D,ECX
432 SHR R8D,24
433
434 CMP R8D,$FF
435 JZ @2
436
437 MOV EAX,ECX
438 AND ECX,$00FF00FF
439 AND EAX,$FF00FF00
440 IMUL ECX,R8D
441 SHR EAX,8
442 IMUL EAX,R8D
443 ADD ECX,bias
444 AND ECX,$FF00FF00
445 SHR ECX,8
446 ADD EAX,bias
447 AND EAX,$FF00FF00
448 OR ECX,EAX
449
450 MOV EDX,[RDI]
451 XOR R8D,$000000FF
452 MOV EAX,EDX
453 AND EDX,$00FF00FF
454 AND EAX,$FF00FF00
455 IMUL EDX, R8D
456 SHR EAX,8
457 IMUL EAX,R8D
458 ADD EDX,bias
459 AND EDX,$FF00FF00
460 SHR EDX,8
461 ADD EAX,bias
462 AND EAX,$FF00FF00
463 OR EAX,EDX
464
465 ADD ECX,EAX
466@2:
467 OR ECX,$FF000000
468 MOV [RDI],ECX
469 POP R8
470
471@3:
472 ADD R9,4
473 ADD RDI,4
474
475 DEC R8D
476 JNZ @1
477
478 POP RDI
479
480@4:
481 RET
482{$ENDIF}
483end;
484
485function BlendRegEx_ASM(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
486asm
487 // blend foreground color (F) to a background color (B),
488 // using alpha channel value of F multiplied by master alpha (M)
489 // no checking for M = $FF, in this case Graphics32 uses BlendReg
490 // Result Z = Fa * M * Fargb + (1 - Fa * M) * Bargb
491 // Result Z = P + Q
492 // EAX <- F
493 // EDX <- B
494 // ECX <- M
495
496{$IFDEF TARGET_x86}
497
498 // Check Fa > 0 ?
499 TEST EAX,$FF000000 // Fa = 0? => Result := EDX
500 JZ @2
501
502 PUSH EBX
503
504 // Get weight W = Fa * M
505 MOV EBX,EAX // EBX <- Fa Fr Fg Fb
506 INC ECX // 255:256 range bias
507 SHR EBX,24 // EBX <- 00 00 00 Fa
508 IMUL ECX,EBX // ECX <- 00 00 W **
509 SHR ECX,8 // ECX <- 00 00 00 W
510 JZ @1 // W = 0 ? => Result := EDX
511
512 // P = W * F
513 MOV EBX,EAX // EBX <- ** Fr Fg Fb
514 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
515 AND EBX,$FF00FF00 // EBX <- Pa 00 Fg 00
516 IMUL EAX,ECX // EAX <- Pr ** Pb **
517 SHR EBX,8 // EBX <- 00 00 00 Fg
518 IMUL EBX,ECX // EBX <- Pa ** Pg **
519 ADD EAX,bias
520 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
521 SHR EAX,8 // EAX <- 00 Pr 00 Pb
522 ADD EBX,bias
523 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
524 OR EAX,EBX // EAX <- Pa Pr Pg Pb
525
526 // W = 1 - W
527 XOR ECX,$000000FF // ECX <- 1 - ECX
528 // Q = W * B
529 MOV EBX,EDX // EBX <- 00 Br Bg Bb
530 AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
531 AND EBX,$FF00FF00 // EBX <- 00 00 Bg 00
532 IMUL EDX,ECX // EDX <- Qr ** Qb **
533 SHR EBX,8 // EBX <- 00 00 00 Bg
534 IMUL EBX,ECX // EBX <- Qa ** Qg **
535 ADD EDX,bias
536 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
537 SHR EDX,8 // EDX <- 00 Qr 00 Qb
538 ADD EBX,bias
539 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
540 OR EBX,EDX // EBX <- 00 Qr Qg Qb
541
542 // Z = P + Q (assuming no overflow at each byte)
543 ADD EAX,EBX // EAX <- Za Zr Zg Zb
544 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
545
546 POP EBX
547 RET
548
549@1:
550 POP EBX
551
552@2: MOV EAX,EDX
553{$ENDIF}
554
555{$IFDEF TARGET_x64}
556 MOV EAX,ECX // EAX <- Fa Fr Fg Fb
557 TEST EAX,$FF000000 // Fa = 0? => Result := EDX
558 JZ @1
559
560 // Get weight W = Fa * M
561 INC R8D // 255:256 range bias
562 SHR ECX,24 // ECX <- 00 00 00 Fa
563 IMUL R8D,ECX // R8D <- 00 00 W **
564 SHR R8D,8 // R8D <- 00 00 00 W
565 JZ @1 // W = 0 ? => Result := EDX
566
567 // P = W * F
568 MOV ECX,EAX // ECX <- ** Fr Fg Fb
569 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
570 AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
571 IMUL EAX,R8D // EAX <- Pr ** Pb **
572 SHR ECX,8 // ECX <- 00 Fa 00 Fg
573 IMUL ECX,R8D // ECX <- Pa ** Pg **
574 ADD EAX,bias
575 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
576 SHR EAX,8 // EAX <- 00 Pr 00 Pb
577 ADD ECX,bias
578 AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
579 OR EAX,ECX // EAX <- Pa Pr Pg Pb
580
581 // W = 1 - W
582 XOR R8D,$000000FF // R8D <- 1 - R8D
583 // Q = W * B
584 MOV ECX,EDX // ECX <- 00 Br Bg Bb
585 AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
586 AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
587 IMUL EDX,R8D // EDX <- Qr ** Qb **
588 SHR ECX,8 // ECX <- 00 Ba 00 Bg
589 IMUL ECX,R8D // ECX <- Qa ** Qg **
590 ADD EDX,bias
591 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
592 SHR EDX,8 // EDX <- 00 Qr ** Qb
593 ADD ECX,bias
594 AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
595 OR ECX,EDX // ECX <- Qa Qr Qg Qb
596
597 // Z = P + Q (assuming no overflow at each byte)
598 ADD EAX,ECX // EAX <- Za Zr Zg Zb
599 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
600
601 RET
602
603@1: MOV EAX,EDX
604{$ENDIF}
605end;
606
607procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
608asm
609{$IFDEF TARGET_x86}
610 // EAX <- F
611 // [EDX] <- B
612 // ECX <- M
613
614 // Check Fa > 0 ?
615 TEST EAX,$FF000000 // Fa = 0? => write nothing
616 JZ @2
617
618 PUSH EBX
619
620 // Get weight W = Fa * M
621 MOV EBX,EAX // EBX <- Fa Fr Fg Fb
622 INC ECX // 255:256 range bias
623 SHR EBX,24 // EBX <- 00 00 00 Fa
624 IMUL ECX,EBX // ECX <- 00 00 W **
625 ADD ECX,bias
626 SHR ECX,8 // ECX <- 00 00 00 W
627 JZ @1 // W = 0 ? => write nothing
628
629 PUSH ESI
630
631 // P = W * F
632 MOV EBX,EAX // EBX <- ** Fr Fg Fb
633 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
634 AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
635 IMUL EAX,ECX // EAX <- Pr ** Pb **
636 SHR EBX,8 // EBX <- 00 Fa 00 Fg
637 IMUL EBX,ECX // EBX <- Pa ** Pg **
638 ADD EAX,bias
639 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
640 SHR EAX,8 // EAX <- 00 Pr 00 Pb
641 ADD EBX,bias
642 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
643 OR EAX,EBX // EAX <- Pa Pr Pg Pb
644
645 // W = 1 - W;
646 MOV ESI,[EDX]
647 XOR ECX,$000000FF // ECX <- 1 - ECX
648 // Q = W * B
649 MOV EBX,ESI // EBX <- 00 Br Bg Bb
650 AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
651 AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
652 IMUL ESI,ECX // ESI <- Qr ** Qb **
653 SHR EBX,8 // EBX <- 00 Ba 00 Bg
654 IMUL EBX,ECX // EBX <- Qa ** Qg **
655 ADD ESI,bias
656 AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
657 SHR ESI,8 // ESI <- 00 Qr ** Qb
658 ADD EBX,bias
659 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
660 OR EBX,ESI // EBX <- Qa Qr Qg Qb
661
662 // Z = P + Q (assuming no overflow at each byte)
663 ADD EAX,EBX // EAX <- Za Zr Zg Zb
664 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
665
666 MOV [EDX],EAX
667 POP ESI
668
669@1: POP EBX
670@2:
671{$ENDIF}
672
673{$IFDEF TARGET_x64}
674 // ECX <- F
675 // [RDX] <- B
676 // R8 <- M
677
678 // ECX <- F
679 // [EDX] <- B
680 // R8 <- M
681
682 // Check Fa > 0 ?
683 TEST ECX,$FF000000 // Fa = 0? => write nothing
684 JZ @1
685
686 // Get weight W = Fa * M
687 MOV EAX,ECX // EAX <- Fa Fr Fg Fb
688 INC R8D // 255:256 range bias
689 SHR EAX,24 // EAX <- 00 00 00 Fa
690 IMUL R8D,EAX // R8D <- 00 00 W **
691 ADD R8D,bias
692 SHR R8D,8 // R8D <- 00 00 00 W
693 JZ @1 // W = 0 ? => write nothing
694
695 // P = W * F
696 MOV EAX,ECX // EAX <- ** Fr Fg Fb
697 AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
698 AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
699 IMUL ECX,R8D // ECX <- Pr ** Pb **
700 SHR EAX,8 // EAX <- 00 Fa 00 Fg
701 IMUL EAX,R8D // EAX <- Pa 00 Pg **
702 ADD ECX,bias
703 AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
704 SHR ECX,8 // ECX <- 00 Pr 00 Pb
705 ADD EAX,bias
706 AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
707 OR ECX,EAX // ECX <- Pa Pr Pg Pb
708
709 // W = 1 - W
710 MOV R9D,[RDX]
711 XOR R8D,$000000FF // R8D <- 1 - R8
712 // Q = W * B
713 MOV EAX,R9D // EAX <- 00 Br Bg Bb
714 AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
715 AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
716 IMUL R9D,R8D // R9D <- Qr ** Qb **
717 SHR EAX,8 // EAX <- 00 00 00 Bg
718 IMUL EAX,R8D // EAX <- 00 00 Qg **
719 ADD R9D,bias
720 AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
721 SHR R9D,8 // R9D <- 00 Qr ** Qb
722 ADD EAX,bias
723 AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
724 OR EAX,R9D // EAX <- 00 Qr Qg Qb
725
726 // Z = P + Q (assuming no overflow at each byte)
727 ADD ECX,EAX // ECX <- 00 Zr Zg Zb
728
729 MOV [RDX],ECX
730
731@1:
732{$ENDIF}
733end;
734
735procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
736asm
737{$IFDEF TARGET_x86}
738 // EAX <- Src
739 // EDX <- Dst
740 // ECX <- Count
741
742 // test the counter for zero or negativity
743 TEST ECX,ECX
744 JS @4
745
746 PUSH EBX
747 PUSH ESI
748 PUSH EDI
749
750 MOV ESI,EAX // ESI <- Src
751 MOV EDI,EDX // EDI <- Dst
752
753 // loop start
754@1: MOV EAX,[ESI]
755 TEST EAX,$FF000000
756 JZ @3 // complete transparency, proceed to next point
757
758 PUSH ECX // store counter
759
760 // Get weight W = Fa
761 MOV ECX,EAX // ECX <- Fa Fr Fg Fb
762 SHR ECX,24 // ECX <- 00 00 00 Fa
763
764 // Test Fa = 255 ?
765 CMP ECX,$FF
766 JZ @2
767
768 // P = W * F
769 MOV EBX,EAX // EBX <- Fa Fr Fg Fb
770 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
771 AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
772 IMUL EAX,ECX // EAX <- Pr ** Pb **
773 SHR EBX,8 // EBX <- 00 Fa 00 Fg
774 IMUL EBX,ECX // EBX <- Pa ** Pg **
775 ADD EAX,bias
776 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
777 SHR EAX,8 // EAX <- 00 Pr 00 Pb
778 ADD EBX,bias
779 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
780 OR EAX,EBX // EAX <- Pa Pr Pg Pb
781
782 // W = 1 - W;
783 MOV EDX,[EDI]
784 XOR ECX,$000000FF // ECX <- 1 - ECX
785 // Q = W * B
786 MOV EBX,EDX // EBX <- Ba Br Bg Bb
787 AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
788 AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
789 IMUL EDX,ECX // EDX <- Qr ** Qb **
790 SHR EBX,8 // EBX <- 00 Ba 00 Bg
791 IMUL EBX,ECX // EBX <- Qa ** Qg **
792 ADD EDX,bias
793 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
794 SHR EDX,8 // EDX <- 00 Qr ** Qb
795 ADD EBX,bias
796 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
797 OR EBX,EDX // EBX <- Qa Qr Qg Qb
798
799 // Z = P + Q (assuming no overflow at each byte)
800 ADD EAX,EBX // EAX <- Za Zr Zg Zb
801 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
802@2:
803 MOV [EDI],EAX
804
805 POP ECX // restore counter
806
807@3:
808 ADD ESI,4
809 ADD EDI,4
810
811 // loop end
812 DEC ECX
813 JNZ @1
814
815 POP EDI
816 POP ESI
817 POP EBX
818
819@4:
820{$ENDIF}
821
822{$IFDEF TARGET_x64}
823 // RCX <- Src
824 // RDX <- Dst
825 // R8 <- Count
826
827 // test the counter for zero or negativity
828 TEST R8D,R8D
829 JS @4
830
831 MOV R10,RCX // R10 <- Src
832 MOV R11,RDX // R11 <- Dst
833 MOV ECX,R8D // RCX <- Count
834
835 // loop start
836@1:
837 MOV EAX,[R10]
838 TEST EAX,$FF000000
839 JZ @3 // complete transparency, proceed to next point
840
841 // Get weight W = Fa
842 MOV R9D,EAX // R9D <- Fa Fr Fg Fb
843 SHR R9D,24 // R9D <- 00 00 00 Fa
844
845 // Test Fa = 255 ?
846 CMP R9D,$FF
847 JZ @2
848
849 // P = W * F
850 MOV R8D,EAX // R8D <- Fa Fr Fg Fb
851 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
852 AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
853 IMUL EAX,R9D // EAX <- Pr ** Pb **
854 SHR R8D,8 // R8D <- 00 Fa 00 Fg
855 IMUL R8D,R9D // R8D <- Pa ** Pg **
856 ADD EAX,bias
857 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
858 SHR EAX,8 // EAX <- 00 Pr 00 Pb
859 ADD R8D,bias
860 AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
861 OR EAX,R8D // EAX <- Pa Pr Pg Pb
862
863 // W = 1 - W;
864 MOV EDX,[R11]
865 XOR R9D,$000000FF // R9D <- 1 - R9D
866 // Q = W * B
867 MOV R8D,EDX // R8D <- Ba Br Bg Bb
868 AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
869 AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
870 IMUL EDX,R9D // EDX <- Qr ** Qb **
871 SHR R8D,8 // R8D <- 00 Ba 00 Bg
872 IMUL R8D,R9D // R8D <- Qa ** Qg **
873 ADD EDX,bias
874 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
875 SHR EDX,8 // EDX <- 00 Qr ** Qb
876 ADD R8D,bias
877 AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
878 OR R8D,EDX // R8D <- Qa Qr Qg Qb
879
880 // Z = P + Q (assuming no overflow at each byte)
881 ADD EAX,R8D // EAX <- Za Zr Zg Zb
882 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
883@2:
884 MOV [R11],EAX
885
886@3:
887 ADD R10,4
888 ADD R11,4
889
890 // loop end
891 DEC ECX
892 JNZ @1
893
894@4:
895{$ENDIF}
896end;
897
898procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
899asm
900{$IFDEF TARGET_x86}
901 // EAX <- Src
902 // EDX <- Dst
903 // ECX <- Count
904
905 // test the counter for zero or negativity
906 TEST ECX,ECX
907 JS @4
908
909 // test if source if fully transparent
910 TEST EAX,$FF000000
911 JZ @4
912
913 PUSH EBX
914 PUSH ESI
915 PUSH EDI
916
917 MOV ESI,EAX // ESI <- Src
918 MOV EDI,EDX // EDI <- Dst
919
920 // Get weight W = Fa
921 SHR ESI, 24 // ESI <- W
922
923 // test if source is fully opaque
924 CMP ESI,$FF
925 JZ @4
926
927 // P = W * F
928 MOV EBX,EAX // EBX <- Fa Fr Fg Fb
929 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
930 AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
931 IMUL EAX,ESI // EAX <- Pr ** Pb **
932 SHR EBX,8 // EBX <- 00 Fa 00 Fg
933 IMUL EBX,ESI // EBX <- Pa ** Pg **
934 ADD EAX,bias
935 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
936 SHR EAX,8 // EAX <- 00 Pr 00 Pb
937 ADD EBX,bias
938 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
939 OR EAX,EBX // EAX <- Pa Pr Pg Pb
940 XOR ESI,$000000FF // ESI <- 1 - Fa
941
942 // loop start
943@1:
944 MOV EDX,[EDI]
945 MOV EBX,EDX // EBX <- Ba Br Bg Bb
946 AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
947 AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
948 IMUL EDX,ESI // EDX <- Qr ** Qb **
949 SHR EBX,8 // EBX <- 00 Ba 00 Bg
950 IMUL EBX,ESI // EBX <- Qa ** Qg **
951 ADD EDX,bias
952 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
953 SHR EDX,8 // EDX <- 00 Qr ** Qb
954 ADD EBX,bias
955 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
956 OR EBX,EDX // EBX <- Qa Qr Qg Qb
957
958 // Z = P + Q (assuming no overflow at each byte)
959 ADD EBX,EAX // EAX <- Za Zr Zg Zb
960 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
961
962 OR EBX,$FF000000
963 MOV [EDI],EBX
964
965 ADD EDI,4
966
967 DEC ECX
968 JNZ @1
969
970 POP EDI
971 POP ESI
972 POP EBX
973
974@3:
975 RET
976
977@4:
978 MOV [EDI],EAX
979 ADD EDI,4
980
981 DEC ECX
982 JNZ @4
983
984 POP EDI
985 POP ESI
986 POP EBX
987
988{$ENDIF}
989
990{$IFDEF TARGET_x64}
991 // RCX <- Src
992 // RDX <- Dst
993 // R8 <- Count
994
995 // test the counter for zero or negativity
996 TEST R8D,R8D // R8D <- Count
997 JZ @2
998
999 // test if source if fully transparent
1000 TEST ECX,$FF000000
1001 JZ @2
1002
1003 PUSH RDI
1004
1005 MOV RDI,RDX // RDI <- Dst
1006 MOV R9D,ECX // R9D <- Src
1007
1008 // Get weight W = Fa
1009 SHR R9D,24 // R9D <- W
1010
1011 // Test Fa = 255 ?
1012 CMP R9D,$FF
1013 JZ @3 // complete opaque,copy source
1014
1015 // P = W * F
1016 MOV EAX,ECX // EAX <- Fa Fr Fg Fb
1017 AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
1018 AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
1019 IMUL ECX,R9D // ECX <- Pr ** Pb **
1020 SHR EAX,8 // EAX <- 00 Fa 00 Fg
1021 IMUL EAX,R9D // EAX <- Pa ** Pg **
1022 ADD ECX,Bias
1023 AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
1024 SHR ECX,8 // ECX <- 00 Pr 00 Pb
1025 ADD EAX,Bias
1026 AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
1027 OR ECX,EAX // ECX <- Pa Pr Pg Pb
1028 XOR R9D,$000000FF // R9D <- 1 - Fa
1029
1030 // loop start
1031@1:
1032 MOV EDX,[RDI]
1033 MOV EAX,EDX // EAX <- Ba Br Bg Bb
1034 AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
1035 AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
1036 IMUL EDX,R9D // EDX <- Qr ** Qb **
1037 SHR EAX,8 // EAX <- 00 Ba 00 Bg
1038 IMUL EAX,R9D // EAX <- Qa ** Qg **
1039 ADD EDX,Bias
1040 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
1041 SHR EDX,8 // EDX <- 00 Qr ** Qb
1042 ADD EAX,Bias
1043 AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
1044 OR EAX,EDX // EAX <- Qa Qr Qg Qb
1045
1046 // Z = P + Q (assuming no overflow at each byte)
1047 ADD EAX,ECX // EAX <- Za Zr Zg Zb
1048 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
1049
1050 OR EAX,$FF000000
1051 MOV [RDI],EAX
1052
1053 ADD RDI,4
1054
1055 // loop end
1056 DEC R8D
1057 JNZ @1
1058
1059 POP RDI
1060
1061@2:
1062 RET
1063
1064@3:
1065 // just copy source
1066 MOV [RDI],ECX
1067 ADD RDI,4
1068
1069 DEC R8D
1070 JNZ @3
1071
1072 POP RDI
1073{$ENDIF}
1074end;
1075
1076{$IFDEF TARGET_x86}
1077
1078function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
1079asm
1080 { This is an implementation of the merge formula, as described
1081 in a paper by Bruce Wallace in 1981. Merging is associative,
1082 that is, A over (B over C) = (A over B) over C. The formula is,
1083
1084 Ra = Fa + Ba * (1 - Fa)
1085 Rc = (Fa * (Fc - Bc * Ba) + Bc * Ba) / Ra
1086
1087 where
1088
1089 Rc is the resultant color,
1090 Ra is the resultant alpha,
1091 Fc is the foreground color,
1092 Fa is the foreground alpha,
1093 Bc is the background color,
1094 Ba is the background alpha.
1095 }
1096
1097 // EAX <- F
1098 // EDX <- B
1099
1100 // if F.A = 0 then
1101 TEST EAX,$FF000000
1102 JZ @exit0
1103
1104 // else if B.A = 255 then
1105 CMP EDX,$FF000000
1106 JNC @blend
1107
1108 // else if F.A = 255 then
1109 CMP EAX,$FF000000
1110 JNC @Exit
1111
1112 // else if B.A = 0 then
1113 TEST EDX,$FF000000
1114 JZ @Exit
1115
1116@4:
1117 PUSH EBX
1118 PUSH ESI
1119 PUSH EDI
1120 ADD ESP,-$0C
1121 MOV [ESP+$04],EDX
1122 MOV [ESP],EAX
1123
1124 // AH <- F.A
1125 // DL, CL <- B.A
1126 SHR EAX,16
1127 AND EAX,$0000FF00
1128 SHR EDX,24
1129 MOV CL,DL
1130 NOP
1131 NOP
1132 NOP
1133
1134 // EDI <- PF
1135 // EDX <- PB
1136 // ESI <- PR
1137
1138 // PF := @DivTable[F.A];
1139 LEA EDI,[EAX+DivTable]
1140 // PB := @DivTable[B.A];
1141 SHL EDX,$08
1142 LEA EDX,[EDX+DivTable]
1143
1144 // Result.A := B.A + F.A - PB[F.A];
1145 SHR EAX,8
1146 ADD ECX,EAX
1147 SUB ECX,[EDX+EAX]
1148 MOV [ESP+$0B],CL
1149 // PR := @RcTable[Result.A];
1150 SHL ECX,$08
1151 AND ECX,$0000FFFF
1152 LEA ESI,[ECX+RcTable]
1153
1154 { Red component }
1155
1156 // Result.R := PB[B.R];
1157 XOR EAX,EAX
1158 MOV AL,[ESP+$06]
1159 MOV CL,[EDX+EAX]
1160 MOV [ESP+$0a],CL
1161 // X := F.R - Result.R;
1162 MOV AL,[ESP+$02]
1163 XOR EBX,EBX
1164 MOV BL,CL
1165 SUB EAX,EBX
1166 // if X >= 0 then
1167 JL @5
1168 // Result.R := PR[PF[X] + Result.R]
1169 MOVZX EAX,BYTE PTR[EDI+EAX]
1170 AND ECX,$000000FF
1171 ADD EAX,ECX
1172 MOV AL,[ESI+EAX]
1173 MOV [ESP+$0A],AL
1174 JMP @6
1175@5:
1176 // Result.R := PR[Result.R - PF[-X]];
1177 NEG EAX
1178 MOVZX EAX,BYTE PTR[EDI+EAX]
1179 XOR ECX,ECX
1180 MOV CL,[ESP+$0A]
1181 SUB ECX,EAX
1182 MOV AL,[ESI+ECX]
1183 MOV [ESP+$0A],AL
1184
1185
1186 { Green component }
1187
1188@6:
1189 // Result.G := PB[B.G];
1190 XOR EAX,EAX
1191 MOV AL,[ESP+$05]
1192 MOV CL,[EDX+EAX]
1193 MOV [ESP+$09],CL
1194 // X := F.G - Result.G;
1195 MOV AL,[ESP+$01]
1196 XOR EBX,EBX
1197 MOV BL,CL
1198 SUB EAX,EBX
1199 // if X >= 0 then
1200 JL @7
1201 // Result.G := PR[PF[X] + Result.G]
1202 MOVZX EAX,BYTE PTR[EDI+EAX]
1203 AND ECX,$000000FF
1204 ADD EAX,ECX
1205 MOV AL,[ESI+EAX]
1206 MOV [ESP+$09],AL
1207 JMP @8
1208@7:
1209 // Result.G := PR[Result.G - PF[-X]];
1210 NEG EAX
1211 MOVZX EAX,BYTE PTR[EDI+EAX]
1212 XOR ECX,ECX
1213 MOV CL,[ESP+$09]
1214 SUB ECX,EAX
1215 MOV AL,[ESI+ECX]
1216 MOV [ESP+$09],AL
1217
1218
1219 { Blue component }
1220@8:
1221 // Result.B := PB[B.B];
1222 XOR EAX,EAX
1223 MOV AL,[ESP+$04]
1224 MOV CL,[EDX+EAX]
1225 MOV [ESP+$08],CL
1226 // X := F.B - Result.B;
1227 MOV AL,[ESP]
1228 XOR EDX,EDX
1229 MOV DL,CL
1230 SUB EAX,EDX
1231 // if X >= 0 then
1232 JL @9
1233 // Result.B := PR[PF[X] + Result.B]
1234 MOVZX EAX,BYTE PTR[EDI+EAX]
1235 XOR EDX,EDX
1236 MOV DL,CL
1237 ADD EAX,EDX
1238 MOV AL,[ESI+EAX]
1239 MOV [ESP+$08],AL
1240 JMP @10
1241@9:
1242 // Result.B := PR[Result.B - PF[-X]];
1243 NEG EAX
1244 MOVZX EAX,BYTE PTR[EDI+EAX]
1245 XOR EDX,EDX
1246 MOV DL,CL
1247 SUB EDX,EAX
1248 MOV AL,[ESI+EDX]
1249 MOV [ESP+$08],AL
1250
1251@10:
1252 // EAX <- Result
1253 MOV EAX,[ESP+$08]
1254
1255 // end;
1256 ADD ESP,$0C
1257 POP EDI
1258 POP ESI
1259 POP EBX
1260 RET
1261@blend:
1262 CALL DWORD PTR [BlendReg]
1263 OR EAX,$FF000000
1264 RET
1265@exit0:
1266 MOV EAX,EDX
1267@Exit:
1268end;
1269
1270{$ENDIF}
1271
1272function CombineReg_ASM(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
1273asm
1274 // combine RGBA channels of colors X and Y with the weight of X given in W
1275 // Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
1276{$IFDEF TARGET_x86}
1277 // EAX <- X
1278 // EDX <- Y
1279 // ECX <- W
1280
1281 // W = 0 or $FF?
1282 JCXZ @1 // CX = 0 ? => Result := EDX
1283 CMP ECX,$FF // CX = $FF ? => Result := EDX
1284 JE @2
1285
1286 PUSH EBX
1287
1288 // P = W * X
1289 MOV EBX,EAX // EBX <- Xa Xr Xg Xb
1290 AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
1291 AND EBX,$FF00FF00 // EBX <- Xa 00 Xg 00
1292 IMUL EAX,ECX // EAX <- Pr ** Pb **
1293 SHR EBX,8 // EBX <- 00 Xa 00 Xg
1294 IMUL EBX,ECX // EBX <- Pa ** Pg **
1295 ADD EAX,bias
1296 AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
1297 SHR EAX,8 // EAX <- 00 Pr 00 Pb
1298 ADD EBX,bias
1299 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
1300 OR EAX,EBX // EAX <- Pa Pr Pg Pb
1301
1302 // W = 1 - W
1303 XOR ECX,$000000FF // ECX <- 1 - ECX
1304 MOV EBX,EDX // EBX <- Ya Yr Yg Yb
1305 // Q = W * Y
1306 AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
1307 AND EBX,$FF00FF00 // EBX <- Ya 00 Yg 00
1308 IMUL EDX,ECX // EDX <- Qr ** Qb **
1309 SHR EBX,8 // EBX <- 00 Ya 00 Yg
1310 IMUL EBX,ECX // EBX <- Qa ** Qg **
1311 ADD EDX,bias
1312 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
1313 SHR EDX,8 // EDX <- 00 Qr ** Qb
1314 ADD EBX,bias
1315 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
1316 OR EBX,EDX // EBX <- Qa Qr Qg Qb
1317
1318 // Z = P + Q (assuming no overflow at each byte)
1319 ADD EAX,EBX // EAX <- Za Zr Zg Zb
1320
1321 POP EBX
1322 RET
1323
1324@1: MOV EAX,EDX
1325@2:
1326{$ENDIF}
1327
1328{$IFDEF TARGET_x64}
1329 // ECX <- X
1330 // EDX <- Y
1331 // R8D <- W
1332
1333 // W = 0 or $FF?
1334 TEST R8D,R8D
1335 JZ @1 // W = 0 ? => Result := EDX
1336 MOV EAX,ECX // EAX <- Xa Xr Xg Xb
1337 CMP R8B,$FF // W = $FF ? => Result := EDX
1338 JE @2
1339
1340 // P = W * X
1341 AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
1342 AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
1343 IMUL EAX,R8D // EAX <- Pr ** Pb **
1344 SHR ECX,8 // ECX <- 00 Xa 00 Xg
1345 IMUL ECX,R8D // ECX <- Pa ** Pg **
1346 ADD EAX,bias
1347 AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
1348 SHR EAX,8 // EAX <- 00 Pr 00 Pb
1349 ADD ECX,bias
1350 AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
1351 OR EAX,ECX // EAX <- Pa Pr Pg Pb
1352
1353 // W = 1 - W
1354 XOR R8D,$000000FF // R8D <- 1 - R8D
1355 MOV ECX,EDX // ECX <- Ya Yr Yg Yb
1356 // Q = W * Y
1357 AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
1358 AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
1359 IMUL EDX,R8D // EDX <- Qr ** Qb **
1360 SHR ECX,8 // ECX <- 00 Ya 00 Yg
1361 IMUL ECX,R8D // ECX <- Qa ** Qg **
1362 ADD EDX,bias
1363 AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
1364 SHR EDX,8 // EDX <- 00 Qr ** Qb
1365 ADD ECX,bias
1366 AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
1367 OR ECX,EDX // ECX <- Qa Qr Qg Qb
1368
1369 // Z = P + Q (assuming no overflow at each byte)
1370 ADD EAX,ECX // EAX <- Za Zr Zg Zb
1371
1372 RET
1373
1374@1: MOV EAX,EDX
1375@2:
1376{$ENDIF}
1377end;
1378
1379procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
1380asm
1381{$IFDEF TARGET_x86}
1382 // EAX <- F
1383 // [EDX] <- B
1384 // ECX <- W
1385
1386 // Check W
1387 JCXZ @1 // W = 0 ? => write nothing
1388 CMP ECX,$FF // W = 255? => write F
1389{$IFDEF FPC}
1390 DB $74,$76 // Prob with FPC 2.2.2 and below
1391{$ELSE}
1392 JZ @2
1393{$ENDIF}
1394
1395
1396 PUSH EBX
1397 PUSH ESI
1398
1399 // P = W * F
1400 MOV EBX,EAX // EBX <- ** Fr Fg Fb
1401 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
1402 AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
1403 IMUL EAX,ECX // EAX <- Pr ** Pb **
1404 SHR EBX,8 // EBX <- 00 Fa 00 Fg
1405 IMUL EBX,ECX // EBX <- Pa ** Pg **
1406 ADD EAX,bias
1407 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
1408 SHR EAX,8 // EAX <- 00 Pr 00 Pb
1409 ADD EBX,bias
1410 AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
1411 OR EAX,EBX // EAX <- Pa Pr Pg Pb
1412
1413 // W = 1 - W
1414 MOV ESI,[EDX]
1415 XOR ECX,$000000FF // ECX <- 1 - ECX
1416 // Q = W * B
1417 MOV EBX,ESI // EBX <- Ba Br Bg Bb
1418 AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
1419 AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
1420 IMUL ESI,ECX // ESI <- Qr ** Qb **
1421 SHR EBX,8 // EBX <- 00 Ba 00 Bg
1422 IMUL EBX,ECX // EBX <- Qa ** Qg **
1423 ADD ESI,bias
1424 AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
1425 SHR ESI,8 // ESI <- 00 Qr ** Qb
1426 ADD EBX,bias
1427 AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
1428 OR EBX,ESI // EBX <- Qa Qr Qg Qb
1429
1430 // Z = P + Q (assuming no overflow at each byte)
1431 ADD EAX,EBX // EAX <- Za Zr Zg Zb
1432
1433 MOV [EDX],EAX
1434
1435 POP ESI
1436 POP EBX
1437@1: RET
1438
1439@2: MOV [EDX],EAX
1440{$ENDIF}
1441
1442{$IFDEF TARGET_x64}
1443 // ECX <- F
1444 // [RDX] <- B
1445 // R8 <- W
1446
1447 // Check W
1448 TEST R8D,R8D // Set flags for R8
1449 JZ @2 // W = 0 ? => Result := EDX
1450 MOV EAX,ECX // EAX <- ** Fr Fg Fb
1451 CMP R8B,$FF // W = 255? => write F
1452 JZ @1
1453
1454 // P = W * F
1455 AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
1456 AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
1457 IMUL EAX,R8D // EAX <- Pr ** Pb **
1458 SHR ECX,8 // ECX <- 00 Fa 00 Fg
1459 IMUL ECX,R8D // ECX <- Pa ** Pg **
1460 ADD EAX,bias
1461 AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
1462 SHR EAX,8 // EAX <- 00 Pr 00 Pb
1463 ADD ECX,bias
1464 AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
1465 OR EAX,ECX // EAX <- Pa Pr Pg Pb
1466
1467 // W = 1 - W
1468 MOV R9D,[RDX]
1469 XOR R8D,$000000FF // R8D <- 1 - R8D
1470 // Q = W * B
1471 MOV ECX,R9D // ECX <- Ba Br Bg Bb
1472 AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
1473 AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
1474 IMUL R9D,R8D // R9D <- Qr ** Qb **
1475 SHR ECX,8 // ECX <- 00 Ba 00 Bg
1476 IMUL ECX,R8D // ECX <- Qa ** Qg **
1477 ADD R9D,bias
1478 AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
1479 SHR R9D,8 // R9D <- 00 Qr ** Qb
1480 ADD ECX,bias
1481 AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
1482 OR ECX,R9D // ECX <- 00 Qr Qg Qb
1483
1484 // Z = P + Q (assuming no overflow at each byte)
1485 ADD EAX,ECX // EAX <- 00 Zr Zg Zb
1486
1487@1: MOV [RDX],EAX
1488@2:
1489
1490{$ENDIF}
1491end;
1492
1493procedure EMMS_ASM; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
1494asm
1495end;
1496
1497end.
Note: See TracBrowser for help on using the repository browser.