1 | unit GR32_BlendASM;
|
---|
2 |
|
---|
3 | (* ***** BEGIN LICENSE BLOCK *****
|
---|
4 | * Version: MPL 1.1 or LGPL 2.1 with linking exception
|
---|
5 | *
|
---|
6 | * The contents of this file are subject to the Mozilla Public License Version
|
---|
7 | * 1.1 (the "License"); you may not use this file except in compliance with
|
---|
8 | * the License. You may obtain a copy of the License at
|
---|
9 | * http://www.mozilla.org/MPL/
|
---|
10 | *
|
---|
11 | * Software distributed under the License is distributed on an "AS IS" basis,
|
---|
12 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
---|
13 | * for the specific language governing rights and limitations under the
|
---|
14 | * License.
|
---|
15 | *
|
---|
16 | * Alternatively, the contents of this file may be used under the terms of the
|
---|
17 | * Free Pascal modified version of the GNU Lesser General Public License
|
---|
18 | * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
|
---|
19 | * of this license are applicable instead of those above.
|
---|
20 | * Please see the file LICENSE.txt for additional information concerning this
|
---|
21 | * license.
|
---|
22 | *
|
---|
23 | * The Original Code is Graphics32
|
---|
24 | *
|
---|
25 | * The Initial Developer of the Original Code is
|
---|
26 | * Alex A. Denisov
|
---|
27 | *
|
---|
28 | * Portions created by the Initial Developer are Copyright (C) 2000-2009
|
---|
29 | * the Initial Developer. All Rights Reserved.
|
---|
30 | *
|
---|
31 | * Contributor(s):
|
---|
32 | * Christian-W. Budde
|
---|
33 | * - 2019/04/01 - Refactoring
|
---|
34 | *
|
---|
35 | * ***** END LICENSE BLOCK ***** *)
|
---|
36 |
|
---|
37 | interface
|
---|
38 |
|
---|
39 | {$I GR32.inc}
|
---|
40 |
|
---|
41 | uses
|
---|
42 | GR32;
|
---|
43 |
|
---|
44 | function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
---|
45 | procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
|
---|
46 | procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
|
---|
47 |
|
---|
48 | function BlendRegEx_ASM(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
---|
49 | procedure BlendMemEx_ASM(F: TColor32; var B:TColor32; M: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
|
---|
50 |
|
---|
51 | procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
|
---|
52 | procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
|
---|
53 |
|
---|
54 | function CombineReg_ASM(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
---|
55 | procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
|
---|
56 |
|
---|
57 | {$IFDEF TARGET_x86}
|
---|
58 | function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
|
---|
59 | {$ENDIF}
|
---|
60 |
|
---|
61 | procedure EMMS_ASM; {$IFDEF FPC} assembler; {$ENDIF}
|
---|
62 |
|
---|
63 | implementation
|
---|
64 |
|
---|
65 | uses
|
---|
66 | GR32_Blend,
|
---|
67 | GR32_LowLevel,
|
---|
68 | GR32_System;
|
---|
69 |
|
---|
70 | { ASM versions }
|
---|
71 |
|
---|
72 | { Assembler versions }
|
---|
73 |
|
---|
74 | const
|
---|
75 | bias = $00800080;
|
---|
76 |
|
---|
77 |
|
---|
78 | function BlendReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
79 | asm
|
---|
80 | // blend foreground color (F) to a background color (B),
|
---|
81 | // using alpha channel value of F
|
---|
82 | // Result Z = Fa * Fargb + (1 - Fa) * Bargb
|
---|
83 | // Result Z = P + Q
|
---|
84 |
|
---|
85 | {$IFDEF TARGET_x86}
|
---|
86 | // EAX <- F
|
---|
87 | // EDX <- B
|
---|
88 |
|
---|
89 | // Test Fa = 255 ?
|
---|
90 | CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
|
---|
91 | JNC @2
|
---|
92 |
|
---|
93 | // Test Fa = 0 ?
|
---|
94 | TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
|
---|
95 | JZ @1
|
---|
96 |
|
---|
97 | // Get weight W = Fa
|
---|
98 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
---|
99 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
---|
100 |
|
---|
101 | PUSH EBX
|
---|
102 |
|
---|
103 | // P = W * F
|
---|
104 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
---|
105 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
106 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
---|
107 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
108 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
---|
109 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
110 | ADD EAX,bias
|
---|
111 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
112 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
113 | ADD EBX,bias
|
---|
114 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
115 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
116 |
|
---|
117 | // W = 1 - W
|
---|
118 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
119 | // Q = W * B
|
---|
120 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
---|
121 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
---|
122 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
---|
123 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
---|
124 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
---|
125 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
126 | ADD EDX,bias
|
---|
127 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
128 | SHR EDX,8 // EDX <- 00 Qr 00 Qb
|
---|
129 | ADD EBX,bias
|
---|
130 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
131 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
---|
132 |
|
---|
133 | // Z = P + Q (assuming no overflow at each byte)
|
---|
134 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
135 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
136 |
|
---|
137 | POP EBX
|
---|
138 | RET
|
---|
139 |
|
---|
140 | @1: MOV EAX,EDX
|
---|
141 | @2:
|
---|
142 | {$ENDIF}
|
---|
143 |
|
---|
144 | // EAX <- F
|
---|
145 | // EDX <- B
|
---|
146 | {$IFDEF TARGET_x64}
|
---|
147 | MOV RAX, RCX
|
---|
148 |
|
---|
149 | // Test Fa = 255 ?
|
---|
150 | CMP EAX,$FF000000 // Fa = 255 ? => Result = EAX
|
---|
151 | JNC @2
|
---|
152 |
|
---|
153 | // Test Fa = 0 ?
|
---|
154 | TEST EAX,$FF000000 // Fa = 0 ? => Result = EDX
|
---|
155 | JZ @1
|
---|
156 |
|
---|
157 | // Get weight W = Fa
|
---|
158 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
---|
159 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
---|
160 |
|
---|
161 | // P = W * F
|
---|
162 | MOV R9D,EAX // R9D <- Fa Fr Fg Fb
|
---|
163 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
164 | AND R9D,$FF00FF00 // R9D <- Fa 00 Fg 00
|
---|
165 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
166 | SHR R9D,8 // R9D <- 00 Fa 00 Fg
|
---|
167 | IMUL R9D,ECX // R9D <- Pa ** Pg **
|
---|
168 | ADD EAX,bias
|
---|
169 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
170 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
171 | ADD R9D,bias
|
---|
172 | AND R9D,$FF00FF00 // R9D <- Pa 00 Pg 00
|
---|
173 | OR EAX,R9D // EAX <- Pa Pr Pg Pb
|
---|
174 |
|
---|
175 | // W = 1 - W
|
---|
176 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
177 | // Q = W * B
|
---|
178 | MOV R9D,EDX // R9D <- Ba Br Bg Bb
|
---|
179 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
---|
180 | AND R9D,$FF00FF00 // R9D <- Ba 00 Bg 00
|
---|
181 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
---|
182 | SHR R9D,8 // R9D <- 00 Ba 00 Bg
|
---|
183 | IMUL R9D,ECX // R9D <- Qa ** Qg **
|
---|
184 | ADD EDX,bias
|
---|
185 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
186 | SHR EDX,8 // EDX <- 00 Qr 00 Qb
|
---|
187 | ADD R9D,bias
|
---|
188 | AND R9D,$FF00FF00 // R9D <- Qa 00 Qg 00
|
---|
189 | OR R9D,EDX // R9D <- Qa Qr Qg Qb
|
---|
190 |
|
---|
191 | // Z = P + Q (assuming no overflow at each byte)
|
---|
192 | ADD EAX,R9D // EAX <- Za Zr Zg Zb
|
---|
193 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
194 | RET
|
---|
195 |
|
---|
196 | @1: MOV EAX,EDX
|
---|
197 | @2:
|
---|
198 | {$ENDIF}
|
---|
199 | end;
|
---|
200 |
|
---|
201 | procedure BlendMem_ASM(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
202 | asm
|
---|
203 | {$IFDEF TARGET_x86}
|
---|
204 | // EAX <- F
|
---|
205 | // [EDX] <- B
|
---|
206 |
|
---|
207 | // Test Fa = 0 ?
|
---|
208 | TEST EAX,$FF000000 // Fa = 0 ? => do not write
|
---|
209 | JZ @2
|
---|
210 |
|
---|
211 | // Get weight W = Fa
|
---|
212 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
---|
213 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
---|
214 |
|
---|
215 | // Test Fa = 255 ?
|
---|
216 | CMP ECX,$FF
|
---|
217 | JZ @1
|
---|
218 |
|
---|
219 | PUSH EBX
|
---|
220 | PUSH ESI
|
---|
221 |
|
---|
222 | // P = W * F
|
---|
223 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
---|
224 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
225 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
---|
226 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
227 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
---|
228 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
229 | ADD EAX,bias // add bias
|
---|
230 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
231 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
232 | ADD EBX,bias // add bias
|
---|
233 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
234 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
235 |
|
---|
236 | MOV ESI,[EDX]
|
---|
237 |
|
---|
238 | // W = 1 - W
|
---|
239 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
240 |
|
---|
241 | // Q = W * B
|
---|
242 | MOV EBX,ESI // EBX <- Ba Br Bg Bb
|
---|
243 | AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
|
---|
244 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
---|
245 | IMUL ESI,ECX // ESI <- Qr ** Qb **
|
---|
246 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
---|
247 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
248 | ADD ESI,bias // add bias
|
---|
249 | AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
|
---|
250 | SHR ESI,8 // ESI <- 00 Qr 00 Qb
|
---|
251 | ADD EBX,bias // add bias
|
---|
252 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
253 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
---|
254 |
|
---|
255 | // Z = P + Q (assuming no overflow at each byte)
|
---|
256 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
257 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
258 |
|
---|
259 | MOV [EDX],EAX
|
---|
260 | POP ESI
|
---|
261 | POP EBX
|
---|
262 | RET
|
---|
263 |
|
---|
264 | @1: MOV [EDX],EAX
|
---|
265 | @2:
|
---|
266 | {$ENDIF}
|
---|
267 |
|
---|
268 | {$IFDEF TARGET_x64}
|
---|
269 | // ECX <- F
|
---|
270 | // [RDX] <- B
|
---|
271 |
|
---|
272 | // Test Fa = 0 ?
|
---|
273 | TEST ECX,$FF000000 // Fa = 0 ? => do not write
|
---|
274 | JZ @2
|
---|
275 |
|
---|
276 | MOV EAX, ECX // EAX <- Fa Fr Fg Fb
|
---|
277 |
|
---|
278 | // Get weight W = Fa
|
---|
279 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
---|
280 |
|
---|
281 | // Test Fa = 255 ?
|
---|
282 | CMP ECX,$FF
|
---|
283 | JZ @1
|
---|
284 |
|
---|
285 | // P = W * F
|
---|
286 | MOV R8D,EAX // R8D <- Fa Fr Fg Fb
|
---|
287 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
288 | AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
|
---|
289 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
290 | SHR R8D,8 // R8D <- 00 Fa 00 Fg
|
---|
291 | IMUL R8D,ECX // R8D <- Pa ** Pg **
|
---|
292 | ADD EAX,bias
|
---|
293 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
294 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
295 | ADD R8D,bias
|
---|
296 | AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
|
---|
297 | OR EAX,R8D // EAX <- Pa Pr Pg Pb
|
---|
298 |
|
---|
299 | MOV R9D,[RDX]
|
---|
300 |
|
---|
301 | // W = 1 - W
|
---|
302 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
303 | // Q = W * B
|
---|
304 | MOV R8D,R9D // R8D <- Ba Br Bg Bb
|
---|
305 | AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
|
---|
306 | AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
|
---|
307 | IMUL R9D,ECX // R9D <- Qr ** Qb **
|
---|
308 | SHR R8D,8 // R8D <- 00 Ba 00 Bg
|
---|
309 | IMUL R8D,ECX // R8D <- Qa ** Qg **
|
---|
310 | ADD R9D,bias
|
---|
311 | AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
|
---|
312 | SHR R9D,8 // R9D <- 00 Qr 00 Qb
|
---|
313 | ADD R8D,bias
|
---|
314 | AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
|
---|
315 | OR R8D,R9D // R8D <- Qa Qr Qg Qb
|
---|
316 |
|
---|
317 | // Z = P + Q (assuming no overflow at each byte)
|
---|
318 | ADD EAX,R8D // EAX <- Za Zr Zg Zb
|
---|
319 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
320 |
|
---|
321 | MOV [RDX],EAX
|
---|
322 | RET
|
---|
323 |
|
---|
324 | @1: MOV [RDX],EAX
|
---|
325 | @2:
|
---|
326 | {$ENDIF}
|
---|
327 | end;
|
---|
328 |
|
---|
329 | procedure BlendMems_ASM(F: TColor32; B: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
330 | asm
|
---|
331 | {$IFDEF TARGET_x86}
|
---|
332 | TEST ECX,ECX
|
---|
333 | JZ @4
|
---|
334 |
|
---|
335 | PUSH EBX
|
---|
336 | PUSH ESI
|
---|
337 | PUSH EDI
|
---|
338 |
|
---|
339 | MOV ESI,EAX
|
---|
340 | MOV EDI,EDX
|
---|
341 |
|
---|
342 | @1:
|
---|
343 | // Test Fa = 0 ?
|
---|
344 | MOV EAX,[ESI]
|
---|
345 | TEST EAX,$FF000000
|
---|
346 | JZ @3
|
---|
347 |
|
---|
348 | PUSH ECX
|
---|
349 |
|
---|
350 | // Get weight W = Fa
|
---|
351 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
---|
352 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
---|
353 |
|
---|
354 | // Test Fa = 255 ?
|
---|
355 | CMP ECX,$FF
|
---|
356 | JZ @2
|
---|
357 |
|
---|
358 | // P = W * F
|
---|
359 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
---|
360 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
361 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
---|
362 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
363 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
---|
364 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
365 | ADD EAX,bias // add bias
|
---|
366 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
367 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
368 | ADD EBX,bias // add bias
|
---|
369 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
370 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
371 |
|
---|
372 | MOV EDX,[EDI]
|
---|
373 |
|
---|
374 | // W = 1 - W
|
---|
375 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
376 |
|
---|
377 | // Q = W * B
|
---|
378 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
---|
379 | AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
|
---|
380 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
---|
381 | IMUL EDX,ECX // ESI <- Qr ** Qb **
|
---|
382 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
---|
383 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
384 | ADD EDX,bias // add bias
|
---|
385 | AND EDX,$FF00FF00 // ESI <- Qr 00 Qb 00
|
---|
386 | SHR EDX,8 // ESI <- 00 Qr 00 Qb
|
---|
387 | ADD EBX,bias // add bias
|
---|
388 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
389 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
---|
390 |
|
---|
391 | // Z = P + Q (assuming no overflow at each byte)
|
---|
392 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
393 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
394 |
|
---|
395 | @2:
|
---|
396 | OR EAX,$FF000000
|
---|
397 | MOV [EDI],EAX
|
---|
398 | POP ECX
|
---|
399 |
|
---|
400 | @3:
|
---|
401 | ADD ESI,4
|
---|
402 | ADD EDI,4
|
---|
403 |
|
---|
404 | DEC ECX
|
---|
405 | JNZ @1
|
---|
406 |
|
---|
407 | POP EDI
|
---|
408 | POP ESI
|
---|
409 | POP EBX
|
---|
410 |
|
---|
411 | @4:
|
---|
412 | RET
|
---|
413 | {$ENDIF}
|
---|
414 |
|
---|
415 | {$IFDEF TARGET_x64}
|
---|
416 | TEST R8D,R8D
|
---|
417 | JZ @4
|
---|
418 |
|
---|
419 | PUSH RDI
|
---|
420 |
|
---|
421 | MOV R9,RCX
|
---|
422 | MOV RDI,RDX
|
---|
423 |
|
---|
424 | @1:
|
---|
425 | MOV ECX,[RSI]
|
---|
426 | TEST ECX,$FF000000
|
---|
427 | JZ @3
|
---|
428 |
|
---|
429 | PUSH R8
|
---|
430 |
|
---|
431 | MOV R8D,ECX
|
---|
432 | SHR R8D,24
|
---|
433 |
|
---|
434 | CMP R8D,$FF
|
---|
435 | JZ @2
|
---|
436 |
|
---|
437 | MOV EAX,ECX
|
---|
438 | AND ECX,$00FF00FF
|
---|
439 | AND EAX,$FF00FF00
|
---|
440 | IMUL ECX,R8D
|
---|
441 | SHR EAX,8
|
---|
442 | IMUL EAX,R8D
|
---|
443 | ADD ECX,bias
|
---|
444 | AND ECX,$FF00FF00
|
---|
445 | SHR ECX,8
|
---|
446 | ADD EAX,bias
|
---|
447 | AND EAX,$FF00FF00
|
---|
448 | OR ECX,EAX
|
---|
449 |
|
---|
450 | MOV EDX,[RDI]
|
---|
451 | XOR R8D,$000000FF
|
---|
452 | MOV EAX,EDX
|
---|
453 | AND EDX,$00FF00FF
|
---|
454 | AND EAX,$FF00FF00
|
---|
455 | IMUL EDX, R8D
|
---|
456 | SHR EAX,8
|
---|
457 | IMUL EAX,R8D
|
---|
458 | ADD EDX,bias
|
---|
459 | AND EDX,$FF00FF00
|
---|
460 | SHR EDX,8
|
---|
461 | ADD EAX,bias
|
---|
462 | AND EAX,$FF00FF00
|
---|
463 | OR EAX,EDX
|
---|
464 |
|
---|
465 | ADD ECX,EAX
|
---|
466 | @2:
|
---|
467 | OR ECX,$FF000000
|
---|
468 | MOV [RDI],ECX
|
---|
469 | POP R8
|
---|
470 |
|
---|
471 | @3:
|
---|
472 | ADD R9,4
|
---|
473 | ADD RDI,4
|
---|
474 |
|
---|
475 | DEC R8D
|
---|
476 | JNZ @1
|
---|
477 |
|
---|
478 | POP RDI
|
---|
479 |
|
---|
480 | @4:
|
---|
481 | RET
|
---|
482 | {$ENDIF}
|
---|
483 | end;
|
---|
484 |
|
---|
485 | function BlendRegEx_ASM(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
486 | asm
|
---|
487 | // blend foreground color (F) to a background color (B),
|
---|
488 | // using alpha channel value of F multiplied by master alpha (M)
|
---|
489 | // no checking for M = $FF, in this case Graphics32 uses BlendReg
|
---|
490 | // Result Z = Fa * M * Fargb + (1 - Fa * M) * Bargb
|
---|
491 | // Result Z = P + Q
|
---|
492 | // EAX <- F
|
---|
493 | // EDX <- B
|
---|
494 | // ECX <- M
|
---|
495 |
|
---|
496 | {$IFDEF TARGET_x86}
|
---|
497 |
|
---|
498 | // Check Fa > 0 ?
|
---|
499 | TEST EAX,$FF000000 // Fa = 0? => Result := EDX
|
---|
500 | JZ @2
|
---|
501 |
|
---|
502 | PUSH EBX
|
---|
503 |
|
---|
504 | // Get weight W = Fa * M
|
---|
505 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
---|
506 | INC ECX // 255:256 range bias
|
---|
507 | SHR EBX,24 // EBX <- 00 00 00 Fa
|
---|
508 | IMUL ECX,EBX // ECX <- 00 00 W **
|
---|
509 | SHR ECX,8 // ECX <- 00 00 00 W
|
---|
510 | JZ @1 // W = 0 ? => Result := EDX
|
---|
511 |
|
---|
512 | // P = W * F
|
---|
513 | MOV EBX,EAX // EBX <- ** Fr Fg Fb
|
---|
514 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
515 | AND EBX,$FF00FF00 // EBX <- Pa 00 Fg 00
|
---|
516 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
517 | SHR EBX,8 // EBX <- 00 00 00 Fg
|
---|
518 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
519 | ADD EAX,bias
|
---|
520 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
521 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
522 | ADD EBX,bias
|
---|
523 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
524 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
525 |
|
---|
526 | // W = 1 - W
|
---|
527 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
528 | // Q = W * B
|
---|
529 | MOV EBX,EDX // EBX <- 00 Br Bg Bb
|
---|
530 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
---|
531 | AND EBX,$FF00FF00 // EBX <- 00 00 Bg 00
|
---|
532 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
---|
533 | SHR EBX,8 // EBX <- 00 00 00 Bg
|
---|
534 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
535 | ADD EDX,bias
|
---|
536 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
537 | SHR EDX,8 // EDX <- 00 Qr 00 Qb
|
---|
538 | ADD EBX,bias
|
---|
539 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
540 | OR EBX,EDX // EBX <- 00 Qr Qg Qb
|
---|
541 |
|
---|
542 | // Z = P + Q (assuming no overflow at each byte)
|
---|
543 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
544 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
545 |
|
---|
546 | POP EBX
|
---|
547 | RET
|
---|
548 |
|
---|
549 | @1:
|
---|
550 | POP EBX
|
---|
551 |
|
---|
552 | @2: MOV EAX,EDX
|
---|
553 | {$ENDIF}
|
---|
554 |
|
---|
555 | {$IFDEF TARGET_x64}
|
---|
556 | MOV EAX,ECX // EAX <- Fa Fr Fg Fb
|
---|
557 | TEST EAX,$FF000000 // Fa = 0? => Result := EDX
|
---|
558 | JZ @1
|
---|
559 |
|
---|
560 | // Get weight W = Fa * M
|
---|
561 | INC R8D // 255:256 range bias
|
---|
562 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
---|
563 | IMUL R8D,ECX // R8D <- 00 00 W **
|
---|
564 | SHR R8D,8 // R8D <- 00 00 00 W
|
---|
565 | JZ @1 // W = 0 ? => Result := EDX
|
---|
566 |
|
---|
567 | // P = W * F
|
---|
568 | MOV ECX,EAX // ECX <- ** Fr Fg Fb
|
---|
569 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
570 | AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
|
---|
571 | IMUL EAX,R8D // EAX <- Pr ** Pb **
|
---|
572 | SHR ECX,8 // ECX <- 00 Fa 00 Fg
|
---|
573 | IMUL ECX,R8D // ECX <- Pa ** Pg **
|
---|
574 | ADD EAX,bias
|
---|
575 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
576 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
577 | ADD ECX,bias
|
---|
578 | AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
|
---|
579 | OR EAX,ECX // EAX <- Pa Pr Pg Pb
|
---|
580 |
|
---|
581 | // W = 1 - W
|
---|
582 | XOR R8D,$000000FF // R8D <- 1 - R8D
|
---|
583 | // Q = W * B
|
---|
584 | MOV ECX,EDX // ECX <- 00 Br Bg Bb
|
---|
585 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
---|
586 | AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
|
---|
587 | IMUL EDX,R8D // EDX <- Qr ** Qb **
|
---|
588 | SHR ECX,8 // ECX <- 00 Ba 00 Bg
|
---|
589 | IMUL ECX,R8D // ECX <- Qa ** Qg **
|
---|
590 | ADD EDX,bias
|
---|
591 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
592 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
---|
593 | ADD ECX,bias
|
---|
594 | AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
|
---|
595 | OR ECX,EDX // ECX <- Qa Qr Qg Qb
|
---|
596 |
|
---|
597 | // Z = P + Q (assuming no overflow at each byte)
|
---|
598 | ADD EAX,ECX // EAX <- Za Zr Zg Zb
|
---|
599 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
600 |
|
---|
601 | RET
|
---|
602 |
|
---|
603 | @1: MOV EAX,EDX
|
---|
604 | {$ENDIF}
|
---|
605 | end;
|
---|
606 |
|
---|
607 | procedure BlendMemEx_ASM(F: TColor32; var B: TColor32; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
608 | asm
|
---|
609 | {$IFDEF TARGET_x86}
|
---|
610 | // EAX <- F
|
---|
611 | // [EDX] <- B
|
---|
612 | // ECX <- M
|
---|
613 |
|
---|
614 | // Check Fa > 0 ?
|
---|
615 | TEST EAX,$FF000000 // Fa = 0? => write nothing
|
---|
616 | JZ @2
|
---|
617 |
|
---|
618 | PUSH EBX
|
---|
619 |
|
---|
620 | // Get weight W = Fa * M
|
---|
621 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
---|
622 | INC ECX // 255:256 range bias
|
---|
623 | SHR EBX,24 // EBX <- 00 00 00 Fa
|
---|
624 | IMUL ECX,EBX // ECX <- 00 00 W **
|
---|
625 | ADD ECX,bias
|
---|
626 | SHR ECX,8 // ECX <- 00 00 00 W
|
---|
627 | JZ @1 // W = 0 ? => write nothing
|
---|
628 |
|
---|
629 | PUSH ESI
|
---|
630 |
|
---|
631 | // P = W * F
|
---|
632 | MOV EBX,EAX // EBX <- ** Fr Fg Fb
|
---|
633 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
634 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
---|
635 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
636 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
---|
637 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
638 | ADD EAX,bias
|
---|
639 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
640 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
641 | ADD EBX,bias
|
---|
642 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
643 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
644 |
|
---|
645 | // W = 1 - W;
|
---|
646 | MOV ESI,[EDX]
|
---|
647 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
648 | // Q = W * B
|
---|
649 | MOV EBX,ESI // EBX <- 00 Br Bg Bb
|
---|
650 | AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
|
---|
651 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
---|
652 | IMUL ESI,ECX // ESI <- Qr ** Qb **
|
---|
653 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
---|
654 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
655 | ADD ESI,bias
|
---|
656 | AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
|
---|
657 | SHR ESI,8 // ESI <- 00 Qr ** Qb
|
---|
658 | ADD EBX,bias
|
---|
659 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
660 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
---|
661 |
|
---|
662 | // Z = P + Q (assuming no overflow at each byte)
|
---|
663 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
664 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
665 |
|
---|
666 | MOV [EDX],EAX
|
---|
667 | POP ESI
|
---|
668 |
|
---|
669 | @1: POP EBX
|
---|
670 | @2:
|
---|
671 | {$ENDIF}
|
---|
672 |
|
---|
673 | {$IFDEF TARGET_x64}
|
---|
674 | // ECX <- F
|
---|
675 | // [RDX] <- B
|
---|
676 | // R8 <- M
|
---|
677 |
|
---|
678 | // ECX <- F
|
---|
679 | // [EDX] <- B
|
---|
680 | // R8 <- M
|
---|
681 |
|
---|
682 | // Check Fa > 0 ?
|
---|
683 | TEST ECX,$FF000000 // Fa = 0? => write nothing
|
---|
684 | JZ @1
|
---|
685 |
|
---|
686 | // Get weight W = Fa * M
|
---|
687 | MOV EAX,ECX // EAX <- Fa Fr Fg Fb
|
---|
688 | INC R8D // 255:256 range bias
|
---|
689 | SHR EAX,24 // EAX <- 00 00 00 Fa
|
---|
690 | IMUL R8D,EAX // R8D <- 00 00 W **
|
---|
691 | ADD R8D,bias
|
---|
692 | SHR R8D,8 // R8D <- 00 00 00 W
|
---|
693 | JZ @1 // W = 0 ? => write nothing
|
---|
694 |
|
---|
695 | // P = W * F
|
---|
696 | MOV EAX,ECX // EAX <- ** Fr Fg Fb
|
---|
697 | AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
|
---|
698 | AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
|
---|
699 | IMUL ECX,R8D // ECX <- Pr ** Pb **
|
---|
700 | SHR EAX,8 // EAX <- 00 Fa 00 Fg
|
---|
701 | IMUL EAX,R8D // EAX <- Pa 00 Pg **
|
---|
702 | ADD ECX,bias
|
---|
703 | AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
|
---|
704 | SHR ECX,8 // ECX <- 00 Pr 00 Pb
|
---|
705 | ADD EAX,bias
|
---|
706 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
---|
707 | OR ECX,EAX // ECX <- Pa Pr Pg Pb
|
---|
708 |
|
---|
709 | // W = 1 - W
|
---|
710 | MOV R9D,[RDX]
|
---|
711 | XOR R8D,$000000FF // R8D <- 1 - R8
|
---|
712 | // Q = W * B
|
---|
713 | MOV EAX,R9D // EAX <- 00 Br Bg Bb
|
---|
714 | AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
|
---|
715 | AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
|
---|
716 | IMUL R9D,R8D // R9D <- Qr ** Qb **
|
---|
717 | SHR EAX,8 // EAX <- 00 00 00 Bg
|
---|
718 | IMUL EAX,R8D // EAX <- 00 00 Qg **
|
---|
719 | ADD R9D,bias
|
---|
720 | AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
|
---|
721 | SHR R9D,8 // R9D <- 00 Qr ** Qb
|
---|
722 | ADD EAX,bias
|
---|
723 | AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
|
---|
724 | OR EAX,R9D // EAX <- 00 Qr Qg Qb
|
---|
725 |
|
---|
726 | // Z = P + Q (assuming no overflow at each byte)
|
---|
727 | ADD ECX,EAX // ECX <- 00 Zr Zg Zb
|
---|
728 |
|
---|
729 | MOV [RDX],ECX
|
---|
730 |
|
---|
731 | @1:
|
---|
732 | {$ENDIF}
|
---|
733 | end;
|
---|
734 |
|
---|
735 | procedure BlendLine_ASM(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
736 | asm
|
---|
737 | {$IFDEF TARGET_x86}
|
---|
738 | // EAX <- Src
|
---|
739 | // EDX <- Dst
|
---|
740 | // ECX <- Count
|
---|
741 |
|
---|
742 | // test the counter for zero or negativity
|
---|
743 | TEST ECX,ECX
|
---|
744 | JS @4
|
---|
745 |
|
---|
746 | PUSH EBX
|
---|
747 | PUSH ESI
|
---|
748 | PUSH EDI
|
---|
749 |
|
---|
750 | MOV ESI,EAX // ESI <- Src
|
---|
751 | MOV EDI,EDX // EDI <- Dst
|
---|
752 |
|
---|
753 | // loop start
|
---|
754 | @1: MOV EAX,[ESI]
|
---|
755 | TEST EAX,$FF000000
|
---|
756 | JZ @3 // complete transparency, proceed to next point
|
---|
757 |
|
---|
758 | PUSH ECX // store counter
|
---|
759 |
|
---|
760 | // Get weight W = Fa
|
---|
761 | MOV ECX,EAX // ECX <- Fa Fr Fg Fb
|
---|
762 | SHR ECX,24 // ECX <- 00 00 00 Fa
|
---|
763 |
|
---|
764 | // Test Fa = 255 ?
|
---|
765 | CMP ECX,$FF
|
---|
766 | JZ @2
|
---|
767 |
|
---|
768 | // P = W * F
|
---|
769 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
---|
770 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
771 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
---|
772 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
773 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
---|
774 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
775 | ADD EAX,bias
|
---|
776 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
777 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
778 | ADD EBX,bias
|
---|
779 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
780 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
781 |
|
---|
782 | // W = 1 - W;
|
---|
783 | MOV EDX,[EDI]
|
---|
784 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
785 | // Q = W * B
|
---|
786 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
---|
787 | AND EDX,$00FF00FF // ESI <- 00 Br 00 Bb
|
---|
788 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
---|
789 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
---|
790 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
---|
791 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
792 | ADD EDX,bias
|
---|
793 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
794 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
---|
795 | ADD EBX,bias
|
---|
796 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
797 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
---|
798 |
|
---|
799 | // Z = P + Q (assuming no overflow at each byte)
|
---|
800 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
801 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
802 | @2:
|
---|
803 | MOV [EDI],EAX
|
---|
804 |
|
---|
805 | POP ECX // restore counter
|
---|
806 |
|
---|
807 | @3:
|
---|
808 | ADD ESI,4
|
---|
809 | ADD EDI,4
|
---|
810 |
|
---|
811 | // loop end
|
---|
812 | DEC ECX
|
---|
813 | JNZ @1
|
---|
814 |
|
---|
815 | POP EDI
|
---|
816 | POP ESI
|
---|
817 | POP EBX
|
---|
818 |
|
---|
819 | @4:
|
---|
820 | {$ENDIF}
|
---|
821 |
|
---|
822 | {$IFDEF TARGET_x64}
|
---|
823 | // RCX <- Src
|
---|
824 | // RDX <- Dst
|
---|
825 | // R8 <- Count
|
---|
826 |
|
---|
827 | // test the counter for zero or negativity
|
---|
828 | TEST R8D,R8D
|
---|
829 | JS @4
|
---|
830 |
|
---|
831 | MOV R10,RCX // R10 <- Src
|
---|
832 | MOV R11,RDX // R11 <- Dst
|
---|
833 | MOV ECX,R8D // RCX <- Count
|
---|
834 |
|
---|
835 | // loop start
|
---|
836 | @1:
|
---|
837 | MOV EAX,[R10]
|
---|
838 | TEST EAX,$FF000000
|
---|
839 | JZ @3 // complete transparency, proceed to next point
|
---|
840 |
|
---|
841 | // Get weight W = Fa
|
---|
842 | MOV R9D,EAX // R9D <- Fa Fr Fg Fb
|
---|
843 | SHR R9D,24 // R9D <- 00 00 00 Fa
|
---|
844 |
|
---|
845 | // Test Fa = 255 ?
|
---|
846 | CMP R9D,$FF
|
---|
847 | JZ @2
|
---|
848 |
|
---|
849 | // P = W * F
|
---|
850 | MOV R8D,EAX // R8D <- Fa Fr Fg Fb
|
---|
851 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
852 | AND R8D,$FF00FF00 // R8D <- Fa 00 Fg 00
|
---|
853 | IMUL EAX,R9D // EAX <- Pr ** Pb **
|
---|
854 | SHR R8D,8 // R8D <- 00 Fa 00 Fg
|
---|
855 | IMUL R8D,R9D // R8D <- Pa ** Pg **
|
---|
856 | ADD EAX,bias
|
---|
857 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
858 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
859 | ADD R8D,bias
|
---|
860 | AND R8D,$FF00FF00 // R8D <- Pa 00 Pg 00
|
---|
861 | OR EAX,R8D // EAX <- Pa Pr Pg Pb
|
---|
862 |
|
---|
863 | // W = 1 - W;
|
---|
864 | MOV EDX,[R11]
|
---|
865 | XOR R9D,$000000FF // R9D <- 1 - R9D
|
---|
866 | // Q = W * B
|
---|
867 | MOV R8D,EDX // R8D <- Ba Br Bg Bb
|
---|
868 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
---|
869 | AND R8D,$FF00FF00 // R8D <- Ba 00 Bg 00
|
---|
870 | IMUL EDX,R9D // EDX <- Qr ** Qb **
|
---|
871 | SHR R8D,8 // R8D <- 00 Ba 00 Bg
|
---|
872 | IMUL R8D,R9D // R8D <- Qa ** Qg **
|
---|
873 | ADD EDX,bias
|
---|
874 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
875 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
---|
876 | ADD R8D,bias
|
---|
877 | AND R8D,$FF00FF00 // R8D <- Qa 00 Qg 00
|
---|
878 | OR R8D,EDX // R8D <- Qa Qr Qg Qb
|
---|
879 |
|
---|
880 | // Z = P + Q (assuming no overflow at each byte)
|
---|
881 | ADD EAX,R8D // EAX <- Za Zr Zg Zb
|
---|
882 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
883 | @2:
|
---|
884 | MOV [R11],EAX
|
---|
885 |
|
---|
886 | @3:
|
---|
887 | ADD R10,4
|
---|
888 | ADD R11,4
|
---|
889 |
|
---|
890 | // loop end
|
---|
891 | DEC ECX
|
---|
892 | JNZ @1
|
---|
893 |
|
---|
894 | @4:
|
---|
895 | {$ENDIF}
|
---|
896 | end;
|
---|
897 |
|
---|
898 | procedure BlendLine1_ASM(Src: TColor32; Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
899 | asm
|
---|
900 | {$IFDEF TARGET_x86}
|
---|
901 | // EAX <- Src
|
---|
902 | // EDX <- Dst
|
---|
903 | // ECX <- Count
|
---|
904 |
|
---|
905 | // test the counter for zero or negativity
|
---|
906 | TEST ECX,ECX
|
---|
907 | JS @4
|
---|
908 |
|
---|
909 | // test if source if fully transparent
|
---|
910 | TEST EAX,$FF000000
|
---|
911 | JZ @4
|
---|
912 |
|
---|
913 | PUSH EBX
|
---|
914 | PUSH ESI
|
---|
915 | PUSH EDI
|
---|
916 |
|
---|
917 | MOV ESI,EAX // ESI <- Src
|
---|
918 | MOV EDI,EDX // EDI <- Dst
|
---|
919 |
|
---|
920 | // Get weight W = Fa
|
---|
921 | SHR ESI, 24 // ESI <- W
|
---|
922 |
|
---|
923 | // test if source is fully opaque
|
---|
924 | CMP ESI,$FF
|
---|
925 | JZ @4
|
---|
926 |
|
---|
927 | // P = W * F
|
---|
928 | MOV EBX,EAX // EBX <- Fa Fr Fg Fb
|
---|
929 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
930 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
---|
931 | IMUL EAX,ESI // EAX <- Pr ** Pb **
|
---|
932 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
---|
933 | IMUL EBX,ESI // EBX <- Pa ** Pg **
|
---|
934 | ADD EAX,bias
|
---|
935 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
936 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
937 | ADD EBX,bias
|
---|
938 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
939 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
940 | XOR ESI,$000000FF // ESI <- 1 - Fa
|
---|
941 |
|
---|
942 | // loop start
|
---|
943 | @1:
|
---|
944 | MOV EDX,[EDI]
|
---|
945 | MOV EBX,EDX // EBX <- Ba Br Bg Bb
|
---|
946 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
---|
947 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
---|
948 | IMUL EDX,ESI // EDX <- Qr ** Qb **
|
---|
949 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
---|
950 | IMUL EBX,ESI // EBX <- Qa ** Qg **
|
---|
951 | ADD EDX,bias
|
---|
952 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
953 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
---|
954 | ADD EBX,bias
|
---|
955 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
956 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
---|
957 |
|
---|
958 | // Z = P + Q (assuming no overflow at each byte)
|
---|
959 | ADD EBX,EAX // EAX <- Za Zr Zg Zb
|
---|
960 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
961 |
|
---|
962 | OR EBX,$FF000000
|
---|
963 | MOV [EDI],EBX
|
---|
964 |
|
---|
965 | ADD EDI,4
|
---|
966 |
|
---|
967 | DEC ECX
|
---|
968 | JNZ @1
|
---|
969 |
|
---|
970 | POP EDI
|
---|
971 | POP ESI
|
---|
972 | POP EBX
|
---|
973 |
|
---|
974 | @3:
|
---|
975 | RET
|
---|
976 |
|
---|
977 | @4:
|
---|
978 | MOV [EDI],EAX
|
---|
979 | ADD EDI,4
|
---|
980 |
|
---|
981 | DEC ECX
|
---|
982 | JNZ @4
|
---|
983 |
|
---|
984 | POP EDI
|
---|
985 | POP ESI
|
---|
986 | POP EBX
|
---|
987 |
|
---|
988 | {$ENDIF}
|
---|
989 |
|
---|
990 | {$IFDEF TARGET_x64}
|
---|
991 | // RCX <- Src
|
---|
992 | // RDX <- Dst
|
---|
993 | // R8 <- Count
|
---|
994 |
|
---|
995 | // test the counter for zero or negativity
|
---|
996 | TEST R8D,R8D // R8D <- Count
|
---|
997 | JZ @2
|
---|
998 |
|
---|
999 | // test if source if fully transparent
|
---|
1000 | TEST ECX,$FF000000
|
---|
1001 | JZ @2
|
---|
1002 |
|
---|
1003 | PUSH RDI
|
---|
1004 |
|
---|
1005 | MOV RDI,RDX // RDI <- Dst
|
---|
1006 | MOV R9D,ECX // R9D <- Src
|
---|
1007 |
|
---|
1008 | // Get weight W = Fa
|
---|
1009 | SHR R9D,24 // R9D <- W
|
---|
1010 |
|
---|
1011 | // Test Fa = 255 ?
|
---|
1012 | CMP R9D,$FF
|
---|
1013 | JZ @3 // complete opaque,copy source
|
---|
1014 |
|
---|
1015 | // P = W * F
|
---|
1016 | MOV EAX,ECX // EAX <- Fa Fr Fg Fb
|
---|
1017 | AND ECX,$00FF00FF // ECX <- 00 Fr 00 Fb
|
---|
1018 | AND EAX,$FF00FF00 // EAX <- Fa 00 Fg 00
|
---|
1019 | IMUL ECX,R9D // ECX <- Pr ** Pb **
|
---|
1020 | SHR EAX,8 // EAX <- 00 Fa 00 Fg
|
---|
1021 | IMUL EAX,R9D // EAX <- Pa ** Pg **
|
---|
1022 | ADD ECX,Bias
|
---|
1023 | AND ECX,$FF00FF00 // ECX <- Pr 00 Pb 00
|
---|
1024 | SHR ECX,8 // ECX <- 00 Pr 00 Pb
|
---|
1025 | ADD EAX,Bias
|
---|
1026 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
---|
1027 | OR ECX,EAX // ECX <- Pa Pr Pg Pb
|
---|
1028 | XOR R9D,$000000FF // R9D <- 1 - Fa
|
---|
1029 |
|
---|
1030 | // loop start
|
---|
1031 | @1:
|
---|
1032 | MOV EDX,[RDI]
|
---|
1033 | MOV EAX,EDX // EAX <- Ba Br Bg Bb
|
---|
1034 | AND EDX,$00FF00FF // EDX <- 00 Br 00 Bb
|
---|
1035 | AND EAX,$FF00FF00 // EAX <- Ba 00 Bg 00
|
---|
1036 | IMUL EDX,R9D // EDX <- Qr ** Qb **
|
---|
1037 | SHR EAX,8 // EAX <- 00 Ba 00 Bg
|
---|
1038 | IMUL EAX,R9D // EAX <- Qa ** Qg **
|
---|
1039 | ADD EDX,Bias
|
---|
1040 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
1041 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
---|
1042 | ADD EAX,Bias
|
---|
1043 | AND EAX,$FF00FF00 // EAX <- Qa 00 Qg 00
|
---|
1044 | OR EAX,EDX // EAX <- Qa Qr Qg Qb
|
---|
1045 |
|
---|
1046 | // Z = P + Q (assuming no overflow at each byte)
|
---|
1047 | ADD EAX,ECX // EAX <- Za Zr Zg Zb
|
---|
1048 | OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
|
---|
1049 |
|
---|
1050 | OR EAX,$FF000000
|
---|
1051 | MOV [RDI],EAX
|
---|
1052 |
|
---|
1053 | ADD RDI,4
|
---|
1054 |
|
---|
1055 | // loop end
|
---|
1056 | DEC R8D
|
---|
1057 | JNZ @1
|
---|
1058 |
|
---|
1059 | POP RDI
|
---|
1060 |
|
---|
1061 | @2:
|
---|
1062 | RET
|
---|
1063 |
|
---|
1064 | @3:
|
---|
1065 | // just copy source
|
---|
1066 | MOV [RDI],ECX
|
---|
1067 | ADD RDI,4
|
---|
1068 |
|
---|
1069 | DEC R8D
|
---|
1070 | JNZ @3
|
---|
1071 |
|
---|
1072 | POP RDI
|
---|
1073 | {$ENDIF}
|
---|
1074 | end;
|
---|
1075 |
|
---|
1076 | {$IFDEF TARGET_x86}
|
---|
1077 |
|
---|
1078 | function MergeReg_ASM(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
1079 | asm
|
---|
1080 | { This is an implementation of the merge formula, as described
|
---|
1081 | in a paper by Bruce Wallace in 1981. Merging is associative,
|
---|
1082 | that is, A over (B over C) = (A over B) over C. The formula is,
|
---|
1083 |
|
---|
1084 | Ra = Fa + Ba * (1 - Fa)
|
---|
1085 | Rc = (Fa * (Fc - Bc * Ba) + Bc * Ba) / Ra
|
---|
1086 |
|
---|
1087 | where
|
---|
1088 |
|
---|
1089 | Rc is the resultant color,
|
---|
1090 | Ra is the resultant alpha,
|
---|
1091 | Fc is the foreground color,
|
---|
1092 | Fa is the foreground alpha,
|
---|
1093 | Bc is the background color,
|
---|
1094 | Ba is the background alpha.
|
---|
1095 | }
|
---|
1096 |
|
---|
1097 | // EAX <- F
|
---|
1098 | // EDX <- B
|
---|
1099 |
|
---|
1100 | // if F.A = 0 then
|
---|
1101 | TEST EAX,$FF000000
|
---|
1102 | JZ @exit0
|
---|
1103 |
|
---|
1104 | // else if B.A = 255 then
|
---|
1105 | CMP EDX,$FF000000
|
---|
1106 | JNC @blend
|
---|
1107 |
|
---|
1108 | // else if F.A = 255 then
|
---|
1109 | CMP EAX,$FF000000
|
---|
1110 | JNC @Exit
|
---|
1111 |
|
---|
1112 | // else if B.A = 0 then
|
---|
1113 | TEST EDX,$FF000000
|
---|
1114 | JZ @Exit
|
---|
1115 |
|
---|
1116 | @4:
|
---|
1117 | PUSH EBX
|
---|
1118 | PUSH ESI
|
---|
1119 | PUSH EDI
|
---|
1120 | ADD ESP,-$0C
|
---|
1121 | MOV [ESP+$04],EDX
|
---|
1122 | MOV [ESP],EAX
|
---|
1123 |
|
---|
1124 | // AH <- F.A
|
---|
1125 | // DL, CL <- B.A
|
---|
1126 | SHR EAX,16
|
---|
1127 | AND EAX,$0000FF00
|
---|
1128 | SHR EDX,24
|
---|
1129 | MOV CL,DL
|
---|
1130 | NOP
|
---|
1131 | NOP
|
---|
1132 | NOP
|
---|
1133 |
|
---|
1134 | // EDI <- PF
|
---|
1135 | // EDX <- PB
|
---|
1136 | // ESI <- PR
|
---|
1137 |
|
---|
1138 | // PF := @DivTable[F.A];
|
---|
1139 | LEA EDI,[EAX+DivTable]
|
---|
1140 | // PB := @DivTable[B.A];
|
---|
1141 | SHL EDX,$08
|
---|
1142 | LEA EDX,[EDX+DivTable]
|
---|
1143 |
|
---|
1144 | // Result.A := B.A + F.A - PB[F.A];
|
---|
1145 | SHR EAX,8
|
---|
1146 | ADD ECX,EAX
|
---|
1147 | SUB ECX,[EDX+EAX]
|
---|
1148 | MOV [ESP+$0B],CL
|
---|
1149 | // PR := @RcTable[Result.A];
|
---|
1150 | SHL ECX,$08
|
---|
1151 | AND ECX,$0000FFFF
|
---|
1152 | LEA ESI,[ECX+RcTable]
|
---|
1153 |
|
---|
1154 | { Red component }
|
---|
1155 |
|
---|
1156 | // Result.R := PB[B.R];
|
---|
1157 | XOR EAX,EAX
|
---|
1158 | MOV AL,[ESP+$06]
|
---|
1159 | MOV CL,[EDX+EAX]
|
---|
1160 | MOV [ESP+$0a],CL
|
---|
1161 | // X := F.R - Result.R;
|
---|
1162 | MOV AL,[ESP+$02]
|
---|
1163 | XOR EBX,EBX
|
---|
1164 | MOV BL,CL
|
---|
1165 | SUB EAX,EBX
|
---|
1166 | // if X >= 0 then
|
---|
1167 | JL @5
|
---|
1168 | // Result.R := PR[PF[X] + Result.R]
|
---|
1169 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
---|
1170 | AND ECX,$000000FF
|
---|
1171 | ADD EAX,ECX
|
---|
1172 | MOV AL,[ESI+EAX]
|
---|
1173 | MOV [ESP+$0A],AL
|
---|
1174 | JMP @6
|
---|
1175 | @5:
|
---|
1176 | // Result.R := PR[Result.R - PF[-X]];
|
---|
1177 | NEG EAX
|
---|
1178 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
---|
1179 | XOR ECX,ECX
|
---|
1180 | MOV CL,[ESP+$0A]
|
---|
1181 | SUB ECX,EAX
|
---|
1182 | MOV AL,[ESI+ECX]
|
---|
1183 | MOV [ESP+$0A],AL
|
---|
1184 |
|
---|
1185 |
|
---|
1186 | { Green component }
|
---|
1187 |
|
---|
1188 | @6:
|
---|
1189 | // Result.G := PB[B.G];
|
---|
1190 | XOR EAX,EAX
|
---|
1191 | MOV AL,[ESP+$05]
|
---|
1192 | MOV CL,[EDX+EAX]
|
---|
1193 | MOV [ESP+$09],CL
|
---|
1194 | // X := F.G - Result.G;
|
---|
1195 | MOV AL,[ESP+$01]
|
---|
1196 | XOR EBX,EBX
|
---|
1197 | MOV BL,CL
|
---|
1198 | SUB EAX,EBX
|
---|
1199 | // if X >= 0 then
|
---|
1200 | JL @7
|
---|
1201 | // Result.G := PR[PF[X] + Result.G]
|
---|
1202 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
---|
1203 | AND ECX,$000000FF
|
---|
1204 | ADD EAX,ECX
|
---|
1205 | MOV AL,[ESI+EAX]
|
---|
1206 | MOV [ESP+$09],AL
|
---|
1207 | JMP @8
|
---|
1208 | @7:
|
---|
1209 | // Result.G := PR[Result.G - PF[-X]];
|
---|
1210 | NEG EAX
|
---|
1211 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
---|
1212 | XOR ECX,ECX
|
---|
1213 | MOV CL,[ESP+$09]
|
---|
1214 | SUB ECX,EAX
|
---|
1215 | MOV AL,[ESI+ECX]
|
---|
1216 | MOV [ESP+$09],AL
|
---|
1217 |
|
---|
1218 |
|
---|
1219 | { Blue component }
|
---|
1220 | @8:
|
---|
1221 | // Result.B := PB[B.B];
|
---|
1222 | XOR EAX,EAX
|
---|
1223 | MOV AL,[ESP+$04]
|
---|
1224 | MOV CL,[EDX+EAX]
|
---|
1225 | MOV [ESP+$08],CL
|
---|
1226 | // X := F.B - Result.B;
|
---|
1227 | MOV AL,[ESP]
|
---|
1228 | XOR EDX,EDX
|
---|
1229 | MOV DL,CL
|
---|
1230 | SUB EAX,EDX
|
---|
1231 | // if X >= 0 then
|
---|
1232 | JL @9
|
---|
1233 | // Result.B := PR[PF[X] + Result.B]
|
---|
1234 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
---|
1235 | XOR EDX,EDX
|
---|
1236 | MOV DL,CL
|
---|
1237 | ADD EAX,EDX
|
---|
1238 | MOV AL,[ESI+EAX]
|
---|
1239 | MOV [ESP+$08],AL
|
---|
1240 | JMP @10
|
---|
1241 | @9:
|
---|
1242 | // Result.B := PR[Result.B - PF[-X]];
|
---|
1243 | NEG EAX
|
---|
1244 | MOVZX EAX,BYTE PTR[EDI+EAX]
|
---|
1245 | XOR EDX,EDX
|
---|
1246 | MOV DL,CL
|
---|
1247 | SUB EDX,EAX
|
---|
1248 | MOV AL,[ESI+EDX]
|
---|
1249 | MOV [ESP+$08],AL
|
---|
1250 |
|
---|
1251 | @10:
|
---|
1252 | // EAX <- Result
|
---|
1253 | MOV EAX,[ESP+$08]
|
---|
1254 |
|
---|
1255 | // end;
|
---|
1256 | ADD ESP,$0C
|
---|
1257 | POP EDI
|
---|
1258 | POP ESI
|
---|
1259 | POP EBX
|
---|
1260 | RET
|
---|
1261 | @blend:
|
---|
1262 | CALL DWORD PTR [BlendReg]
|
---|
1263 | OR EAX,$FF000000
|
---|
1264 | RET
|
---|
1265 | @exit0:
|
---|
1266 | MOV EAX,EDX
|
---|
1267 | @Exit:
|
---|
1268 | end;
|
---|
1269 |
|
---|
1270 | {$ENDIF}
|
---|
1271 |
|
---|
1272 | function CombineReg_ASM(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
1273 | asm
|
---|
1274 | // combine RGBA channels of colors X and Y with the weight of X given in W
|
---|
1275 | // Result Z = W * X + (1 - W) * Y (all channels are combined, including alpha)
|
---|
1276 | {$IFDEF TARGET_x86}
|
---|
1277 | // EAX <- X
|
---|
1278 | // EDX <- Y
|
---|
1279 | // ECX <- W
|
---|
1280 |
|
---|
1281 | // W = 0 or $FF?
|
---|
1282 | JCXZ @1 // CX = 0 ? => Result := EDX
|
---|
1283 | CMP ECX,$FF // CX = $FF ? => Result := EDX
|
---|
1284 | JE @2
|
---|
1285 |
|
---|
1286 | PUSH EBX
|
---|
1287 |
|
---|
1288 | // P = W * X
|
---|
1289 | MOV EBX,EAX // EBX <- Xa Xr Xg Xb
|
---|
1290 | AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
|
---|
1291 | AND EBX,$FF00FF00 // EBX <- Xa 00 Xg 00
|
---|
1292 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
1293 | SHR EBX,8 // EBX <- 00 Xa 00 Xg
|
---|
1294 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
1295 | ADD EAX,bias
|
---|
1296 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
---|
1297 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
1298 | ADD EBX,bias
|
---|
1299 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
1300 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
1301 |
|
---|
1302 | // W = 1 - W
|
---|
1303 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
1304 | MOV EBX,EDX // EBX <- Ya Yr Yg Yb
|
---|
1305 | // Q = W * Y
|
---|
1306 | AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
|
---|
1307 | AND EBX,$FF00FF00 // EBX <- Ya 00 Yg 00
|
---|
1308 | IMUL EDX,ECX // EDX <- Qr ** Qb **
|
---|
1309 | SHR EBX,8 // EBX <- 00 Ya 00 Yg
|
---|
1310 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
1311 | ADD EDX,bias
|
---|
1312 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
1313 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
---|
1314 | ADD EBX,bias
|
---|
1315 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
1316 | OR EBX,EDX // EBX <- Qa Qr Qg Qb
|
---|
1317 |
|
---|
1318 | // Z = P + Q (assuming no overflow at each byte)
|
---|
1319 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
1320 |
|
---|
1321 | POP EBX
|
---|
1322 | RET
|
---|
1323 |
|
---|
1324 | @1: MOV EAX,EDX
|
---|
1325 | @2:
|
---|
1326 | {$ENDIF}
|
---|
1327 |
|
---|
1328 | {$IFDEF TARGET_x64}
|
---|
1329 | // ECX <- X
|
---|
1330 | // EDX <- Y
|
---|
1331 | // R8D <- W
|
---|
1332 |
|
---|
1333 | // W = 0 or $FF?
|
---|
1334 | TEST R8D,R8D
|
---|
1335 | JZ @1 // W = 0 ? => Result := EDX
|
---|
1336 | MOV EAX,ECX // EAX <- Xa Xr Xg Xb
|
---|
1337 | CMP R8B,$FF // W = $FF ? => Result := EDX
|
---|
1338 | JE @2
|
---|
1339 |
|
---|
1340 | // P = W * X
|
---|
1341 | AND EAX,$00FF00FF // EAX <- 00 Xr 00 Xb
|
---|
1342 | AND ECX,$FF00FF00 // ECX <- Xa 00 Xg 00
|
---|
1343 | IMUL EAX,R8D // EAX <- Pr ** Pb **
|
---|
1344 | SHR ECX,8 // ECX <- 00 Xa 00 Xg
|
---|
1345 | IMUL ECX,R8D // ECX <- Pa ** Pg **
|
---|
1346 | ADD EAX,bias
|
---|
1347 | AND EAX,$FF00FF00 // EAX <- Pa 00 Pg 00
|
---|
1348 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
1349 | ADD ECX,bias
|
---|
1350 | AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
|
---|
1351 | OR EAX,ECX // EAX <- Pa Pr Pg Pb
|
---|
1352 |
|
---|
1353 | // W = 1 - W
|
---|
1354 | XOR R8D,$000000FF // R8D <- 1 - R8D
|
---|
1355 | MOV ECX,EDX // ECX <- Ya Yr Yg Yb
|
---|
1356 | // Q = W * Y
|
---|
1357 | AND EDX,$00FF00FF // EDX <- 00 Yr 00 Yb
|
---|
1358 | AND ECX,$FF00FF00 // ECX <- Ya 00 Yg 00
|
---|
1359 | IMUL EDX,R8D // EDX <- Qr ** Qb **
|
---|
1360 | SHR ECX,8 // ECX <- 00 Ya 00 Yg
|
---|
1361 | IMUL ECX,R8D // ECX <- Qa ** Qg **
|
---|
1362 | ADD EDX,bias
|
---|
1363 | AND EDX,$FF00FF00 // EDX <- Qr 00 Qb 00
|
---|
1364 | SHR EDX,8 // EDX <- 00 Qr ** Qb
|
---|
1365 | ADD ECX,bias
|
---|
1366 | AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
|
---|
1367 | OR ECX,EDX // ECX <- Qa Qr Qg Qb
|
---|
1368 |
|
---|
1369 | // Z = P + Q (assuming no overflow at each byte)
|
---|
1370 | ADD EAX,ECX // EAX <- Za Zr Zg Zb
|
---|
1371 |
|
---|
1372 | RET
|
---|
1373 |
|
---|
1374 | @1: MOV EAX,EDX
|
---|
1375 | @2:
|
---|
1376 | {$ENDIF}
|
---|
1377 | end;
|
---|
1378 |
|
---|
1379 | procedure CombineMem_ASM(X: TColor32; var Y: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
1380 | asm
|
---|
1381 | {$IFDEF TARGET_x86}
|
---|
1382 | // EAX <- F
|
---|
1383 | // [EDX] <- B
|
---|
1384 | // ECX <- W
|
---|
1385 |
|
---|
1386 | // Check W
|
---|
1387 | JCXZ @1 // W = 0 ? => write nothing
|
---|
1388 | CMP ECX,$FF // W = 255? => write F
|
---|
1389 | {$IFDEF FPC}
|
---|
1390 | DB $74,$76 // Prob with FPC 2.2.2 and below
|
---|
1391 | {$ELSE}
|
---|
1392 | JZ @2
|
---|
1393 | {$ENDIF}
|
---|
1394 |
|
---|
1395 |
|
---|
1396 | PUSH EBX
|
---|
1397 | PUSH ESI
|
---|
1398 |
|
---|
1399 | // P = W * F
|
---|
1400 | MOV EBX,EAX // EBX <- ** Fr Fg Fb
|
---|
1401 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
1402 | AND EBX,$FF00FF00 // EBX <- Fa 00 Fg 00
|
---|
1403 | IMUL EAX,ECX // EAX <- Pr ** Pb **
|
---|
1404 | SHR EBX,8 // EBX <- 00 Fa 00 Fg
|
---|
1405 | IMUL EBX,ECX // EBX <- Pa ** Pg **
|
---|
1406 | ADD EAX,bias
|
---|
1407 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
1408 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
1409 | ADD EBX,bias
|
---|
1410 | AND EBX,$FF00FF00 // EBX <- Pa 00 Pg 00
|
---|
1411 | OR EAX,EBX // EAX <- Pa Pr Pg Pb
|
---|
1412 |
|
---|
1413 | // W = 1 - W
|
---|
1414 | MOV ESI,[EDX]
|
---|
1415 | XOR ECX,$000000FF // ECX <- 1 - ECX
|
---|
1416 | // Q = W * B
|
---|
1417 | MOV EBX,ESI // EBX <- Ba Br Bg Bb
|
---|
1418 | AND ESI,$00FF00FF // ESI <- 00 Br 00 Bb
|
---|
1419 | AND EBX,$FF00FF00 // EBX <- Ba 00 Bg 00
|
---|
1420 | IMUL ESI,ECX // ESI <- Qr ** Qb **
|
---|
1421 | SHR EBX,8 // EBX <- 00 Ba 00 Bg
|
---|
1422 | IMUL EBX,ECX // EBX <- Qa ** Qg **
|
---|
1423 | ADD ESI,bias
|
---|
1424 | AND ESI,$FF00FF00 // ESI <- Qr 00 Qb 00
|
---|
1425 | SHR ESI,8 // ESI <- 00 Qr ** Qb
|
---|
1426 | ADD EBX,bias
|
---|
1427 | AND EBX,$FF00FF00 // EBX <- Qa 00 Qg 00
|
---|
1428 | OR EBX,ESI // EBX <- Qa Qr Qg Qb
|
---|
1429 |
|
---|
1430 | // Z = P + Q (assuming no overflow at each byte)
|
---|
1431 | ADD EAX,EBX // EAX <- Za Zr Zg Zb
|
---|
1432 |
|
---|
1433 | MOV [EDX],EAX
|
---|
1434 |
|
---|
1435 | POP ESI
|
---|
1436 | POP EBX
|
---|
1437 | @1: RET
|
---|
1438 |
|
---|
1439 | @2: MOV [EDX],EAX
|
---|
1440 | {$ENDIF}
|
---|
1441 |
|
---|
1442 | {$IFDEF TARGET_x64}
|
---|
1443 | // ECX <- F
|
---|
1444 | // [RDX] <- B
|
---|
1445 | // R8 <- W
|
---|
1446 |
|
---|
1447 | // Check W
|
---|
1448 | TEST R8D,R8D // Set flags for R8
|
---|
1449 | JZ @2 // W = 0 ? => Result := EDX
|
---|
1450 | MOV EAX,ECX // EAX <- ** Fr Fg Fb
|
---|
1451 | CMP R8B,$FF // W = 255? => write F
|
---|
1452 | JZ @1
|
---|
1453 |
|
---|
1454 | // P = W * F
|
---|
1455 | AND EAX,$00FF00FF // EAX <- 00 Fr 00 Fb
|
---|
1456 | AND ECX,$FF00FF00 // ECX <- Fa 00 Fg 00
|
---|
1457 | IMUL EAX,R8D // EAX <- Pr ** Pb **
|
---|
1458 | SHR ECX,8 // ECX <- 00 Fa 00 Fg
|
---|
1459 | IMUL ECX,R8D // ECX <- Pa ** Pg **
|
---|
1460 | ADD EAX,bias
|
---|
1461 | AND EAX,$FF00FF00 // EAX <- Pr 00 Pb 00
|
---|
1462 | SHR EAX,8 // EAX <- 00 Pr 00 Pb
|
---|
1463 | ADD ECX,bias
|
---|
1464 | AND ECX,$FF00FF00 // ECX <- Pa 00 Pg 00
|
---|
1465 | OR EAX,ECX // EAX <- Pa Pr Pg Pb
|
---|
1466 |
|
---|
1467 | // W = 1 - W
|
---|
1468 | MOV R9D,[RDX]
|
---|
1469 | XOR R8D,$000000FF // R8D <- 1 - R8D
|
---|
1470 | // Q = W * B
|
---|
1471 | MOV ECX,R9D // ECX <- Ba Br Bg Bb
|
---|
1472 | AND R9D,$00FF00FF // R9D <- 00 Br 00 Bb
|
---|
1473 | AND ECX,$FF00FF00 // ECX <- Ba 00 Bg 00
|
---|
1474 | IMUL R9D,R8D // R9D <- Qr ** Qb **
|
---|
1475 | SHR ECX,8 // ECX <- 00 Ba 00 Bg
|
---|
1476 | IMUL ECX,R8D // ECX <- Qa ** Qg **
|
---|
1477 | ADD R9D,bias
|
---|
1478 | AND R9D,$FF00FF00 // R9D <- Qr 00 Qb 00
|
---|
1479 | SHR R9D,8 // R9D <- 00 Qr ** Qb
|
---|
1480 | ADD ECX,bias
|
---|
1481 | AND ECX,$FF00FF00 // ECX <- Qa 00 Qg 00
|
---|
1482 | OR ECX,R9D // ECX <- 00 Qr Qg Qb
|
---|
1483 |
|
---|
1484 | // Z = P + Q (assuming no overflow at each byte)
|
---|
1485 | ADD EAX,ECX // EAX <- 00 Zr Zg Zb
|
---|
1486 |
|
---|
1487 | @1: MOV [RDX],EAX
|
---|
1488 | @2:
|
---|
1489 |
|
---|
1490 | {$ENDIF}
|
---|
1491 | end;
|
---|
1492 |
|
---|
1493 | procedure EMMS_ASM; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
|
---|
1494 | asm
|
---|
1495 | end;
|
---|
1496 |
|
---|
1497 | end.
|
---|