source: trunk/Packages/Graphics32/GR32_BlendMMX.pas

Last change on this file was 2, checked in by chronos, 5 years ago
File size: 23.8 KB
Line 
1unit GR32_BlendMMX;
2
3(* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1 or LGPL 2.1 with linking exception
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Alternatively, the contents of this file may be used under the terms of the
17 * Free Pascal modified version of the GNU Lesser General Public License
18 * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
19 * of this license are applicable instead of those above.
20 * Please see the file LICENSE.txt for additional information concerning this
21 * license.
22 *
23 * The Original Code is Graphics32
24 *
25 * The Initial Developer of the Original Code is
26 * Alex A. Denisov
27 *
28 * Portions created by the Initial Developer are Copyright (C) 2000-2009
29 * the Initial Developer. All Rights Reserved.
30 *
31 * Contributor(s):
32 * Christian-W. Budde
33 * - 2019/04/01 - Refactoring
34 *
35 * ***** END LICENSE BLOCK ***** *)
36
37interface
38
39{$I GR32.inc}
40
41uses
42 GR32;
43
44{$IFNDEF OMIT_MMX}
45function BlendReg_MMX(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
46procedure BlendMem_MMX(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
47
48function BlendRegEx_MMX(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
49procedure BlendMemEx_MMX(F: TColor32; var B:TColor32; M: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
50
51function BlendRegRGB_MMX(F, B, W: TColor32): TColor32; {$IFDEF FPC} assembler;{$ENDIF}
52procedure BlendMemRGB_MMX(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
53
54procedure BlendLine_MMX(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; {$ENDIF}
55procedure BlendLineEx_MMX(Src, Dst: PColor32; Count: Integer; M: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
56
57function CombineReg_MMX(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
58procedure CombineMem_MMX(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
59procedure CombineLine_MMX(Src, Dst: PColor32; Count: Integer; W: TColor32); {$IFDEF FPC} assembler; {$ENDIF}
60
61procedure EMMS_MMX; {$IFDEF FPC} assembler; {$ENDIF}
62
63function LightenReg_MMX(C: TColor32; Amount: Integer): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
64
65function ColorAdd_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
66function ColorSub_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
67function ColorModulate_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
68function ColorMax_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
69function ColorMin_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
70function ColorDifference_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
71function ColorExclusion_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
72function ColorScale_MMX(C, W: TColor32): TColor32; {$IFDEF FPC} assembler; {$ENDIF}
73{$ENDIF}
74
75implementation
76
77uses
78 GR32_Blend,
79 GR32_LowLevel,
80 GR32_System;
81
82{ MMX versions }
83
84function BlendReg_MMX(F, B: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
85asm
86 // blend foreground color (F) to a background color (B),
87 // using alpha channel value of F
88{$IFDEF TARGET_x86}
89 // EAX <- F
90 // EDX <- B
91 // Result := Fa * (Fargb - Bargb) + Bargb
92 MOVD MM0,EAX
93 PXOR MM3,MM3
94 MOVD MM2,EDX
95 PUNPCKLBW MM0,MM3
96 MOV ECX,bias_ptr
97 PUNPCKLBW MM2,MM3
98 MOVQ MM1,MM0
99 PUNPCKHWD MM1,MM1
100 PSUBW MM0,MM2
101 PUNPCKHDQ MM1,MM1
102 PSLLW MM2,8
103 PMULLW MM0,MM1
104 PADDW MM2,[ECX]
105 PADDW MM2,MM0
106 PSRLW MM2,8
107 PACKUSWB MM2,MM3
108 MOVD EAX,MM2
109 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
110{$ENDIF}
111
112{$IFDEF TARGET_x64}
113 // ECX <- F
114 // EDX <- B
115 // Result := Fa * (Fargb - Bargb) + Bargb
116 MOVD MM0,ECX
117 PXOR MM3,MM3
118 MOVD MM2,EDX
119 PUNPCKLBW MM0,MM3
120{$IFNDEF FPC}
121 MOV RAX,bias_ptr
122{$ELSE}
123 MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
124{$ENDIF}
125 PUNPCKLBW MM2,MM3
126 MOVQ MM1,MM0
127 PUNPCKHWD MM1,MM1
128 PSUBW MM0,MM2
129 PUNPCKHDQ MM1,MM1
130 PSLLW MM2,8
131 PMULLW MM0,MM1
132 PADDW MM2,[RAX]
133 PADDW MM2,MM0
134 PSRLW MM2,8
135 PACKUSWB MM2,MM3
136 MOVD EAX,MM2
137 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
138{$ENDIF}
139end;
140
141{$IFDEF TARGET_x86}
142
143procedure BlendMem_MMX(F: TColor32; var B: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
144asm
145 // EAX - Color X
146 // [EDX] - Color Y
147 // Result := W * (X - Y) + Y
148
149 TEST EAX,$FF000000
150 JZ @1
151 CMP EAX,$FF000000
152 JNC @2
153
154 PXOR MM3,MM3
155 MOVD MM0,EAX
156 MOVD MM2,[EDX]
157 PUNPCKLBW MM0,MM3
158 MOV ECX,bias_ptr
159 PUNPCKLBW MM2,MM3
160 MOVQ MM1,MM0
161 PUNPCKHWD MM1,MM1
162 PSUBW MM0,MM2
163 PUNPCKHDQ MM1,MM1
164 PSLLW MM2,8
165 PMULLW MM0,MM1
166 PADDW MM2,[ECX]
167 PADDW MM2,MM0
168 PSRLW MM2,8
169 PACKUSWB MM2,MM3
170 MOVD [EDX],MM2
171 OR [EDX],$FF000000
172
173@1: RET
174@2: MOV [EDX],EAX
175end;
176
177function BlendRegEx_MMX(F, B, M: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
178asm
179 // blend foreground color (F) to a background color (B),
180 // using alpha channel value of F
181 // EAX <- F
182 // EDX <- B
183 // ECX <- M
184 // Result := M * Fa * (Fargb - Bargb) + Bargb
185 PUSH EBX
186 MOV EBX,EAX
187 SHR EBX,24
188 INC ECX // 255:256 range bias
189 IMUL ECX,EBX
190 SHR ECX,8
191 JZ @1
192
193 PXOR MM0,MM0
194 MOVD MM1,EAX
195 SHL ECX,4
196 MOVD MM2,EDX
197 PUNPCKLBW MM1,MM0
198 PUNPCKLBW MM2,MM0
199 ADD ECX,alpha_ptr
200 PSUBW MM1,MM2
201 PMULLW MM1,[ECX]
202 PSLLW MM2,8
203 MOV ECX,bias_ptr
204 PADDW MM2,[ECX]
205 PADDW MM1,MM2
206 PSRLW MM1,8
207 PACKUSWB MM1,MM0
208 MOVD EAX,MM1
209
210 POP EBX
211 RET
212
213@1: MOV EAX,EDX
214 POP EBX
215end;
216
217{$ENDIF}
218
219procedure BlendMemEx_MMX(F: TColor32; var B:TColor32; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
220asm
221{$IFDEF TARGET_x86}
222 // blend foreground color (F) to a background color (B),
223 // using alpha channel value of F
224 // EAX <- F
225 // [EDX] <- B
226 // ECX <- M
227 // Result := M * Fa * (Fargb - Bargb) + Bargb
228 TEST EAX,$FF000000
229 JZ @2
230
231 PUSH EBX
232 MOV EBX,EAX
233 SHR EBX,24
234 INC ECX // 255:256 range bias
235 IMUL ECX,EBX
236 SHR ECX,8
237 JZ @1
238
239 PXOR MM0,MM0
240 MOVD MM1,EAX
241 SHL ECX,4
242 MOVD MM2,[EDX]
243 PUNPCKLBW MM1,MM0
244 PUNPCKLBW MM2,MM0
245 ADD ECX,alpha_ptr
246 PSUBW MM1,MM2
247 PMULLW MM1,[ECX]
248 PSLLW MM2,8
249 MOV ECX,bias_ptr
250 PADDW MM2,[ECX]
251 PADDW MM1,MM2
252 PSRLW MM1,8
253 PACKUSWB MM1,MM0
254 MOVD [EDX],MM1
255
256@1: POP EBX
257
258@2:
259{$ENDIF}
260
261{$IFDEF TARGET_x64}
262 // blend foreground color (F) to a background color (B),
263 // using alpha channel value of F
264 // ECX <- F
265 // [EDX] <- B
266 // R8 <- M
267 // Result := M * Fa * (Fargb - Bargb) + Bargb
268 TEST ECX,$FF000000
269 JZ @1
270
271 MOV EAX,ECX
272 SHR EAX,24
273 INC R8D // 255:256 range bias
274 IMUL R8D,EAX
275 SHR R8D,8
276 JZ @1
277
278 PXOR MM0,MM0
279 MOVD MM1,ECX
280 SHL R8D,4
281 MOVD MM2,[RDX]
282 PUNPCKLBW MM1,MM0
283 PUNPCKLBW MM2,MM0
284{$IFNDEF FPC}
285 ADD R8,alpha_ptr
286{$ELSE}
287 ADD R8,[RIP+alpha_ptr]
288{$ENDIF}
289 PSUBW MM1,MM2
290 PMULLW MM1,[R8]
291 PSLLW MM2,8
292{$IFNDEF FPC}
293 MOV RAX,bias_ptr
294{$ELSE}
295 MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
296{$ENDIF}
297 PADDW MM2,[RAX]
298 PADDW MM1,MM2
299 PSRLW MM1,8
300 PACKUSWB MM1,MM0
301 MOVD [RDX],MM1
302
303@1:
304{$ENDIF}
305end;
306
307function BlendRegRGB_MMX(F, B, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
308asm
309{$IFDEF TARGET_x86}
310 PXOR MM2,MM2
311 MOVD MM0,EAX
312 PUNPCKLBW MM0,MM2
313 MOVD MM1,EDX
314 PUNPCKLBW MM1,MM2
315 BSWAP ECX
316 PSUBW MM0,MM1
317 MOVD MM3,ECX
318 PUNPCKLBW MM3,MM2
319 PMULLW MM0,MM3
320 MOV EAX,bias_ptr
321 PSLLW MM1,8
322 PADDW MM1,[EAX]
323 PADDW MM1,MM0
324 PSRLW MM1,8
325 PACKUSWB MM1,MM2
326 MOVD EAX,MM1
327{$ENDIF}
328
329{$IFDEF TARGET_x64}
330 PXOR MM2,MM2
331 MOVD MM0,ECX
332 PUNPCKLBW MM0,MM2
333 MOVD MM1,EDX
334 PUNPCKLBW MM1,MM2
335 BSWAP R8D
336 PSUBW MM0,MM1
337 MOVD MM3,R8D
338 PUNPCKLBW MM3,MM2
339 PMULLW MM0,MM3
340{$IFNDEF FPC}
341 MOV RAX,bias_ptr
342{$ELSE}
343 MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
344{$ENDIF}
345 PSLLW MM1,8
346 PADDW MM1,[RAX]
347 PADDW MM1,MM0
348 PSRLW MM1,8
349 PACKUSWB MM1,MM2
350 MOVD EAX,MM1
351{$ENDIF}
352end;
353
354procedure BlendMemRGB_MMX(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
355asm
356{$IFDEF TARGET_x86}
357 PXOR MM2,MM2
358 MOVD MM0,EAX
359 PUNPCKLBW MM0,MM2
360 MOVD MM1,[EDX]
361 PUNPCKLBW MM1,MM2
362 BSWAP ECX
363 PSUBW MM0,MM1
364 MOVD MM3,ECX
365 PUNPCKLBW MM3,MM2
366 PMULLW MM0,MM3
367 MOV EAX,bias_ptr
368 PSLLW MM1,8
369 PADDW MM1,[EAX]
370 PADDW MM1,MM0
371 PSRLW MM1,8
372 PACKUSWB MM1,MM2
373 MOVD [EDX],MM1
374{$ENDIF}
375
376{$IFDEF TARGET_x64}
377 PXOR MM2,MM2
378 MOVD MM0,ECX
379 PUNPCKLBW MM0,MM2
380 MOVD MM1,[EDX]
381 PUNPCKLBW MM1,MM2
382 BSWAP R8D
383 PSUBW MM0,MM1
384 MOVD MM3,R8D
385 PUNPCKLBW MM3,MM2
386 PMULLW MM0,MM3
387{$IFNDEF FPC}
388 MOV RAX,bias_ptr
389{$ELSE}
390 MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
391{$ENDIF}
392 PSLLW MM1,8
393 PADDW MM1,[RAX]
394 PADDW MM1,MM0
395 PSRLW MM1,8
396 PACKUSWB MM1,MM2
397 MOVD [EDX],MM1
398{$ENDIF}
399end;
400
401
402{$IFDEF TARGET_x86}
403procedure BlendLine_MMX(Src, Dst: PColor32; Count: Integer); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
404asm
405 // EAX <- Src
406 // EDX <- Dst
407 // ECX <- Count
408
409 // test the counter for zero or negativity
410 TEST ECX,ECX
411 JS @4
412
413 PUSH ESI
414 PUSH EDI
415
416 MOV ESI,EAX // ESI <- Src
417 MOV EDI,EDX // EDI <- Dst
418
419 // loop start
420@1: MOV EAX,[ESI]
421 TEST EAX,$FF000000
422 JZ @3 // complete transparency, proceed to next point
423 CMP EAX,$FF000000
424 JNC @2 // opaque pixel, copy without blending
425
426 // blend
427 MOVD MM0,EAX // MM0 <- 00 00 00 00 Fa Fr Fg Fb
428 PXOR MM3,MM3 // MM3 <- 00 00 00 00 00 00 00 00
429 MOVD MM2,[EDI] // MM2 <- 00 00 00 00 Ba Br Bg Bb
430 PUNPCKLBW MM0,MM3 // MM0 <- 00 Fa 00 Fr 00 Fg 00 Fb
431 MOV EAX,bias_ptr
432 PUNPCKLBW MM2,MM3 // MM2 <- 00 Ba 00 Br 00 Bg 00 Bb
433 MOVQ MM1,MM0 // MM1 <- 00 Fa 00 Fr 00 Fg 00 Fb
434 PUNPCKHWD MM1,MM1 // MM1 <- 00 Fa 00 Fa 00 ** 00 **
435 PSUBW MM0,MM2 // MM0 <- 00 Da 00 Dr 00 Dg 00 Db
436 PUNPCKHDQ MM1,MM1 // MM1 <- 00 Fa 00 Fa 00 Fa 00 Fa
437 PSLLW MM2,8 // MM2 <- Ba 00 Br 00 Bg 00 Bb 00
438 PMULLW MM0,MM1 // MM0 <- Pa ** Pr ** Pg ** Pb **
439 PADDW MM2,[EAX] // add bias
440 PADDW MM2,MM0 // MM2 <- Qa ** Qr ** Qg ** Qb **
441 PSRLW MM2,8 // MM2 <- 00 Qa 00 Qr 00 Qg 00 Qb
442 PACKUSWB MM2,MM3 // MM2 <- 00 00 00 00 Qa Qr Qg Qb
443 MOVD EAX,MM2 // EAX <- Qa Qr Qg Qb
444 OR EAX,$FF000000 // EAX <- FF Zr Zg Zb
445
446@2: MOV [EDI],EAX
447
448@3: ADD ESI,4
449 ADD EDI,4
450
451 // loop end
452 DEC ECX
453 JNZ @1
454
455 POP EDI
456 POP ESI
457
458@4:
459end;
460
461procedure BlendLineEx_MMX(Src, Dst: PColor32; Count: Integer; M: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
462asm
463 // EAX <- Src
464 // EDX <- Dst
465 // ECX <- Count
466
467 // test the counter for zero or negativity
468 TEST ECX,ECX
469 JS @4
470
471 PUSH ESI
472 PUSH EDI
473 PUSH EBX
474
475 MOV ESI,EAX // ESI <- Src
476 MOV EDI,EDX // EDI <- Dst
477 MOV EDX,M // EDX <- Master Alpha
478
479 // loop start
480@1: MOV EAX,[ESI]
481 TEST EAX,$FF000000
482 JZ @3 // complete transparency, proceed to next point
483 MOV EBX,EAX
484 SHR EBX,24
485 INC EBX // 255:256 range bias
486 IMUL EBX,EDX
487 SHR EBX,8
488 JZ @3 // complete transparency, proceed to next point
489
490 // blend
491 PXOR MM0,MM0
492 MOVD MM1,EAX
493 SHL EBX,4
494 MOVD MM2,[EDI]
495 PUNPCKLBW MM1,MM0
496 PUNPCKLBW MM2,MM0
497 ADD EBX,alpha_ptr
498 PSUBW MM1,MM2
499 PMULLW MM1,[EBX]
500 PSLLW MM2,8
501 MOV EBX,bias_ptr
502 PADDW MM2,[EBX]
503 PADDW MM1,MM2
504 PSRLW MM1,8
505 PACKUSWB MM1,MM0
506 MOVD EAX,MM1
507
508@2: MOV [EDI],EAX
509
510@3: ADD ESI,4
511 ADD EDI,4
512
513 // loop end
514 DEC ECX
515 JNZ @1
516
517 POP EBX
518 POP EDI
519 POP ESI
520@4:
521end;
522
523{$ENDIF}
524
525function CombineReg_MMX(X, Y, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
526asm
527{$IFDEF TARGET_X86}
528 // EAX - Color X
529 // EDX - Color Y
530 // ECX - Weight of X [0..255]
531 // Result := W * (X - Y) + Y
532
533 MOVD MM1,EAX
534 PXOR MM0,MM0
535 SHL ECX,4
536
537 MOVD MM2,EDX
538 PUNPCKLBW MM1,MM0
539 PUNPCKLBW MM2,MM0
540
541 ADD ECX,alpha_ptr
542
543 PSUBW MM1,MM2
544 PMULLW MM1,[ECX]
545 PSLLW MM2,8
546
547 MOV ECX,bias_ptr
548
549 PADDW MM2,[ECX]
550 PADDW MM1,MM2
551 PSRLW MM1,8
552 PACKUSWB MM1,MM0
553 MOVD EAX,MM1
554{$ENDIF}
555
556{$IFDEF TARGET_X64}
557 // ECX - Color X
558 // EDX - Color Y
559 // R8 - Weight of X [0..255]
560 // Result := W * (X - Y) + Y
561
562 MOVD MM1,ECX
563 PXOR MM0,MM0
564 SHL R8D,4
565
566 MOVD MM2,EDX
567 PUNPCKLBW MM1,MM0
568 PUNPCKLBW MM2,MM0
569
570{$IFNDEF FPC}
571 ADD R8,alpha_ptr
572{$ELSE}
573 ADD R8,[RIP+alpha_ptr]
574{$ENDIF}
575
576 PSUBW MM1,MM2
577 PMULLW MM1,[R8]
578 PSLLW MM2,8
579
580{$IFNDEF FPC}
581 MOV RAX,bias_ptr
582{$ELSE}
583 MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
584{$ENDIF}
585
586 PADDW MM2,[RAX]
587 PADDW MM1,MM2
588 PSRLW MM1,8
589 PACKUSWB MM1,MM0
590 MOVD EAX,MM1
591{$ENDIF}
592end;
593
594procedure CombineMem_MMX(F: TColor32; var B: TColor32; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
595asm
596{$IFDEF TARGET_X86}
597 // EAX - Color X
598 // [EDX] - Color Y
599 // ECX - Weight of X [0..255]
600 // Result := W * (X - Y) + Y
601
602 JCXZ @1
603 CMP ECX,$FF
604 JZ @2
605
606 MOVD MM1,EAX
607 PXOR MM0,MM0
608
609 SHL ECX,4
610
611 MOVD MM2,[EDX]
612 PUNPCKLBW MM1,MM0
613 PUNPCKLBW MM2,MM0
614
615 ADD ECX,alpha_ptr
616
617 PSUBW MM1,MM2
618 PMULLW MM1,[ECX]
619 PSLLW MM2,8
620
621 MOV ECX,bias_ptr
622
623 PADDW MM2,[ECX]
624 PADDW MM1,MM2
625 PSRLW MM1,8
626 PACKUSWB MM1,MM0
627 MOVD [EDX],MM1
628
629@1: RET
630
631@2: MOV [EDX],EAX
632{$ENDIF}
633
634{$IFDEF TARGET_x64}
635 // ECX - Color X
636 // [RDX] - Color Y
637 // R8 - Weight of X [0..255]
638 // Result := W * (X - Y) + Y
639
640 TEST R8D,R8D // Set flags for R8
641 JZ @1 // W = 0 ? => Result := EDX
642 CMP R8D,$FF
643 JZ @2
644
645 MOVD MM1,ECX
646 PXOR MM0,MM0
647
648 SHL R8D,4
649
650 MOVD MM2,[RDX]
651 PUNPCKLBW MM1,MM0
652 PUNPCKLBW MM2,MM0
653
654{$IFNDEF FPC}
655 ADD R8,alpha_ptr
656{$ELSE}
657 ADD R8,[RIP+alpha_ptr]
658{$ENDIF}
659
660 PSUBW MM1,MM2
661 PMULLW MM1,[R8]
662 PSLLW MM2,8
663
664{$IFNDEF FPC}
665 MOV RAX,bias_ptr
666{$ELSE}
667 MOV RAX,[RIP+bias_ptr] // XXX : Enabling PIC by relative offsetting for x64
668{$ENDIF}
669
670 PADDW MM2,[RAX]
671 PADDW MM1,MM2
672 PSRLW MM1,8
673 PACKUSWB MM1,MM0
674 MOVD [RDX],MM1
675
676@1: RET
677
678@2: MOV [RDX],RCX
679{$ENDIF}
680end;
681
682{$IFDEF TARGET_x86}
683
684procedure CombineLine_MMX(Src, Dst: PColor32; Count: Integer; W: TColor32); {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
685asm
686 // EAX <- Src
687 // EDX <- Dst
688 // ECX <- Count
689
690 // Result := W * (X - Y) + Y
691
692 TEST ECX,ECX
693 JS @3
694
695 PUSH EBX
696 MOV EBX,W
697
698 TEST EBX,EBX
699 JZ @2 // weight is zero
700
701 CMP EBX,$FF
702 JZ @4 // weight = 255 => copy src to dst
703
704 SHL EBX,4
705 ADD EBX,alpha_ptr
706 MOVQ MM3,[EBX]
707 MOV EBX,bias_ptr
708 MOVQ MM4,[EBX]
709
710 // loop start
711@1: MOVD MM1,[EAX]
712 PXOR MM0,MM0
713 MOVD MM2,[EDX]
714 PUNPCKLBW MM1,MM0
715 PUNPCKLBW MM2,MM0
716
717 PSUBW MM1,MM2
718 PMULLW MM1,MM3
719 PSLLW MM2,8
720
721 PADDW MM2,MM4
722 PADDW MM1,MM2
723 PSRLW MM1,8
724 PACKUSWB MM1,MM0
725 MOVD [EDX],MM1
726
727 ADD EAX,4
728 ADD EDX,4
729
730 DEC ECX
731 JNZ @1
732@2: POP EBX
733 POP EBP
734@3: RET $0004
735
736@4: CALL GR32_LowLevel.MoveLongword
737 POP EBX
738end;
739
740{$ENDIF}
741
742procedure EMMS_MMX; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
743asm
744 EMMS
745end;
746
747function LightenReg_MMX(C: TColor32; Amount: Integer): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
748asm
749{$IFDEF TARGET_X86}
750 MOVD MM0,EAX
751 TEST EDX,EDX
752 JL @1
753 IMUL EDX,$010101
754 MOVD MM1,EDX
755 PADDUSB MM0,MM1
756 MOVD EAX,MM0
757 RET
758@1: NEG EDX
759 IMUL EDX,$010101
760 MOVD MM1,EDX
761 PSUBUSB MM0,MM1
762 MOVD EAX,MM0
763{$ENDIF}
764
765{$IFDEF TARGET_X64}
766 MOVD MM0,ECX
767 TEST EDX,EDX
768 JL @1
769 IMUL EDX,$010101
770 MOVD MM1,EDX
771 PADDUSB MM0,MM1
772 MOVD EAX,MM0
773 RET
774@1: NEG EDX
775 IMUL EDX,$010101
776 MOVD MM1,EDX
777 PSUBUSB MM0,MM1
778 MOVD EAX,MM0
779{$ENDIF}
780end;
781
782{ MMX Color algebra versions }
783
784function ColorAdd_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
785asm
786{$IFDEF TARGET_X86}
787 MOVD MM0,EAX
788 MOVD MM1,EDX
789 PADDUSB MM0,MM1
790 MOVD EAX,MM0
791{$ENDIF}
792
793{$IFDEF TARGET_X64}
794 MOVD MM0,ECX
795 MOVD MM1,EDX
796 PADDUSB MM0,MM1
797 MOVD EAX,MM0
798{$ENDIF}
799end;
800
801function ColorSub_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
802asm
803{$IFDEF TARGET_X86}
804 MOVD MM0,EAX
805 MOVD MM1,EDX
806 PSUBUSB MM0,MM1
807 MOVD EAX,MM0
808{$ENDIF}
809
810{$IFDEF TARGET_X64}
811 MOVD MM0,ECX
812 MOVD MM1,EDX
813 PSUBUSB MM0,MM1
814 MOVD EAX,MM0
815{$ENDIF}
816end;
817
818function ColorModulate_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
819asm
820{$IFDEF TARGET_X86}
821 PXOR MM2,MM2
822 MOVD MM0,EAX
823 PUNPCKLBW MM0,MM2
824 MOVD MM1,EDX
825 PUNPCKLBW MM1,MM2
826 PMULLW MM0,MM1
827 PSRLW MM0,8
828 PACKUSWB MM0,MM2
829 MOVD EAX,MM0
830{$ENDIF}
831
832{$IFDEF TARGET_X64}
833 PXOR MM2,MM2
834 MOVD MM0,ECX
835 PUNPCKLBW MM0,MM2
836 MOVD MM1,EDX
837 PUNPCKLBW MM1,MM2
838 PMULLW MM0,MM1
839 PSRLW MM0,8
840 PACKUSWB MM0,MM2
841 MOVD EAX,MM0
842{$ENDIF}
843end;
844
845function ColorMax_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
846asm
847{$IFDEF TARGET_X86}
848 MOVD MM0,EAX
849 MOVD MM1,EDX
850 PMAXUB MM0,MM1
851 MOVD EAX,MM0
852{$ENDIF}
853
854{$IFDEF TARGET_X64}
855 MOVD MM0,ECX
856 MOVD MM1,EDX
857 PMAXUB MM0,MM1
858 MOVD EAX,MM0
859{$ENDIF}
860end;
861
862function ColorMin_EMMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
863asm
864{$IFDEF TARGET_X86}
865 MOVD MM0,EAX
866 MOVD MM1,EDX
867 PMINUB MM0,MM1
868 MOVD EAX,MM0
869{$ENDIF}
870
871{$IFDEF TARGET_X64}
872 MOVD MM0,ECX
873 MOVD MM1,EDX
874 PMINUB MM0,MM1
875 MOVD EAX,MM0
876{$ENDIF}
877end;
878
879function ColorDifference_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
880asm
881{$IFDEF TARGET_X86}
882 MOVD MM0,EAX
883 MOVD MM1,EDX
884 MOVQ MM2,MM0
885 PSUBUSB MM0,MM1
886 PSUBUSB MM1,MM2
887 POR MM0,MM1
888 MOVD EAX,MM0
889{$ENDIF}
890
891{$IFDEF TARGET_X64}
892 MOVD MM0,ECX
893 MOVD MM1,EDX
894 MOVQ MM2,MM0
895 PSUBUSB MM0,MM1
896 PSUBUSB MM1,MM2
897 POR MM0,MM1
898 MOVD EAX,MM0
899{$ENDIF}
900end;
901
902function ColorExclusion_MMX(C1, C2: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
903asm
904{$IFDEF TARGET_X86}
905 PXOR MM2,MM2
906 MOVD MM0,EAX
907 PUNPCKLBW MM0,MM2
908 MOVD MM1,EDX
909 PUNPCKLBW MM1,MM2
910 MOVQ MM3,MM0
911 PADDW MM0,MM1
912 PMULLW MM1,MM3
913 PSRLW MM1,7
914 PSUBUSW MM0,MM1
915 PACKUSWB MM0,MM2
916 MOVD EAX,MM0
917{$ENDIF}
918
919{$IFDEF TARGET_X64}
920 PXOR MM2,MM2
921 MOVD MM0,ECX
922 PUNPCKLBW MM0,MM2
923 MOVD MM1,EDX
924 PUNPCKLBW MM1,MM2
925 MOVQ MM3,MM0
926 PADDW MM0,MM1
927 PMULLW MM1,MM3
928 PSRLW MM1,7
929 PSUBUSW MM0,MM1
930 PACKUSWB MM0,MM2
931 MOVD EAX,MM0
932{$ENDIF}
933end;
934
935function ColorScale_MMX(C, W: TColor32): TColor32; {$IFDEF FPC} assembler; nostackframe; {$ENDIF}
936asm
937{$IFDEF TARGET_X86}
938 PXOR MM2,MM2
939 SHL EDX,4
940 MOVD MM0,EAX
941 PUNPCKLBW MM0,MM2
942 ADD EDX,alpha_ptr
943 PMULLW MM0,[EDX]
944 PSRLW MM0,8
945 PACKUSWB MM0,MM2
946 MOVD EAX,MM0
947{$ENDIF}
948
949{$IFDEF TARGET_X64}
950 PXOR MM2,MM2
951 SHL RDX,4
952 MOVD MM0,ECX
953 PUNPCKLBW MM0,MM2
954{$IFNDEF FPC}
955 ADD RDX,alpha_ptr
956{$ELSE}
957 ADD RDX,[RIP+alpha_ptr]
958{$ENDIF}
959 PMULLW MM0,[RDX]
960 PSRLW MM0,8
961 PACKUSWB MM0,MM2
962 MOVD EAX,MM0
963{$ENDIF}
964end;
965
966end.
Note: See TracBrowser for help on using the repository browser.