source: trunk/Packages/Graphics32/GR32_Filters.pas

Last change on this file was 2, checked in by chronos, 5 years ago
File size: 43.7 KB
Line 
1unit GR32_Filters;
2
3(* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1 or LGPL 2.1 with linking exception
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * Alternatively, the contents of this file may be used under the terms of the
17 * Free Pascal modified version of the GNU Lesser General Public License
18 * Version 2.1 (the "FPC modified LGPL License"), in which case the provisions
19 * of this license are applicable instead of those above.
20 * Please see the file LICENSE.txt for additional information concerning this
21 * license.
22 *
23 * The Original Code is Graphics32
24 *
25 * The Initial Developer of the Original Code is
26 * Alex A. Denisov
27 *
28 * Portions created by the Initial Developer are Copyright (C) 2000-2009
29 * the Initial Developer. All Rights Reserved.
30 *
31 * Contributor(s):
32 * Michael Hansen <dyster_tid@hotmail.com>
33 * - 2007/02/25 - Logical Mask Operations and related types
34 * - 2007/02/27 - CopyComponents
35 * - 2007/05/10 - Logical Mask Operation functions in pascal versions
36 *
37 * ***** END LICENSE BLOCK ***** *)
38
39interface
40
41{$I GR32.inc}
42
43{$IFDEF TARGET_X64}
44{$DEFINE PUREPASCAL}
45{$ENDIF}
46
47uses
48{$IFDEF FPC}
49{$ELSE}
50 Windows,
51{$ENDIF}
52 Classes, SysUtils, GR32;
53
54{ Basic processing }
55type
56 TLUT8 = array [Byte] of Byte;
57 TLogicalOperator = (loXOR, loAND, loOR);
58
59procedure CopyComponents(Dst, Src: TCustomBitmap32; Components: TColor32Components);overload;
60procedure CopyComponents(Dst: TCustomBitmap32; DstX, DstY: Integer; Src: TCustomBitmap32;
61 SrcRect: TRect; Components: TColor32Components); overload;
62
63procedure AlphaToGrayscale(Dst, Src: TCustomBitmap32);
64procedure ColorToGrayscale(Dst, Src: TCustomBitmap32; PreserveAlpha: Boolean = False);
65procedure IntensityToAlpha(Dst, Src: TCustomBitmap32);
66
67procedure Invert(Dst, Src: TCustomBitmap32; Components : TColor32Components = [ccAlpha, ccRed, ccGreen, ccBlue]);
68procedure InvertRGB(Dst, Src: TCustomBitmap32);
69
70procedure ApplyLUT(Dst, Src: TCustomBitmap32; const LUT: TLUT8; PreserveAlpha: Boolean = False);
71procedure ChromaKey(ABitmap: TCustomBitmap32; TrColor: TColor32);
72
73function CreateBitmask(Components: TColor32Components): TColor32;
74
75procedure ApplyBitmask(Dst: TCustomBitmap32; DstX, DstY: Integer; Src: TCustomBitmap32;
76 SrcRect: TRect; Bitmask: TColor32; LogicalOperator: TLogicalOperator); overload;
77procedure ApplyBitmask(ABitmap: TCustomBitmap32; ARect: TRect; Bitmask: TColor32;
78 LogicalOperator: TLogicalOperator); overload;
79
80procedure CheckParams(Dst, Src: TCustomBitmap32; ResizeDst: Boolean = True);
81
82implementation
83
84uses
85 {$IFDEF COMPILERXE2_UP}Types, {$ENDIF} GR32_System, GR32_Bindings,
86 GR32_Lowlevel;
87
88const
89 SEmptyBitmap = 'The bitmap is nil';
90 SEmptySource = 'The source is nil';
91 SEmptyDestination = 'Destination is nil';
92
93type
94{ Function Prototypes }
95 TLogicalMaskLine = procedure(Dst: PColor32; Mask: TColor32; Count: Integer); //Inplace
96 TLogicalMaskLineEx = procedure(Src, Dst: PColor32; Count: Integer; Mask: TColor32); //"Src To Dst"
97
98{$HINTS OFF}
99var
100{ masked logical operation functions }
101 LogicalMaskLineXor: TLogicalMaskLine;
102 LogicalMaskLineOr: TLogicalMaskLine;
103 LogicalMaskLineAnd: TLogicalMaskLine;
104
105 LogicalMaskLineXorEx: TLogicalMaskLineEx;
106 LogicalMaskLineOrEx: TLogicalMaskLineEx;
107 LogicalMaskLineAndEx: TLogicalMaskLineEx;
108{$HINTS ON}
109
110{ Access to masked logical operation functions corresponding to a logical operation mode }
111const
112 LOGICAL_MASK_LINE: array[TLogicalOperator] of ^TLogicalMaskLine = (
113 (@@LogicalMaskLineXor),
114 (@@LogicalMaskLineAnd),
115 (@@LogicalMaskLineOr)
116 );
117
118 LOGICAL_MASK_LINE_EX: array[TLogicalOperator] of ^TLogicalMaskLineEx = (
119 (@@LogicalMaskLineXorEx),
120 (@@LogicalMaskLineAndEx),
121 (@@LogicalMaskLineOrEx)
122 );
123
124procedure CheckParams(Dst, Src: TCustomBitmap32; ResizeDst: Boolean = True);
125begin
126 if not Assigned(Src) then
127 raise Exception.Create(SEmptySource);
128
129 if not Assigned(Dst) then
130 raise Exception.Create(SEmptyDestination);
131
132 if ResizeDst then Dst.SetSize(Src.Width, Src.Height);
133end;
134
135procedure CopyComponents(Dst, Src: TCustomBitmap32; Components: TColor32Components);
136begin
137 if Components = [] then Exit;
138 CheckParams(Dst, Src);
139 CopyComponents(Dst, 0, 0, Src, Src.BoundsRect, Components);
140end;
141
142procedure CopyComponents(Dst: TCustomBitmap32; DstX, DstY: Integer; Src: TCustomBitmap32;
143 SrcRect: TRect; Components: TColor32Components);
144var
145 I, J, Count, ComponentCount, XOffset: Integer;
146 Mask: TColor32;
147 SrcRow, DstRow: PColor32Array;
148 PBDst, PBSrc: PByteArray;
149 DstRect: TRect;
150begin
151 if Components = [] then Exit;
152 CheckParams(Dst, Src, False);
153
154 ComponentCount := 0;
155 XOffset := 0;
156 Mask := 0;
157 if ccAlpha in Components then
158 begin
159 Inc(ComponentCount);
160 Inc(Mask, $FF000000);
161 XOffset := 3;
162 end;
163 if ccRed in Components then
164 begin
165 Inc(ComponentCount);
166 Inc(Mask, $00FF0000);
167 XOffset := 2;
168 end;
169 if ccGreen in Components then
170 begin
171 Inc(ComponentCount);
172 Inc(Mask, $0000FF00);
173 XOffset := 1;
174 end;
175 if ccBlue in Components then
176 begin
177 Inc(ComponentCount);
178 Inc(Mask, $000000FF);
179 end;
180
181 with Dst do
182 begin
183 GR32.IntersectRect(SrcRect, SrcRect, Src.BoundsRect);
184 if (SrcRect.Right < SrcRect.Left) or (SrcRect.Bottom < SrcRect.Top) then Exit;
185
186 DstX := Clamp(DstX, 0, Width);
187 DstY := Clamp(DstY, 0, Height);
188
189 DstRect.TopLeft := GR32.Point(DstX, DstY);
190 DstRect.Right := DstX + SrcRect.Right - SrcRect.Left;
191 DstRect.Bottom := DstY + SrcRect.Bottom - SrcRect.Top;
192
193 GR32.IntersectRect(DstRect, DstRect, BoundsRect);
194 GR32.IntersectRect(DstRect, DstRect, ClipRect);
195 if (DstRect.Right < DstRect.Left) or (DstRect.Bottom < DstRect.Top) then Exit;
196
197 if not MeasuringMode then
198 begin
199 BeginUpdate;
200 try
201 with DstRect do
202 if (Bottom - Top) > 0 then
203 begin
204 SrcRow := Pointer(Src.PixelPtr[SrcRect.Left, SrcRect.Top]);
205 DstRow := Pointer(PixelPtr[Left, Top]);
206 Count := Right - Left;
207 if Count > 16 then
208 case ComponentCount of
209 1://Byte ptr approach
210 begin
211 PBSrc := Pointer(SrcRow);
212 Inc(PBSrc, XOffset); // shift the pointer to the given component of the first pixel
213 PBDst := Pointer(DstRow);
214 Inc(PBDst, XOffset);
215
216 Count := Count * 4 - 64;
217 Inc(PBSrc, Count);
218 Inc(PBDst, Count);
219
220 for I := 0 to Bottom - Top - 1 do
221 begin
222 //16x enrolled loop
223 J := - Count;
224 repeat
225 PBDst[J] := PBSrc[J];
226 PBDst[J + 4] := PBSrc[J + 4];
227 PBDst[J + 8] := PBSrc[J + 8];
228 PBDst[J + 12] := PBSrc[J + 12];
229 PBDst[J + 16] := PBSrc[J + 16];
230 PBDst[J + 20] := PBSrc[J + 20];
231 PBDst[J + 24] := PBSrc[J + 24];
232 PBDst[J + 28] := PBSrc[J + 28];
233 PBDst[J + 32] := PBSrc[J + 32];
234 PBDst[J + 36] := PBSrc[J + 36];
235 PBDst[J + 40] := PBSrc[J + 40];
236 PBDst[J + 44] := PBSrc[J + 44];
237 PBDst[J + 48] := PBSrc[J + 48];
238 PBDst[J + 52] := PBSrc[J + 52];
239 PBDst[J + 56] := PBSrc[J + 56];
240 PBDst[J + 60] := PBSrc[J + 60];
241 Inc(J, 64)
242 until J > 0;
243
244 //The rest
245 Dec(J, 64);
246 while J < 0 do
247 begin
248 PBDst[J + 64] := PBSrc[J + 64];
249 Inc(J, 4);
250 end;
251 Inc(PBSrc, Src.Width * 4);
252 Inc(PBDst, Width * 4);
253 end;
254 end;
255 2, 3: //Masked approach
256 begin
257 Count := Count - 8;
258 Inc(DstRow, Count);
259 Inc(SrcRow, Count);
260 for I := 0 to Bottom - Top - 1 do
261 begin
262 //8x enrolled loop
263 J := - Count;
264 repeat
265 Mask := not Mask;
266 DstRow[J] := DstRow[J] and Mask;
267 DstRow[J + 1] := DstRow[J + 1] and Mask;
268 DstRow[J + 2] := DstRow[J + 2] and Mask;
269 DstRow[J + 3] := DstRow[J + 3] and Mask;
270 DstRow[J + 4] := DstRow[J + 4] and Mask;
271 DstRow[J + 5] := DstRow[J + 5] and Mask;
272 DstRow[J + 6] := DstRow[J + 6] and Mask;
273 DstRow[J + 7] := DstRow[J + 7] and Mask;
274
275 Mask := not Mask;
276 DstRow[J] := DstRow[J] or SrcRow[J] and Mask;
277 DstRow[J + 1] := DstRow[J + 1] or SrcRow[J + 1] and Mask;
278 DstRow[J + 2] := DstRow[J + 2] or SrcRow[J + 2] and Mask;
279 DstRow[J + 3] := DstRow[J + 3] or SrcRow[J + 3] and Mask;
280 DstRow[J + 4] := DstRow[J + 4] or SrcRow[J + 4] and Mask;
281 DstRow[J + 5] := DstRow[J + 5] or SrcRow[J + 5] and Mask;
282 DstRow[J + 6] := DstRow[J + 6] or SrcRow[J + 6] and Mask;
283 DstRow[J + 7] := DstRow[J + 7] or SrcRow[J + 7] and Mask;
284
285 Inc(J, 8);
286 until J > 0;
287
288 //The rest
289 Dec(J, 8);
290 while J < 0 do
291 begin
292 DstRow[J + 8] := DstRow[J + 8] and not Mask or SrcRow[J + 8] and Mask;
293 Inc(J);
294 end;
295 Inc(SrcRow, Src.Width);
296 Inc(DstRow, Width);
297 end;
298 end;
299 4: //full copy approach approach, use MoveLongWord
300 for I := 0 to Bottom - Top - 1 do
301 begin
302 MoveLongWord(SrcRow^, DstRow^, Count);
303 Inc(SrcRow, Src.Width);
304 Inc(DstRow, Width);
305 end;
306 end
307 else
308 begin
309 for I := 0 to Bottom - Top - 1 do
310 begin
311 for J := 0 to Count - 1 do
312 DstRow[J] := DstRow[J] and not Mask or SrcRow[J] and Mask;
313 Inc(SrcRow, Src.Width);
314 Inc(DstRow, Width);
315 end;
316 end;
317 end;
318 finally
319 EndUpdate;
320 end;
321 end;
322 Changed(DstRect);
323 end;
324end;
325
326procedure AlphaToGrayscale(Dst, Src: TCustomBitmap32);
327var
328 I: Integer;
329 D, S : PColor32EntryArray;
330 Alpha: Byte;
331begin
332 CheckParams(Dst, Src);
333 S := PColor32EntryArray(@Src.Bits[0]);
334 D := PColor32EntryArray(@Dst.Bits[0]);
335 for I := 0 to Src.Height * Src.Width -1 do
336 begin
337 Alpha := S[I].A;
338 with D[I] do
339 begin
340 R := Alpha;
341 G := Alpha;
342 B := Alpha;
343 end;
344 end;
345 Dst.Changed;
346end;
347
348procedure IntensityToAlpha(Dst, Src: TCustomBitmap32);
349var
350 I: Integer;
351 D, S : PColor32EntryArray;
352begin
353 CheckParams(Dst, Src);
354 S := PColor32EntryArray(@Src.Bits[0]);
355 D := PColor32EntryArray(@Dst.Bits[0]);
356 for I := 0 to Src.Width * Src.Height - 1 do
357 D[I].A := (S[I].R * 61 + S[I].G * 174 + S[I].B * 21) shr 8;
358 Dst.Changed;
359end;
360
361procedure Invert(Dst, Src: TCustomBitmap32; Components : TColor32Components = [ccAlpha, ccRed, ccGreen, ccBlue]);
362var
363 Mask: TColor32;
364begin
365 if Components = [] then Exit;
366 Mask := CreateBitmask(Components);
367 if Src = Dst then
368 begin
369 //Inplace
370 CheckParams(Dst, Src, False);
371 ApplyBitmask(Src, Src.BoundsRect, Mask, loXOR);
372 end
373 else
374 begin
375 //Src -> Dst
376 CheckParams(Dst, Src);
377 ApplyBitmask(Dst, 0, 0, Src, Src.BoundsRect, Mask, loXOR);
378 end;
379end;
380
381procedure InvertRGB(Dst, Src: TCustomBitmap32);
382begin
383 Invert(Src, Dst, [ccRed, ccGreen, ccBlue]);
384end;
385
386procedure ColorToGrayscale(Dst, Src: TCustomBitmap32; PreserveAlpha: Boolean = False);
387var
388 I: Integer;
389 D, S: PColor32;
390begin
391 CheckParams(Dst, Src);
392 D := @Dst.Bits[0];
393 S := @Src.Bits[0];
394
395 if PreserveAlpha then
396 for I := 0 to Src.Width * Src.Height - 1 do
397 begin
398 D^ := Gray32(Intensity(S^), AlphaComponent(S^));
399 Inc(S); Inc(D);
400 end
401 else
402 for I := 0 to Src.Width * Src.Height - 1 do
403 begin
404 D^ := Gray32(Intensity(S^));
405 Inc(S); Inc(D);
406 end;
407
408 Dst.Changed;
409end;
410
411procedure ApplyLUT(Dst, Src: TCustomBitmap32; const LUT: TLUT8; PreserveAlpha: Boolean = False);
412var
413 I: Integer;
414 D, S: PColor32Entry;
415begin
416 CheckParams(Dst, Src);
417 D := @Dst.Bits[0];
418 S := @Src.Bits[0];
419
420 if PreserveAlpha then
421 for I := 0 to Src.Width * Src.Height - 1 do
422 begin
423 D.ARGB := D.ARGB and $FF000000 + LUT[S.B] + LUT[S.G] shl 8 + LUT[S.R] shl 16;
424 Inc(S);
425 Inc(D);
426 end
427 else
428 for I := 0 to Src.Width * Src.Height - 1 do
429 begin
430 D.ARGB := $FF000000 + LUT[S.B] + LUT[S.G] shl 8 + LUT[S.R] shl 16;
431 Inc(S);
432 Inc(D);
433 end;
434
435 Dst.Changed;
436end;
437
438procedure ChromaKey(ABitmap: TCustomBitmap32; TrColor: TColor32);
439var
440 P: PColor32;
441 C: TColor32;
442 I: Integer;
443begin
444 TrColor := TrColor and $00FFFFFF;
445 with ABitmap do
446 begin
447 P := PixelPtr[0, 0];
448 for I := 0 to Width * Height - 1 do
449 begin
450 C := P^ and $00FFFFFF;
451 if C = TrColor then P^ := C;
452 Inc(P)
453 end;
454 end;
455
456 ABitmap.Changed;
457end;
458
459function CreateBitmask(Components: TColor32Components): TColor32;
460begin
461 Result := 0;
462 if ccAlpha in Components then Inc(Result, $FF000000);
463 if ccRed in Components then Inc(Result, $00FF0000);
464 if ccGreen in Components then Inc(Result, $0000FF00);
465 if ccBlue in Components then Inc(Result, $000000FF);
466end;
467
468procedure ApplyBitmask(Dst: TCustomBitmap32; DstX, DstY: Integer; Src: TCustomBitmap32;
469 SrcRect: TRect; Bitmask: TColor32; LogicalOperator: TLogicalOperator);
470var
471 I, Count: Integer;
472 DstRect: TRect;
473 MaskProc : TLogicalMaskLineEx;
474begin
475 CheckParams(Dst, Src, False);
476
477 MaskProc := LOGICAL_MASK_LINE_EX[LogicalOperator]^;
478
479 if Assigned(MaskProc) then
480 with Dst do
481 begin
482 GR32.IntersectRect(SrcRect, SrcRect, Src.BoundsRect);
483 if (SrcRect.Right < SrcRect.Left) or (SrcRect.Bottom < SrcRect.Top) then Exit;
484
485 DstX := Clamp(DstX, 0, Width);
486 DstY := Clamp(DstY, 0, Height);
487
488 DstRect.TopLeft := GR32.Point(DstX, DstY);
489 DstRect.Right := DstX + SrcRect.Right - SrcRect.Left;
490 DstRect.Bottom := DstY + SrcRect.Bottom - SrcRect.Top;
491
492 GR32.IntersectRect(DstRect, DstRect, Dst.BoundsRect);
493 GR32.IntersectRect(DstRect, DstRect, Dst.ClipRect);
494 if (DstRect.Right < DstRect.Left) or (DstRect.Bottom < DstRect.Top) then
495 Exit;
496
497 if not MeasuringMode then
498 begin
499 BeginUpdate;
500 try
501 with DstRect do
502 if (Bottom - Top) > 0 then
503 begin
504 Count := Right - Left;
505 if Count > 0 then
506 for I := 0 to Bottom - Top - 1 do
507 MaskProc(Src.PixelPtr[SrcRect.Left, SrcRect.Top + I], PixelPtr[Left, Top + I], Count, Bitmask)
508 end;
509 finally
510 EndUpdate;
511 end;
512 end;
513
514 Changed(DstRect);
515 end;
516end;
517
518procedure ApplyBitmask(ABitmap: TCustomBitmap32; ARect: TRect; Bitmask: TColor32;
519 LogicalOperator: TLogicalOperator);
520var
521 I, Count: Integer;
522 MaskProc : TLogicalMaskLine;
523begin
524 if not Assigned(ABitmap) then
525 raise Exception.Create(SEmptyBitmap);
526
527 MaskProc := LOGICAL_MASK_LINE[LogicalOperator]^;
528
529 if Assigned(MaskProc) then
530 with ABitmap do
531 begin
532 GR32.IntersectRect(ARect, ARect, BoundsRect);
533 GR32.IntersectRect(ARect, ARect, ClipRect);
534 if (ARect.Right < ARect.Left) or (ARect.Bottom < ARect.Top) then Exit;
535
536 if not MeasuringMode then
537 begin
538 BeginUpdate;
539 try
540 with ARect do
541 if (Bottom - Top) > 0 then
542 begin
543 Count := Right - Left;
544 if Count > 0 then
545 begin
546 if Count = Width then
547 MaskProc(PixelPtr[Left, Top], Bitmask, Count * (Bottom - Top))
548 else
549 for I := Top to Bottom - 1 do
550 MaskProc(PixelPtr[Left, I], Bitmask, Count);
551 end;
552 end;
553 finally
554 EndUpdate;
555 end;
556 end;
557
558 Changed(ARect);
559 end;
560end;
561
562{ In-place logical mask functions }
563{ Non - MMX versions}
564
565procedure XorLine_Pas(Dst: PColor32; Mask: TColor32; Count: Integer);
566var
567 DstRow: PColor32Array absolute Dst;
568begin
569 Inc(Dst, Count);
570 Count := - Count;
571 repeat
572 DstRow[Count] := DstRow[Count] xor Mask;
573 Inc(Count);
574 until Count = 0;
575end;
576
577procedure OrLine_Pas(Dst: PColor32; Mask: TColor32; Count: Integer);
578var
579 DstRow: PColor32Array absolute Dst;
580begin
581 Inc(Dst, Count);
582 Count := - Count;
583 repeat
584 DstRow[Count] := DstRow[Count] or Mask;
585 Inc(Count);
586 until Count = 0;
587end;
588
589procedure AndLine_Pas(Dst: PColor32; Mask: TColor32; Count: Integer);
590var
591 DstRow: PColor32Array absolute Dst;
592begin
593 Inc(Dst, Count);
594 Count := - Count;
595 repeat
596 DstRow[Count] := DstRow[Count] and Mask;
597 Inc(Count);
598 until Count = 0;
599end;
600
601{$IFNDEF PUREPASCAL}
602
603procedure XorLine_ASM(Dst: PColor32; Mask: TColor32; Count: Integer);
604// No speedup achieveable using MMX
605asm
606{$IFDEF TARGET_x86}
607 TEST ECX, ECX
608 JZ @Exit
609
610 PUSH EBX
611 MOV EBX, ECX
612 SHR ECX, 4
613 SHL ECX, 4
614 JZ @PrepSingleLoop
615 LEA EAX, [EAX + ECX * 4]
616 SHL ECX, 2
617 NEG ECX
618
619@ChunkLoop:
620 //16x unrolled loop
621 XOR [EAX + ECX], EDX
622 XOR [EAX + ECX + 4], EDX
623 XOR [EAX + ECX + 8], EDX
624 XOR [EAX + ECX + 12], EDX
625
626 XOR [EAX + ECX + 16], EDX
627 XOR [EAX + ECX + 20], EDX
628 XOR [EAX + ECX + 24], EDX
629 XOR [EAX + ECX + 28], EDX
630
631 XOR [EAX + ECX + 32], EDX
632 XOR [EAX + ECX + 36], EDX
633 XOR [EAX + ECX + 40], EDX
634 XOR [EAX + ECX + 44], EDX
635
636 XOR [EAX + ECX + 48], EDX
637 XOR [EAX + ECX + 52], EDX
638 XOR [EAX + ECX + 56], EDX
639 XOR [EAX + ECX + 60], EDX
640
641 ADD ECX, 16 * 4
642 JNZ @ChunkLoop
643
644@PrepSingleLoop:
645 MOV ECX, EBX
646 SHR EBX, 4
647 SHL EBX, 4
648 SUB ECX, EBX
649 JZ @PopExit
650
651 LEA EAX, [EAX + ECX * 4]
652 NEG ECX
653
654@SingleLoop:
655 XOR [EAX + ECX * 4], EDX
656 INC ECX
657 JNZ @SingleLoop
658
659@PopExit:
660 POP EBX
661
662@Exit:
663{$ENDIF}
664
665{$IFDEF TARGET_x64}
666 TEST R8D, R8D
667 JZ @Exit
668
669 MOV EAX, R8D
670 SHR R8D, 4
671 SHL R8D, 4
672 JZ @PrepSingleLoop
673 LEA RCX, [RCX + R8D * 4]
674 SHL R8D, 2
675 NEG R8D
676
677@ChunkLoop:
678 //16x unrolled loop
679 XOR [RCX + R8D], EDX
680 XOR [RCX + R8D + 4], EDX
681 XOR [RCX + R8D + 8], EDX
682 XOR [RCX + R8D + 12], EDX
683
684 XOR [RCX + R8D + 16], EDX
685 XOR [RCX + R8D + 20], EDX
686 XOR [RCX + R8D + 24], EDX
687 XOR [RCX + R8D + 28], EDX
688
689 XOR [RCX + R8D + 32], EDX
690 XOR [RCX + R8D + 36], EDX
691 XOR [RCX + R8D + 40], EDX
692 XOR [RCX + R8D + 44], EDX
693
694 XOR [RCX + R8D + 48], EDX
695 XOR [RCX + R8D + 52], EDX
696 XOR [RCX + R8D + 56], EDX
697 XOR [RCX + R8D + 60], EDX
698
699 ADD R8D, 16 * 4
700 JNZ @ChunkLoop
701
702@PrepSingleLoop:
703 MOV R8D, EAX
704 SHR EAX, 4
705 SHL EAX, 4
706 SUB R8D, EAX
707 JZ @Exit
708
709 LEA RCX, [RCX + R8D * 4]
710 NEG R8D
711
712@SingleLoop:
713 XOR [RCX + R8D * 4], EDX
714 INC R8D
715 JNZ @SingleLoop
716
717@Exit:
718{$ENDIF}
719end;
720
721procedure OrLine_ASM(Dst: PColor32; Mask: TColor32; Count: Integer);
722// No speedup achieveable using MMX
723asm
724{$IFDEF TARGET_x86}
725 TEST ECX, ECX
726 JZ @Exit
727
728 PUSH EBX
729 MOV EBX, ECX
730 SHR ECX, 4
731 SHL ECX, 4
732 JZ @PrepSingleLoop
733 LEA EAX, [EAX + ECX * 4]
734 SHL ECX, 2
735 NEG ECX
736
737@ChunkLoop:
738 //16x unrolled loop
739 OR [EAX + ECX], EDX
740 OR [EAX + ECX + 4], EDX
741 OR [EAX + ECX + 8], EDX
742 OR [EAX + ECX + 12], EDX
743
744 OR [EAX + ECX + 16], EDX
745 OR [EAX + ECX + 20], EDX
746 OR [EAX + ECX + 24], EDX
747 OR [EAX + ECX + 28], EDX
748
749 OR [EAX + ECX + 32], EDX
750 OR [EAX + ECX + 36], EDX
751 OR [EAX + ECX + 40], EDX
752 OR [EAX + ECX + 44], EDX
753
754 OR [EAX + ECX + 48], EDX
755 OR [EAX + ECX + 52], EDX
756 OR [EAX + ECX + 56], EDX
757 OR [EAX + ECX + 60], EDX
758
759 ADD ECX, 16 * 4
760 JNZ @ChunkLoop
761
762@PrepSingleLoop:
763 MOV ECX, EBX
764 SHR EBX, 4
765 SHL EBX, 4
766 SUB ECX, EBX
767 JZ @PopExit
768
769 LEA EAX, [EAX + ECX * 4]
770 NEG ECX
771
772@SingleLoop:
773 OR [EAX + ECX * 4], EDX
774 INC ECX
775 JNZ @SingleLoop
776
777@PopExit:
778 POP EBX
779
780@Exit:
781{$ENDIF}
782
783{$IFDEF TARGET_x64}
784 TEST R8D, R8D
785 JZ @Exit
786
787 MOV EAX, R8D
788 SHR R8D, 4
789 SHL R8D, 4
790 JZ @PrepSingleLoop
791 LEA RCX, [RCX + R8D * 4]
792 SHL R8D, 2
793 NEG R8D
794
795@ChunkLoop:
796 //16x unrolled loop
797 OR [RCX + R8D], EDX
798 OR [RCX + R8D + 4], EDX
799 OR [RCX + R8D + 8], EDX
800 OR [RCX + R8D + 12], EDX
801
802 OR [RCX + R8D + 16], EDX
803 OR [RCX + R8D + 20], EDX
804 OR [RCX + R8D + 24], EDX
805 OR [RCX + R8D + 28], EDX
806
807 OR [RCX + R8D + 32], EDX
808 OR [RCX + R8D + 36], EDX
809 OR [RCX + R8D + 40], EDX
810 OR [RCX + R8D + 44], EDX
811
812 OR [RCX + R8D + 48], EDX
813 OR [RCX + R8D + 52], EDX
814 OR [RCX + R8D + 56], EDX
815 OR [RCX + R8D + 60], EDX
816
817 ADD R8D, 16 * 4
818 JNZ @ChunkLoop
819
820@PrepSingleLoop:
821 MOV R8D, EAX
822 SHR EAX, 4
823 SHL EAX, 4
824 SUB R8D, EAX
825 JZ @Exit
826
827 LEA RCX, [RCX + R8D * 4]
828 NEG R8D
829
830@SingleLoop:
831 OR [RCX + R8D * 4], EDX
832 INC R8D
833 JNZ @SingleLoop
834
835@Exit:
836{$ENDIF}
837end;
838
839procedure AndLine_ASM(Dst: PColor32; Mask: TColor32; Count: Integer);
840// No speedup achieveable using MMX
841asm
842{$IFDEF TARGET_x86}
843 TEST ECX, ECX
844 JZ @Exit
845
846 PUSH EBX
847 MOV EBX, ECX
848 SHR ECX, 4
849 SHL ECX, 4
850 JZ @PrepSingleLoop
851 LEA EAX, [EAX + ECX * 4]
852 SHL ECX, 2
853 NEG ECX
854
855@ChunkLoop:
856 //16x unrolled loop
857 AND [EAX + ECX], EDX
858 AND [EAX + ECX + 4], EDX
859 AND [EAX + ECX + 8], EDX
860 AND [EAX + ECX + 12], EDX
861
862 AND [EAX + ECX + 16], EDX
863 AND [EAX + ECX + 20], EDX
864 AND [EAX + ECX + 24], EDX
865 AND [EAX + ECX + 28], EDX
866
867 AND [EAX + ECX + 32], EDX
868 AND [EAX + ECX + 36], EDX
869 AND [EAX + ECX + 40], EDX
870 AND [EAX + ECX + 44], EDX
871
872 AND [EAX + ECX + 48], EDX
873 AND [EAX + ECX + 52], EDX
874 AND [EAX + ECX + 56], EDX
875 AND [EAX + ECX + 60], EDX
876
877 ADD ECX, 16 * 4
878 JNZ @ChunkLoop
879
880@PrepSingleLoop:
881 MOV ECX, EBX
882 SHR EBX, 4
883 SHL EBX, 4
884 SUB ECX, EBX
885 JZ @PopExit
886
887 LEA EAX, [EAX + ECX * 4]
888 NEG ECX
889
890@SingleLoop:
891 AND [EAX + ECX * 4], EDX
892 INC ECX
893 JNZ @SingleLoop
894
895@PopExit:
896 POP EBX
897
898@Exit:
899{$ENDIF}
900
901{$IFDEF TARGET_x64}
902 TEST R8D, R8D
903 JZ @Exit
904
905 MOV EAX, R8D
906 SHR R8D, 4
907 SHL R8D, 4
908 JZ @PrepSingleLoop
909 LEA RCX, [RCX + R8D * 4]
910 SHL R8D, 2
911 NEG R8D
912
913@ChunkLoop:
914 //16x unrolled loop
915 AND [RCX + R8D], EDX
916 AND [RCX + R8D + 4], EDX
917 AND [RCX + R8D + 8], EDX
918 AND [RCX + R8D + 12], EDX
919
920 AND [RCX + R8D + 16], EDX
921 AND [RCX + R8D + 20], EDX
922 AND [RCX + R8D + 24], EDX
923 AND [RCX + R8D + 28], EDX
924
925 AND [RCX + R8D + 32], EDX
926 AND [RCX + R8D + 36], EDX
927 AND [RCX + R8D + 40], EDX
928 AND [RCX + R8D + 44], EDX
929
930 AND [RCX + R8D + 48], EDX
931 AND [RCX + R8D + 52], EDX
932 AND [RCX + R8D + 56], EDX
933 AND [RCX + R8D + 60], EDX
934
935 ADD R8D, 16 * 4
936 JNZ @ChunkLoop
937
938@PrepSingleLoop:
939 MOV R8D, EAX
940 SHR EAX, 4
941 SHL EAX, 4
942 SUB R8D, EAX
943 JZ @Exit
944
945 LEA RCX, [RCX + R8D * 4]
946 NEG R8D
947
948@SingleLoop:
949 AND [RCX + R8D * 4], EDX
950 INC R8D
951 JNZ @SingleLoop
952
953@Exit:
954{$ENDIF}
955end;
956
957{$ENDIF}
958
959{ extended logical mask functions Src -> Dst }
960{ Non - MMX versions}
961
962procedure XorLineEx_Pas(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
963var
964 SrcRow: PColor32Array absolute Src;
965 DstRow: PColor32Array absolute Dst;
966begin
967 Inc(Dst, Count);
968 Inc(Src, Count);
969 Count := - Count;
970 repeat
971 DstRow[Count] := SrcRow[Count] xor Mask;
972 Inc(Count);
973 until Count = 0;
974end;
975
976procedure OrLineEx_Pas(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
977var
978 SrcRow: PColor32Array absolute Src;
979 DstRow: PColor32Array absolute Dst;
980begin
981 Inc(Dst, Count);
982 Inc(Src, Count);
983 Count := - Count;
984 repeat
985 DstRow[Count] := SrcRow[Count] or Mask;
986 Inc(Count);
987 until Count = 0;
988end;
989
990procedure AndLineEx_Pas(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
991var
992 SrcRow: PColor32Array absolute Src;
993 DstRow: PColor32Array absolute Dst;
994begin
995 Inc(Dst, Count);
996 Inc(Src, Count);
997 Count := - Count;
998 repeat
999 DstRow[Count] := SrcRow[Count] and Mask;
1000 Inc(Count);
1001 until Count = 0;
1002end;
1003
1004{$IFNDEF PUREPASCAL}
1005
1006procedure XorLineEx_ASM(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1007asm
1008{$IFDEF TARGET_x86}
1009 PUSH EBX
1010 PUSH EDI
1011
1012 LEA EAX, [EAX + ECX * 4]
1013 LEA EDX, [EDX + ECX * 4]
1014 NEG ECX
1015 JZ @Exit
1016
1017 MOV EDI, Mask
1018
1019@Loop:
1020 MOV EBX, [EAX + ECX * 4]
1021 XOR EBX, EDI
1022 MOV [EDX + ECX * 4], EBX
1023 INC ECX
1024 JNZ @Loop
1025
1026@Exit:
1027 POP EDI
1028 POP EBX
1029{$ENDIF}
1030
1031{$IFDEF TARGET_x64}
1032 LEA RCX, [RCX + R8D * 4]
1033 LEA RDX, [RDX + R8D * 4]
1034 NEG R8D
1035 JZ @Exit
1036
1037@Loop:
1038 MOV EAX, [RCX + R8D * 4]
1039 XOR EAX, R9D
1040 MOV [RDX + R8D * 4], EAX
1041 INC R8D
1042 JNZ @Loop
1043
1044@Exit:
1045{$ENDIF}
1046end;
1047
1048procedure OrLineEx_ASM(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1049asm
1050{$IFDEF TARGET_x86}
1051 PUSH EBX
1052 PUSH EDI
1053
1054 LEA EAX, [EAX + ECX * 4]
1055 LEA EDX, [EDX + ECX * 4]
1056 NEG ECX
1057 JZ @Exit
1058
1059 MOV EDI, Mask
1060
1061@Loop:
1062 MOV EBX, [EAX + ECX * 4]
1063 OR EBX, EDI
1064 MOV [EDX + ECX * 4], EBX
1065 INC ECX
1066 JNZ @Loop
1067
1068@Exit:
1069
1070 POP EDI
1071 POP EBX
1072{$ENDIF}
1073
1074{$IFDEF TARGET_x64}
1075 LEA RCX, [RCX + R8D * 4]
1076 LEA RDX, [RDX + R8D * 4]
1077 NEG R8D
1078 JZ @Exit
1079
1080@Loop:
1081 MOV EBX, [RCX + R8D * 4]
1082 OR EBX, R9D
1083 MOV [RDX + R8D * 4], EBX
1084 INC R8D
1085 JNZ @Loop
1086
1087@Exit:
1088{$ENDIF}
1089end;
1090
1091procedure AndLineEx_ASM(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1092asm
1093{$IFDEF TARGET_x86}
1094 PUSH EBX
1095 PUSH EDI
1096
1097 LEA EAX, [EAX + ECX * 4]
1098 LEA EDX, [EDX + ECX * 4]
1099 NEG ECX
1100 JZ @Exit
1101
1102 MOV EDI, Mask
1103
1104@Loop:
1105 MOV EBX, [EAX + ECX * 4]
1106 AND EBX, EDI
1107 MOV [EDX + ECX * 4], EBX
1108 INC ECX
1109 JNZ @Loop
1110
1111@Exit:
1112
1113 POP EDI
1114 POP EBX
1115{$ENDIF}
1116
1117{$IFDEF TARGET_x64}
1118 LEA RCX, [RCX + R8D * 4]
1119 LEA RDX, [RDX + R8D * 4]
1120 NEG R8D
1121 JZ @Exit
1122
1123@Loop:
1124 MOV EAX, [RCX + R8D * 4]
1125 AND EAX, R9D
1126 MOV [RDX + R8D * 4], EAX
1127 INC R8D
1128 JNZ @Loop
1129
1130@Exit:
1131{$ENDIF}
1132end;
1133
1134{ MMX versions}
1135
1136{$IFNDEF OMIT_MMX}
1137procedure XorLineEx_MMX(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1138//MMX version
1139var
1140 QMask: Int64;
1141
1142asm
1143 PUSH EBX
1144 PUSH EDI
1145
1146 TEST ECX, ECX
1147 JZ @Exit
1148
1149 MOV EBX, ECX
1150 SHR ECX, 4
1151 SHL ECX, 4
1152 JZ @PrepSingleLoop
1153
1154 SAR ECX, 1
1155 LEA EAX, [EAX + ECX * 8]
1156 LEA EDX, [EDX + ECX * 8]
1157 NEG ECX
1158
1159 MOVD MM7, MASK
1160 PUNPCKLDQ MM7, MM7
1161 MOVQ QMask, MM7
1162 EMMS
1163
1164@Loop:
1165 MOVQ MM0, [EAX + ECX * 8]
1166 MOVQ MM1, [EAX + ECX * 8 + 8]
1167 MOVQ MM2, [EAX + ECX * 8 + 16]
1168 MOVQ MM3, [EAX + ECX * 8 + 24]
1169 MOVQ MM4, [EAX + ECX * 8 + 32]
1170 MOVQ MM5, [EAX + ECX * 8 + 40]
1171 MOVQ MM6, [EAX + ECX * 8 + 48]
1172 MOVQ MM7, [EAX + ECX * 8 + 56]
1173
1174 PXOR MM0, QMask
1175 PXOR MM1, QMask
1176 PXOR MM2, QMask
1177 PXOR MM3, QMask
1178 PXOR MM4, QMask
1179 PXOR MM5, QMask
1180 PXOR MM6, QMask
1181 PXOR MM7, QMask
1182
1183 MOVQ [EDX + ECX * 8], MM0
1184 MOVQ [EDX + ECX * 8 + 8], MM1
1185 MOVQ [EDX + ECX * 8 + 16], MM2
1186 MOVQ [EDX + ECX * 8 + 24], MM3
1187 MOVQ [EDX + ECX * 8 + 32], MM4
1188 MOVQ [EDX + ECX * 8 + 40], MM5
1189 MOVQ [EDX + ECX * 8 + 48], MM6
1190 MOVQ [EDX + ECX * 8 + 56], MM7
1191
1192 ADD ECX, 8
1193 JS @Loop
1194
1195 EMMS
1196
1197@PrepSingleLoop:
1198 MOV ECX, EBX
1199 SHR EBX, 4
1200 SHL EBX, 4
1201 SUB ECX, EBX
1202 JZ @Exit
1203
1204 LEA EAX, [EAX + ECX * 4]
1205 LEA EDX, [EDX + ECX * 4]
1206 NEG ECX
1207
1208 MOV EDI, Mask
1209
1210@SingleLoop:
1211 MOV EBX, [EAX + ECX * 4]
1212 XOR EBX, EDI
1213 MOV [EDX + ECX * 4], EBX
1214 INC ECX
1215 JNZ @SingleLoop
1216
1217@Exit:
1218 POP EDI
1219 POP EBX
1220end;
1221
1222procedure OrLineEx_MMX(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1223//MMX version
1224var
1225 QMask: Int64;
1226
1227asm
1228 PUSH EBX
1229 PUSH EDI
1230
1231 TEST ECX, ECX
1232 JZ @Exit
1233
1234 MOV EBX, ECX
1235 SHR ECX, 4
1236 SHL ECX, 4
1237 JZ @PrepSingleLoop
1238
1239 SAR ECX, 1
1240 LEA EAX, [EAX + ECX * 8]
1241 LEA EDX, [EDX + ECX * 8]
1242 NEG ECX
1243
1244 MOVD MM7, MASK
1245 PUNPCKLDQ MM7, MM7
1246 MOVQ QMask, MM7
1247 EMMS
1248
1249@Loop:
1250 MOVQ MM0, [EAX + ECX * 8]
1251 MOVQ MM1, [EAX + ECX * 8 + 8]
1252 MOVQ MM2, [EAX + ECX * 8 + 16]
1253 MOVQ MM3, [EAX + ECX * 8 + 24]
1254 MOVQ MM4, [EAX + ECX * 8 + 32]
1255 MOVQ MM5, [EAX + ECX * 8 + 40]
1256 MOVQ MM6, [EAX + ECX * 8 + 48]
1257 MOVQ MM7, [EAX + ECX * 8 + 56]
1258
1259 POR MM0, QMask
1260 POR MM1, QMask
1261 POR MM2, QMask
1262 POR MM3, QMask
1263 POR MM4, QMask
1264 POR MM5, QMask
1265 POR MM6, QMask
1266 POR MM7, QMask
1267
1268 MOVQ [EDX + ECX * 8], MM0
1269 MOVQ [EDX + ECX * 8 + 8], MM1
1270 MOVQ [EDX + ECX * 8 + 16], MM2
1271 MOVQ [EDX + ECX * 8 + 24], MM3
1272 MOVQ [EDX + ECX * 8 + 32], MM4
1273 MOVQ [EDX + ECX * 8 + 40], MM5
1274 MOVQ [EDX + ECX * 8 + 48], MM6
1275 MOVQ [EDX + ECX * 8 + 56], MM7
1276
1277 ADD ECX, 8
1278 JS @Loop
1279
1280 EMMS
1281
1282@PrepSingleLoop:
1283 MOV ECX, EBX
1284 SHR EBX, 4
1285 SHL EBX, 4
1286 SUB ECX, EBX
1287 JZ @Exit
1288
1289 LEA EAX, [EAX + ECX * 4]
1290 LEA EDX, [EDX + ECX * 4]
1291 NEG ECX
1292
1293 MOV EDI, Mask
1294
1295@SingleLoop:
1296 MOV EBX, [EAX + ECX * 4]
1297 OR EBX, EDI
1298 MOV [EDX + ECX * 4], EBX
1299 INC ECX
1300 JNZ @SingleLoop
1301
1302@Exit:
1303 POP EDI
1304 POP EBX
1305end;
1306
1307procedure AndLineEx_MMX(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1308//MMX version
1309var
1310 QMask: Int64;
1311asm
1312 PUSH EBX
1313 PUSH EDI
1314
1315 TEST ECX, ECX
1316 JZ @Exit
1317
1318 MOV EBX, ECX
1319 SHR ECX, 4
1320 SHL ECX, 4
1321 JZ @PrepSingleLoop
1322
1323 SAR ECX, 1
1324 LEA EAX, [EAX + ECX * 8]
1325 LEA EDX, [EDX + ECX * 8]
1326 NEG ECX
1327
1328 MOVD MM7, MASK
1329 PUNPCKLDQ MM7, MM7
1330 MOVQ QMask, MM7
1331 EMMS
1332
1333@Loop:
1334 MOVQ MM0, [EAX + ECX * 8]
1335 MOVQ MM1, [EAX + ECX * 8 + 8]
1336 MOVQ MM2, [EAX + ECX * 8 + 16]
1337 MOVQ MM3, [EAX + ECX * 8 + 24]
1338 MOVQ MM4, [EAX + ECX * 8 + 32]
1339 MOVQ MM5, [EAX + ECX * 8 + 40]
1340 MOVQ MM6, [EAX + ECX * 8 + 48]
1341 MOVQ MM7, [EAX + ECX * 8 + 56]
1342
1343 PAND MM0, QMask
1344 PAND MM1, QMask
1345 PAND MM2, QMask
1346 PAND MM3, QMask
1347 PAND MM4, QMask
1348 PAND MM5, QMask
1349 PAND MM6, QMask
1350 PAND MM7, QMask
1351
1352 MOVQ [EDX + ECX * 8], MM0
1353 MOVQ [EDX + ECX * 8 + 8], MM1
1354 MOVQ [EDX + ECX * 8 + 16], MM2
1355 MOVQ [EDX + ECX * 8 + 24], MM3
1356 MOVQ [EDX + ECX * 8 + 32], MM4
1357 MOVQ [EDX + ECX * 8 + 40], MM5
1358 MOVQ [EDX + ECX * 8 + 48], MM6
1359 MOVQ [EDX + ECX * 8 + 56], MM7
1360
1361 ADD ECX, 8
1362 JS @Loop
1363
1364 EMMS
1365
1366@PrepSingleLoop:
1367 MOV ECX, EBX
1368 SHR EBX, 4
1369 SHL EBX, 4
1370 SUB ECX, EBX
1371 JZ @Exit
1372
1373 LEA EAX, [EAX + ECX * 4]
1374 LEA EDX, [EDX + ECX * 4]
1375 NEG ECX
1376
1377 MOV EDI, Mask
1378
1379@SingleLoop:
1380 MOV EBX, [EAX + ECX * 4]
1381 AND EBX, EDI
1382 MOV [EDX + ECX * 4], EBX
1383 INC ECX
1384 JNZ @SingleLoop
1385
1386@Exit:
1387 POP EDI
1388 POP EBX
1389end;
1390
1391{ Extended MMX versions}
1392
1393procedure XorLineEx_EMMX(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1394//EMMX version
1395var
1396 QMask: Int64;
1397
1398asm
1399 PUSH EBX
1400 PUSH EDI
1401
1402 TEST ECX, ECX
1403 JZ @Exit
1404
1405 MOV EBX, ECX
1406 SHR ECX, 4
1407 SHL ECX, 4
1408 JZ @PrepSingleLoop
1409
1410 SAR ECX, 1
1411 LEA EAX, [EAX + ECX * 8]
1412 LEA EDX, [EDX + ECX * 8]
1413 NEG ECX
1414
1415 MOVD MM7, MASK
1416 PUNPCKLDQ MM7, MM7
1417 MOVQ QMask, MM7
1418 EMMS
1419
1420@Loop:
1421 MOVQ MM0, [EAX + ECX * 8]
1422 MOVQ MM1, [EAX + ECX * 8 + 8]
1423 MOVQ MM2, [EAX + ECX * 8 + 16]
1424 MOVQ MM3, [EAX + ECX * 8 + 24]
1425 MOVQ MM4, [EAX + ECX * 8 + 32]
1426 MOVQ MM5, [EAX + ECX * 8 + 40]
1427 MOVQ MM6, [EAX + ECX * 8 + 48]
1428 MOVQ MM7, [EAX + ECX * 8 + 56]
1429
1430 PXOR MM0, QMask
1431 PXOR MM1, QMask
1432 PXOR MM2, QMask
1433 PXOR MM3, QMask
1434 PXOR MM4, QMask
1435 PXOR MM5, QMask
1436 PXOR MM6, QMask
1437 PXOR MM7, QMask
1438
1439 MOVNTQ [EDX + ECX * 8], MM0
1440 MOVNTQ [EDX + ECX * 8 + 8], MM1
1441 MOVNTQ [EDX + ECX * 8 + 16], MM2
1442 MOVNTQ [EDX + ECX * 8 + 24], MM3
1443 MOVNTQ [EDX + ECX * 8 + 32], MM4
1444 MOVNTQ [EDX + ECX * 8 + 40], MM5
1445 MOVNTQ [EDX + ECX * 8 + 48], MM6
1446 MOVNTQ [EDX + ECX * 8 + 56], MM7
1447
1448 ADD ECX, 8
1449 JS @Loop
1450
1451 EMMS
1452
1453@PrepSingleLoop:
1454 MOV ECX, EBX
1455 SHR EBX, 4
1456 SHL EBX, 4
1457 SUB ECX, EBX
1458 JZ @Exit
1459
1460 LEA EAX, [EAX + ECX * 4]
1461 LEA EDX, [EDX + ECX * 4]
1462 NEG ECX
1463
1464 MOV EDI, Mask
1465
1466@SingleLoop:
1467 MOV EBX, [EAX + ECX * 4]
1468 XOR EBX, EDI
1469 MOV [EDX + ECX * 4], EBX
1470 INC ECX
1471 JNZ @SingleLoop
1472
1473@Exit:
1474 POP EDI
1475 POP EBX
1476end;
1477
1478procedure OrLineEx_EMMX(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1479//EMMX version
1480var
1481 QMask: Int64;
1482
1483asm
1484 PUSH EBX
1485 PUSH EDI
1486
1487 TEST ECX, ECX
1488 JZ @Exit
1489
1490 MOV EBX, ECX
1491 SHR ECX, 4
1492 SHL ECX, 4
1493 JZ @PrepSingleLoop
1494
1495 SAR ECX, 1
1496 LEA EAX, [EAX + ECX * 8]
1497 LEA EDX, [EDX + ECX * 8]
1498 NEG ECX
1499
1500 MOVD MM7, MASK
1501 PUNPCKLDQ MM7, MM7
1502 MOVQ QMask, MM7
1503 EMMS
1504
1505@Loop:
1506 MOVQ MM0, [EAX + ECX * 8]
1507 MOVQ MM1, [EAX + ECX * 8 + 8]
1508 MOVQ MM2, [EAX + ECX * 8 + 16]
1509 MOVQ MM3, [EAX + ECX * 8 + 24]
1510 MOVQ MM4, [EAX + ECX * 8 + 32]
1511 MOVQ MM5, [EAX + ECX * 8 + 40]
1512 MOVQ MM6, [EAX + ECX * 8 + 48]
1513 MOVQ MM7, [EAX + ECX * 8 + 56]
1514
1515 POR MM0, QMask
1516 POR MM1, QMask
1517 POR MM2, QMask
1518 POR MM3, QMask
1519 POR MM4, QMask
1520 POR MM5, QMask
1521 POR MM6, QMask
1522 POR MM7, QMask
1523
1524 MOVNTQ [EDX + ECX * 8], MM0
1525 MOVNTQ [EDX + ECX * 8 + 8], MM1
1526 MOVNTQ [EDX + ECX * 8 + 16], MM2
1527 MOVNTQ [EDX + ECX * 8 + 24], MM3
1528 MOVNTQ [EDX + ECX * 8 + 32], MM4
1529 MOVNTQ [EDX + ECX * 8 + 40], MM5
1530 MOVNTQ [EDX + ECX * 8 + 48], MM6
1531 MOVNTQ [EDX + ECX * 8 + 56], MM7
1532
1533 ADD ECX, 8
1534 JS @Loop
1535
1536 EMMS
1537
1538@PrepSingleLoop:
1539 MOV ECX, EBX
1540 SHR EBX, 4
1541 SHL EBX, 4
1542 SUB ECX, EBX
1543 JZ @Exit
1544
1545 LEA EAX, [EAX + ECX * 4]
1546 LEA EDX, [EDX + ECX * 4]
1547 NEG ECX
1548
1549 MOV EDI, Mask
1550
1551@SingleLoop:
1552 MOV EBX, [EAX + ECX * 4]
1553 OR EBX, EDI
1554 MOV [EDX + ECX * 4], EBX
1555 INC ECX
1556 JNZ @SingleLoop
1557
1558@Exit:
1559 POP EDI
1560 POP EBX
1561end;
1562
1563procedure AndLineEx_EMMX(Src, Dst: PColor32; Count: Integer; Mask: TColor32);
1564//EMMX version
1565var
1566 QMask: Int64;
1567
1568asm
1569 PUSH EBX
1570 PUSH EDI
1571
1572 TEST ECX, ECX
1573 JZ @Exit
1574
1575 MOV EBX, ECX
1576 SHR ECX, 4
1577 SHL ECX, 4
1578 JZ @PrepSingleLoop
1579
1580 SAR ECX, 1
1581 LEA EAX, [EAX + ECX * 8]
1582 LEA EDX, [EDX + ECX * 8]
1583 NEG ECX
1584
1585 MOVD MM7, MASK
1586 PUNPCKLDQ MM7, MM7
1587 MOVQ QMask, MM7
1588 EMMS
1589
1590@Loop:
1591 MOVQ MM0, [EAX + ECX * 8]
1592 MOVQ MM1, [EAX + ECX * 8 + 8]
1593 MOVQ MM2, [EAX + ECX * 8 + 16]
1594 MOVQ MM3, [EAX + ECX * 8 + 24]
1595 MOVQ MM4, [EAX + ECX * 8 + 32]
1596 MOVQ MM5, [EAX + ECX * 8 + 40]
1597 MOVQ MM6, [EAX + ECX * 8 + 48]
1598 MOVQ MM7, [EAX + ECX * 8 + 56]
1599
1600 PAND MM0, QMask
1601 PAND MM1, QMask
1602 PAND MM2, QMask
1603 PAND MM3, QMask
1604 PAND MM4, QMask
1605 PAND MM5, QMask
1606 PAND MM6, QMask
1607 PAND MM7, QMask
1608
1609 MOVNTQ [EDX + ECX * 8], MM0
1610 MOVNTQ [EDX + ECX * 8 + 8], MM1
1611 MOVNTQ [EDX + ECX * 8 + 16], MM2
1612 MOVNTQ [EDX + ECX * 8 + 24], MM3
1613 MOVNTQ [EDX + ECX * 8 + 32], MM4
1614 MOVNTQ [EDX + ECX * 8 + 40], MM5
1615 MOVNTQ [EDX + ECX * 8 + 48], MM6
1616 MOVNTQ [EDX + ECX * 8 + 56], MM7
1617
1618 ADD ECX, 8
1619 JS @Loop
1620
1621 EMMS
1622
1623@PrepSingleLoop:
1624 MOV ECX, EBX
1625 SHR EBX, 4
1626 SHL EBX, 4
1627 SUB ECX, EBX
1628 JZ @Exit
1629
1630 LEA EAX, [EAX + ECX * 4]
1631 LEA EDX, [EDX + ECX * 4]
1632 NEG ECX
1633
1634 MOV EDI, Mask
1635
1636@SingleLoop:
1637 MOV EBX, [EAX + ECX * 4]
1638 AND EBX, EDI
1639 MOV [EDX + ECX * 4], EBX
1640 INC ECX
1641 JNZ @SingleLoop
1642
1643@Exit:
1644 POP EDI
1645 POP EBX
1646end;
1647
1648{$ENDIF}
1649{$ENDIF}
1650
1651{CPU target and feature Function templates}
1652
1653const
1654 FID_ANDLINE = 0;
1655 FID_ORLINE = 1;
1656 FID_XORLINE = 2;
1657 FID_ANDLINEEX = 3;
1658 FID_ORLINEEX = 4;
1659 FID_XORLINEEX = 5;
1660
1661var
1662 Registry: TFunctionRegistry;
1663
1664procedure RegisterBindings;
1665begin
1666 Registry := NewRegistry('GR32_Filters bindings');
1667 Registry.RegisterBinding(FID_ANDLINE, @@LogicalMaskLineAnd);
1668 Registry.RegisterBinding(FID_ORLINE, @@LogicalMaskLineOr);
1669 Registry.RegisterBinding(FID_XORLINE, @@LogicalMaskLineXor);
1670 Registry.RegisterBinding(FID_ANDLINEEX, @@LogicalMaskLineAndEx);
1671 Registry.RegisterBinding(FID_ORLINEEX, @@LogicalMaskLineOrEx);
1672 Registry.RegisterBinding(FID_XORLINEEX, @@LogicalMaskLineXorEx);
1673
1674 Registry.Add(FID_ANDLINE, @AndLine_Pas);
1675 Registry.Add(FID_ORLINE, @OrLine_Pas);
1676 Registry.Add(FID_XORLINE, @XorLine_Pas);
1677 Registry.Add(FID_ANDLINEEX, @AndLineEx_Pas);
1678 Registry.Add(FID_ORLINEEX, @OrLineEx_Pas);
1679 Registry.Add(FID_XORLINEEX, @XorLineEx_Pas);
1680
1681{$IFNDEF PUREPASCAL}
1682 Registry.Add(FID_ANDLINE, @AndLine_ASM);
1683 Registry.Add(FID_ORLINE, @OrLine_ASM);
1684 Registry.Add(FID_XORLINE, @XorLine_ASM);
1685 Registry.Add(FID_ANDLINEEX, @AndLineEx_ASM);
1686 Registry.Add(FID_ORLINEEX, @OrLineEx_ASM);
1687 Registry.Add(FID_XORLINEEX, @XorLineEx_ASM);
1688{$IFNDEF OMIT_MMX}
1689 Registry.Add(FID_ANDLINEEX, @AndLineEx_MMX, [ciMMX]);
1690 Registry.Add(FID_ORLINEEX, @OrLineEx_MMX, [ciMMX]);
1691 Registry.Add(FID_XORLINEEX, @XorLineEx_MMX, [ciMMX]);
1692 Registry.Add(FID_ANDLINEEX, @AndLineEx_EMMX, [ciEMMX]);
1693 Registry.Add(FID_ORLINEEX, @OrLineEx_EMMX, [ciEMMX]);
1694 Registry.Add(FID_XORLINEEX, @XorLineEx_EMMX, [ciEMMX]);
1695{$ENDIF}
1696{$ENDIF}
1697
1698 Registry.RebindAll;
1699end;
1700
1701initialization
1702 RegisterBindings;
1703
1704end.
Note: See TracBrowser for help on using the repository browser.