00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244 #define PNG_INTERNAL
00245 #include "png.h"
00246
00247 #if defined(PNG_USE_PNGGCCRD)
00248
00249 int PNGAPI png_mmx_support(void);
00250
00251 #ifdef PNG_USE_LOCAL_ARRAYS
00252 static const int FARDATA png_pass_start[7] = {0, 4, 0, 2, 0, 1, 0};
00253 static const int FARDATA png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
00254 static const int FARDATA png_pass_width[7] = {8, 4, 4, 2, 2, 1, 1};
00255 #endif
00256
00257 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
00258
00259
00260 #if defined(__DJGPP__) || defined(WIN32) || defined(__CYGWIN__)
00261 # define _mmx_supported mmx_supported
00262 # define _const4 const4
00263 # define _const6 const6
00264 # define _mask8_0 mask8_0
00265 # define _mask16_1 mask16_1
00266 # define _mask16_0 mask16_0
00267 # define _mask24_2 mask24_2
00268 # define _mask24_1 mask24_1
00269 # define _mask24_0 mask24_0
00270 # define _mask32_3 mask32_3
00271 # define _mask32_2 mask32_2
00272 # define _mask32_1 mask32_1
00273 # define _mask32_0 mask32_0
00274 # define _mask48_5 mask48_5
00275 # define _mask48_4 mask48_4
00276 # define _mask48_3 mask48_3
00277 # define _mask48_2 mask48_2
00278 # define _mask48_1 mask48_1
00279 # define _mask48_0 mask48_0
00280 # define _LBCarryMask LBCarryMask
00281 # define _HBClearMask HBClearMask
00282 # define _ActiveMask ActiveMask
00283 # define _ActiveMask2 ActiveMask2
00284 # define _ActiveMaskEnd ActiveMaskEnd
00285 # define _ShiftBpp ShiftBpp
00286 # define _ShiftRem ShiftRem
00287 #ifdef PNG_THREAD_UNSAFE_OK
00288 # define _unmask unmask
00289 # define _FullLength FullLength
00290 # define _MMXLength MMXLength
00291 # define _dif dif
00292 # define _patemp patemp
00293 # define _pbtemp pbtemp
00294 # define _pctemp pctemp
00295 #endif
00296 #endif
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308 #ifdef PNG_THREAD_UNSAFE_OK
00309 static int _unmask;
00310 #endif
00311
00312 static unsigned long long _mask8_0 = 0x0102040810204080LL;
00313
00314 static unsigned long long _mask16_1 = 0x0101020204040808LL;
00315 static unsigned long long _mask16_0 = 0x1010202040408080LL;
00316
00317 static unsigned long long _mask24_2 = 0x0101010202020404LL;
00318 static unsigned long long _mask24_1 = 0x0408080810101020LL;
00319 static unsigned long long _mask24_0 = 0x2020404040808080LL;
00320
00321 static unsigned long long _mask32_3 = 0x0101010102020202LL;
00322 static unsigned long long _mask32_2 = 0x0404040408080808LL;
00323 static unsigned long long _mask32_1 = 0x1010101020202020LL;
00324 static unsigned long long _mask32_0 = 0x4040404080808080LL;
00325
00326 static unsigned long long _mask48_5 = 0x0101010101010202LL;
00327 static unsigned long long _mask48_4 = 0x0202020204040404LL;
00328 static unsigned long long _mask48_3 = 0x0404080808080808LL;
00329 static unsigned long long _mask48_2 = 0x1010101010102020LL;
00330 static unsigned long long _mask48_1 = 0x2020202040404040LL;
00331 static unsigned long long _mask48_0 = 0x4040808080808080LL;
00332
00333 static unsigned long long _const4 = 0x0000000000FFFFFFLL;
00334
00335 static unsigned long long _const6 = 0x00000000000000FFLL;
00336
00337
00338
00339
00340
00341 #ifdef PNG_THREAD_UNSAFE_OK
00342 static png_uint_32 _FullLength;
00343 static png_uint_32 _MMXLength;
00344 static int _dif;
00345 static int _patemp;
00346 static int _pbtemp;
00347 static int _pctemp;
00348 #endif
00349
00350 void
00351 png_squelch_warnings(void)
00352 {
00353 #ifdef PNG_THREAD_UNSAFE_OK
00354 _dif = _dif;
00355 _patemp = _patemp;
00356 _pbtemp = _pbtemp;
00357 _pctemp = _pctemp;
00358 _MMXLength = _MMXLength;
00359 #endif
00360 _const4 = _const4;
00361 _const6 = _const6;
00362 _mask8_0 = _mask8_0;
00363 _mask16_1 = _mask16_1;
00364 _mask16_0 = _mask16_0;
00365 _mask24_2 = _mask24_2;
00366 _mask24_1 = _mask24_1;
00367 _mask24_0 = _mask24_0;
00368 _mask32_3 = _mask32_3;
00369 _mask32_2 = _mask32_2;
00370 _mask32_1 = _mask32_1;
00371 _mask32_0 = _mask32_0;
00372 _mask48_5 = _mask48_5;
00373 _mask48_4 = _mask48_4;
00374 _mask48_3 = _mask48_3;
00375 _mask48_2 = _mask48_2;
00376 _mask48_1 = _mask48_1;
00377 _mask48_0 = _mask48_0;
00378 }
00379 #endif
00380
00381
00382 static int _mmx_supported = 2;
00383
00384
00385
00386
00387
00388
00389
00390 #if defined(PNG_HAVE_ASSEMBLER_COMBINE_ROW)
00391
00392 #define BPP2 2
00393 #define BPP3 3
00394 #define BPP4 4
00395 #define BPP6 6
00396 #define BPP8 8
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412 void
00413 png_combine_row(png_structp png_ptr, png_bytep row, int mask)
00414 {
00415 png_debug(1, "in png_combine_row (pnggccrd.c)\n");
00416
00417 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
00418 if (_mmx_supported == 2) {
00419 #if !defined(PNG_1_0_X)
00420
00421 png_warning(png_ptr, "asm_flags may not have been initialized");
00422 #endif
00423 png_mmx_support();
00424 }
00425 #endif
00426
00427 if (mask == 0xff)
00428 {
00429 png_debug(2,"mask == 0xff: doing single png_memcpy()\n");
00430 png_memcpy(row, png_ptr->row_buf + 1,
00431 (png_size_t)PNG_ROWBYTES(png_ptr->row_info.pixel_depth,png_ptr->width));
00432 }
00433 else
00434 {
00435 switch (png_ptr->row_info.pixel_depth)
00436 {
00437 case 1:
00438 {
00439 png_bytep sp;
00440 png_bytep dp;
00441 int s_inc, s_start, s_end;
00442 int m;
00443 int shift;
00444 png_uint_32 i;
00445
00446 sp = png_ptr->row_buf + 1;
00447 dp = row;
00448 m = 0x80;
00449 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
00450 if (png_ptr->transformations & PNG_PACKSWAP)
00451 {
00452 s_start = 0;
00453 s_end = 7;
00454 s_inc = 1;
00455 }
00456 else
00457 #endif
00458 {
00459 s_start = 7;
00460 s_end = 0;
00461 s_inc = -1;
00462 }
00463
00464 shift = s_start;
00465
00466 for (i = 0; i < png_ptr->width; i++)
00467 {
00468 if (m & mask)
00469 {
00470 int value;
00471
00472 value = (*sp >> shift) & 0x1;
00473 *dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
00474 *dp |= (png_byte)(value << shift);
00475 }
00476
00477 if (shift == s_end)
00478 {
00479 shift = s_start;
00480 sp++;
00481 dp++;
00482 }
00483 else
00484 shift += s_inc;
00485
00486 if (m == 1)
00487 m = 0x80;
00488 else
00489 m >>= 1;
00490 }
00491 break;
00492 }
00493
00494 case 2:
00495 {
00496 png_bytep sp;
00497 png_bytep dp;
00498 int s_start, s_end, s_inc;
00499 int m;
00500 int shift;
00501 png_uint_32 i;
00502 int value;
00503
00504 sp = png_ptr->row_buf + 1;
00505 dp = row;
00506 m = 0x80;
00507 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
00508 if (png_ptr->transformations & PNG_PACKSWAP)
00509 {
00510 s_start = 0;
00511 s_end = 6;
00512 s_inc = 2;
00513 }
00514 else
00515 #endif
00516 {
00517 s_start = 6;
00518 s_end = 0;
00519 s_inc = -2;
00520 }
00521
00522 shift = s_start;
00523
00524 for (i = 0; i < png_ptr->width; i++)
00525 {
00526 if (m & mask)
00527 {
00528 value = (*sp >> shift) & 0x3;
00529 *dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
00530 *dp |= (png_byte)(value << shift);
00531 }
00532
00533 if (shift == s_end)
00534 {
00535 shift = s_start;
00536 sp++;
00537 dp++;
00538 }
00539 else
00540 shift += s_inc;
00541 if (m == 1)
00542 m = 0x80;
00543 else
00544 m >>= 1;
00545 }
00546 break;
00547 }
00548
00549 case 4:
00550 {
00551 png_bytep sp;
00552 png_bytep dp;
00553 int s_start, s_end, s_inc;
00554 int m;
00555 int shift;
00556 png_uint_32 i;
00557 int value;
00558
00559 sp = png_ptr->row_buf + 1;
00560 dp = row;
00561 m = 0x80;
00562 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
00563 if (png_ptr->transformations & PNG_PACKSWAP)
00564 {
00565 s_start = 0;
00566 s_end = 4;
00567 s_inc = 4;
00568 }
00569 else
00570 #endif
00571 {
00572 s_start = 4;
00573 s_end = 0;
00574 s_inc = -4;
00575 }
00576 shift = s_start;
00577
00578 for (i = 0; i < png_ptr->width; i++)
00579 {
00580 if (m & mask)
00581 {
00582 value = (*sp >> shift) & 0xf;
00583 *dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
00584 *dp |= (png_byte)(value << shift);
00585 }
00586
00587 if (shift == s_end)
00588 {
00589 shift = s_start;
00590 sp++;
00591 dp++;
00592 }
00593 else
00594 shift += s_inc;
00595 if (m == 1)
00596 m = 0x80;
00597 else
00598 m >>= 1;
00599 }
00600 break;
00601 }
00602
00603 case 8:
00604 {
00605 png_bytep srcptr;
00606 png_bytep dstptr;
00607
00608 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
00609 #if !defined(PNG_1_0_X)
00610 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
00611 )
00612 #else
00613 if (_mmx_supported)
00614 #endif
00615 {
00616 png_uint_32 len;
00617 int diff;
00618 int dummy_value_a;
00619 int dummy_value_d;
00620 int dummy_value_c;
00621 int dummy_value_S;
00622 int dummy_value_D;
00623 _unmask = ~mask;
00624 srcptr = png_ptr->row_buf + 1;
00625 dstptr = row;
00626 len = png_ptr->width &~7;
00627 diff = (int) (png_ptr->width & 7);
00628
00629 __asm__ __volatile__ (
00630 "movd _unmask, %%mm7 \n\t"
00631 "psubb %%mm6, %%mm6 \n\t"
00632 "punpcklbw %%mm7, %%mm7 \n\t"
00633 "punpcklwd %%mm7, %%mm7 \n\t"
00634 "punpckldq %%mm7, %%mm7 \n\t"
00635
00636 "movq _mask8_0, %%mm0 \n\t"
00637 "pand %%mm7, %%mm0 \n\t"
00638 "pcmpeqb %%mm6, %%mm0 \n\t"
00639
00640
00641
00642
00643
00644 "cmpl $0, %%ecx \n\t"
00645 "je mainloop8end \n\t"
00646
00647 "mainloop8: \n\t"
00648 "movq (%%esi), %%mm4 \n\t"
00649 "pand %%mm0, %%mm4 \n\t"
00650 "movq %%mm0, %%mm6 \n\t"
00651 "pandn (%%edi), %%mm6 \n\t"
00652 "por %%mm6, %%mm4 \n\t"
00653 "movq %%mm4, (%%edi) \n\t"
00654 "addl $8, %%esi \n\t"
00655 "addl $8, %%edi \n\t"
00656 "subl $8, %%ecx \n\t"
00657 "ja mainloop8 \n\t"
00658
00659 "mainloop8end: \n\t"
00660
00661 "movl %%eax, %%ecx \n\t"
00662 "cmpl $0, %%ecx \n\t"
00663 "jz end8 \n\t"
00664
00665 "sall $24, %%edx \n\t"
00666
00667 "secondloop8: \n\t"
00668 "sall %%edx \n\t"
00669 "jnc skip8 \n\t"
00670 "movb (%%esi), %%al \n\t"
00671 "movb %%al, (%%edi) \n\t"
00672
00673 "skip8: \n\t"
00674 "incl %%esi \n\t"
00675 "incl %%edi \n\t"
00676 "decl %%ecx \n\t"
00677 "jnz secondloop8 \n\t"
00678
00679 "end8: \n\t"
00680 "EMMS \n\t"
00681
00682 : "=a" (dummy_value_a),
00683 "=d" (dummy_value_d),
00684 "=c" (dummy_value_c),
00685 "=S" (dummy_value_S),
00686 "=D" (dummy_value_D)
00687
00688 : "3" (srcptr),
00689 "4" (dstptr),
00690 "0" (diff),
00691
00692 "2" (len),
00693 "1" (mask)
00694
00695 #if 0
00696 : "%mm0", "%mm4", "%mm6", "%mm7"
00697 #endif
00698 );
00699 }
00700 else
00701 #endif
00702 {
00703 register png_uint_32 i;
00704 png_uint_32 initial_val = png_pass_start[png_ptr->pass];
00705
00706 register int stride = png_pass_inc[png_ptr->pass];
00707
00708 register int rep_bytes = png_pass_width[png_ptr->pass];
00709
00710 png_uint_32 len = png_ptr->width &~7;
00711 int diff = (int) (png_ptr->width & 7);
00712 register png_uint_32 final_val = len;
00713
00714 srcptr = png_ptr->row_buf + 1 + initial_val;
00715 dstptr = row + initial_val;
00716
00717 for (i = initial_val; i < final_val; i += stride)
00718 {
00719 png_memcpy(dstptr, srcptr, rep_bytes);
00720 srcptr += stride;
00721 dstptr += stride;
00722 }
00723 if (diff)
00724 {
00725 final_val+=diff ;
00726 for (; i < final_val; i += stride)
00727 {
00728 if (rep_bytes > (int)(final_val-i))
00729 rep_bytes = (int)(final_val-i);
00730 png_memcpy(dstptr, srcptr, rep_bytes);
00731 srcptr += stride;
00732 dstptr += stride;
00733 }
00734 }
00735
00736 }
00737
00738 break;
00739 }
00740
00741 case 16:
00742 {
00743 png_bytep srcptr;
00744 png_bytep dstptr;
00745
00746 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
00747 #if !defined(PNG_1_0_X)
00748 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
00749 )
00750 #else
00751 if (_mmx_supported)
00752 #endif
00753 {
00754 png_uint_32 len;
00755 int diff;
00756 int dummy_value_a;
00757 int dummy_value_d;
00758 int dummy_value_c;
00759 int dummy_value_S;
00760 int dummy_value_D;
00761 _unmask = ~mask;
00762 srcptr = png_ptr->row_buf + 1;
00763 dstptr = row;
00764 len = png_ptr->width &~7;
00765 diff = (int) (png_ptr->width & 7);
00766
00767 __asm__ __volatile__ (
00768 "movd _unmask, %%mm7 \n\t"
00769 "psubb %%mm6, %%mm6 \n\t"
00770 "punpcklbw %%mm7, %%mm7 \n\t"
00771 "punpcklwd %%mm7, %%mm7 \n\t"
00772 "punpckldq %%mm7, %%mm7 \n\t"
00773
00774 "movq _mask16_0, %%mm0 \n\t"
00775 "movq _mask16_1, %%mm1 \n\t"
00776
00777 "pand %%mm7, %%mm0 \n\t"
00778 "pand %%mm7, %%mm1 \n\t"
00779
00780 "pcmpeqb %%mm6, %%mm0 \n\t"
00781 "pcmpeqb %%mm6, %%mm1 \n\t"
00782
00783
00784
00785
00786
00787 "cmpl $0, %%ecx \n\t"
00788 "jz mainloop16end \n\t"
00789
00790 "mainloop16: \n\t"
00791 "movq (%%esi), %%mm4 \n\t"
00792 "pand %%mm0, %%mm4 \n\t"
00793 "movq %%mm0, %%mm6 \n\t"
00794 "movq (%%edi), %%mm7 \n\t"
00795 "pandn %%mm7, %%mm6 \n\t"
00796 "por %%mm6, %%mm4 \n\t"
00797 "movq %%mm4, (%%edi) \n\t"
00798
00799 "movq 8(%%esi), %%mm5 \n\t"
00800 "pand %%mm1, %%mm5 \n\t"
00801 "movq %%mm1, %%mm7 \n\t"
00802 "movq 8(%%edi), %%mm6 \n\t"
00803 "pandn %%mm6, %%mm7 \n\t"
00804 "por %%mm7, %%mm5 \n\t"
00805 "movq %%mm5, 8(%%edi) \n\t"
00806
00807 "addl $16, %%esi \n\t"
00808 "addl $16, %%edi \n\t"
00809 "subl $8, %%ecx \n\t"
00810 "ja mainloop16 \n\t"
00811
00812 "mainloop16end: \n\t"
00813
00814 "movl %%eax, %%ecx \n\t"
00815 "cmpl $0, %%ecx \n\t"
00816 "jz end16 \n\t"
00817
00818 "sall $24, %%edx \n\t"
00819
00820 "secondloop16: \n\t"
00821 "sall %%edx \n\t"
00822 "jnc skip16 \n\t"
00823 "movw (%%esi), %%ax \n\t"
00824 "movw %%ax, (%%edi) \n\t"
00825
00826 "skip16: \n\t"
00827 "addl $2, %%esi \n\t"
00828 "addl $2, %%edi \n\t"
00829 "decl %%ecx \n\t"
00830 "jnz secondloop16 \n\t"
00831
00832 "end16: \n\t"
00833 "EMMS \n\t"
00834
00835 : "=a" (dummy_value_a),
00836 "=c" (dummy_value_c),
00837 "=d" (dummy_value_d),
00838 "=S" (dummy_value_S),
00839 "=D" (dummy_value_D)
00840
00841 : "0" (diff),
00842
00843 "1" (len),
00844 "2" (mask),
00845 "3" (srcptr),
00846 "4" (dstptr)
00847
00848 #if 0
00849 : "%mm0", "%mm1", "%mm4"
00850 , "%mm5", "%mm6", "%mm7"
00851 #endif
00852 );
00853 }
00854 else
00855 #endif
00856 {
00857 register png_uint_32 i;
00858 png_uint_32 initial_val = BPP2 * png_pass_start[png_ptr->pass];
00859
00860 register int stride = BPP2 * png_pass_inc[png_ptr->pass];
00861
00862 register int rep_bytes = BPP2 * png_pass_width[png_ptr->pass];
00863
00864 png_uint_32 len = png_ptr->width &~7;
00865 int diff = (int) (png_ptr->width & 7);
00866 register png_uint_32 final_val = BPP2 * len;
00867
00868 srcptr = png_ptr->row_buf + 1 + initial_val;
00869 dstptr = row + initial_val;
00870
00871 for (i = initial_val; i < final_val; i += stride)
00872 {
00873 png_memcpy(dstptr, srcptr, rep_bytes);
00874 srcptr += stride;
00875 dstptr += stride;
00876 }
00877 if (diff)
00878 {
00879 final_val+=diff*BPP2;
00880 for (; i < final_val; i += stride)
00881 {
00882 if (rep_bytes > (int)(final_val-i))
00883 rep_bytes = (int)(final_val-i);
00884 png_memcpy(dstptr, srcptr, rep_bytes);
00885 srcptr += stride;
00886 dstptr += stride;
00887 }
00888 }
00889 }
00890
00891 break;
00892 }
00893
00894 case 24:
00895 {
00896 png_bytep srcptr;
00897 png_bytep dstptr;
00898
00899 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
00900 #if !defined(PNG_1_0_X)
00901 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
00902 )
00903 #else
00904 if (_mmx_supported)
00905 #endif
00906 {
00907 png_uint_32 len;
00908 int diff;
00909 int dummy_value_a;
00910 int dummy_value_d;
00911 int dummy_value_c;
00912 int dummy_value_S;
00913 int dummy_value_D;
00914 _unmask = ~mask;
00915 srcptr = png_ptr->row_buf + 1;
00916 dstptr = row;
00917 len = png_ptr->width &~7;
00918 diff = (int) (png_ptr->width & 7);
00919
00920 __asm__ __volatile__ (
00921 "movd _unmask, %%mm7 \n\t"
00922 "psubb %%mm6, %%mm6 \n\t"
00923 "punpcklbw %%mm7, %%mm7 \n\t"
00924 "punpcklwd %%mm7, %%mm7 \n\t"
00925 "punpckldq %%mm7, %%mm7 \n\t"
00926
00927 "movq _mask24_0, %%mm0 \n\t"
00928 "movq _mask24_1, %%mm1 \n\t"
00929 "movq _mask24_2, %%mm2 \n\t"
00930
00931 "pand %%mm7, %%mm0 \n\t"
00932 "pand %%mm7, %%mm1 \n\t"
00933 "pand %%mm7, %%mm2 \n\t"
00934
00935 "pcmpeqb %%mm6, %%mm0 \n\t"
00936 "pcmpeqb %%mm6, %%mm1 \n\t"
00937 "pcmpeqb %%mm6, %%mm2 \n\t"
00938
00939
00940
00941
00942
00943 "cmpl $0, %%ecx \n\t"
00944 "jz mainloop24end \n\t"
00945
00946 "mainloop24: \n\t"
00947 "movq (%%esi), %%mm4 \n\t"
00948 "pand %%mm0, %%mm4 \n\t"
00949 "movq %%mm0, %%mm6 \n\t"
00950 "movq (%%edi), %%mm7 \n\t"
00951 "pandn %%mm7, %%mm6 \n\t"
00952 "por %%mm6, %%mm4 \n\t"
00953 "movq %%mm4, (%%edi) \n\t"
00954
00955 "movq 8(%%esi), %%mm5 \n\t"
00956 "pand %%mm1, %%mm5 \n\t"
00957 "movq %%mm1, %%mm7 \n\t"
00958 "movq 8(%%edi), %%mm6 \n\t"
00959 "pandn %%mm6, %%mm7 \n\t"
00960 "por %%mm7, %%mm5 \n\t"
00961 "movq %%mm5, 8(%%edi) \n\t"
00962
00963 "movq 16(%%esi), %%mm6 \n\t"
00964 "pand %%mm2, %%mm6 \n\t"
00965 "movq %%mm2, %%mm4 \n\t"
00966 "movq 16(%%edi), %%mm7 \n\t"
00967 "pandn %%mm7, %%mm4 \n\t"
00968 "por %%mm4, %%mm6 \n\t"
00969 "movq %%mm6, 16(%%edi) \n\t"
00970
00971 "addl $24, %%esi \n\t"
00972 "addl $24, %%edi \n\t"
00973 "subl $8, %%ecx \n\t"
00974
00975 "ja mainloop24 \n\t"
00976
00977 "mainloop24end: \n\t"
00978
00979 "movl %%eax, %%ecx \n\t"
00980 "cmpl $0, %%ecx \n\t"
00981 "jz end24 \n\t"
00982
00983 "sall $24, %%edx \n\t"
00984
00985 "secondloop24: \n\t"
00986 "sall %%edx \n\t"
00987 "jnc skip24 \n\t"
00988 "movw (%%esi), %%ax \n\t"
00989 "movw %%ax, (%%edi) \n\t"
00990 "xorl %%eax, %%eax \n\t"
00991 "movb 2(%%esi), %%al \n\t"
00992 "movb %%al, 2(%%edi) \n\t"
00993
00994 "skip24: \n\t"
00995 "addl $3, %%esi \n\t"
00996 "addl $3, %%edi \n\t"
00997 "decl %%ecx \n\t"
00998 "jnz secondloop24 \n\t"
00999
01000 "end24: \n\t"
01001 "EMMS \n\t"
01002
01003 : "=a" (dummy_value_a),
01004 "=d" (dummy_value_d),
01005 "=c" (dummy_value_c),
01006 "=S" (dummy_value_S),
01007 "=D" (dummy_value_D)
01008
01009 : "3" (srcptr),
01010 "4" (dstptr),
01011 "0" (diff),
01012
01013 "2" (len),
01014 "1" (mask)
01015
01016 #if 0
01017 : "%mm0", "%mm1", "%mm2"
01018 , "%mm4", "%mm5", "%mm6", "%mm7"
01019 #endif
01020 );
01021 }
01022 else
01023 #endif
01024 {
01025 register png_uint_32 i;
01026 png_uint_32 initial_val = BPP3 * png_pass_start[png_ptr->pass];
01027
01028 register int stride = BPP3 * png_pass_inc[png_ptr->pass];
01029
01030 register int rep_bytes = BPP3 * png_pass_width[png_ptr->pass];
01031
01032 png_uint_32 len = png_ptr->width &~7;
01033 int diff = (int) (png_ptr->width & 7);
01034 register png_uint_32 final_val = BPP3 * len;
01035
01036 srcptr = png_ptr->row_buf + 1 + initial_val;
01037 dstptr = row + initial_val;
01038
01039 for (i = initial_val; i < final_val; i += stride)
01040 {
01041 png_memcpy(dstptr, srcptr, rep_bytes);
01042 srcptr += stride;
01043 dstptr += stride;
01044 }
01045 if (diff)
01046 {
01047 final_val+=diff*BPP3;
01048 for (; i < final_val; i += stride)
01049 {
01050 if (rep_bytes > (int)(final_val-i))
01051 rep_bytes = (int)(final_val-i);
01052 png_memcpy(dstptr, srcptr, rep_bytes);
01053 srcptr += stride;
01054 dstptr += stride;
01055 }
01056 }
01057 }
01058
01059 break;
01060 }
01061
01062 case 32:
01063 {
01064 png_bytep srcptr;
01065 png_bytep dstptr;
01066
01067 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
01068 #if !defined(PNG_1_0_X)
01069 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
01070 )
01071 #else
01072 if (_mmx_supported)
01073 #endif
01074 {
01075 png_uint_32 len;
01076 int diff;
01077 int dummy_value_a;
01078 int dummy_value_d;
01079 int dummy_value_c;
01080 int dummy_value_S;
01081 int dummy_value_D;
01082 _unmask = ~mask;
01083 srcptr = png_ptr->row_buf + 1;
01084 dstptr = row;
01085 len = png_ptr->width &~7;
01086 diff = (int) (png_ptr->width & 7);
01087
01088 __asm__ __volatile__ (
01089 "movd _unmask, %%mm7 \n\t"
01090 "psubb %%mm6, %%mm6 \n\t"
01091 "punpcklbw %%mm7, %%mm7 \n\t"
01092 "punpcklwd %%mm7, %%mm7 \n\t"
01093 "punpckldq %%mm7, %%mm7 \n\t"
01094
01095 "movq _mask32_0, %%mm0 \n\t"
01096 "movq _mask32_1, %%mm1 \n\t"
01097 "movq _mask32_2, %%mm2 \n\t"
01098 "movq _mask32_3, %%mm3 \n\t"
01099
01100 "pand %%mm7, %%mm0 \n\t"
01101 "pand %%mm7, %%mm1 \n\t"
01102 "pand %%mm7, %%mm2 \n\t"
01103 "pand %%mm7, %%mm3 \n\t"
01104
01105 "pcmpeqb %%mm6, %%mm0 \n\t"
01106 "pcmpeqb %%mm6, %%mm1 \n\t"
01107 "pcmpeqb %%mm6, %%mm2 \n\t"
01108 "pcmpeqb %%mm6, %%mm3 \n\t"
01109
01110
01111
01112
01113
01114 "cmpl $0, %%ecx \n\t"
01115 "jz mainloop32end \n\t"
01116
01117 "mainloop32: \n\t"
01118 "movq (%%esi), %%mm4 \n\t"
01119 "pand %%mm0, %%mm4 \n\t"
01120 "movq %%mm0, %%mm6 \n\t"
01121 "movq (%%edi), %%mm7 \n\t"
01122 "pandn %%mm7, %%mm6 \n\t"
01123 "por %%mm6, %%mm4 \n\t"
01124 "movq %%mm4, (%%edi) \n\t"
01125
01126 "movq 8(%%esi), %%mm5 \n\t"
01127 "pand %%mm1, %%mm5 \n\t"
01128 "movq %%mm1, %%mm7 \n\t"
01129 "movq 8(%%edi), %%mm6 \n\t"
01130 "pandn %%mm6, %%mm7 \n\t"
01131 "por %%mm7, %%mm5 \n\t"
01132 "movq %%mm5, 8(%%edi) \n\t"
01133
01134 "movq 16(%%esi), %%mm6 \n\t"
01135 "pand %%mm2, %%mm6 \n\t"
01136 "movq %%mm2, %%mm4 \n\t"
01137 "movq 16(%%edi), %%mm7 \n\t"
01138 "pandn %%mm7, %%mm4 \n\t"
01139 "por %%mm4, %%mm6 \n\t"
01140 "movq %%mm6, 16(%%edi) \n\t"
01141
01142 "movq 24(%%esi), %%mm7 \n\t"
01143 "pand %%mm3, %%mm7 \n\t"
01144 "movq %%mm3, %%mm5 \n\t"
01145 "movq 24(%%edi), %%mm4 \n\t"
01146 "pandn %%mm4, %%mm5 \n\t"
01147 "por %%mm5, %%mm7 \n\t"
01148 "movq %%mm7, 24(%%edi) \n\t"
01149
01150 "addl $32, %%esi \n\t"
01151 "addl $32, %%edi \n\t"
01152 "subl $8, %%ecx \n\t"
01153 "ja mainloop32 \n\t"
01154
01155 "mainloop32end: \n\t"
01156
01157 "movl %%eax, %%ecx \n\t"
01158 "cmpl $0, %%ecx \n\t"
01159 "jz end32 \n\t"
01160
01161 "sall $24, %%edx \n\t"
01162
01163 "secondloop32: \n\t"
01164 "sall %%edx \n\t"
01165 "jnc skip32 \n\t"
01166 "movl (%%esi), %%eax \n\t"
01167 "movl %%eax, (%%edi) \n\t"
01168
01169 "skip32: \n\t"
01170 "addl $4, %%esi \n\t"
01171 "addl $4, %%edi \n\t"
01172 "decl %%ecx \n\t"
01173 "jnz secondloop32 \n\t"
01174
01175 "end32: \n\t"
01176 "EMMS \n\t"
01177
01178 : "=a" (dummy_value_a),
01179 "=d" (dummy_value_d),
01180 "=c" (dummy_value_c),
01181 "=S" (dummy_value_S),
01182 "=D" (dummy_value_D)
01183
01184 : "3" (srcptr),
01185 "4" (dstptr),
01186 "0" (diff),
01187
01188 "2" (len),
01189 "1" (mask)
01190
01191 #if 0
01192 : "%mm0", "%mm1", "%mm2", "%mm3"
01193 , "%mm4", "%mm5", "%mm6", "%mm7"
01194 #endif
01195 );
01196 }
01197 else
01198 #endif
01199 {
01200 register png_uint_32 i;
01201 png_uint_32 initial_val = BPP4 * png_pass_start[png_ptr->pass];
01202
01203 register int stride = BPP4 * png_pass_inc[png_ptr->pass];
01204
01205 register int rep_bytes = BPP4 * png_pass_width[png_ptr->pass];
01206
01207 png_uint_32 len = png_ptr->width &~7;
01208 int diff = (int) (png_ptr->width & 7);
01209 register png_uint_32 final_val = BPP4 * len;
01210
01211 srcptr = png_ptr->row_buf + 1 + initial_val;
01212 dstptr = row + initial_val;
01213
01214 for (i = initial_val; i < final_val; i += stride)
01215 {
01216 png_memcpy(dstptr, srcptr, rep_bytes);
01217 srcptr += stride;
01218 dstptr += stride;
01219 }
01220 if (diff)
01221 {
01222 final_val+=diff*BPP4;
01223 for (; i < final_val; i += stride)
01224 {
01225 if (rep_bytes > (int)(final_val-i))
01226 rep_bytes = (int)(final_val-i);
01227 png_memcpy(dstptr, srcptr, rep_bytes);
01228 srcptr += stride;
01229 dstptr += stride;
01230 }
01231 }
01232 }
01233
01234 break;
01235 }
01236
01237 case 48:
01238 {
01239 png_bytep srcptr;
01240 png_bytep dstptr;
01241
01242 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
01243 #if !defined(PNG_1_0_X)
01244 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
01245 )
01246 #else
01247 if (_mmx_supported)
01248 #endif
01249 {
01250 png_uint_32 len;
01251 int diff;
01252 int dummy_value_a;
01253 int dummy_value_d;
01254 int dummy_value_c;
01255 int dummy_value_S;
01256 int dummy_value_D;
01257 _unmask = ~mask;
01258 srcptr = png_ptr->row_buf + 1;
01259 dstptr = row;
01260 len = png_ptr->width &~7;
01261 diff = (int) (png_ptr->width & 7);
01262
01263 __asm__ __volatile__ (
01264 "movd _unmask, %%mm7 \n\t"
01265 "psubb %%mm6, %%mm6 \n\t"
01266 "punpcklbw %%mm7, %%mm7 \n\t"
01267 "punpcklwd %%mm7, %%mm7 \n\t"
01268 "punpckldq %%mm7, %%mm7 \n\t"
01269
01270 "movq _mask48_0, %%mm0 \n\t"
01271 "movq _mask48_1, %%mm1 \n\t"
01272 "movq _mask48_2, %%mm2 \n\t"
01273 "movq _mask48_3, %%mm3 \n\t"
01274 "movq _mask48_4, %%mm4 \n\t"
01275 "movq _mask48_5, %%mm5 \n\t"
01276
01277 "pand %%mm7, %%mm0 \n\t"
01278 "pand %%mm7, %%mm1 \n\t"
01279 "pand %%mm7, %%mm2 \n\t"
01280 "pand %%mm7, %%mm3 \n\t"
01281 "pand %%mm7, %%mm4 \n\t"
01282 "pand %%mm7, %%mm5 \n\t"
01283
01284 "pcmpeqb %%mm6, %%mm0 \n\t"
01285 "pcmpeqb %%mm6, %%mm1 \n\t"
01286 "pcmpeqb %%mm6, %%mm2 \n\t"
01287 "pcmpeqb %%mm6, %%mm3 \n\t"
01288 "pcmpeqb %%mm6, %%mm4 \n\t"
01289 "pcmpeqb %%mm6, %%mm5 \n\t"
01290
01291
01292
01293
01294
01295 "cmpl $0, %%ecx \n\t"
01296 "jz mainloop48end \n\t"
01297
01298 "mainloop48: \n\t"
01299 "movq (%%esi), %%mm7 \n\t"
01300 "pand %%mm0, %%mm7 \n\t"
01301 "movq %%mm0, %%mm6 \n\t"
01302 "pandn (%%edi), %%mm6 \n\t"
01303 "por %%mm6, %%mm7 \n\t"
01304 "movq %%mm7, (%%edi) \n\t"
01305
01306 "movq 8(%%esi), %%mm6 \n\t"
01307 "pand %%mm1, %%mm6 \n\t"
01308 "movq %%mm1, %%mm7 \n\t"
01309 "pandn 8(%%edi), %%mm7 \n\t"
01310 "por %%mm7, %%mm6 \n\t"
01311 "movq %%mm6, 8(%%edi) \n\t"
01312
01313 "movq 16(%%esi), %%mm6 \n\t"
01314 "pand %%mm2, %%mm6 \n\t"
01315 "movq %%mm2, %%mm7 \n\t"
01316 "pandn 16(%%edi), %%mm7 \n\t"
01317 "por %%mm7, %%mm6 \n\t"
01318 "movq %%mm6, 16(%%edi) \n\t"
01319
01320 "movq 24(%%esi), %%mm7 \n\t"
01321 "pand %%mm3, %%mm7 \n\t"
01322 "movq %%mm3, %%mm6 \n\t"
01323 "pandn 24(%%edi), %%mm6 \n\t"
01324 "por %%mm6, %%mm7 \n\t"
01325 "movq %%mm7, 24(%%edi) \n\t"
01326
01327 "movq 32(%%esi), %%mm6 \n\t"
01328 "pand %%mm4, %%mm6 \n\t"
01329 "movq %%mm4, %%mm7 \n\t"
01330 "pandn 32(%%edi), %%mm7 \n\t"
01331 "por %%mm7, %%mm6 \n\t"
01332 "movq %%mm6, 32(%%edi) \n\t"
01333
01334 "movq 40(%%esi), %%mm7 \n\t"
01335 "pand %%mm5, %%mm7 \n\t"
01336 "movq %%mm5, %%mm6 \n\t"
01337 "pandn 40(%%edi), %%mm6 \n\t"
01338 "por %%mm6, %%mm7 \n\t"
01339 "movq %%mm7, 40(%%edi) \n\t"
01340
01341 "addl $48, %%esi \n\t"
01342 "addl $48, %%edi \n\t"
01343 "subl $8, %%ecx \n\t"
01344
01345 "ja mainloop48 \n\t"
01346
01347 "mainloop48end: \n\t"
01348
01349 "movl %%eax, %%ecx \n\t"
01350 "cmpl $0, %%ecx \n\t"
01351 "jz end48 \n\t"
01352
01353 "sall $24, %%edx \n\t"
01354
01355 "secondloop48: \n\t"
01356 "sall %%edx \n\t"
01357 "jnc skip48 \n\t"
01358 "movl (%%esi), %%eax \n\t"
01359 "movl %%eax, (%%edi) \n\t"
01360
01361 "skip48: \n\t"
01362 "addl $4, %%esi \n\t"
01363 "addl $4, %%edi \n\t"
01364 "decl %%ecx \n\t"
01365 "jnz secondloop48 \n\t"
01366
01367 "end48: \n\t"
01368 "EMMS \n\t"
01369
01370 : "=a" (dummy_value_a),
01371 "=d" (dummy_value_d),
01372 "=c" (dummy_value_c),
01373 "=S" (dummy_value_S),
01374 "=D" (dummy_value_D)
01375
01376 : "3" (srcptr),
01377 "4" (dstptr),
01378 "0" (diff),
01379
01380 "2" (len),
01381 "1" (mask)
01382
01383 #if 0
01384 : "%mm0", "%mm1", "%mm2", "%mm3"
01385 , "%mm4", "%mm5", "%mm6", "%mm7"
01386 #endif
01387 );
01388 }
01389 else
01390 #endif
01391 {
01392 register png_uint_32 i;
01393 png_uint_32 initial_val = BPP6 * png_pass_start[png_ptr->pass];
01394
01395 register int stride = BPP6 * png_pass_inc[png_ptr->pass];
01396
01397 register int rep_bytes = BPP6 * png_pass_width[png_ptr->pass];
01398
01399 png_uint_32 len = png_ptr->width &~7;
01400 int diff = (int) (png_ptr->width & 7);
01401 register png_uint_32 final_val = BPP6 * len;
01402
01403 srcptr = png_ptr->row_buf + 1 + initial_val;
01404 dstptr = row + initial_val;
01405
01406 for (i = initial_val; i < final_val; i += stride)
01407 {
01408 png_memcpy(dstptr, srcptr, rep_bytes);
01409 srcptr += stride;
01410 dstptr += stride;
01411 }
01412 if (diff)
01413 {
01414 final_val+=diff*BPP6;
01415 for (; i < final_val; i += stride)
01416 {
01417 if (rep_bytes > (int)(final_val-i))
01418 rep_bytes = (int)(final_val-i);
01419 png_memcpy(dstptr, srcptr, rep_bytes);
01420 srcptr += stride;
01421 dstptr += stride;
01422 }
01423 }
01424 }
01425
01426 break;
01427 }
01428
01429 case 64:
01430 {
01431 png_bytep srcptr;
01432 png_bytep dstptr;
01433 register png_uint_32 i;
01434 png_uint_32 initial_val = BPP8 * png_pass_start[png_ptr->pass];
01435
01436 register int stride = BPP8 * png_pass_inc[png_ptr->pass];
01437
01438 register int rep_bytes = BPP8 * png_pass_width[png_ptr->pass];
01439
01440 png_uint_32 len = png_ptr->width &~7;
01441 int diff = (int) (png_ptr->width & 7);
01442 register png_uint_32 final_val = BPP8 * len;
01443
01444 srcptr = png_ptr->row_buf + 1 + initial_val;
01445 dstptr = row + initial_val;
01446
01447 for (i = initial_val; i < final_val; i += stride)
01448 {
01449 png_memcpy(dstptr, srcptr, rep_bytes);
01450 srcptr += stride;
01451 dstptr += stride;
01452 }
01453 if (diff)
01454 {
01455 final_val+=diff*BPP8;
01456 for (; i < final_val; i += stride)
01457 {
01458 if (rep_bytes > (int)(final_val-i))
01459 rep_bytes = (int)(final_val-i);
01460 png_memcpy(dstptr, srcptr, rep_bytes);
01461 srcptr += stride;
01462 dstptr += stride;
01463 }
01464 }
01465
01466 break;
01467 }
01468
01469 default:
01470 {
01471
01472 png_warning(png_ptr, "Invalid row_info.pixel_depth in pnggccrd");
01473 break;
01474 }
01475 }
01476
01477 }
01478
01479 }
01480
01481 #endif
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492 #if defined(PNG_READ_INTERLACING_SUPPORTED)
01493 #if defined(PNG_HAVE_ASSEMBLER_READ_INTERLACE)
01494
01495
01496
01497
01498
01499 void
01500 png_do_read_interlace(png_structp png_ptr)
01501 {
01502 png_row_infop row_info = &(png_ptr->row_info);
01503 png_bytep row = png_ptr->row_buf + 1;
01504 int pass = png_ptr->pass;
01505 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
01506 png_uint_32 transformations = png_ptr->transformations;
01507 #endif
01508
01509 png_debug(1, "in png_do_read_interlace (pnggccrd.c)\n");
01510
01511 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
01512 if (_mmx_supported == 2) {
01513 #if !defined(PNG_1_0_X)
01514
01515 png_warning(png_ptr, "asm_flags may not have been initialized");
01516 #endif
01517 png_mmx_support();
01518 }
01519 #endif
01520
01521 if (row != NULL && row_info != NULL)
01522 {
01523 png_uint_32 final_width;
01524
01525 final_width = row_info->width * png_pass_inc[pass];
01526
01527 switch (row_info->pixel_depth)
01528 {
01529 case 1:
01530 {
01531 png_bytep sp, dp;
01532 int sshift, dshift;
01533 int s_start, s_end, s_inc;
01534 png_byte v;
01535 png_uint_32 i;
01536 int j;
01537
01538 sp = row + (png_size_t)((row_info->width - 1) >> 3);
01539 dp = row + (png_size_t)((final_width - 1) >> 3);
01540 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
01541 if (transformations & PNG_PACKSWAP)
01542 {
01543 sshift = (int)((row_info->width + 7) & 7);
01544 dshift = (int)((final_width + 7) & 7);
01545 s_start = 7;
01546 s_end = 0;
01547 s_inc = -1;
01548 }
01549 else
01550 #endif
01551 {
01552 sshift = 7 - (int)((row_info->width + 7) & 7);
01553 dshift = 7 - (int)((final_width + 7) & 7);
01554 s_start = 0;
01555 s_end = 7;
01556 s_inc = 1;
01557 }
01558
01559 for (i = row_info->width; i; i--)
01560 {
01561 v = (png_byte)((*sp >> sshift) & 0x1);
01562 for (j = 0; j < png_pass_inc[pass]; j++)
01563 {
01564 *dp &= (png_byte)((0x7f7f >> (7 - dshift)) & 0xff);
01565 *dp |= (png_byte)(v << dshift);
01566 if (dshift == s_end)
01567 {
01568 dshift = s_start;
01569 dp--;
01570 }
01571 else
01572 dshift += s_inc;
01573 }
01574 if (sshift == s_end)
01575 {
01576 sshift = s_start;
01577 sp--;
01578 }
01579 else
01580 sshift += s_inc;
01581 }
01582 break;
01583 }
01584
01585 case 2:
01586 {
01587 png_bytep sp, dp;
01588 int sshift, dshift;
01589 int s_start, s_end, s_inc;
01590 png_uint_32 i;
01591
01592 sp = row + (png_size_t)((row_info->width - 1) >> 2);
01593 dp = row + (png_size_t)((final_width - 1) >> 2);
01594 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
01595 if (transformations & PNG_PACKSWAP)
01596 {
01597 sshift = (png_size_t)(((row_info->width + 3) & 3) << 1);
01598 dshift = (png_size_t)(((final_width + 3) & 3) << 1);
01599 s_start = 6;
01600 s_end = 0;
01601 s_inc = -2;
01602 }
01603 else
01604 #endif
01605 {
01606 sshift = (png_size_t)((3 - ((row_info->width + 3) & 3)) << 1);
01607 dshift = (png_size_t)((3 - ((final_width + 3) & 3)) << 1);
01608 s_start = 0;
01609 s_end = 6;
01610 s_inc = 2;
01611 }
01612
01613 for (i = row_info->width; i; i--)
01614 {
01615 png_byte v;
01616 int j;
01617
01618 v = (png_byte)((*sp >> sshift) & 0x3);
01619 for (j = 0; j < png_pass_inc[pass]; j++)
01620 {
01621 *dp &= (png_byte)((0x3f3f >> (6 - dshift)) & 0xff);
01622 *dp |= (png_byte)(v << dshift);
01623 if (dshift == s_end)
01624 {
01625 dshift = s_start;
01626 dp--;
01627 }
01628 else
01629 dshift += s_inc;
01630 }
01631 if (sshift == s_end)
01632 {
01633 sshift = s_start;
01634 sp--;
01635 }
01636 else
01637 sshift += s_inc;
01638 }
01639 break;
01640 }
01641
01642 case 4:
01643 {
01644 png_bytep sp, dp;
01645 int sshift, dshift;
01646 int s_start, s_end, s_inc;
01647 png_uint_32 i;
01648
01649 sp = row + (png_size_t)((row_info->width - 1) >> 1);
01650 dp = row + (png_size_t)((final_width - 1) >> 1);
01651 #if defined(PNG_READ_PACKSWAP_SUPPORTED)
01652 if (transformations & PNG_PACKSWAP)
01653 {
01654 sshift = (png_size_t)(((row_info->width + 1) & 1) << 2);
01655 dshift = (png_size_t)(((final_width + 1) & 1) << 2);
01656 s_start = 4;
01657 s_end = 0;
01658 s_inc = -4;
01659 }
01660 else
01661 #endif
01662 {
01663 sshift = (png_size_t)((1 - ((row_info->width + 1) & 1)) << 2);
01664 dshift = (png_size_t)((1 - ((final_width + 1) & 1)) << 2);
01665 s_start = 0;
01666 s_end = 4;
01667 s_inc = 4;
01668 }
01669
01670 for (i = row_info->width; i; i--)
01671 {
01672 png_byte v;
01673 int j;
01674
01675 v = (png_byte)((*sp >> sshift) & 0xf);
01676 for (j = 0; j < png_pass_inc[pass]; j++)
01677 {
01678 *dp &= (png_byte)((0xf0f >> (4 - dshift)) & 0xff);
01679 *dp |= (png_byte)(v << dshift);
01680 if (dshift == s_end)
01681 {
01682 dshift = s_start;
01683 dp--;
01684 }
01685 else
01686 dshift += s_inc;
01687 }
01688 if (sshift == s_end)
01689 {
01690 sshift = s_start;
01691 sp--;
01692 }
01693 else
01694 sshift += s_inc;
01695 }
01696 break;
01697 }
01698
01699
01700
01701 default:
01702 {
01703 #if 0
01704
01705
01706
01707
01708 #endif
01709 png_bytep sptr, dp;
01710 png_uint_32 i;
01711 png_size_t pixel_bytes;
01712 int width = (int)row_info->width;
01713
01714 pixel_bytes = (row_info->pixel_depth >> 3);
01715
01716
01717 sptr = row + (width - 1) * pixel_bytes;
01718
01719
01720 dp = row + (final_width - 1) * pixel_bytes;
01721
01722
01723
01724 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
01725 #if !defined(PNG_1_0_X)
01726 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_INTERLACE)
01727 )
01728 #else
01729 if (_mmx_supported)
01730 #endif
01731 {
01732
01733 if (pixel_bytes == 3)
01734 {
01735 if (((pass == 0) || (pass == 1)) && width)
01736 {
01737 int dummy_value_c;
01738 int dummy_value_S;
01739 int dummy_value_D;
01740
01741 __asm__ __volatile__ (
01742 "subl $21, %%edi \n\t"
01743
01744
01745 ".loop3_pass0: \n\t"
01746 "movd (%%esi), %%mm0 \n\t"
01747 "pand _const4, %%mm0 \n\t"
01748 "movq %%mm0, %%mm1 \n\t"
01749 "psllq $16, %%mm0 \n\t"
01750 "movq %%mm0, %%mm2 \n\t"
01751 "psllq $24, %%mm0 \n\t"
01752 "psrlq $8, %%mm1 \n\t"
01753 "por %%mm2, %%mm0 \n\t"
01754 "por %%mm1, %%mm0 \n\t"
01755 "movq %%mm0, %%mm3 \n\t"
01756 "psllq $16, %%mm0 \n\t"
01757 "movq %%mm3, %%mm4 \n\t"
01758 "punpckhdq %%mm0, %%mm3 \n\t"
01759 "movq %%mm4, 16(%%edi) \n\t"
01760 "psrlq $32, %%mm0 \n\t"
01761 "movq %%mm3, 8(%%edi) \n\t"
01762 "punpckldq %%mm4, %%mm0 \n\t"
01763 "subl $3, %%esi \n\t"
01764 "movq %%mm0, (%%edi) \n\t"
01765 "subl $24, %%edi \n\t"
01766 "decl %%ecx \n\t"
01767 "jnz .loop3_pass0 \n\t"
01768 "EMMS \n\t"
01769
01770 : "=c" (dummy_value_c),
01771 "=S" (dummy_value_S),
01772 "=D" (dummy_value_D)
01773
01774 : "1" (sptr),
01775 "2" (dp),
01776 "0" (width),
01777 "rim" (_const4)
01778
01779 #if 0
01780 : "%mm0", "%mm1", "%mm2"
01781 , "%mm3", "%mm4"
01782 #endif
01783 );
01784 }
01785 else if (((pass == 2) || (pass == 3)) && width)
01786 {
01787 int dummy_value_c;
01788 int dummy_value_S;
01789 int dummy_value_D;
01790
01791 __asm__ __volatile__ (
01792 "subl $9, %%edi \n\t"
01793
01794
01795 ".loop3_pass2: \n\t"
01796 "movd (%%esi), %%mm0 \n\t"
01797 "pand _const4, %%mm0 \n\t"
01798 "movq %%mm0, %%mm1 \n\t"
01799 "psllq $16, %%mm0 \n\t"
01800 "movq %%mm0, %%mm2 \n\t"
01801 "psllq $24, %%mm0 \n\t"
01802 "psrlq $8, %%mm1 \n\t"
01803 "por %%mm2, %%mm0 \n\t"
01804 "por %%mm1, %%mm0 \n\t"
01805 "movq %%mm0, 4(%%edi) \n\t"
01806 "psrlq $16, %%mm0 \n\t"
01807 "subl $3, %%esi \n\t"
01808 "movd %%mm0, (%%edi) \n\t"
01809 "subl $12, %%edi \n\t"
01810 "decl %%ecx \n\t"
01811 "jnz .loop3_pass2 \n\t"
01812 "EMMS \n\t"
01813
01814 : "=c" (dummy_value_c),
01815 "=S" (dummy_value_S),
01816 "=D" (dummy_value_D)
01817
01818 : "1" (sptr),
01819 "2" (dp),
01820 "0" (width),
01821 "rim" (_const4)
01822
01823 #if 0
01824 : "%mm0", "%mm1", "%mm2"
01825 #endif
01826 );
01827 }
01828 else if (width)
01829 {
01830 int width_mmx = ((width >> 1) << 1) - 8;
01831 if (width_mmx < 0)
01832 width_mmx = 0;
01833 width -= width_mmx;
01834 if (width_mmx)
01835 {
01836
01837
01838
01839 int dummy_value_c;
01840 int dummy_value_S;
01841 int dummy_value_D;
01842
01843 __asm__ __volatile__ (
01844 "subl $3, %%esi \n\t"
01845 "subl $9, %%edi \n\t"
01846
01847
01848 ".loop3_pass4: \n\t"
01849 "movq (%%esi), %%mm0 \n\t"
01850 "movq %%mm0, %%mm1 \n\t"
01851 "movq %%mm0, %%mm2 \n\t"
01852 "psllq $24, %%mm0 \n\t"
01853 "pand _const4, %%mm1 \n\t"
01854 "psrlq $24, %%mm2 \n\t"
01855 "por %%mm1, %%mm0 \n\t"
01856 "movq %%mm2, %%mm3 \n\t"
01857 "psllq $8, %%mm2 \n\t"
01858 "movq %%mm0, (%%edi) \n\t"
01859 "psrlq $16, %%mm3 \n\t"
01860 "pand _const6, %%mm3 \n\t"
01861 "por %%mm3, %%mm2 \n\t"
01862 "subl $6, %%esi \n\t"
01863 "movd %%mm2, 8(%%edi) \n\t"
01864 "subl $12, %%edi \n\t"
01865 "subl $2, %%ecx \n\t"
01866 "jnz .loop3_pass4 \n\t"
01867 "EMMS \n\t"
01868
01869 : "=c" (dummy_value_c),
01870 "=S" (dummy_value_S),
01871 "=D" (dummy_value_D)
01872
01873 : "1" (sptr),
01874 "2" (dp),
01875 "0" (width_mmx),
01876 "rim" (_const4),
01877 "rim" (_const6)
01878
01879 #if 0
01880 : "%mm0", "%mm1"
01881 , "%mm2", "%mm3"
01882 #endif
01883 );
01884 }
01885
01886 sptr -= width_mmx*3;
01887 dp -= width_mmx*6;
01888 for (i = width; i; i--)
01889 {
01890 png_byte v[8];
01891 int j;
01892
01893 png_memcpy(v, sptr, 3);
01894 for (j = 0; j < png_pass_inc[pass]; j++)
01895 {
01896 png_memcpy(dp, v, 3);
01897 dp -= 3;
01898 }
01899 sptr -= 3;
01900 }
01901 }
01902 }
01903
01904
01905 else if (pixel_bytes == 1)
01906 {
01907 if (((pass == 0) || (pass == 1)) && width)
01908 {
01909 int width_mmx = ((width >> 2) << 2);
01910 width -= width_mmx;
01911 if (width_mmx)
01912 {
01913 int dummy_value_c;
01914 int dummy_value_S;
01915 int dummy_value_D;
01916
01917 __asm__ __volatile__ (
01918 "subl $3, %%esi \n\t"
01919 "subl $31, %%edi \n\t"
01920
01921 ".loop1_pass0: \n\t"
01922 "movd (%%esi), %%mm0 \n\t"
01923 "movq %%mm0, %%mm1 \n\t"
01924 "punpcklbw %%mm0, %%mm0 \n\t"
01925 "movq %%mm0, %%mm2 \n\t"
01926 "punpcklwd %%mm0, %%mm0 \n\t"
01927 "movq %%mm0, %%mm3 \n\t"
01928 "punpckldq %%mm0, %%mm0 \n\t"
01929 "punpckhdq %%mm3, %%mm3 \n\t"
01930 "movq %%mm0, (%%edi) \n\t"
01931 "punpckhwd %%mm2, %%mm2 \n\t"
01932 "movq %%mm3, 8(%%edi) \n\t"
01933 "movq %%mm2, %%mm4 \n\t"
01934 "punpckldq %%mm2, %%mm2 \n\t"
01935 "punpckhdq %%mm4, %%mm4 \n\t"
01936 "movq %%mm2, 16(%%edi) \n\t"
01937 "subl $4, %%esi \n\t"
01938 "movq %%mm4, 24(%%edi) \n\t"
01939 "subl $32, %%edi \n\t"
01940 "subl $4, %%ecx \n\t"
01941 "jnz .loop1_pass0 \n\t"
01942 "EMMS \n\t"
01943
01944 : "=c" (dummy_value_c),
01945 "=S" (dummy_value_S),
01946 "=D" (dummy_value_D)
01947
01948 : "1" (sptr),
01949 "2" (dp),
01950 "0" (width_mmx)
01951
01952 #if 0
01953 : "%mm0", "%mm1", "%mm2"
01954 , "%mm3", "%mm4"
01955 #endif
01956 );
01957 }
01958
01959 sptr -= width_mmx;
01960 dp -= width_mmx*8;
01961 for (i = width; i; i--)
01962 {
01963 int j;
01964
01965
01966
01967
01968
01969
01970
01971
01972
01973
01974
01975
01976
01977
01978
01979
01980
01981
01982
01983 for (j = 0; j < png_pass_inc[pass]; j++)
01984 {
01985 *dp-- = *sptr;
01986 }
01987 --sptr;
01988 }
01989 }
01990 else if (((pass == 2) || (pass == 3)) && width)
01991 {
01992 int width_mmx = ((width >> 2) << 2);
01993 width -= width_mmx;
01994 if (width_mmx)
01995 {
01996 int dummy_value_c;
01997 int dummy_value_S;
01998 int dummy_value_D;
01999
02000 __asm__ __volatile__ (
02001 "subl $3, %%esi \n\t"
02002 "subl $15, %%edi \n\t"
02003
02004 ".loop1_pass2: \n\t"
02005 "movd (%%esi), %%mm0 \n\t"
02006 "punpcklbw %%mm0, %%mm0 \n\t"
02007 "movq %%mm0, %%mm1 \n\t"
02008 "punpcklwd %%mm0, %%mm0 \n\t"
02009 "punpckhwd %%mm1, %%mm1 \n\t"
02010 "movq %%mm0, (%%edi) \n\t"
02011 "subl $4, %%esi \n\t"
02012 "movq %%mm1, 8(%%edi) \n\t"
02013 "subl $16, %%edi \n\t"
02014 "subl $4, %%ecx \n\t"
02015 "jnz .loop1_pass2 \n\t"
02016 "EMMS \n\t"
02017
02018 : "=c" (dummy_value_c),
02019 "=S" (dummy_value_S),
02020 "=D" (dummy_value_D)
02021
02022 : "1" (sptr),
02023 "2" (dp),
02024 "0" (width_mmx)
02025
02026 #if 0
02027 : "%mm0", "%mm1"
02028 #endif
02029 );
02030 }
02031
02032 sptr -= width_mmx;
02033 dp -= width_mmx*4;
02034 for (i = width; i; i--)
02035 {
02036 int j;
02037
02038 for (j = 0; j < png_pass_inc[pass]; j++)
02039 {
02040 *dp-- = *sptr;
02041 }
02042 --sptr;
02043 }
02044 }
02045 else if (width)
02046 {
02047 int width_mmx = ((width >> 3) << 3);
02048 width -= width_mmx;
02049 if (width_mmx)
02050 {
02051 int dummy_value_c;
02052 int dummy_value_S;
02053 int dummy_value_D;
02054
02055 __asm__ __volatile__ (
02056 "subl $7, %%esi \n\t"
02057 "subl $15, %%edi \n\t"
02058
02059 ".loop1_pass4: \n\t"
02060 "movq (%%esi), %%mm0 \n\t"
02061 "movq %%mm0, %%mm1 \n\t"
02062 "punpcklbw %%mm0, %%mm0 \n\t"
02063 "punpckhbw %%mm1, %%mm1 \n\t"
02064 "movq %%mm1, 8(%%edi) \n\t"
02065 "subl $8, %%esi \n\t"
02066 "movq %%mm0, (%%edi) \n\t"
02067 "subl $16, %%edi \n\t"
02068 "subl $8, %%ecx \n\t"
02069 "jnz .loop1_pass4 \n\t"
02070 "EMMS \n\t"
02071
02072 : "=c" (dummy_value_c),
02073 "=S" (dummy_value_S),
02074 "=D" (dummy_value_D)
02075
02076 : "1" (sptr),
02077 "2" (dp),
02078 "0" (width_mmx)
02079
02080 #if 0
02081 : "%mm0", "%mm1"
02082 #endif
02083 );
02084 }
02085
02086 sptr -= width_mmx;
02087 dp -= width_mmx*2;
02088 for (i = width; i; i--)
02089 {
02090 int j;
02091
02092 for (j = 0; j < png_pass_inc[pass]; j++)
02093 {
02094 *dp-- = *sptr;
02095 }
02096 --sptr;
02097 }
02098 }
02099 }
02100
02101
02102 else if (pixel_bytes == 2)
02103 {
02104 if (((pass == 0) || (pass == 1)) && width)
02105 {
02106 int width_mmx = ((width >> 1) << 1);
02107 width -= width_mmx;
02108 if (width_mmx)
02109 {
02110 int dummy_value_c;
02111 int dummy_value_S;
02112 int dummy_value_D;
02113
02114 __asm__ __volatile__ (
02115 "subl $2, %%esi \n\t"
02116 "subl $30, %%edi \n\t"
02117
02118 ".loop2_pass0: \n\t"
02119 "movd (%%esi), %%mm0 \n\t"
02120 "punpcklwd %%mm0, %%mm0 \n\t"
02121 "movq %%mm0, %%mm1 \n\t"
02122 "punpckldq %%mm0, %%mm0 \n\t"
02123 "punpckhdq %%mm1, %%mm1 \n\t"
02124 "movq %%mm0, (%%edi) \n\t"
02125 "movq %%mm0, 8(%%edi) \n\t"
02126 "movq %%mm1, 16(%%edi) \n\t"
02127 "subl $4, %%esi \n\t"
02128 "movq %%mm1, 24(%%edi) \n\t"
02129 "subl $32, %%edi \n\t"
02130 "subl $2, %%ecx \n\t"
02131 "jnz .loop2_pass0 \n\t"
02132 "EMMS \n\t"
02133
02134 : "=c" (dummy_value_c),
02135 "=S" (dummy_value_S),
02136 "=D" (dummy_value_D)
02137
02138 : "1" (sptr),
02139 "2" (dp),
02140 "0" (width_mmx)
02141
02142 #if 0
02143 : "%mm0", "%mm1"
02144 #endif
02145 );
02146 }
02147
02148 sptr -= (width_mmx*2 - 2);
02149 dp -= (width_mmx*16 - 2);
02150 for (i = width; i; i--)
02151 {
02152 png_byte v[8];
02153 int j;
02154 sptr -= 2;
02155 png_memcpy(v, sptr, 2);
02156 for (j = 0; j < png_pass_inc[pass]; j++)
02157 {
02158 dp -= 2;
02159 png_memcpy(dp, v, 2);
02160 }
02161 }
02162 }
02163 else if (((pass == 2) || (pass == 3)) && width)
02164 {
02165 int width_mmx = ((width >> 1) << 1) ;
02166 width -= width_mmx;
02167 if (width_mmx)
02168 {
02169 int dummy_value_c;
02170 int dummy_value_S;
02171 int dummy_value_D;
02172
02173 __asm__ __volatile__ (
02174 "subl $2, %%esi \n\t"
02175 "subl $14, %%edi \n\t"
02176
02177 ".loop2_pass2: \n\t"
02178 "movd (%%esi), %%mm0 \n\t"
02179 "punpcklwd %%mm0, %%mm0 \n\t"
02180 "movq %%mm0, %%mm1 \n\t"
02181 "punpckldq %%mm0, %%mm0 \n\t"
02182 "punpckhdq %%mm1, %%mm1 \n\t"
02183 "movq %%mm0, (%%edi) \n\t"
02184 "subl $4, %%esi \n\t"
02185 "movq %%mm1, 8(%%edi) \n\t"
02186 "subl $16, %%edi \n\t"
02187 "subl $2, %%ecx \n\t"
02188 "jnz .loop2_pass2 \n\t"
02189 "EMMS \n\t"
02190
02191 : "=c" (dummy_value_c),
02192 "=S" (dummy_value_S),
02193 "=D" (dummy_value_D)
02194
02195 : "1" (sptr),
02196 "2" (dp),
02197 "0" (width_mmx)
02198
02199 #if 0
02200 : "%mm0", "%mm1"
02201 #endif
02202 );
02203 }
02204
02205 sptr -= (width_mmx*2 - 2);
02206 dp -= (width_mmx*8 - 2);
02207 for (i = width; i; i--)
02208 {
02209 png_byte v[8];
02210 int j;
02211 sptr -= 2;
02212 png_memcpy(v, sptr, 2);
02213 for (j = 0; j < png_pass_inc[pass]; j++)
02214 {
02215 dp -= 2;
02216 png_memcpy(dp, v, 2);
02217 }
02218 }
02219 }
02220 else if (width)
02221 {
02222 int width_mmx = ((width >> 1) << 1) ;
02223 width -= width_mmx;
02224 if (width_mmx)
02225 {
02226 int dummy_value_c;
02227 int dummy_value_S;
02228 int dummy_value_D;
02229
02230 __asm__ __volatile__ (
02231 "subl $2, %%esi \n\t"
02232 "subl $6, %%edi \n\t"
02233
02234 ".loop2_pass4: \n\t"
02235 "movd (%%esi), %%mm0 \n\t"
02236 "punpcklwd %%mm0, %%mm0 \n\t"
02237 "subl $4, %%esi \n\t"
02238 "movq %%mm0, (%%edi) \n\t"
02239 "subl $8, %%edi \n\t"
02240 "subl $2, %%ecx \n\t"
02241 "jnz .loop2_pass4 \n\t"
02242 "EMMS \n\t"
02243
02244 : "=c" (dummy_value_c),
02245 "=S" (dummy_value_S),
02246 "=D" (dummy_value_D)
02247
02248 : "1" (sptr),
02249 "2" (dp),
02250 "0" (width_mmx)
02251
02252 #if 0
02253 : "%mm0"
02254 #endif
02255 );
02256 }
02257
02258 sptr -= (width_mmx*2 - 2);
02259 dp -= (width_mmx*4 - 2);
02260 for (i = width; i; i--)
02261 {
02262 png_byte v[8];
02263 int j;
02264 sptr -= 2;
02265 png_memcpy(v, sptr, 2);
02266 for (j = 0; j < png_pass_inc[pass]; j++)
02267 {
02268 dp -= 2;
02269 png_memcpy(dp, v, 2);
02270 }
02271 }
02272 }
02273 }
02274
02275
02276 else if (pixel_bytes == 4)
02277 {
02278 if (((pass == 0) || (pass == 1)) && width)
02279 {
02280 int width_mmx = ((width >> 1) << 1);
02281 width -= width_mmx;
02282 if (width_mmx)
02283 {
02284 int dummy_value_c;
02285 int dummy_value_S;
02286 int dummy_value_D;
02287
02288 __asm__ __volatile__ (
02289 "subl $4, %%esi \n\t"
02290 "subl $60, %%edi \n\t"
02291
02292 ".loop4_pass0: \n\t"
02293 "movq (%%esi), %%mm0 \n\t"
02294 "movq %%mm0, %%mm1 \n\t"
02295 "punpckldq %%mm0, %%mm0 \n\t"
02296 "punpckhdq %%mm1, %%mm1 \n\t"
02297 "movq %%mm0, (%%edi) \n\t"
02298 "movq %%mm0, 8(%%edi) \n\t"
02299 "movq %%mm0, 16(%%edi) \n\t"
02300 "movq %%mm0, 24(%%edi) \n\t"
02301 "movq %%mm1, 32(%%edi) \n\t"
02302 "movq %%mm1, 40(%%edi) \n\t"
02303 "movq %%mm1, 48(%%edi) \n\t"
02304 "subl $8, %%esi \n\t"
02305 "movq %%mm1, 56(%%edi) \n\t"
02306 "subl $64, %%edi \n\t"
02307 "subl $2, %%ecx \n\t"
02308 "jnz .loop4_pass0 \n\t"
02309 "EMMS \n\t"
02310
02311 : "=c" (dummy_value_c),
02312 "=S" (dummy_value_S),
02313 "=D" (dummy_value_D)
02314
02315 : "1" (sptr),
02316 "2" (dp),
02317 "0" (width_mmx)
02318
02319 #if 0
02320 : "%mm0", "%mm1"
02321 #endif
02322 );
02323 }
02324
02325 sptr -= (width_mmx*4 - 4);
02326 dp -= (width_mmx*32 - 4);
02327 for (i = width; i; i--)
02328 {
02329 png_byte v[8];
02330 int j;
02331 sptr -= 4;
02332 png_memcpy(v, sptr, 4);
02333 for (j = 0; j < png_pass_inc[pass]; j++)
02334 {
02335 dp -= 4;
02336 png_memcpy(dp, v, 4);
02337 }
02338 }
02339 }
02340 else if (((pass == 2) || (pass == 3)) && width)
02341 {
02342 int width_mmx = ((width >> 1) << 1);
02343 width -= width_mmx;
02344 if (width_mmx)
02345 {
02346 int dummy_value_c;
02347 int dummy_value_S;
02348 int dummy_value_D;
02349
02350 __asm__ __volatile__ (
02351 "subl $4, %%esi \n\t"
02352 "subl $28, %%edi \n\t"
02353
02354 ".loop4_pass2: \n\t"
02355 "movq (%%esi), %%mm0 \n\t"
02356 "movq %%mm0, %%mm1 \n\t"
02357 "punpckldq %%mm0, %%mm0 \n\t"
02358 "punpckhdq %%mm1, %%mm1 \n\t"
02359 "movq %%mm0, (%%edi) \n\t"
02360 "movq %%mm0, 8(%%edi) \n\t"
02361 "movq %%mm1, 16(%%edi) \n\t"
02362 "movq %%mm1, 24(%%edi) \n\t"
02363 "subl $8, %%esi \n\t"
02364 "subl $32, %%edi \n\t"
02365 "subl $2, %%ecx \n\t"
02366 "jnz .loop4_pass2 \n\t"
02367 "EMMS \n\t"
02368
02369 : "=c" (dummy_value_c),
02370 "=S" (dummy_value_S),
02371 "=D" (dummy_value_D)
02372
02373 : "1" (sptr),
02374 "2" (dp),
02375 "0" (width_mmx)
02376
02377 #if 0
02378 : "%mm0", "%mm1"
02379 #endif
02380 );
02381 }
02382
02383 sptr -= (width_mmx*4 - 4);
02384 dp -= (width_mmx*16 - 4);
02385 for (i = width; i; i--)
02386 {
02387 png_byte v[8];
02388 int j;
02389 sptr -= 4;
02390 png_memcpy(v, sptr, 4);
02391 for (j = 0; j < png_pass_inc[pass]; j++)
02392 {
02393 dp -= 4;
02394 png_memcpy(dp, v, 4);
02395 }
02396 }
02397 }
02398 else if (width)
02399 {
02400 int width_mmx = ((width >> 1) << 1) ;
02401 width -= width_mmx;
02402 if (width_mmx)
02403 {
02404 int dummy_value_c;
02405 int dummy_value_S;
02406 int dummy_value_D;
02407
02408 __asm__ __volatile__ (
02409 "subl $4, %%esi \n\t"
02410 "subl $12, %%edi \n\t"
02411
02412 ".loop4_pass4: \n\t"
02413 "movq (%%esi), %%mm0 \n\t"
02414 "movq %%mm0, %%mm1 \n\t"
02415 "punpckldq %%mm0, %%mm0 \n\t"
02416 "punpckhdq %%mm1, %%mm1 \n\t"
02417 "movq %%mm0, (%%edi) \n\t"
02418 "subl $8, %%esi \n\t"
02419 "movq %%mm1, 8(%%edi) \n\t"
02420 "subl $16, %%edi \n\t"
02421 "subl $2, %%ecx \n\t"
02422 "jnz .loop4_pass4 \n\t"
02423 "EMMS \n\t"
02424
02425 : "=c" (dummy_value_c),
02426 "=S" (dummy_value_S),
02427 "=D" (dummy_value_D)
02428
02429 : "1" (sptr),
02430 "2" (dp),
02431 "0" (width_mmx)
02432
02433 #if 0
02434 : "%mm0", "%mm1"
02435 #endif
02436 );
02437 }
02438
02439 sptr -= (width_mmx*4 - 4);
02440 dp -= (width_mmx*8 - 4);
02441 for (i = width; i; i--)
02442 {
02443 png_byte v[8];
02444 int j;
02445 sptr -= 4;
02446 png_memcpy(v, sptr, 4);
02447 for (j = 0; j < png_pass_inc[pass]; j++)
02448 {
02449 dp -= 4;
02450 png_memcpy(dp, v, 4);
02451 }
02452 }
02453 }
02454 }
02455
02456
02457 else if (pixel_bytes == 8)
02458 {
02459
02460
02461 if (((pass == 0) || (pass == 1)) && width)
02462 {
02463 int dummy_value_c;
02464 int dummy_value_S;
02465 int dummy_value_D;
02466
02467
02468
02469 __asm__ __volatile__ (
02470 "subl $56, %%edi \n\t"
02471
02472 ".loop8_pass0: \n\t"
02473 "movq (%%esi), %%mm0 \n\t"
02474 "movq %%mm0, (%%edi) \n\t"
02475 "movq %%mm0, 8(%%edi) \n\t"
02476 "movq %%mm0, 16(%%edi) \n\t"
02477 "movq %%mm0, 24(%%edi) \n\t"
02478 "movq %%mm0, 32(%%edi) \n\t"
02479 "movq %%mm0, 40(%%edi) \n\t"
02480 "movq %%mm0, 48(%%edi) \n\t"
02481 "subl $8, %%esi \n\t"
02482 "movq %%mm0, 56(%%edi) \n\t"
02483 "subl $64, %%edi \n\t"
02484 "decl %%ecx \n\t"
02485 "jnz .loop8_pass0 \n\t"
02486 "EMMS \n\t"
02487
02488 : "=c" (dummy_value_c),
02489 "=S" (dummy_value_S),
02490 "=D" (dummy_value_D)
02491
02492 : "1" (sptr),
02493 "2" (dp),
02494 "0" (width)
02495
02496 #if 0
02497 : "%mm0"
02498 #endif
02499 );
02500 }
02501 else if (((pass == 2) || (pass == 3)) && width)
02502 {
02503
02504
02505
02506
02507 {
02508 int dummy_value_c;
02509 int dummy_value_S;
02510 int dummy_value_D;
02511
02512 __asm__ __volatile__ (
02513 "subl $24, %%edi \n\t"
02514
02515 ".loop8_pass2: \n\t"
02516 "movq (%%esi), %%mm0 \n\t"
02517 "movq %%mm0, (%%edi) \n\t"
02518 "movq %%mm0, 8(%%edi) \n\t"
02519 "movq %%mm0, 16(%%edi) \n\t"
02520 "subl $8, %%esi \n\t"
02521 "movq %%mm0, 24(%%edi) \n\t"
02522 "subl $32, %%edi \n\t"
02523 "decl %%ecx \n\t"
02524 "jnz .loop8_pass2 \n\t"
02525 "EMMS \n\t"
02526
02527 : "=c" (dummy_value_c),
02528 "=S" (dummy_value_S),
02529 "=D" (dummy_value_D)
02530
02531 : "1" (sptr),
02532 "2" (dp),
02533 "0" (width)
02534
02535 #if 0
02536 : "%mm0"
02537 #endif
02538 );
02539 }
02540 }
02541 else if (width)
02542 {
02543
02544
02545 {
02546 int dummy_value_c;
02547 int dummy_value_S;
02548 int dummy_value_D;
02549
02550 __asm__ __volatile__ (
02551 "subl $8, %%edi \n\t"
02552
02553 ".loop8_pass4: \n\t"
02554 "movq (%%esi), %%mm0 \n\t"
02555 "movq %%mm0, (%%edi) \n\t"
02556 "subl $8, %%esi \n\t"
02557 "movq %%mm0, 8(%%edi) \n\t"
02558 "subl $16, %%edi \n\t"
02559 "decl %%ecx \n\t"
02560 "jnz .loop8_pass4 \n\t"
02561 "EMMS \n\t"
02562
02563 : "=c" (dummy_value_c),
02564 "=S" (dummy_value_S),
02565 "=D" (dummy_value_D)
02566
02567 : "1" (sptr),
02568 "2" (dp),
02569 "0" (width)
02570
02571 #if 0
02572 : "%mm0"
02573 #endif
02574 );
02575 }
02576 }
02577
02578 }
02579
02580
02581 else if (pixel_bytes == 6)
02582 {
02583 for (i = width; i; i--)
02584 {
02585 png_byte v[8];
02586 int j;
02587 png_memcpy(v, sptr, 6);
02588 for (j = 0; j < png_pass_inc[pass]; j++)
02589 {
02590 png_memcpy(dp, v, 6);
02591 dp -= 6;
02592 }
02593 sptr -= 6;
02594 }
02595 }
02596
02597
02598 else
02599 {
02600 for (i = width; i; i--)
02601 {
02602 png_byte v[8];
02603 int j;
02604 png_memcpy(v, sptr, pixel_bytes);
02605 for (j = 0; j < png_pass_inc[pass]; j++)
02606 {
02607 png_memcpy(dp, v, pixel_bytes);
02608 dp -= pixel_bytes;
02609 }
02610 sptr-= pixel_bytes;
02611 }
02612 }
02613 }
02614
02615 else
02616
02617
02618
02619
02620 #endif
02621 {
02622 if (pixel_bytes == 1)
02623 {
02624 for (i = width; i; i--)
02625 {
02626 int j;
02627 for (j = 0; j < png_pass_inc[pass]; j++)
02628 {
02629 *dp-- = *sptr;
02630 }
02631 --sptr;
02632 }
02633 }
02634 else if (pixel_bytes == 3)
02635 {
02636 for (i = width; i; i--)
02637 {
02638 png_byte v[8];
02639 int j;
02640 png_memcpy(v, sptr, 3);
02641 for (j = 0; j < png_pass_inc[pass]; j++)
02642 {
02643 png_memcpy(dp, v, 3);
02644 dp -= 3;
02645 }
02646 sptr -= 3;
02647 }
02648 }
02649 else if (pixel_bytes == 2)
02650 {
02651 for (i = width; i; i--)
02652 {
02653 png_byte v[8];
02654 int j;
02655 png_memcpy(v, sptr, 2);
02656 for (j = 0; j < png_pass_inc[pass]; j++)
02657 {
02658 png_memcpy(dp, v, 2);
02659 dp -= 2;
02660 }
02661 sptr -= 2;
02662 }
02663 }
02664 else if (pixel_bytes == 4)
02665 {
02666 for (i = width; i; i--)
02667 {
02668 png_byte v[8];
02669 int j;
02670 png_memcpy(v, sptr, 4);
02671 for (j = 0; j < png_pass_inc[pass]; j++)
02672 {
02673 #ifdef PNG_DEBUG
02674 if (dp < row || dp+3 > row+png_ptr->row_buf_size)
02675 {
02676 printf("dp out of bounds: row=%d, dp=%d, rp=%d\n",
02677 row, dp, row+png_ptr->row_buf_size);
02678 printf("row_buf=%d\n",png_ptr->row_buf_size);
02679 }
02680 #endif
02681 png_memcpy(dp, v, 4);
02682 dp -= 4;
02683 }
02684 sptr -= 4;
02685 }
02686 }
02687 else if (pixel_bytes == 6)
02688 {
02689 for (i = width; i; i--)
02690 {
02691 png_byte v[8];
02692 int j;
02693 png_memcpy(v, sptr, 6);
02694 for (j = 0; j < png_pass_inc[pass]; j++)
02695 {
02696 png_memcpy(dp, v, 6);
02697 dp -= 6;
02698 }
02699 sptr -= 6;
02700 }
02701 }
02702 else if (pixel_bytes == 8)
02703 {
02704 for (i = width; i; i--)
02705 {
02706 png_byte v[8];
02707 int j;
02708 png_memcpy(v, sptr, 8);
02709 for (j = 0; j < png_pass_inc[pass]; j++)
02710 {
02711 png_memcpy(dp, v, 8);
02712 dp -= 8;
02713 }
02714 sptr -= 8;
02715 }
02716 }
02717 else
02718 {
02719 for (i = width; i; i--)
02720 {
02721 png_byte v[8];
02722 int j;
02723 png_memcpy(v, sptr, pixel_bytes);
02724 for (j = 0; j < png_pass_inc[pass]; j++)
02725 {
02726 png_memcpy(dp, v, pixel_bytes);
02727 dp -= pixel_bytes;
02728 }
02729 sptr -= pixel_bytes;
02730 }
02731 }
02732
02733 }
02734 break;
02735 }
02736 }
02737
02738 row_info->width = final_width;
02739
02740 row_info->rowbytes = PNG_ROWBYTES(row_info->pixel_depth,final_width);
02741 }
02742
02743 }
02744
02745 #endif
02746 #endif
02747
02748
02749
02750 #if defined(PNG_HAVE_ASSEMBLER_READ_FILTER_ROW)
02751 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
02752
02753
02754
02755
02756 union uAll {
02757 long long use;
02758 double align;
02759 } _LBCarryMask = {0x0101010101010101LL},
02760 _HBClearMask = {0x7f7f7f7f7f7f7f7fLL},
02761 _ActiveMask, _ActiveMask2, _ActiveMaskEnd, _ShiftBpp, _ShiftRem;
02762
02763 #ifdef PNG_THREAD_UNSAFE_OK
02764
02765
02766
02767
02768
02769
02770
02771
02772 static void
02773 png_read_filter_row_mmx_avg(png_row_infop row_info, png_bytep row,
02774 png_bytep prev_row)
02775 {
02776 int bpp;
02777 int dummy_value_c;
02778 int dummy_value_S;
02779 int dummy_value_D;
02780
02781 bpp = (row_info->pixel_depth + 7) >> 3;
02782 _FullLength = row_info->rowbytes;
02783
02784 __asm__ __volatile__ (
02785
02786 #ifdef __PIC__
02787 "pushl %%ebx \n\t"
02788 #endif
02789
02790 "xorl %%ebx, %%ebx \n\t"
02791 "movl %%edi, %%edx \n\t"
02792
02793
02794 "subl %%ecx, %%edx \n\t"
02795
02796 "xorl %%eax,%%eax \n\t"
02797
02798
02799
02800 "avg_rlp: \n\t"
02801 "movb (%%esi,%%ebx,),%%al \n\t"
02802 "incl %%ebx \n\t"
02803 "shrb %%al \n\t"
02804 "addb -1(%%edi,%%ebx,),%%al \n\t"
02805
02806 "cmpl %%ecx, %%ebx \n\t"
02807 "movb %%al,-1(%%edi,%%ebx,) \n\t"
02808 "jb avg_rlp \n\t"
02809
02810
02811 "movl %%edi, _dif \n\t"
02812 "addl %%ebx, _dif \n\t"
02813 "addl $0xf, _dif \n\t"
02814 "andl $0xfffffff8, _dif \n\t"
02815 "subl %%edi, _dif \n\t"
02816 "jz avg_go \n\t"
02817
02818
02819
02820
02821 "xorl %%ecx, %%ecx \n\t"
02822
02823 "avg_lp1: \n\t"
02824 "xorl %%eax, %%eax \n\t"
02825 "movb (%%esi,%%ebx,), %%cl \n\t"
02826 "movb (%%edx,%%ebx,), %%al \n\t"
02827 "addw %%cx, %%ax \n\t"
02828 "incl %%ebx \n\t"
02829 "shrw %%ax \n\t"
02830 "addb -1(%%edi,%%ebx,), %%al \n\t"
02831 "cmpl _dif, %%ebx \n\t"
02832 "movb %%al, -1(%%edi,%%ebx,) \n\t"
02833 "jb avg_lp1 \n\t"
02834
02835 "avg_go: \n\t"
02836 "movl _FullLength, %%eax \n\t"
02837 "movl %%eax, %%ecx \n\t"
02838 "subl %%ebx, %%eax \n\t"
02839 "andl $0x00000007, %%eax \n\t"
02840 "subl %%eax, %%ecx \n\t"
02841 "movl %%ecx, _MMXLength \n\t"
02842 #ifdef __PIC__
02843 "popl %%ebx \n\t"
02844 #endif
02845
02846 : "=c" (dummy_value_c),
02847 "=S" (dummy_value_S),
02848 "=D" (dummy_value_D)
02849
02850 : "0" (bpp),
02851 "1" (prev_row),
02852 "2" (row)
02853
02854 : "%eax", "%edx"
02855 #ifndef __PIC__
02856 , "%ebx"
02857 #endif
02858
02859
02860 );
02861
02862
02863 switch (bpp)
02864 {
02865 case 3:
02866 {
02867 _ActiveMask.use = 0x0000000000ffffffLL;
02868 _ShiftBpp.use = 24;
02869 _ShiftRem.use = 40;
02870
02871 __asm__ __volatile__ (
02872
02873 "movq _ActiveMask, %%mm7 \n\t"
02874 "movl _dif, %%ecx \n\t"
02875 "movq _LBCarryMask, %%mm5 \n\t"
02876
02877 "movq _HBClearMask, %%mm4 \n\t"
02878
02879
02880
02881 "movq -8(%%edi,%%ecx,), %%mm2 \n\t"
02882
02883 "avg_3lp: \n\t"
02884 "movq (%%edi,%%ecx,), %%mm0 \n\t"
02885 "movq %%mm5, %%mm3 \n\t"
02886 "psrlq _ShiftRem, %%mm2 \n\t"
02887
02888 "movq (%%esi,%%ecx,), %%mm1 \n\t"
02889 "movq %%mm7, %%mm6 \n\t"
02890 "pand %%mm1, %%mm3 \n\t"
02891 "psrlq $1, %%mm1 \n\t"
02892 "pand %%mm4, %%mm1 \n\t"
02893
02894 "paddb %%mm1, %%mm0 \n\t"
02895
02896
02897 "movq %%mm3, %%mm1 \n\t"
02898
02899 "pand %%mm2, %%mm1 \n\t"
02900
02901
02902 "psrlq $1, %%mm2 \n\t"
02903 "pand %%mm4, %%mm2 \n\t"
02904
02905 "paddb %%mm1, %%mm2 \n\t"
02906
02907 "pand %%mm6, %%mm2 \n\t"
02908
02909 "paddb %%mm2, %%mm0 \n\t"
02910
02911
02912
02913 "psllq _ShiftBpp, %%mm6 \n\t"
02914
02915 "movq %%mm0, %%mm2 \n\t"
02916 "psllq _ShiftBpp, %%mm2 \n\t"
02917 "movq %%mm3, %%mm1 \n\t"
02918
02919 "pand %%mm2, %%mm1 \n\t"
02920
02921
02922 "psrlq $1, %%mm2 \n\t"
02923 "pand %%mm4, %%mm2 \n\t"
02924
02925 "paddb %%mm1, %%mm2 \n\t"
02926
02927 "pand %%mm6, %%mm2 \n\t"
02928
02929 "paddb %%mm2, %%mm0 \n\t"
02930
02931
02932
02933
02934 "psllq _ShiftBpp, %%mm6 \n\t"
02935
02936
02937 "movq %%mm0, %%mm2 \n\t"
02938 "psllq _ShiftBpp, %%mm2 \n\t"
02939
02940
02941 "movq %%mm3, %%mm1 \n\t"
02942
02943 "pand %%mm2, %%mm1 \n\t"
02944
02945
02946 "psrlq $1, %%mm2 \n\t"
02947 "pand %%mm4, %%mm2 \n\t"
02948
02949 "paddb %%mm1, %%mm2 \n\t"
02950
02951 "pand %%mm6, %%mm2 \n\t"
02952
02953 "addl $8, %%ecx \n\t"
02954 "paddb %%mm2, %%mm0 \n\t"
02955
02956
02957
02958 "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
02959
02960 "cmpl _MMXLength, %%ecx \n\t"
02961 "movq %%mm0, %%mm2 \n\t"
02962 "jb avg_3lp \n\t"
02963
02964 : "=S" (dummy_value_S),
02965 "=D" (dummy_value_D)
02966
02967 : "0" (prev_row),
02968 "1" (row)
02969
02970 : "%ecx"
02971 #if 0
02972 , "%mm0", "%mm1", "%mm2", "%mm3"
02973 , "%mm4", "%mm5", "%mm6", "%mm7"
02974 #endif
02975 );
02976 }
02977 break;
02978
02979 case 6:
02980 case 4:
02981
02982
02983 {
02984 _ActiveMask.use = 0xffffffffffffffffLL;
02985
02986 _ShiftBpp.use = bpp << 3;
02987 _ShiftRem.use = 64 - _ShiftBpp.use;
02988
02989 __asm__ __volatile__ (
02990 "movq _HBClearMask, %%mm4 \n\t"
02991
02992
02993 "movl _dif, %%ecx \n\t"
02994
02995
02996
02997 "movq _ActiveMask, %%mm7 \n\t"
02998
02999 "psrlq _ShiftRem, %%mm7 \n\t"
03000
03001 "movq %%mm7, %%mm6 \n\t"
03002 "movq _LBCarryMask, %%mm5 \n\t"
03003 "psllq _ShiftBpp, %%mm6 \n\t"
03004
03005
03006
03007 "movq -8(%%edi,%%ecx,), %%mm2 \n\t"
03008
03009 "avg_4lp: \n\t"
03010 "movq (%%edi,%%ecx,), %%mm0 \n\t"
03011 "psrlq _ShiftRem, %%mm2 \n\t"
03012 "movq (%%esi,%%ecx,), %%mm1 \n\t"
03013
03014 "movq %%mm5, %%mm3 \n\t"
03015 "pand %%mm1, %%mm3 \n\t"
03016 "psrlq $1, %%mm1 \n\t"
03017 "pand %%mm4, %%mm1 \n\t"
03018
03019 "paddb %%mm1, %%mm0 \n\t"
03020
03021
03022 "movq %%mm3, %%mm1 \n\t"
03023
03024 "pand %%mm2, %%mm1 \n\t"
03025
03026
03027 "psrlq $1, %%mm2 \n\t"
03028 "pand %%mm4, %%mm2 \n\t"
03029
03030 "paddb %%mm1, %%mm2 \n\t"
03031
03032 "pand %%mm7, %%mm2 \n\t"
03033
03034 "paddb %%mm2, %%mm0 \n\t"
03035
03036
03037
03038 "movq %%mm0, %%mm2 \n\t"
03039 "psllq _ShiftBpp, %%mm2 \n\t"
03040 "addl $8, %%ecx \n\t"
03041 "movq %%mm3, %%mm1 \n\t"
03042
03043 "pand %%mm2, %%mm1 \n\t"
03044
03045
03046 "psrlq $1, %%mm2 \n\t"
03047 "pand %%mm4, %%mm2 \n\t"
03048
03049 "paddb %%mm1, %%mm2 \n\t"
03050
03051 "pand %%mm6, %%mm2 \n\t"
03052
03053 "paddb %%mm2, %%mm0 \n\t"
03054
03055
03056 "cmpl _MMXLength, %%ecx \n\t"
03057
03058 "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
03059
03060 "movq %%mm0, %%mm2 \n\t"
03061 "jb avg_4lp \n\t"
03062
03063 : "=S" (dummy_value_S),
03064 "=D" (dummy_value_D)
03065
03066 : "0" (prev_row),
03067 "1" (row)
03068
03069 : "%ecx"
03070 #if 0
03071 , "%mm0", "%mm1", "%mm2", "%mm3"
03072 , "%mm4", "%mm5", "%mm6", "%mm7"
03073 #endif
03074 );
03075 }
03076 break;
03077
03078 case 2:
03079 {
03080 _ActiveMask.use = 0x000000000000ffffLL;
03081 _ShiftBpp.use = 16;
03082 _ShiftRem.use = 48;
03083
03084 __asm__ __volatile__ (
03085
03086 "movq _ActiveMask, %%mm7 \n\t"
03087
03088 "movl _dif, %%ecx \n\t"
03089
03090 "movq _LBCarryMask, %%mm5 \n\t"
03091
03092 "movq _HBClearMask, %%mm4 \n\t"
03093
03094
03095
03096 "movq -8(%%edi,%%ecx,), %%mm2 \n\t"
03097
03098 "avg_2lp: \n\t"
03099 "movq (%%edi,%%ecx,), %%mm0 \n\t"
03100 "psrlq _ShiftRem, %%mm2 \n\t"
03101 "movq (%%esi,%%ecx,), %%mm1 \n\t"
03102
03103 "movq %%mm5, %%mm3 \n\t"
03104 "pand %%mm1, %%mm3 \n\t"
03105 "psrlq $1, %%mm1 \n\t"
03106 "pand %%mm4, %%mm1 \n\t"
03107
03108 "movq %%mm7, %%mm6 \n\t"
03109 "paddb %%mm1, %%mm0 \n\t"
03110
03111
03112
03113 "movq %%mm3, %%mm1 \n\t"
03114
03115 "pand %%mm2, %%mm1 \n\t"
03116
03117
03118
03119 "psrlq $1, %%mm2 \n\t"
03120 "pand %%mm4, %%mm2 \n\t"
03121
03122 "paddb %%mm1, %%mm2 \n\t"
03123
03124 "pand %%mm6, %%mm2 \n\t"
03125
03126 "paddb %%mm2, %%mm0 \n\t"
03127
03128
03129
03130 "psllq _ShiftBpp, %%mm6 \n\t"
03131
03132 "movq %%mm0, %%mm2 \n\t"
03133 "psllq _ShiftBpp, %%mm2 \n\t"
03134 "movq %%mm3, %%mm1 \n\t"
03135
03136 "pand %%mm2, %%mm1 \n\t"
03137
03138
03139
03140 "psrlq $1, %%mm2 \n\t"
03141 "pand %%mm4, %%mm2 \n\t"
03142
03143 "paddb %%mm1, %%mm2 \n\t"
03144
03145 "pand %%mm6, %%mm2 \n\t"
03146
03147 "paddb %%mm2, %%mm0 \n\t"
03148
03149
03150
03151 "psllq _ShiftBpp, %%mm6 \n\t"
03152
03153 "movq %%mm0, %%mm2 \n\t"
03154 "psllq _ShiftBpp, %%mm2 \n\t"
03155 "movq %%mm3, %%mm1 \n\t"
03156
03157 "pand %%mm2, %%mm1 \n\t"
03158
03159
03160 "psrlq $1, %%mm2 \n\t"
03161 "pand %%mm4, %%mm2 \n\t"
03162
03163 "paddb %%mm1, %%mm2 \n\t"
03164
03165 "pand %%mm6, %%mm2 \n\t"
03166
03167 "paddb %%mm2, %%mm0 \n\t"
03168
03169
03170
03171 "psllq _ShiftBpp, %%mm6 \n\t"
03172
03173 "movq %%mm0, %%mm2 \n\t"
03174 "psllq _ShiftBpp, %%mm2 \n\t"
03175 "addl $8, %%ecx \n\t"
03176 "movq %%mm3, %%mm1 \n\t"
03177
03178 "pand %%mm2, %%mm1 \n\t"
03179
03180
03181
03182 "psrlq $1, %%mm2 \n\t"
03183 "pand %%mm4, %%mm2 \n\t"
03184
03185 "paddb %%mm1, %%mm2 \n\t"
03186
03187 "pand %%mm6, %%mm2 \n\t"
03188
03189 "paddb %%mm2, %%mm0 \n\t"
03190
03191
03192 "cmpl _MMXLength, %%ecx \n\t"
03193
03194 "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
03195
03196 "movq %%mm0, %%mm2 \n\t"
03197 "jb avg_2lp \n\t"
03198
03199 : "=S" (dummy_value_S),
03200 "=D" (dummy_value_D)
03201
03202 : "0" (prev_row),
03203 "1" (row)
03204
03205 : "%ecx"
03206 #if 0
03207 , "%mm0", "%mm1", "%mm2", "%mm3"
03208 , "%mm4", "%mm5", "%mm6", "%mm7"
03209 #endif
03210 );
03211 }
03212 break;
03213
03214 case 1:
03215 {
03216 __asm__ __volatile__ (
03217
03218 #ifdef __PIC__
03219 "pushl %%ebx \n\t"
03220 #endif
03221 "movl _dif, %%ebx \n\t"
03222
03223
03224 "cmpl _FullLength, %%ebx \n\t"
03225 "jnb avg_1end \n\t"
03226
03227
03228 "movl %%edi, %%edx \n\t"
03229
03230 "subl %%ecx, %%edx \n\t"
03231 "xorl %%ecx, %%ecx \n\t"
03232
03233 "avg_1lp: \n\t"
03234
03235 "xorl %%eax, %%eax \n\t"
03236 "movb (%%esi,%%ebx,), %%cl \n\t"
03237 "movb (%%edx,%%ebx,), %%al \n\t"
03238 "addw %%cx, %%ax \n\t"
03239 "incl %%ebx \n\t"
03240 "shrw %%ax \n\t"
03241 "addb -1(%%edi,%%ebx,), %%al \n\t"
03242
03243 "cmpl _FullLength, %%ebx \n\t"
03244 "movb %%al, -1(%%edi,%%ebx,) \n\t"
03245
03246 "jb avg_1lp \n\t"
03247
03248 "avg_1end: \n\t"
03249 #ifdef __PIC__
03250 "popl %%ebx \n\t"
03251 #endif
03252
03253 : "=c" (dummy_value_c),
03254 "=S" (dummy_value_S),
03255 "=D" (dummy_value_D)
03256
03257 : "0" (bpp),
03258 "1" (prev_row),
03259 "2" (row)
03260
03261 : "%eax", "%edx"
03262 #ifndef __PIC__
03263 , "%ebx"
03264 #endif
03265 );
03266 }
03267 return;
03268
03269 case 8:
03270 {
03271 __asm__ __volatile__ (
03272
03273 "movl _dif, %%ecx \n\t"
03274 "movq _LBCarryMask, %%mm5 \n\t"
03275
03276 "movq _HBClearMask, %%mm4 \n\t"
03277
03278
03279
03280 "movq -8(%%edi,%%ecx,), %%mm2 \n\t"
03281
03282
03283 "avg_8lp: \n\t"
03284 "movq (%%edi,%%ecx,), %%mm0 \n\t"
03285 "movq %%mm5, %%mm3 \n\t"
03286 "movq (%%esi,%%ecx,), %%mm1 \n\t"
03287 "addl $8, %%ecx \n\t"
03288 "pand %%mm1, %%mm3 \n\t"
03289 "psrlq $1, %%mm1 \n\t"
03290 "pand %%mm2, %%mm3 \n\t"
03291
03292 "psrlq $1, %%mm2 \n\t"
03293 "pand %%mm4, %%mm1 \n\t"
03294 "paddb %%mm3, %%mm0 \n\t"
03295 "pand %%mm4, %%mm2 \n\t"
03296 "paddb %%mm1, %%mm0 \n\t"
03297 "paddb %%mm2, %%mm0 \n\t"
03298 "cmpl _MMXLength, %%ecx \n\t"
03299 "movq %%mm0, -8(%%edi,%%ecx,) \n\t"
03300 "movq %%mm0, %%mm2 \n\t"
03301 "jb avg_8lp \n\t"
03302
03303 : "=S" (dummy_value_S),
03304 "=D" (dummy_value_D)
03305
03306 : "0" (prev_row),
03307 "1" (row)
03308
03309 : "%ecx"
03310 #if 0
03311 , "%mm0", "%mm1", "%mm2"
03312 , "%mm3", "%mm4", "%mm5"
03313 #endif
03314 );
03315 }
03316 break;
03317
03318 default:
03319 {
03320
03321 #ifdef PNG_DEBUG
03322
03323 png_debug(1,
03324 "Internal logic error in pnggccrd (png_read_filter_row_mmx_avg())\n");
03325 #endif
03326
03327 #if 0
03328 __asm__ __volatile__ (
03329 "movq _LBCarryMask, %%mm5 \n\t"
03330
03331 "movl _dif, %%ebx \n\t"
03332
03333 "movl row, %%edi \n\t"
03334 "movq _HBClearMask, %%mm4 \n\t"
03335 "movl %%edi, %%edx \n\t"
03336 "movl prev_row, %%esi \n\t"
03337 "subl bpp, %%edx \n\t"
03338 "avg_Alp: \n\t"
03339 "movq (%%edi,%%ebx,), %%mm0 \n\t"
03340 "movq %%mm5, %%mm3 \n\t"
03341 "movq (%%esi,%%ebx,), %%mm1 \n\t"
03342 "pand %%mm1, %%mm3 \n\t"
03343 "movq (%%edx,%%ebx,), %%mm2 \n\t"
03344 "psrlq $1, %%mm1 \n\t"
03345 "pand %%mm2, %%mm3 \n\t"
03346
03347 "psrlq $1, %%mm2 \n\t"
03348 "pand %%mm4, %%mm1 \n\t"
03349
03350 "paddb %%mm3, %%mm0 \n\t"
03351
03352 "pand %%mm4, %%mm2 \n\t"
03353
03354 "paddb %%mm1, %%mm0 \n\t"
03355
03356 "addl $8, %%ebx \n\t"
03357 "paddb %%mm2, %%mm0 \n\t"
03358
03359 "cmpl _MMXLength, %%ebx \n\t"
03360 "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
03361 "jb avg_Alp \n\t"
03362
03363 :
03364
03365 :
03366
03367 : "%ebx", "%edx", "%edi", "%esi"
03368 );
03369 #endif
03370 }
03371 break;
03372
03373 }
03374
03375 __asm__ __volatile__ (
03376
03377
03378 #ifdef __PIC__
03379 "pushl %%ebx \n\t"
03380 #endif
03381 "movl _MMXLength, %%ebx \n\t"
03382
03383 "cmpl _FullLength, %%ebx \n\t"
03384 "jnb avg_end \n\t"
03385
03386
03387
03388 "movl %%edi, %%edx \n\t"
03389
03390 "subl %%ecx, %%edx \n\t"
03391 "xorl %%ecx, %%ecx \n\t"
03392
03393 "avg_lp2: \n\t"
03394
03395 "xorl %%eax, %%eax \n\t"
03396 "movb (%%esi,%%ebx,), %%cl \n\t"
03397 "movb (%%edx,%%ebx,), %%al \n\t"
03398 "addw %%cx, %%ax \n\t"
03399 "incl %%ebx \n\t"
03400 "shrw %%ax \n\t"
03401 "addb -1(%%edi,%%ebx,), %%al \n\t"
03402 "cmpl _FullLength, %%ebx \n\t"
03403 "movb %%al, -1(%%edi,%%ebx,) \n\t"
03404 "jb avg_lp2 \n\t"
03405
03406 "avg_end: \n\t"
03407 "EMMS \n\t"
03408 #ifdef __PIC__
03409 "popl %%ebx \n\t"
03410 #endif
03411
03412 : "=c" (dummy_value_c),
03413 "=S" (dummy_value_S),
03414 "=D" (dummy_value_D)
03415
03416 : "0" (bpp),
03417 "1" (prev_row),
03418 "2" (row)
03419
03420 : "%eax", "%edx"
03421 #ifndef __PIC__
03422 , "%ebx"
03423 #endif
03424 );
03425
03426 }
03427 #endif
03428
03429
03430
03431 #ifdef PNG_THREAD_UNSAFE_OK
03432
03433
03434
03435
03436
03437
03438
03439
03440 static void
03441 png_read_filter_row_mmx_paeth(png_row_infop row_info, png_bytep row,
03442 png_bytep prev_row)
03443 {
03444 int bpp;
03445 int dummy_value_c;
03446 int dummy_value_S;
03447 int dummy_value_D;
03448
03449 bpp = (row_info->pixel_depth + 7) >> 3;
03450 _FullLength = row_info->rowbytes;
03451
03452 __asm__ __volatile__ (
03453 #ifdef __PIC__
03454 "pushl %%ebx \n\t"
03455 #endif
03456 "xorl %%ebx, %%ebx \n\t"
03457
03458 "xorl %%edx, %%edx \n\t"
03459
03460 "xorl %%eax, %%eax \n\t"
03461
03462
03463
03464
03465 "paeth_rlp: \n\t"
03466 "movb (%%edi,%%ebx,), %%al \n\t"
03467 "addb (%%esi,%%ebx,), %%al \n\t"
03468 "incl %%ebx \n\t"
03469
03470 "cmpl %%ecx, %%ebx \n\t"
03471 "movb %%al, -1(%%edi,%%ebx,) \n\t"
03472 "jb paeth_rlp \n\t"
03473
03474 "movl %%edi, _dif \n\t"
03475 "addl %%ebx, _dif \n\t"
03476 "xorl %%ecx, %%ecx \n\t"
03477 "addl $0xf, _dif \n\t"
03478
03479 "andl $0xfffffff8, _dif \n\t"
03480 "subl %%edi, _dif \n\t"
03481
03482 "jz paeth_go \n\t"
03483
03484
03485 "paeth_lp1: \n\t"
03486 "xorl %%eax, %%eax \n\t"
03487
03488 "movb (%%esi,%%ebx,), %%al \n\t"
03489 "movb (%%esi,%%edx,), %%cl \n\t"
03490 "subl %%ecx, %%eax \n\t"
03491 "movl %%eax, _patemp \n\t"
03492 "xorl %%eax, %%eax \n\t"
03493
03494 "movb (%%edi,%%edx,), %%al \n\t"
03495 "subl %%ecx, %%eax \n\t"
03496 "movl %%eax, %%ecx \n\t"
03497
03498 "addl _patemp, %%eax \n\t"
03499
03500 "testl $0x80000000, %%eax \n\t"
03501 "jz paeth_pca \n\t"
03502 "negl %%eax \n\t"
03503
03504 "paeth_pca: \n\t"
03505 "movl %%eax, _pctemp \n\t"
03506
03507 "testl $0x80000000, %%ecx \n\t"
03508 "jz paeth_pba \n\t"
03509 "negl %%ecx \n\t"
03510
03511 "paeth_pba: \n\t"
03512 "movl %%ecx, _pbtemp \n\t"
03513
03514 "movl _patemp, %%eax \n\t"
03515 "testl $0x80000000, %%eax \n\t"
03516 "jz paeth_paa \n\t"
03517 "negl %%eax \n\t"
03518
03519 "paeth_paa: \n\t"
03520 "movl %%eax, _patemp \n\t"
03521
03522 "cmpl %%ecx, %%eax \n\t"
03523 "jna paeth_abb \n\t"
03524
03525 "cmpl _pctemp, %%ecx \n\t"
03526 "jna paeth_bbc \n\t"
03527
03528 "movb (%%esi,%%edx,), %%cl \n\t"
03529 "jmp paeth_paeth \n\t"
03530
03531 "paeth_bbc: \n\t"
03532
03533 "movb (%%esi,%%ebx,), %%cl \n\t"
03534 "jmp paeth_paeth \n\t"
03535
03536 "paeth_abb: \n\t"
03537
03538 "cmpl _pctemp, %%eax \n\t"
03539 "jna paeth_abc \n\t"
03540
03541 "movb (%%esi,%%edx,), %%cl \n\t"
03542 "jmp paeth_paeth \n\t"
03543
03544 "paeth_abc: \n\t"
03545
03546 "movb (%%edi,%%edx,), %%cl \n\t"
03547
03548 "paeth_paeth: \n\t"
03549 "incl %%ebx \n\t"
03550 "incl %%edx \n\t"
03551
03552 "addb %%cl, -1(%%edi,%%ebx,) \n\t"
03553 "cmpl _dif, %%ebx \n\t"
03554 "jb paeth_lp1 \n\t"
03555
03556 "paeth_go: \n\t"
03557 "movl _FullLength, %%ecx \n\t"
03558 "movl %%ecx, %%eax \n\t"
03559 "subl %%ebx, %%eax \n\t"
03560 "andl $0x00000007, %%eax \n\t"
03561 "subl %%eax, %%ecx \n\t"
03562 "movl %%ecx, _MMXLength \n\t"
03563 #ifdef __PIC__
03564 "popl %%ebx \n\t"
03565 #endif
03566
03567 : "=c" (dummy_value_c),
03568 "=S" (dummy_value_S),
03569 "=D" (dummy_value_D)
03570
03571 : "0" (bpp),
03572 "1" (prev_row),
03573 "2" (row)
03574
03575 : "%eax", "%edx"
03576 #ifndef __PIC__
03577 , "%ebx"
03578 #endif
03579 );
03580
03581
03582 switch (bpp)
03583 {
03584 case 3:
03585 {
03586 _ActiveMask.use = 0x0000000000ffffffLL;
03587 _ActiveMaskEnd.use = 0xffff000000000000LL;
03588 _ShiftBpp.use = 24;
03589 _ShiftRem.use = 40;
03590
03591 __asm__ __volatile__ (
03592 "movl _dif, %%ecx \n\t"
03593
03594
03595 "pxor %%mm0, %%mm0 \n\t"
03596
03597 "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
03598 "paeth_3lp: \n\t"
03599 "psrlq _ShiftRem, %%mm1 \n\t"
03600
03601 "movq (%%esi,%%ecx,), %%mm2 \n\t"
03602 "punpcklbw %%mm0, %%mm1 \n\t"
03603 "movq -8(%%esi,%%ecx,), %%mm3 \n\t"
03604 "punpcklbw %%mm0, %%mm2 \n\t"
03605 "psrlq _ShiftRem, %%mm3 \n\t"
03606
03607
03608 "movq %%mm2, %%mm4 \n\t"
03609 "punpcklbw %%mm0, %%mm3 \n\t"
03610
03611 "movq %%mm1, %%mm5 \n\t"
03612 "psubw %%mm3, %%mm4 \n\t"
03613 "pxor %%mm7, %%mm7 \n\t"
03614
03615 "movq %%mm4, %%mm6 \n\t"
03616 "psubw %%mm3, %%mm5 \n\t"
03617
03618
03619
03620
03621 "pcmpgtw %%mm4, %%mm0 \n\t"
03622 "paddw %%mm5, %%mm6 \n\t"
03623 "pand %%mm4, %%mm0 \n\t"
03624 "pcmpgtw %%mm5, %%mm7 \n\t"
03625 "psubw %%mm0, %%mm4 \n\t"
03626 "pand %%mm5, %%mm7 \n\t"
03627 "psubw %%mm0, %%mm4 \n\t"
03628 "psubw %%mm7, %%mm5 \n\t"
03629 "pxor %%mm0, %%mm0 \n\t"
03630 "pcmpgtw %%mm6, %%mm0 \n\t"
03631 "pand %%mm6, %%mm0 \n\t"
03632 "psubw %%mm7, %%mm5 \n\t"
03633 "psubw %%mm0, %%mm6 \n\t"
03634
03635 "movq %%mm4, %%mm7 \n\t"
03636 "psubw %%mm0, %%mm6 \n\t"
03637 "pcmpgtw %%mm5, %%mm7 \n\t"
03638 "movq %%mm7, %%mm0 \n\t"
03639
03640 "pand %%mm7, %%mm5 \n\t"
03641
03642 "pand %%mm0, %%mm2 \n\t"
03643 "pandn %%mm4, %%mm7 \n\t"
03644 "pandn %%mm1, %%mm0 \n\t"
03645 "paddw %%mm5, %%mm7 \n\t"
03646 "paddw %%mm2, %%mm0 \n\t"
03647
03648 "pcmpgtw %%mm6, %%mm7 \n\t"
03649 "pxor %%mm1, %%mm1 \n\t"
03650 "pand %%mm7, %%mm3 \n\t"
03651 "pandn %%mm0, %%mm7 \n\t"
03652 "paddw %%mm3, %%mm7 \n\t"
03653 "pxor %%mm0, %%mm0 \n\t"
03654 "packuswb %%mm1, %%mm7 \n\t"
03655 "movq (%%esi,%%ecx,), %%mm3 \n\t"
03656 "pand _ActiveMask, %%mm7 \n\t"
03657 "movq %%mm3, %%mm2 \n\t"
03658 "paddb (%%edi,%%ecx,), %%mm7 \n\t"
03659 "punpcklbw %%mm0, %%mm3 \n\t"
03660 "movq %%mm7, (%%edi,%%ecx,) \n\t"
03661 "movq %%mm7, %%mm1 \n\t"
03662
03663
03664 "psrlq _ShiftBpp, %%mm2 \n\t"
03665 "punpcklbw %%mm0, %%mm1 \n\t"
03666 "pxor %%mm7, %%mm7 \n\t"
03667 "punpcklbw %%mm0, %%mm2 \n\t"
03668
03669 "movq %%mm1, %%mm5 \n\t"
03670
03671 "movq %%mm2, %%mm4 \n\t"
03672 "psubw %%mm3, %%mm5 \n\t"
03673 "psubw %%mm3, %%mm4 \n\t"
03674
03675
03676 "movq %%mm5, %%mm6 \n\t"
03677 "paddw %%mm4, %%mm6 \n\t"
03678
03679
03680
03681
03682 "pcmpgtw %%mm5, %%mm0 \n\t"
03683 "pcmpgtw %%mm4, %%mm7 \n\t"
03684 "pand %%mm5, %%mm0 \n\t"
03685 "pand %%mm4, %%mm7 \n\t"
03686 "psubw %%mm0, %%mm5 \n\t"
03687 "psubw %%mm7, %%mm4 \n\t"
03688 "psubw %%mm0, %%mm5 \n\t"
03689 "psubw %%mm7, %%mm4 \n\t"
03690 "pxor %%mm0, %%mm0 \n\t"
03691 "pcmpgtw %%mm6, %%mm0 \n\t"
03692 "pand %%mm6, %%mm0 \n\t"
03693 "psubw %%mm0, %%mm6 \n\t"
03694
03695 "movq %%mm4, %%mm7 \n\t"
03696 "psubw %%mm0, %%mm6 \n\t"
03697 "pcmpgtw %%mm5, %%mm7 \n\t"
03698 "movq %%mm7, %%mm0 \n\t"
03699
03700 "pand %%mm7, %%mm5 \n\t"
03701
03702 "pand %%mm0, %%mm2 \n\t"
03703 "pandn %%mm4, %%mm7 \n\t"
03704 "pandn %%mm1, %%mm0 \n\t"
03705 "paddw %%mm5, %%mm7 \n\t"
03706 "paddw %%mm2, %%mm0 \n\t"
03707
03708 "pcmpgtw %%mm6, %%mm7 \n\t"
03709 "movq (%%esi,%%ecx,), %%mm2 \n\t"
03710 "pand %%mm7, %%mm3 \n\t"
03711 "pandn %%mm0, %%mm7 \n\t"
03712 "pxor %%mm1, %%mm1 \n\t"
03713 "paddw %%mm3, %%mm7 \n\t"
03714 "pxor %%mm0, %%mm0 \n\t"
03715 "packuswb %%mm1, %%mm7 \n\t"
03716 "movq %%mm2, %%mm3 \n\t"
03717 "pand _ActiveMask, %%mm7 \n\t"
03718 "punpckhbw %%mm0, %%mm2 \n\t"
03719 "psllq _ShiftBpp, %%mm7 \n\t"
03720
03721
03722 "movq %%mm2, %%mm4 \n\t"
03723 "paddb (%%edi,%%ecx,), %%mm7 \n\t"
03724 "psllq _ShiftBpp, %%mm3 \n\t"
03725 "movq %%mm7, (%%edi,%%ecx,) \n\t"
03726 "movq %%mm7, %%mm1 \n\t"
03727 "punpckhbw %%mm0, %%mm3 \n\t"
03728 "psllq _ShiftBpp, %%mm1 \n\t"
03729
03730
03731 "pxor %%mm7, %%mm7 \n\t"
03732 "punpckhbw %%mm0, %%mm1 \n\t"
03733 "psubw %%mm3, %%mm4 \n\t"
03734
03735 "movq %%mm1, %%mm5 \n\t"
03736
03737 "movq %%mm4, %%mm6 \n\t"
03738 "psubw %%mm3, %%mm5 \n\t"
03739 "pxor %%mm0, %%mm0 \n\t"
03740 "paddw %%mm5, %%mm6 \n\t"
03741
03742
03743
03744
03745 "pcmpgtw %%mm4, %%mm0 \n\t"
03746 "pcmpgtw %%mm5, %%mm7 \n\t"
03747 "pand %%mm4, %%mm0 \n\t"
03748 "pand %%mm5, %%mm7 \n\t"
03749 "psubw %%mm0, %%mm4 \n\t"
03750 "psubw %%mm7, %%mm5 \n\t"
03751 "psubw %%mm0, %%mm4 \n\t"
03752 "psubw %%mm7, %%mm5 \n\t"
03753 "pxor %%mm0, %%mm0 \n\t"
03754 "pcmpgtw %%mm6, %%mm0 \n\t"
03755 "pand %%mm6, %%mm0 \n\t"
03756 "psubw %%mm0, %%mm6 \n\t"
03757
03758 "movq %%mm4, %%mm7 \n\t"
03759 "psubw %%mm0, %%mm6 \n\t"
03760 "pcmpgtw %%mm5, %%mm7 \n\t"
03761 "movq %%mm7, %%mm0 \n\t"
03762
03763 "pand %%mm0, %%mm2 \n\t"
03764
03765 "pand %%mm7, %%mm5 \n\t"
03766 "pandn %%mm1, %%mm0 \n\t"
03767 "pandn %%mm4, %%mm7 \n\t"
03768 "paddw %%mm2, %%mm0 \n\t"
03769 "paddw %%mm5, %%mm7 \n\t"
03770
03771 "pcmpgtw %%mm6, %%mm7 \n\t"
03772 "pand %%mm7, %%mm3 \n\t"
03773 "pandn %%mm0, %%mm7 \n\t"
03774 "paddw %%mm3, %%mm7 \n\t"
03775 "pxor %%mm1, %%mm1 \n\t"
03776 "packuswb %%mm7, %%mm1 \n\t"
03777
03778 "addl $8, %%ecx \n\t"
03779 "pand _ActiveMaskEnd, %%mm1 \n\t"
03780 "paddb -8(%%edi,%%ecx,), %%mm1 \n\t"
03781
03782
03783 "cmpl _MMXLength, %%ecx \n\t"
03784 "pxor %%mm0, %%mm0 \n\t"
03785 "movq %%mm1, -8(%%edi,%%ecx,) \n\t"
03786
03787
03788 "jb paeth_3lp \n\t"
03789
03790 : "=S" (dummy_value_S),
03791 "=D" (dummy_value_D)
03792
03793 : "0" (prev_row),
03794 "1" (row)
03795
03796 : "%ecx"
03797 #if 0
03798 , "%mm0", "%mm1", "%mm2", "%mm3"
03799 , "%mm4", "%mm5", "%mm6", "%mm7"
03800 #endif
03801 );
03802 }
03803 break;
03804
03805 case 6:
03806
03807
03808 {
03809 _ActiveMask.use = 0x00000000ffffffffLL;
03810 _ActiveMask2.use = 0xffffffff00000000LL;
03811 _ShiftBpp.use = bpp << 3;
03812 _ShiftRem.use = 64 - _ShiftBpp.use;
03813
03814 __asm__ __volatile__ (
03815 "movl _dif, %%ecx \n\t"
03816
03817
03818
03819 "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
03820 "pxor %%mm0, %%mm0 \n\t"
03821
03822 "paeth_6lp: \n\t"
03823
03824 "psrlq _ShiftRem, %%mm1 \n\t"
03825
03826 "movq -8(%%esi,%%ecx,), %%mm3 \n\t"
03827 "punpcklbw %%mm0, %%mm1 \n\t"
03828 "movq (%%esi,%%ecx,), %%mm2 \n\t"
03829 "punpcklbw %%mm0, %%mm2 \n\t"
03830
03831 "psrlq _ShiftRem, %%mm3 \n\t"
03832
03833 "movq %%mm2, %%mm4 \n\t"
03834 "punpcklbw %%mm0, %%mm3 \n\t"
03835
03836 "movq %%mm1, %%mm5 \n\t"
03837 "psubw %%mm3, %%mm4 \n\t"
03838 "pxor %%mm7, %%mm7 \n\t"
03839
03840 "movq %%mm4, %%mm6 \n\t"
03841 "psubw %%mm3, %%mm5 \n\t"
03842
03843
03844
03845 "pcmpgtw %%mm4, %%mm0 \n\t"
03846 "paddw %%mm5, %%mm6 \n\t"
03847 "pand %%mm4, %%mm0 \n\t"
03848 "pcmpgtw %%mm5, %%mm7 \n\t"
03849 "psubw %%mm0, %%mm4 \n\t"
03850 "pand %%mm5, %%mm7 \n\t"
03851 "psubw %%mm0, %%mm4 \n\t"
03852 "psubw %%mm7, %%mm5 \n\t"
03853 "pxor %%mm0, %%mm0 \n\t"
03854 "pcmpgtw %%mm6, %%mm0 \n\t"
03855 "pand %%mm6, %%mm0 \n\t"
03856 "psubw %%mm7, %%mm5 \n\t"
03857 "psubw %%mm0, %%mm6 \n\t"
03858
03859 "movq %%mm4, %%mm7 \n\t"
03860 "psubw %%mm0, %%mm6 \n\t"
03861 "pcmpgtw %%mm5, %%mm7 \n\t"
03862 "movq %%mm7, %%mm0 \n\t"
03863
03864 "pand %%mm7, %%mm5 \n\t"
03865
03866 "pand %%mm0, %%mm2 \n\t"
03867 "pandn %%mm4, %%mm7 \n\t"
03868 "pandn %%mm1, %%mm0 \n\t"
03869 "paddw %%mm5, %%mm7 \n\t"
03870 "paddw %%mm2, %%mm0 \n\t"
03871
03872 "pcmpgtw %%mm6, %%mm7 \n\t"
03873 "pxor %%mm1, %%mm1 \n\t"
03874 "pand %%mm7, %%mm3 \n\t"
03875 "pandn %%mm0, %%mm7 \n\t"
03876 "paddw %%mm3, %%mm7 \n\t"
03877 "pxor %%mm0, %%mm0 \n\t"
03878 "packuswb %%mm1, %%mm7 \n\t"
03879 "movq -8(%%esi,%%ecx,), %%mm3 \n\t"
03880 "pand _ActiveMask, %%mm7 \n\t"
03881 "psrlq _ShiftRem, %%mm3 \n\t"
03882 "movq (%%esi,%%ecx,), %%mm2 \n\t"
03883 "paddb (%%edi,%%ecx,), %%mm7 \n\t"
03884 "movq %%mm2, %%mm6 \n\t"
03885 "movq %%mm7, (%%edi,%%ecx,) \n\t"
03886 "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
03887 "psllq _ShiftBpp, %%mm6 \n\t"
03888 "movq %%mm7, %%mm5 \n\t"
03889 "psrlq _ShiftRem, %%mm1 \n\t"
03890 "por %%mm6, %%mm3 \n\t"
03891 "psllq _ShiftBpp, %%mm5 \n\t"
03892 "punpckhbw %%mm0, %%mm3 \n\t"
03893 "por %%mm5, %%mm1 \n\t"
03894
03895 "punpckhbw %%mm0, %%mm2 \n\t"
03896 "punpckhbw %%mm0, %%mm1 \n\t"
03897
03898 "movq %%mm2, %%mm4 \n\t"
03899
03900 "movq %%mm1, %%mm5 \n\t"
03901 "psubw %%mm3, %%mm4 \n\t"
03902 "pxor %%mm7, %%mm7 \n\t"
03903
03904 "movq %%mm4, %%mm6 \n\t"
03905 "psubw %%mm3, %%mm5 \n\t"
03906
03907
03908
03909 "pcmpgtw %%mm4, %%mm0 \n\t"
03910 "paddw %%mm5, %%mm6 \n\t"
03911 "pand %%mm4, %%mm0 \n\t"
03912 "pcmpgtw %%mm5, %%mm7 \n\t"
03913 "psubw %%mm0, %%mm4 \n\t"
03914 "pand %%mm5, %%mm7 \n\t"
03915 "psubw %%mm0, %%mm4 \n\t"
03916 "psubw %%mm7, %%mm5 \n\t"
03917 "pxor %%mm0, %%mm0 \n\t"
03918 "pcmpgtw %%mm6, %%mm0 \n\t"
03919 "pand %%mm6, %%mm0 \n\t"
03920 "psubw %%mm7, %%mm5 \n\t"
03921 "psubw %%mm0, %%mm6 \n\t"
03922
03923 "movq %%mm4, %%mm7 \n\t"
03924 "psubw %%mm0, %%mm6 \n\t"
03925 "pcmpgtw %%mm5, %%mm7 \n\t"
03926 "movq %%mm7, %%mm0 \n\t"
03927
03928 "pand %%mm7, %%mm5 \n\t"
03929
03930 "pand %%mm0, %%mm2 \n\t"
03931 "pandn %%mm4, %%mm7 \n\t"
03932 "pandn %%mm1, %%mm0 \n\t"
03933 "paddw %%mm5, %%mm7 \n\t"
03934 "paddw %%mm2, %%mm0 \n\t"
03935
03936 "pcmpgtw %%mm6, %%mm7 \n\t"
03937 "pxor %%mm1, %%mm1 \n\t"
03938 "pand %%mm7, %%mm3 \n\t"
03939 "pandn %%mm0, %%mm7 \n\t"
03940 "pxor %%mm1, %%mm1 \n\t"
03941 "paddw %%mm3, %%mm7 \n\t"
03942 "pxor %%mm0, %%mm0 \n\t"
03943
03944 "addl $8, %%ecx \n\t"
03945 "packuswb %%mm7, %%mm1 \n\t"
03946 "paddb -8(%%edi,%%ecx,), %%mm1 \n\t"
03947 "cmpl _MMXLength, %%ecx \n\t"
03948 "movq %%mm1, -8(%%edi,%%ecx,) \n\t"
03949
03950 "jb paeth_6lp \n\t"
03951
03952 : "=S" (dummy_value_S),
03953 "=D" (dummy_value_D)
03954
03955 : "0" (prev_row),
03956 "1" (row)
03957
03958 : "%ecx"
03959 #if 0
03960 , "%mm0", "%mm1", "%mm2", "%mm3"
03961 , "%mm4", "%mm5", "%mm6", "%mm7"
03962 #endif
03963 );
03964 }
03965 break;
03966
03967 case 4:
03968 {
03969 _ActiveMask.use = 0x00000000ffffffffLL;
03970
03971 __asm__ __volatile__ (
03972 "movl _dif, %%ecx \n\t"
03973
03974
03975 "pxor %%mm0, %%mm0 \n\t"
03976
03977 "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
03978
03979 "paeth_4lp: \n\t"
03980
03981 "movq -8(%%esi,%%ecx,), %%mm3 \n\t"
03982 "punpckhbw %%mm0, %%mm1 \n\t"
03983 "movq (%%esi,%%ecx,), %%mm2 \n\t"
03984 "punpcklbw %%mm0, %%mm2 \n\t"
03985
03986 "movq %%mm2, %%mm4 \n\t"
03987 "punpckhbw %%mm0, %%mm3 \n\t"
03988
03989 "movq %%mm1, %%mm5 \n\t"
03990 "psubw %%mm3, %%mm4 \n\t"
03991 "pxor %%mm7, %%mm7 \n\t"
03992
03993 "movq %%mm4, %%mm6 \n\t"
03994 "psubw %%mm3, %%mm5 \n\t"
03995
03996
03997
03998 "pcmpgtw %%mm4, %%mm0 \n\t"
03999 "paddw %%mm5, %%mm6 \n\t"
04000 "pand %%mm4, %%mm0 \n\t"
04001 "pcmpgtw %%mm5, %%mm7 \n\t"
04002 "psubw %%mm0, %%mm4 \n\t"
04003 "pand %%mm5, %%mm7 \n\t"
04004 "psubw %%mm0, %%mm4 \n\t"
04005 "psubw %%mm7, %%mm5 \n\t"
04006 "pxor %%mm0, %%mm0 \n\t"
04007 "pcmpgtw %%mm6, %%mm0 \n\t"
04008 "pand %%mm6, %%mm0 \n\t"
04009 "psubw %%mm7, %%mm5 \n\t"
04010 "psubw %%mm0, %%mm6 \n\t"
04011
04012 "movq %%mm4, %%mm7 \n\t"
04013 "psubw %%mm0, %%mm6 \n\t"
04014 "pcmpgtw %%mm5, %%mm7 \n\t"
04015 "movq %%mm7, %%mm0 \n\t"
04016
04017 "pand %%mm7, %%mm5 \n\t"
04018
04019 "pand %%mm0, %%mm2 \n\t"
04020 "pandn %%mm4, %%mm7 \n\t"
04021 "pandn %%mm1, %%mm0 \n\t"
04022 "paddw %%mm5, %%mm7 \n\t"
04023 "paddw %%mm2, %%mm0 \n\t"
04024
04025 "pcmpgtw %%mm6, %%mm7 \n\t"
04026 "pxor %%mm1, %%mm1 \n\t"
04027 "pand %%mm7, %%mm3 \n\t"
04028 "pandn %%mm0, %%mm7 \n\t"
04029 "paddw %%mm3, %%mm7 \n\t"
04030 "pxor %%mm0, %%mm0 \n\t"
04031 "packuswb %%mm1, %%mm7 \n\t"
04032 "movq (%%esi,%%ecx,), %%mm3 \n\t"
04033 "pand _ActiveMask, %%mm7 \n\t"
04034 "movq %%mm3, %%mm2 \n\t"
04035 "paddb (%%edi,%%ecx,), %%mm7 \n\t"
04036 "punpcklbw %%mm0, %%mm3 \n\t"
04037 "movq %%mm7, (%%edi,%%ecx,) \n\t"
04038 "movq %%mm7, %%mm1 \n\t"
04039
04040 "punpckhbw %%mm0, %%mm2 \n\t"
04041 "punpcklbw %%mm0, %%mm1 \n\t"
04042
04043 "movq %%mm2, %%mm4 \n\t"
04044
04045 "movq %%mm1, %%mm5 \n\t"
04046 "psubw %%mm3, %%mm4 \n\t"
04047 "pxor %%mm7, %%mm7 \n\t"
04048
04049 "movq %%mm4, %%mm6 \n\t"
04050 "psubw %%mm3, %%mm5 \n\t"
04051
04052
04053
04054 "pcmpgtw %%mm4, %%mm0 \n\t"
04055 "paddw %%mm5, %%mm6 \n\t"
04056 "pand %%mm4, %%mm0 \n\t"
04057 "pcmpgtw %%mm5, %%mm7 \n\t"
04058 "psubw %%mm0, %%mm4 \n\t"
04059 "pand %%mm5, %%mm7 \n\t"
04060 "psubw %%mm0, %%mm4 \n\t"
04061 "psubw %%mm7, %%mm5 \n\t"
04062 "pxor %%mm0, %%mm0 \n\t"
04063 "pcmpgtw %%mm6, %%mm0 \n\t"
04064 "pand %%mm6, %%mm0 \n\t"
04065 "psubw %%mm7, %%mm5 \n\t"
04066 "psubw %%mm0, %%mm6 \n\t"
04067
04068 "movq %%mm4, %%mm7 \n\t"
04069 "psubw %%mm0, %%mm6 \n\t"
04070 "pcmpgtw %%mm5, %%mm7 \n\t"
04071 "movq %%mm7, %%mm0 \n\t"
04072
04073 "pand %%mm7, %%mm5 \n\t"
04074
04075 "pand %%mm0, %%mm2 \n\t"
04076 "pandn %%mm4, %%mm7 \n\t"
04077 "pandn %%mm1, %%mm0 \n\t"
04078 "paddw %%mm5, %%mm7 \n\t"
04079 "paddw %%mm2, %%mm0 \n\t"
04080
04081 "pcmpgtw %%mm6, %%mm7 \n\t"
04082 "pxor %%mm1, %%mm1 \n\t"
04083 "pand %%mm7, %%mm3 \n\t"
04084 "pandn %%mm0, %%mm7 \n\t"
04085 "pxor %%mm1, %%mm1 \n\t"
04086 "paddw %%mm3, %%mm7 \n\t"
04087 "pxor %%mm0, %%mm0 \n\t"
04088
04089 "addl $8, %%ecx \n\t"
04090 "packuswb %%mm7, %%mm1 \n\t"
04091 "paddb -8(%%edi,%%ecx,), %%mm1 \n\t"
04092 "cmpl _MMXLength, %%ecx \n\t"
04093 "movq %%mm1, -8(%%edi,%%ecx,) \n\t"
04094
04095 "jb paeth_4lp \n\t"
04096
04097 : "=S" (dummy_value_S),
04098 "=D" (dummy_value_D)
04099
04100 : "0" (prev_row),
04101 "1" (row)
04102
04103 : "%ecx"
04104 #if 0
04105 , "%mm0", "%mm1", "%mm2", "%mm3"
04106 , "%mm4", "%mm5", "%mm6", "%mm7"
04107 #endif
04108 );
04109 }
04110 break;
04111
04112 case 8:
04113 {
04114 _ActiveMask.use = 0x00000000ffffffffLL;
04115
04116 __asm__ __volatile__ (
04117 "movl _dif, %%ecx \n\t"
04118
04119
04120 "pxor %%mm0, %%mm0 \n\t"
04121
04122 "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
04123
04124 "paeth_8lp: \n\t"
04125
04126 "movq -8(%%esi,%%ecx,), %%mm3 \n\t"
04127 "punpcklbw %%mm0, %%mm1 \n\t"
04128 "movq (%%esi,%%ecx,), %%mm2 \n\t"
04129 "punpcklbw %%mm0, %%mm2 \n\t"
04130
04131 "movq %%mm2, %%mm4 \n\t"
04132 "punpcklbw %%mm0, %%mm3 \n\t"
04133
04134 "movq %%mm1, %%mm5 \n\t"
04135 "psubw %%mm3, %%mm4 \n\t"
04136 "pxor %%mm7, %%mm7 \n\t"
04137
04138 "movq %%mm4, %%mm6 \n\t"
04139 "psubw %%mm3, %%mm5 \n\t"
04140
04141
04142
04143 "pcmpgtw %%mm4, %%mm0 \n\t"
04144 "paddw %%mm5, %%mm6 \n\t"
04145 "pand %%mm4, %%mm0 \n\t"
04146 "pcmpgtw %%mm5, %%mm7 \n\t"
04147 "psubw %%mm0, %%mm4 \n\t"
04148 "pand %%mm5, %%mm7 \n\t"
04149 "psubw %%mm0, %%mm4 \n\t"
04150 "psubw %%mm7, %%mm5 \n\t"
04151 "pxor %%mm0, %%mm0 \n\t"
04152 "pcmpgtw %%mm6, %%mm0 \n\t"
04153 "pand %%mm6, %%mm0 \n\t"
04154 "psubw %%mm7, %%mm5 \n\t"
04155 "psubw %%mm0, %%mm6 \n\t"
04156
04157 "movq %%mm4, %%mm7 \n\t"
04158 "psubw %%mm0, %%mm6 \n\t"
04159 "pcmpgtw %%mm5, %%mm7 \n\t"
04160 "movq %%mm7, %%mm0 \n\t"
04161
04162 "pand %%mm7, %%mm5 \n\t"
04163
04164 "pand %%mm0, %%mm2 \n\t"
04165 "pandn %%mm4, %%mm7 \n\t"
04166 "pandn %%mm1, %%mm0 \n\t"
04167 "paddw %%mm5, %%mm7 \n\t"
04168 "paddw %%mm2, %%mm0 \n\t"
04169
04170 "pcmpgtw %%mm6, %%mm7 \n\t"
04171 "pxor %%mm1, %%mm1 \n\t"
04172 "pand %%mm7, %%mm3 \n\t"
04173 "pandn %%mm0, %%mm7 \n\t"
04174 "paddw %%mm3, %%mm7 \n\t"
04175 "pxor %%mm0, %%mm0 \n\t"
04176 "packuswb %%mm1, %%mm7 \n\t"
04177 "movq -8(%%esi,%%ecx,), %%mm3 \n\t"
04178 "pand _ActiveMask, %%mm7 \n\t"
04179 "movq (%%esi,%%ecx,), %%mm2 \n\t"
04180 "paddb (%%edi,%%ecx,), %%mm7 \n\t"
04181 "punpckhbw %%mm0, %%mm3 \n\t"
04182 "movq %%mm7, (%%edi,%%ecx,) \n\t"
04183 "movq -8(%%edi,%%ecx,), %%mm1 \n\t"
04184
04185
04186 "punpckhbw %%mm0, %%mm2 \n\t"
04187 "punpckhbw %%mm0, %%mm1 \n\t"
04188
04189 "movq %%mm2, %%mm4 \n\t"
04190
04191 "movq %%mm1, %%mm5 \n\t"
04192 "psubw %%mm3, %%mm4 \n\t"
04193 "pxor %%mm7, %%mm7 \n\t"
04194
04195 "movq %%mm4, %%mm6 \n\t"
04196 "psubw %%mm3, %%mm5 \n\t"
04197
04198
04199
04200 "pcmpgtw %%mm4, %%mm0 \n\t"
04201 "paddw %%mm5, %%mm6 \n\t"
04202 "pand %%mm4, %%mm0 \n\t"
04203 "pcmpgtw %%mm5, %%mm7 \n\t"
04204 "psubw %%mm0, %%mm4 \n\t"
04205 "pand %%mm5, %%mm7 \n\t"
04206 "psubw %%mm0, %%mm4 \n\t"
04207 "psubw %%mm7, %%mm5 \n\t"
04208 "pxor %%mm0, %%mm0 \n\t"
04209 "pcmpgtw %%mm6, %%mm0 \n\t"
04210 "pand %%mm6, %%mm0 \n\t"
04211 "psubw %%mm7, %%mm5 \n\t"
04212 "psubw %%mm0, %%mm6 \n\t"
04213
04214 "movq %%mm4, %%mm7 \n\t"
04215 "psubw %%mm0, %%mm6 \n\t"
04216 "pcmpgtw %%mm5, %%mm7 \n\t"
04217 "movq %%mm7, %%mm0 \n\t"
04218
04219 "pand %%mm7, %%mm5 \n\t"
04220
04221 "pand %%mm0, %%mm2 \n\t"
04222 "pandn %%mm4, %%mm7 \n\t"
04223 "pandn %%mm1, %%mm0 \n\t"
04224 "paddw %%mm5, %%mm7 \n\t"
04225 "paddw %%mm2, %%mm0 \n\t"
04226
04227 "pcmpgtw %%mm6, %%mm7 \n\t"
04228 "pxor %%mm1, %%mm1 \n\t"
04229 "pand %%mm7, %%mm3 \n\t"
04230 "pandn %%mm0, %%mm7 \n\t"
04231 "pxor %%mm1, %%mm1 \n\t"
04232 "paddw %%mm3, %%mm7 \n\t"
04233 "pxor %%mm0, %%mm0 \n\t"
04234
04235 "addl $8, %%ecx \n\t"
04236 "packuswb %%mm7, %%mm1 \n\t"
04237 "paddb -8(%%edi,%%ecx,), %%mm1 \n\t"
04238 "cmpl _MMXLength, %%ecx \n\t"
04239 "movq %%mm1, -8(%%edi,%%ecx,) \n\t"
04240
04241 "jb paeth_8lp \n\t"
04242
04243 : "=S" (dummy_value_S),
04244 "=D" (dummy_value_D)
04245
04246 : "0" (prev_row),
04247 "1" (row)
04248
04249 : "%ecx"
04250 #if 0
04251 , "%mm0", "%mm1", "%mm2", "%mm3"
04252 , "%mm4", "%mm5", "%mm6", "%mm7"
04253 #endif
04254 );
04255 }
04256 break;
04257
04258 case 1:
04259 case 2:
04260 default:
04261 {
04262 __asm__ __volatile__ (
04263 #ifdef __PIC__
04264 "pushl %%ebx \n\t"
04265 #endif
04266 "movl _dif, %%ebx \n\t"
04267 "cmpl _FullLength, %%ebx \n\t"
04268 "jnb paeth_dend \n\t"
04269
04270
04271
04272
04273 "movl %%ebx, %%edx \n\t"
04274
04275 "subl %%ecx, %%edx \n\t"
04276 "xorl %%ecx, %%ecx \n\t"
04277
04278 "paeth_dlp: \n\t"
04279 "xorl %%eax, %%eax \n\t"
04280
04281 "movb (%%esi,%%ebx,), %%al \n\t"
04282 "movb (%%esi,%%edx,), %%cl \n\t"
04283 "subl %%ecx, %%eax \n\t"
04284 "movl %%eax, _patemp \n\t"
04285 "xorl %%eax, %%eax \n\t"
04286
04287 "movb (%%edi,%%edx,), %%al \n\t"
04288 "subl %%ecx, %%eax \n\t"
04289 "movl %%eax, %%ecx \n\t"
04290
04291 "addl _patemp, %%eax \n\t"
04292
04293 "testl $0x80000000, %%eax \n\t"
04294 "jz paeth_dpca \n\t"
04295 "negl %%eax \n\t"
04296
04297 "paeth_dpca: \n\t"
04298 "movl %%eax, _pctemp \n\t"
04299
04300 "testl $0x80000000, %%ecx \n\t"
04301 "jz paeth_dpba \n\t"
04302 "negl %%ecx \n\t"
04303
04304 "paeth_dpba: \n\t"
04305 "movl %%ecx, _pbtemp \n\t"
04306
04307 "movl _patemp, %%eax \n\t"
04308 "testl $0x80000000, %%eax \n\t"
04309 "jz paeth_dpaa \n\t"
04310 "negl %%eax \n\t"
04311
04312 "paeth_dpaa: \n\t"
04313 "movl %%eax, _patemp \n\t"
04314
04315 "cmpl %%ecx, %%eax \n\t"
04316 "jna paeth_dabb \n\t"
04317
04318 "cmpl _pctemp, %%ecx \n\t"
04319 "jna paeth_dbbc \n\t"
04320
04321 "movb (%%esi,%%edx,), %%cl \n\t"
04322 "jmp paeth_dpaeth \n\t"
04323
04324 "paeth_dbbc: \n\t"
04325
04326 "movb (%%esi,%%ebx,), %%cl \n\t"
04327 "jmp paeth_dpaeth \n\t"
04328
04329 "paeth_dabb: \n\t"
04330
04331 "cmpl _pctemp, %%eax \n\t"
04332 "jna paeth_dabc \n\t"
04333
04334 "movb (%%esi,%%edx,), %%cl \n\t"
04335 "jmp paeth_dpaeth \n\t"
04336
04337 "paeth_dabc: \n\t"
04338
04339 "movb (%%edi,%%edx,), %%cl \n\t"
04340
04341 "paeth_dpaeth: \n\t"
04342 "incl %%ebx \n\t"
04343 "incl %%edx \n\t"
04344
04345 "addb %%cl, -1(%%edi,%%ebx,) \n\t"
04346 "cmpl _FullLength, %%ebx \n\t"
04347 "jb paeth_dlp \n\t"
04348
04349 "paeth_dend: \n\t"
04350 #ifdef __PIC__
04351 "popl %%ebx \n\t"
04352 #endif
04353
04354 : "=c" (dummy_value_c),
04355 "=S" (dummy_value_S),
04356 "=D" (dummy_value_D)
04357
04358 : "0" (bpp),
04359 "1" (prev_row),
04360 "2" (row)
04361
04362 : "%eax", "%edx"
04363 #ifndef __PIC__
04364 , "%ebx"
04365 #endif
04366 );
04367 }
04368 return;
04369
04370 }
04371
04372 __asm__ __volatile__ (
04373
04374
04375 #ifdef __PIC__
04376 "pushl %%ebx \n\t"
04377 #endif
04378 "movl _MMXLength, %%ebx \n\t"
04379 "cmpl _FullLength, %%ebx \n\t"
04380 "jnb paeth_end \n\t"
04381
04382
04383
04384 "movl %%ebx, %%edx \n\t"
04385
04386 "subl %%ecx, %%edx \n\t"
04387 "xorl %%ecx, %%ecx \n\t"
04388
04389 "paeth_lp2: \n\t"
04390 "xorl %%eax, %%eax \n\t"
04391
04392 "movb (%%esi,%%ebx,), %%al \n\t"
04393 "movb (%%esi,%%edx,), %%cl \n\t"
04394 "subl %%ecx, %%eax \n\t"
04395 "movl %%eax, _patemp \n\t"
04396 "xorl %%eax, %%eax \n\t"
04397
04398 "movb (%%edi,%%edx,), %%al \n\t"
04399 "subl %%ecx, %%eax \n\t"
04400 "movl %%eax, %%ecx \n\t"
04401
04402 "addl _patemp, %%eax \n\t"
04403
04404 "testl $0x80000000, %%eax \n\t"
04405 "jz paeth_pca2 \n\t"
04406 "negl %%eax \n\t"
04407
04408 "paeth_pca2: \n\t"
04409 "movl %%eax, _pctemp \n\t"
04410
04411 "testl $0x80000000, %%ecx \n\t"
04412 "jz paeth_pba2 \n\t"
04413 "negl %%ecx \n\t"
04414
04415 "paeth_pba2: \n\t"
04416 "movl %%ecx, _pbtemp \n\t"
04417
04418 "movl _patemp, %%eax \n\t"
04419 "testl $0x80000000, %%eax \n\t"
04420 "jz paeth_paa2 \n\t"
04421 "negl %%eax \n\t"
04422
04423 "paeth_paa2: \n\t"
04424 "movl %%eax, _patemp \n\t"
04425
04426 "cmpl %%ecx, %%eax \n\t"
04427 "jna paeth_abb2 \n\t"
04428
04429 "cmpl _pctemp, %%ecx \n\t"
04430 "jna paeth_bbc2 \n\t"
04431
04432 "movb (%%esi,%%edx,), %%cl \n\t"
04433 "jmp paeth_paeth2 \n\t"
04434
04435 "paeth_bbc2: \n\t"
04436
04437 "movb (%%esi,%%ebx,), %%cl \n\t"
04438 "jmp paeth_paeth2 \n\t"
04439
04440 "paeth_abb2: \n\t"
04441
04442 "cmpl _pctemp, %%eax \n\t"
04443 "jna paeth_abc2 \n\t"
04444
04445 "movb (%%esi,%%edx,), %%cl \n\t"
04446 "jmp paeth_paeth2 \n\t"
04447
04448 "paeth_abc2: \n\t"
04449
04450 "movb (%%edi,%%edx,), %%cl \n\t"
04451
04452 "paeth_paeth2: \n\t"
04453 "incl %%ebx \n\t"
04454 "incl %%edx \n\t"
04455
04456 "addb %%cl, -1(%%edi,%%ebx,) \n\t"
04457 "cmpl _FullLength, %%ebx \n\t"
04458 "jb paeth_lp2 \n\t"
04459
04460 "paeth_end: \n\t"
04461 "EMMS \n\t"
04462 #ifdef __PIC__
04463 "popl %%ebx \n\t"
04464 #endif
04465
04466 : "=c" (dummy_value_c),
04467 "=S" (dummy_value_S),
04468 "=D" (dummy_value_D)
04469
04470 : "0" (bpp),
04471 "1" (prev_row),
04472 "2" (row)
04473
04474 : "%eax", "%edx"
04475 #ifndef __PIC__
04476 , "%ebx"
04477 #endif
04478 );
04479
04480 }
04481 #endif
04482
04483
04484
04485
04486 #ifdef PNG_THREAD_UNSAFE_OK
04487
04488
04489
04490
04491
04492
04493
04494
04495 static void
04496 png_read_filter_row_mmx_sub(png_row_infop row_info, png_bytep row)
04497 {
04498 int bpp;
04499 int dummy_value_a;
04500 int dummy_value_D;
04501
04502 bpp = (row_info->pixel_depth + 7) >> 3;
04503 _FullLength = row_info->rowbytes - bpp;
04504
04505 __asm__ __volatile__ (
04506
04507 "movl %%edi, %%esi \n\t"
04508
04509 "addl %%eax, %%edi \n\t"
04510
04511
04512 "movl %%edi, _dif \n\t"
04513 "addl $0xf, _dif \n\t"
04514
04515 "xorl %%ecx, %%ecx \n\t"
04516 "andl $0xfffffff8, _dif \n\t"
04517 "subl %%edi, _dif \n\t"
04518 "jz sub_go \n\t"
04519
04520 "sub_lp1: \n\t"
04521 "movb (%%esi,%%ecx,), %%al \n\t"
04522 "addb %%al, (%%edi,%%ecx,) \n\t"
04523 "incl %%ecx \n\t"
04524 "cmpl _dif, %%ecx \n\t"
04525 "jb sub_lp1 \n\t"
04526
04527 "sub_go: \n\t"
04528 "movl _FullLength, %%eax \n\t"
04529 "movl %%eax, %%edx \n\t"
04530 "subl %%ecx, %%edx \n\t"
04531 "andl $0x00000007, %%edx \n\t"
04532 "subl %%edx, %%eax \n\t"
04533 "movl %%eax, _MMXLength \n\t"
04534
04535 : "=a" (dummy_value_a),
04536 "=D" (dummy_value_D)
04537
04538 : "0" (bpp),
04539 "1" (row)
04540
04541 : "%esi", "%ecx", "%edx"
04542
04543 #if 0
04544 , "%mm0", "%mm1", "%mm2", "%mm3"
04545 , "%mm4", "%mm5", "%mm6", "%mm7"
04546 #endif
04547 );
04548
04549
04550 switch (bpp)
04551 {
04552 case 3:
04553 {
04554 _ActiveMask.use = 0x0000ffffff000000LL;
04555 _ShiftBpp.use = 24;
04556 _ShiftRem.use = 40;
04557
04558 __asm__ __volatile__ (
04559
04560 "movq _ActiveMask, %%mm7 \n\t"
04561
04562 "movl %%edi, %%esi \n\t"
04563
04564 "addl %%eax, %%edi \n\t"
04565 "movq %%mm7, %%mm6 \n\t"
04566 "movl _dif, %%edx \n\t"
04567 "psllq _ShiftBpp, %%mm6 \n\t"
04568
04569
04570 "movq -8(%%edi,%%edx,), %%mm1 \n\t"
04571
04572 "sub_3lp: \n\t"
04573 "psrlq _ShiftRem, %%mm1 \n\t"
04574
04575
04576 "movq (%%edi,%%edx,), %%mm0 \n\t"
04577 "paddb %%mm1, %%mm0 \n\t"
04578
04579
04580 "movq %%mm0, %%mm1 \n\t"
04581 "psllq _ShiftBpp, %%mm1 \n\t"
04582 "pand %%mm7, %%mm1 \n\t"
04583 "paddb %%mm1, %%mm0 \n\t"
04584
04585
04586 "movq %%mm0, %%mm1 \n\t"
04587 "psllq _ShiftBpp, %%mm1 \n\t"
04588 "pand %%mm6, %%mm1 \n\t"
04589 "addl $8, %%edx \n\t"
04590 "paddb %%mm1, %%mm0 \n\t"
04591
04592 "cmpl _MMXLength, %%edx \n\t"
04593 "movq %%mm0, -8(%%edi,%%edx,) \n\t"
04594 "movq %%mm0, %%mm1 \n\t"
04595 "jb sub_3lp \n\t"
04596
04597 : "=a" (dummy_value_a),
04598 "=D" (dummy_value_D)
04599
04600 : "0" (bpp),
04601 "1" (row)
04602
04603 : "%edx", "%esi"
04604 #if 0
04605 , "%mm0", "%mm1", "%mm6", "%mm7"
04606 #endif
04607 );
04608 }
04609 break;
04610
04611 case 1:
04612 {
04613 __asm__ __volatile__ (
04614 "movl _dif, %%edx \n\t"
04615
04616 "cmpl _FullLength, %%edx \n\t"
04617 "jnb sub_1end \n\t"
04618 "movl %%edi, %%esi \n\t"
04619 "xorl %%eax, %%eax \n\t"
04620
04621 "addl %%eax, %%edi \n\t"
04622
04623 "sub_1lp: \n\t"
04624 "movb (%%esi,%%edx,), %%al \n\t"
04625 "addb %%al, (%%edi,%%edx,) \n\t"
04626 "incl %%edx \n\t"
04627 "cmpl _FullLength, %%edx \n\t"
04628 "jb sub_1lp \n\t"
04629
04630 "sub_1end: \n\t"
04631
04632 : "=a" (dummy_value_a),
04633 "=D" (dummy_value_D)
04634
04635 : "0" (bpp),
04636 "1" (row)
04637
04638 : "%edx", "%esi"
04639 );
04640 }
04641 return;
04642
04643 case 6:
04644 case 4:
04645
04646
04647 {
04648 _ShiftBpp.use = bpp << 3;
04649 _ShiftRem.use = 64 - _ShiftBpp.use;
04650
04651 __asm__ __volatile__ (
04652
04653 "movl _dif, %%edx \n\t"
04654 "movl %%edi, %%esi \n\t"
04655
04656 "addl %%eax, %%edi \n\t"
04657
04658
04659 "movq -8(%%edi,%%edx,), %%mm1 \n\t"
04660
04661 "sub_4lp: \n\t"
04662 "psrlq _ShiftRem, %%mm1 \n\t"
04663
04664 "movq (%%edi,%%edx,), %%mm0 \n\t"
04665 "paddb %%mm1, %%mm0 \n\t"
04666
04667
04668 "movq %%mm0, %%mm1 \n\t"
04669 "psllq _ShiftBpp, %%mm1 \n\t"
04670 "addl $8, %%edx \n\t"
04671 "paddb %%mm1, %%mm0 \n\t"
04672
04673 "cmpl _MMXLength, %%edx \n\t"
04674 "movq %%mm0, -8(%%edi,%%edx,) \n\t"
04675 "movq %%mm0, %%mm1 \n\t"
04676 "jb sub_4lp \n\t"
04677
04678 : "=a" (dummy_value_a),
04679 "=D" (dummy_value_D)
04680
04681 : "0" (bpp),
04682 "1" (row)
04683
04684 : "%edx", "%esi"
04685 #if 0
04686 , "%mm0", "%mm1"
04687 #endif
04688 );
04689 }
04690 break;
04691
04692 case 2:
04693 {
04694 _ActiveMask.use = 0x00000000ffff0000LL;
04695 _ShiftBpp.use = 16;
04696 _ShiftRem.use = 48;
04697
04698 __asm__ __volatile__ (
04699 "movq _ActiveMask, %%mm7 \n\t"
04700
04701 "movl _dif, %%edx \n\t"
04702 "movq %%mm7, %%mm6 \n\t"
04703
04704 "psllq _ShiftBpp, %%mm6 \n\t"
04705
04706 "movl %%edi, %%esi \n\t"
04707 "movq %%mm6, %%mm5 \n\t"
04708
04709 "addl %%eax, %%edi \n\t"
04710 "psllq _ShiftBpp, %%mm5 \n\t"
04711
04712
04713 "movq -8(%%edi,%%edx,), %%mm1 \n\t"
04714
04715 "sub_2lp: \n\t"
04716 "psrlq _ShiftRem, %%mm1 \n\t"
04717
04718
04719 "movq (%%edi,%%edx,), %%mm0 \n\t"
04720 "paddb %%mm1, %%mm0 \n\t"
04721
04722
04723 "movq %%mm0, %%mm1 \n\t"
04724 "psllq _ShiftBpp, %%mm1 \n\t"
04725 "pand %%mm7, %%mm1 \n\t"
04726 "paddb %%mm1, %%mm0 \n\t"
04727
04728
04729 "movq %%mm0, %%mm1 \n\t"
04730 "psllq _ShiftBpp, %%mm1 \n\t"
04731 "pand %%mm6, %%mm1 \n\t"
04732 "paddb %%mm1, %%mm0 \n\t"
04733
04734
04735 "movq %%mm0, %%mm1 \n\t"
04736 "psllq _ShiftBpp, %%mm1 \n\t"
04737 "pand %%mm5, %%mm1 \n\t"
04738 "addl $8, %%edx \n\t"
04739 "paddb %%mm1, %%mm0 \n\t"
04740 "cmpl _MMXLength, %%edx \n\t"
04741 "movq %%mm0, -8(%%edi,%%edx,) \n\t"
04742 "movq %%mm0, %%mm1 \n\t"
04743 "jb sub_2lp \n\t"
04744
04745 : "=a" (dummy_value_a),
04746 "=D" (dummy_value_D)
04747
04748 : "0" (bpp),
04749 "1" (row)
04750
04751 : "%edx", "%esi"
04752 #if 0
04753 , "%mm0", "%mm1", "%mm5", "%mm6", "%mm7"
04754 #endif
04755 );
04756 }
04757 break;
04758
04759 case 8:
04760 {
04761 __asm__ __volatile__ (
04762
04763 "movl _dif, %%edx \n\t"
04764 "movl %%edi, %%esi \n\t"
04765
04766 "addl %%eax, %%edi \n\t"
04767 "movl _MMXLength, %%ecx \n\t"
04768
04769
04770 "movq -8(%%edi,%%edx,), %%mm7 \n\t"
04771 "andl $0x0000003f, %%ecx \n\t"
04772
04773 "sub_8lp: \n\t"
04774 "movq (%%edi,%%edx,), %%mm0 \n\t"
04775 "paddb %%mm7, %%mm0 \n\t"
04776 "movq 8(%%edi,%%edx,), %%mm1 \n\t"
04777 "movq %%mm0, (%%edi,%%edx,) \n\t"
04778
04779
04780
04781
04782
04783
04784 "paddb %%mm0, %%mm1 \n\t"
04785 "movq 16(%%edi,%%edx,), %%mm2 \n\t"
04786 "movq %%mm1, 8(%%edi,%%edx,) \n\t"
04787 "paddb %%mm1, %%mm2 \n\t"
04788 "movq 24(%%edi,%%edx,), %%mm3 \n\t"
04789 "movq %%mm2, 16(%%edi,%%edx,) \n\t"
04790 "paddb %%mm2, %%mm3 \n\t"
04791 "movq 32(%%edi,%%edx,), %%mm4 \n\t"
04792 "movq %%mm3, 24(%%edi,%%edx,) \n\t"
04793 "paddb %%mm3, %%mm4 \n\t"
04794 "movq 40(%%edi,%%edx,), %%mm5 \n\t"
04795 "movq %%mm4, 32(%%edi,%%edx,) \n\t"
04796 "paddb %%mm4, %%mm5 \n\t"
04797 "movq 48(%%edi,%%edx,), %%mm6 \n\t"
04798 "movq %%mm5, 40(%%edi,%%edx,) \n\t"
04799 "paddb %%mm5, %%mm6 \n\t"
04800 "movq 56(%%edi,%%edx,), %%mm7 \n\t"
04801 "movq %%mm6, 48(%%edi,%%edx,) \n\t"
04802 "addl $64, %%edx \n\t"
04803 "paddb %%mm6, %%mm7 \n\t"
04804 "cmpl %%ecx, %%edx \n\t"
04805 "movq %%mm7, -8(%%edi,%%edx,) \n\t"
04806 "jb sub_8lp \n\t"
04807
04808 "cmpl _MMXLength, %%edx \n\t"
04809 "jnb sub_8lt8 \n\t"
04810
04811 "sub_8lpA: \n\t"
04812 "movq (%%edi,%%edx,), %%mm0 \n\t"
04813 "addl $8, %%edx \n\t"
04814 "paddb %%mm7, %%mm0 \n\t"
04815 "cmpl _MMXLength, %%edx \n\t"
04816 "movq %%mm0, -8(%%edi,%%edx,) \n\t"
04817 "movq %%mm0, %%mm7 \n\t"
04818
04819
04820 "jb sub_8lpA \n\t"
04821
04822 "sub_8lt8: \n\t"
04823
04824 : "=a" (dummy_value_a),
04825 "=D" (dummy_value_D)
04826
04827 : "0" (bpp),
04828 "1" (row)
04829
04830 : "%ecx", "%edx", "%esi"
04831 #if 0
04832 , "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"
04833 #endif
04834 );
04835 }
04836 break;
04837
04838 default:
04839 {
04840 __asm__ __volatile__ (
04841 "movl _dif, %%edx \n\t"
04842
04843 "movl %%edi, %%esi \n\t"
04844
04845 "addl %%eax, %%edi \n\t"
04846
04847 "sub_Alp: \n\t"
04848 "movq (%%edi,%%edx,), %%mm0 \n\t"
04849 "movq (%%esi,%%edx,), %%mm1 \n\t"
04850 "addl $8, %%edx \n\t"
04851 "paddb %%mm1, %%mm0 \n\t"
04852 "cmpl _MMXLength, %%edx \n\t"
04853 "movq %%mm0, -8(%%edi,%%edx,) \n\t"
04854
04855 "jb sub_Alp \n\t"
04856
04857 : "=a" (dummy_value_a),
04858 "=D" (dummy_value_D)
04859
04860 : "0" (bpp),
04861 "1" (row)
04862
04863 : "%edx", "%esi"
04864 #if 0
04865 , "%mm0", "%mm1"
04866 #endif
04867 );
04868 }
04869 break;
04870
04871 }
04872
04873 __asm__ __volatile__ (
04874 "movl _MMXLength, %%edx \n\t"
04875
04876 "cmpl _FullLength, %%edx \n\t"
04877 "jnb sub_end \n\t"
04878
04879 "movl %%edi, %%esi \n\t"
04880
04881 "addl %%eax, %%edi \n\t"
04882 "xorl %%eax, %%eax \n\t"
04883
04884 "sub_lp2: \n\t"
04885 "movb (%%esi,%%edx,), %%al \n\t"
04886 "addb %%al, (%%edi,%%edx,) \n\t"
04887 "incl %%edx \n\t"
04888 "cmpl _FullLength, %%edx \n\t"
04889 "jb sub_lp2 \n\t"
04890
04891 "sub_end: \n\t"
04892 "EMMS \n\t"
04893
04894 : "=a" (dummy_value_a),
04895 "=D" (dummy_value_D)
04896
04897 : "0" (bpp),
04898 "1" (row)
04899
04900 : "%edx", "%esi"
04901 );
04902
04903 }
04904 #endif
04905
04906
04907
04908
04909
04910
04911
04912
04913
04914
04915
04916
04917 static void
04918 png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
04919 png_bytep prev_row)
04920 {
04921 png_uint_32 len;
04922 int dummy_value_d;
04923 int dummy_value_S;
04924 int dummy_value_D;
04925
04926 len = row_info->rowbytes;
04927
04928 __asm__ __volatile__ (
04929
04930
04931 #ifdef __PIC__
04932 "pushl %%ebx \n\t"
04933 #endif
04934 "movl %%edi, %%ecx \n\t"
04935 "xorl %%ebx, %%ebx \n\t"
04936 "addl $0x7, %%ecx \n\t"
04937 "xorl %%eax, %%eax \n\t"
04938 "andl $0xfffffff8, %%ecx \n\t"
04939
04940 "subl %%edi, %%ecx \n\t"
04941 "jz up_go \n\t"
04942
04943 "up_lp1: \n\t"
04944 "movb (%%edi,%%ebx,), %%al \n\t"
04945 "addb (%%esi,%%ebx,), %%al \n\t"
04946 "incl %%ebx \n\t"
04947 "cmpl %%ecx, %%ebx \n\t"
04948 "movb %%al, -1(%%edi,%%ebx,) \n\t"
04949 "jb up_lp1 \n\t"
04950
04951 "up_go: \n\t"
04952
04953 "movl %%edx, %%ecx \n\t"
04954 "subl %%ebx, %%edx \n\t"
04955 "andl $0x0000003f, %%edx \n\t"
04956 "subl %%edx, %%ecx \n\t"
04957
04958
04959
04960 "up_loop: \n\t"
04961 "movq (%%esi,%%ebx,), %%mm1 \n\t"
04962 "movq (%%edi,%%ebx,), %%mm0 \n\t"
04963 "movq 8(%%esi,%%ebx,), %%mm3 \n\t"
04964 "paddb %%mm1, %%mm0 \n\t"
04965 "movq 8(%%edi,%%ebx,), %%mm2 \n\t"
04966 "movq %%mm0, (%%edi,%%ebx,) \n\t"
04967 "paddb %%mm3, %%mm2 \n\t"
04968 "movq 16(%%esi,%%ebx,), %%mm5 \n\t"
04969 "movq %%mm2, 8(%%edi,%%ebx,) \n\t"
04970 "movq 16(%%edi,%%ebx,), %%mm4 \n\t"
04971 "movq 24(%%esi,%%ebx,), %%mm7 \n\t"
04972 "paddb %%mm5, %%mm4 \n\t"
04973 "movq 24(%%edi,%%ebx,), %%mm6 \n\t"
04974 "movq %%mm4, 16(%%edi,%%ebx,) \n\t"
04975 "paddb %%mm7, %%mm6 \n\t"
04976 "movq 32(%%esi,%%ebx,), %%mm1 \n\t"
04977 "movq %%mm6, 24(%%edi,%%ebx,) \n\t"
04978 "movq 32(%%edi,%%ebx,), %%mm0 \n\t"
04979 "movq 40(%%esi,%%ebx,), %%mm3 \n\t"
04980 "paddb %%mm1, %%mm0 \n\t"
04981 "movq 40(%%edi,%%ebx,), %%mm2 \n\t"
04982 "movq %%mm0, 32(%%edi,%%ebx,) \n\t"
04983 "paddb %%mm3, %%mm2 \n\t"
04984 "movq 48(%%esi,%%ebx,), %%mm5 \n\t"
04985 "movq %%mm2, 40(%%edi,%%ebx,) \n\t"
04986 "movq 48(%%edi,%%ebx,), %%mm4 \n\t"
04987 "movq 56(%%esi,%%ebx,), %%mm7 \n\t"
04988 "paddb %%mm5, %%mm4 \n\t"
04989 "movq 56(%%edi,%%ebx,), %%mm6 \n\t"
04990 "movq %%mm4, 48(%%edi,%%ebx,) \n\t"
04991 "addl $64, %%ebx \n\t"
04992 "paddb %%mm7, %%mm6 \n\t"
04993 "cmpl %%ecx, %%ebx \n\t"
04994 "movq %%mm6, -8(%%edi,%%ebx,) \n\t"
04995 "jb up_loop \n\t"
04996
04997 "cmpl $0, %%edx \n\t"
04998 "jz up_end \n\t"
04999
05000 "cmpl $8, %%edx \n\t"
05001 "jb up_lt8 \n\t"
05002
05003 "addl %%edx, %%ecx \n\t"
05004 "andl $0x00000007, %%edx \n\t"
05005 "subl %%edx, %%ecx \n\t"
05006 "jz up_lt8 \n\t"
05007
05008 "up_lpA: \n\t"
05009 "movq (%%esi,%%ebx,), %%mm1 \n\t"
05010 "movq (%%edi,%%ebx,), %%mm0 \n\t"
05011 "addl $8, %%ebx \n\t"
05012 "paddb %%mm1, %%mm0 \n\t"
05013 "cmpl %%ecx, %%ebx \n\t"
05014 "movq %%mm0, -8(%%edi,%%ebx,) \n\t"
05015 "jb up_lpA \n\t"
05016 "cmpl $0, %%edx \n\t"
05017 "jz up_end \n\t"
05018
05019 "up_lt8: \n\t"
05020 "xorl %%eax, %%eax \n\t"
05021 "addl %%edx, %%ecx \n\t"
05022
05023 "up_lp2: \n\t"
05024 "movb (%%edi,%%ebx,), %%al \n\t"
05025 "addb (%%esi,%%ebx,), %%al \n\t"
05026 "incl %%ebx \n\t"
05027 "cmpl %%ecx, %%ebx \n\t"
05028 "movb %%al, -1(%%edi,%%ebx,) \n\t"
05029 "jb up_lp2 \n\t"
05030
05031 "up_end: \n\t"
05032 "EMMS \n\t"
05033 #ifdef __PIC__
05034 "popl %%ebx \n\t"
05035 #endif
05036
05037 : "=d" (dummy_value_d),
05038 "=S" (dummy_value_S),
05039 "=D" (dummy_value_D)
05040
05041 : "0" (len),
05042 "1" (prev_row),
05043 "2" (row)
05044
05045 : "%eax", "%ecx"
05046 #ifndef __PIC__
05047 , "%ebx"
05048 #endif
05049
05050 #if 0
05051 , "%mm0", "%mm1", "%mm2", "%mm3"
05052 , "%mm4", "%mm5", "%mm6", "%mm7"
05053 #endif
05054 );
05055
05056 }
05057
05058 #endif
05059
05060
05061
05062
05063
05064
05065
05066
05067
05068
05069
05070
05071
05072 void
05073 png_read_filter_row(png_structp png_ptr, png_row_infop row_info, png_bytep
05074 row, png_bytep prev_row, int filter)
05075 {
05076 #ifdef PNG_DEBUG
05077 char filnm[10];
05078 #endif
05079
05080 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
05081
05082 #define UseMMX_sub 1 // GRR: converted 20000730
05083 #define UseMMX_up 1 // GRR: converted 20000729
05084 #define UseMMX_avg 1 // GRR: converted 20000828 (+ 16-bit bugfix 20000916)
05085 #define UseMMX_paeth 1 // GRR: converted 20000828
05086
05087 if (_mmx_supported == 2) {
05088
05089 #if !defined(PNG_1_0_X)
05090 png_warning(png_ptr, "asm_flags may not have been initialized");
05091 #endif
05092 png_mmx_support();
05093 }
05094 #endif
05095
05096 #ifdef PNG_DEBUG
05097 png_debug(1, "in png_read_filter_row (pnggccrd.c)\n");
05098 switch (filter)
05099 {
05100 case 0: sprintf(filnm, "none");
05101 break;
05102 case 1: sprintf(filnm, "sub-%s",
05103 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
05104 #if !defined(PNG_1_0_X)
05105 (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB)? "MMX" :
05106 #endif
05107 #endif
05108 "x86");
05109 break;
05110 case 2: sprintf(filnm, "up-%s",
05111 #ifdef PNG_ASSEMBLER_CODE_SUPPORTED
05112 #if !defined(PNG_1_0_X)
05113 (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP)? "MMX" :
05114 #endif
05115 #endif
05116 "x86");
05117 break;
05118 case 3: sprintf(filnm, "avg-%s",
05119 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
05120 #if !defined(PNG_1_0_X)
05121 (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG)? "MMX" :
05122 #endif
05123 #endif
05124 "x86");
05125 break;
05126 case 4: sprintf(filnm, "Paeth-%s",
05127 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
05128 #if !defined(PNG_1_0_X)
05129 (png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH)? "MMX":
05130 #endif
05131 #endif
05132 "x86");
05133 break;
05134 default: sprintf(filnm, "unknw");
05135 break;
05136 }
05137 png_debug2(0, "row_number=%5ld, %5s, ", png_ptr->row_number, filnm);
05138 png_debug1(0, "row=0x%08lx, ", (unsigned long)row);
05139 png_debug2(0, "pixdepth=%2d, bytes=%d, ", (int)row_info->pixel_depth,
05140 (int)((row_info->pixel_depth + 7) >> 3));
05141 png_debug1(0,"rowbytes=%8ld\n", row_info->rowbytes);
05142 #endif
05143
05144 switch (filter)
05145 {
05146 case PNG_FILTER_VALUE_NONE:
05147 break;
05148
05149 case PNG_FILTER_VALUE_SUB:
05150 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
05151 #if !defined(PNG_1_0_X)
05152 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_SUB) &&
05153 (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
05154 (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
05155 #else
05156 if (_mmx_supported)
05157 #endif
05158 {
05159 png_read_filter_row_mmx_sub(row_info, row);
05160 }
05161 else
05162 #endif
05163 {
05164 png_uint_32 i;
05165 png_uint_32 istop = row_info->rowbytes;
05166 png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
05167 png_bytep rp = row + bpp;
05168 png_bytep lp = row;
05169
05170 for (i = bpp; i < istop; i++)
05171 {
05172 *rp = (png_byte)(((int)(*rp) + (int)(*lp++)) & 0xff);
05173 rp++;
05174 }
05175 }
05176 break;
05177
05178 case PNG_FILTER_VALUE_UP:
05179 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED)
05180 #if !defined(PNG_1_0_X)
05181 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_UP) &&
05182 (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
05183 (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
05184 #else
05185 if (_mmx_supported)
05186 #endif
05187 {
05188 png_read_filter_row_mmx_up(row_info, row, prev_row);
05189 }
05190 else
05191 #endif
05192 {
05193 png_uint_32 i;
05194 png_uint_32 istop = row_info->rowbytes;
05195 png_bytep rp = row;
05196 png_bytep pp = prev_row;
05197
05198 for (i = 0; i < istop; ++i)
05199 {
05200 *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
05201 rp++;
05202 }
05203 }
05204 break;
05205
05206 case PNG_FILTER_VALUE_AVG:
05207 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
05208 #if !defined(PNG_1_0_X)
05209 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_AVG) &&
05210 (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
05211 (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
05212 #else
05213 if (_mmx_supported)
05214 #endif
05215 {
05216 png_read_filter_row_mmx_avg(row_info, row, prev_row);
05217 }
05218 else
05219 #endif
05220 {
05221 png_uint_32 i;
05222 png_bytep rp = row;
05223 png_bytep pp = prev_row;
05224 png_bytep lp = row;
05225 png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
05226 png_uint_32 istop = row_info->rowbytes - bpp;
05227
05228 for (i = 0; i < bpp; i++)
05229 {
05230 *rp = (png_byte)(((int)(*rp) +
05231 ((int)(*pp++) >> 1)) & 0xff);
05232 rp++;
05233 }
05234
05235 for (i = 0; i < istop; i++)
05236 {
05237 *rp = (png_byte)(((int)(*rp) +
05238 ((int)(*pp++ + *lp++) >> 1)) & 0xff);
05239 rp++;
05240 }
05241 }
05242 break;
05243
05244 case PNG_FILTER_VALUE_PAETH:
05245 #if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_THREAD_UNSAFE_OK)
05246 #if !defined(PNG_1_0_X)
05247 if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_FILTER_PAETH) &&
05248 (row_info->pixel_depth >= png_ptr->mmx_bitdepth_threshold) &&
05249 (row_info->rowbytes >= png_ptr->mmx_rowbytes_threshold))
05250 #else
05251 if (_mmx_supported)
05252 #endif
05253 {
05254 png_read_filter_row_mmx_paeth(row_info, row, prev_row);
05255 }
05256 else
05257 #endif
05258 {
05259 png_uint_32 i;
05260 png_bytep rp = row;
05261 png_bytep pp = prev_row;
05262 png_bytep lp = row;
05263 png_bytep cp = prev_row;
05264 png_uint_32 bpp = (row_info->pixel_depth + 7) >> 3;
05265 png_uint_32 istop = row_info->rowbytes - bpp;
05266
05267 for (i = 0; i < bpp; i++)
05268 {
05269 *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
05270 rp++;
05271 }
05272
05273 for (i = 0; i < istop; i++)
05274 {
05275 int a, b, c, pa, pb, pc, p;
05276
05277 a = *lp++;
05278 b = *pp++;
05279 c = *cp++;
05280
05281 p = b - c;
05282 pc = a - c;
05283
05284 #ifdef PNG_USE_ABS
05285 pa = abs(p);
05286 pb = abs(pc);
05287 pc = abs(p + pc);
05288 #else
05289 pa = p < 0 ? -p : p;
05290 pb = pc < 0 ? -pc : pc;
05291 pc = (p + pc) < 0 ? -(p + pc) : p + pc;
05292 #endif
05293
05294
05295
05296
05297
05298
05299
05300
05301
05302
05303 p = (pa <= pb && pa <= pc) ? a : (pb <= pc) ? b : c;
05304
05305 *rp = (png_byte)(((int)(*rp) + p) & 0xff);
05306 rp++;
05307 }
05308 }
05309 break;
05310
05311 default:
05312 png_warning(png_ptr, "Ignoring bad row-filter type");
05313 *row=0;
05314 break;
05315 }
05316 }
05317
05318 #endif
05319
05320
05321
05322
05323
05324
05325
05326
05327
05328
05329
05330
05331
05332
05333
05334
05335
05336
05337
05338 int PNGAPI
05339 png_mmx_support(void)
05340 {
05341 #if defined(PNG_MMX_CODE_SUPPORTED)
05342 __asm__ __volatile__ (
05343 "pushl %%ebx \n\t"
05344 "pushl %%ecx \n\t"
05345 "pushl %%edx \n\t"
05346
05347
05348 "pushfl \n\t"
05349 "popl %%eax \n\t"
05350 "movl %%eax, %%ecx \n\t"
05351 "xorl $0x200000, %%eax \n\t"
05352 "pushl %%eax \n\t"
05353
05354
05355 "popfl \n\t"
05356 "pushfl \n\t"
05357 "popl %%eax \n\t"
05358 "pushl %%ecx \n\t"
05359 "popfl \n\t"
05360 "xorl %%ecx, %%eax \n\t"
05361 "jz 0f \n\t"
05362
05363 "xorl %%eax, %%eax \n\t"
05364
05365 "cpuid \n\t"
05366 "cmpl $1, %%eax \n\t"
05367 "jl 0f \n\t"
05368
05369 "xorl %%eax, %%eax \n\t"
05370 "incl %%eax \n\t"
05371
05372 "cpuid \n\t"
05373 "andl $0x800000, %%edx \n\t"
05374 "cmpl $0, %%edx \n\t"
05375 "jz 0f \n\t"
05376
05377 "movl $1, %%eax \n\t"
05378 "jmp 1f \n\t"
05379
05380 "0: \n\t"
05381 "movl $0, %%eax \n\t"
05382 "1: \n\t"
05383 "movl %%eax, _mmx_supported \n\t"
05384 "popl %%edx \n\t"
05385 "popl %%ecx \n\t"
05386 "popl %%ebx \n\t"
05387
05388
05389
05390
05391 :
05392
05393 :
05394
05395 : "%eax"
05396
05397
05398
05399 );
05400 #else
05401 _mmx_supported = 0;
05402 #endif
05403
05404 return _mmx_supported;
05405 }
05406
05407
05408 #endif