diff --git a/src/3rdparty/libwebp.pri b/src/3rdparty/libwebp.pri index 7c41f8f..c91b2d0 100644 --- a/src/3rdparty/libwebp.pri +++ b/src/3rdparty/libwebp.pri @@ -1,3 +1,5 @@ +CONFIG += compile_included_sources + INCLUDEPATH += \ $$PWD/libwebp \ $$PWD/libwebp/sharpyuv \ @@ -11,6 +13,7 @@ INCLUDEPATH += \ SOURCES += \ $$PWD/libwebp/sharpyuv/sharpyuv.c \ + $$PWD/libwebp/sharpyuv/sharpyuv_cpu.c \ $$PWD/libwebp/sharpyuv/sharpyuv_csp.c \ $$PWD/libwebp/sharpyuv/sharpyuv_dsp.c \ $$PWD/libwebp/sharpyuv/sharpyuv_gamma.c \ diff --git a/src/3rdparty/libwebp/AUTHORS b/src/3rdparty/libwebp/AUTHORS index 3efcbe2..2f0c537 100644 --- a/src/3rdparty/libwebp/AUTHORS +++ b/src/3rdparty/libwebp/AUTHORS @@ -11,11 +11,13 @@ Contributors: - Djordje Pesut (djordje dot pesut at imgtec dot com) - Frank Barchard (fbarchard at google dot com) - Hui Su (huisu at google dot com) +- H. Vetinari (h dot vetinari at gmx dot com) - Ilya Kurdyukov (jpegqs at gmail dot com) - Ingvar Stepanyan (rreverser at google dot com) - James Zern (jzern at google dot com) - Jan Engelhardt (jengelh at medozas dot de) - Jehan (jehan at girinstud dot io) +- Jeremy Maitin-Shepard (jbms at google dot com) - Johann Koenig (johann dot koenig at duck dot com) - Jovan Zelincevic (jovan dot zelincevic at imgtec dot com) - Jyrki Alakuijala (jyrki at google dot com) diff --git a/src/3rdparty/libwebp/ChangeLog b/src/3rdparty/libwebp/ChangeLog index c7655f2..00ef617 100644 --- a/src/3rdparty/libwebp/ChangeLog +++ b/src/3rdparty/libwebp/ChangeLog @@ -1,8 +1,149 @@ +0ba77244 update NEWS +e763eb1e bump version to 1.3.0 +2a8686fc update AUTHORS +106a57c1 Merge "*/Android.mk: add a check for NDK_ROOT" into main +c5e841c4 Merge "extras: WebpToSDL -> WebPToSDL" into main +dbc30715 Merge "xcframeworkbuild.sh: bump MACOSX_CATALYST_MIN_VERSION" into main +6fc1a9f9 */Android.mk: add a check for NDK_ROOT +d3e151fc doc/api.md,webp_js/README.md: Webp -> WebP +ed92a626 extras: WebpToSDL -> WebPToSDL +6eb0189b xcframeworkbuild.sh: bump MACOSX_CATALYST_MIN_VERSION +1d58575b CMake: align .pc variables with autoconf +e5fe2cfc webp-lossless-bitstream-spec,cosmetics: reflow paragraphs +0ceeeab9 webp-lossless-bitstream-spec: add amendment note +607611cd Merge "webp-container-spec: normalize section title case" into main +f853685e lossless: SUBTRACT_GREEN -> SUBTRACT_GREEN_TRANSFORM +786497e4 webp-lossless-bitstream-spec: fix inv color txfm description +c6ac672d webp-lossless-bitstream-spec: fix num_code_lengths check +b5700efb webp-lossless-bitstream-spec,cosmetics: grammar/capitalization +d8ed8c11 webp-container-spec: normalize section title case +52ec0b8f Merge changes Ie975dbb5,Ifc8c93af,I6ca7c5d6,I2e8d66f5,I152477b8 into main +5097ef62 webp-container-spec,cosmetics: grammar/capitalization +e3ba2b1f webp-lossless-bitstream-spec,cosmetics: reflow abstract +1e8e3ded webp-lossless-bitstream-spec: reword abstract re alpha +017cb6fa webp-container-spec,cosmetics: normalize range syntax +f6a4684b webp-lossless-bitstream-spec,cosmetics: normalize range syntax +54ebd5a3 webp-lossless-bitstream-spec: limit dist map lut to 69 cols +44741f9c webp-lossless-bitstream-spec: fix dist mapping example +fad0ece7 pnmdec.c: use snprintf instead of sprintf +3f73e8f7 sharpyuv: add SharpYuvGetVersion() +ce2f2d66 SharpYuvConvert: fix a race on SharpYuvGetCPUInfo +a458e308 sharpyuv_dsp.h: restore sharpyuv_cpu.h include +9ba800a7 Merge changes Id72fbf3b,Ic59d23a2 into main +979c0ebb sharpyuv: add SharpYuvGetCPUInfo +8bab09a4 Merge "*.pc.in: rename lib_prefix to webp_libname_prefix" into main +769387c5 cpu.c,cosmetics: fix a typo +a02978c2 sharpyuv/Makefile.am+cmake: add missing -lm +28aedcb9 *.pc.in: rename lib_prefix to webp_libname_prefix +c42e6d5a configure.ac: export an empty lib_prefix variable +dfc843aa Merge "*.pc.in: add lib prefix to lib names w/MSVC" into main +2498209b *.pc.in: add lib prefix to lib names w/MSVC +ac252b61 Merge "analysis_enc.c: fix a dead store warning" into main +56944762 analysis_enc.c: fix a dead store warning +d34f9b99 Merge "webp-lossless-bitstream-spec: convert BNF to ABNF" into main +dc05b4db Merge changes I96bc063c,I45880467,If9e18e5a,I6ee938e4,I0a410b28, ... into main +83270c7f webp-container-spec: add prose for rendering process +73b19b64 webp-container-spec: note reserved fields MUST be ignored +57101d3f webp-lossless-bitstream-spec: improve 'small' color table stmt +dfd32e45 webp-container-spec: remove redundant sentence +8a6185dd doc/webp-*: fix some punctuation, grammar +72776530 webp-lossless-bitstream-spec: convert BNF to ABNF +d992bb08 cmake: rename cpufeatures target to cpufeatures-webp +3ed2b275 webp-container-spec: clarify background color note +951c292d webp-container-spec: come too late -> out of order +902dd787 webp-container-spec: prefer hex literals +a8f6b5ee webp-container-spec: change SHOULD to MUST w/ANIM chunk +1dc59435 webp-container-spec: add unknown fields MUST be ignored +280a810f webp-container-spec: make padding byte=0 a MUST +41f0bf68 webp-container-spec: update note on trailing data +6bdd36db webp-container-spec: clarify Chunk Size is in bytes +87e36c48 Merge "webp_js/README.md,cosmetics: reflow some lines" into main +5b01f321 Merge "Update Windows makefile to build libsharpyuv library." into main +19b1a71c webp_js/README.md,cosmetics: reflow some lines +780db756 Update Windows makefile to build libsharpyuv library. +e407d4b3 CMakeLists.txt: replace GLUT_glut_LIBRARY w/GLUT::GLUT +abf73d62 Merge "WebPConfig.cmake.in: add find_dependency(Threads)" into main +25807fb4 Merge "cmake: restore compatibility with cmake < 3.12" into main +5dbc4bfa WebPConfig.cmake.in: add find_dependency(Threads) +b2a175dd Merge "Update wasm instructions." into main +cb90f76b Update wasm instructions. +02d15258 cmake: restore compatibility with cmake < 3.12 +5ba046e2 CMake: add_definitions -> add_compile_options +e68765af dsp,neon: use vaddv in a few more places +e8f83de2 Set libsharpyuv include dir to 'webp' subdirectory. +15a91ab1 cmake,cosmetics: apply cmake-format +0dd49d1a CMakeLists.txt: set @ONLY in configure_file() calls +62b1bfe8 Merge changes I2877e7bb,I777cad70,I15af7d1a,I686e6740,If10538a9, ... into main +95c8fe5f Merge changes Iecea3603,I9dc228ab into main +e7c805cf picture_csp_enc.c: remove SafeInitSharpYuv +6af8845a sharpyuv: prefer webp/types.h +639619ce cmake: fix dll exports +782ed48c sharpyuv,SharpYuvInit: add mutex protection when available +cad0d5ad sharyuv_{neon,sse2}.c: merge WEBP_USE_* sections +ef70ee06 add a few missing includes for NULL +f0f9eda4 sharpyuv.h: remove +9b902cba Merge "picture_csp_enc.c,CheckNonOpaque: rm unneeded local" into main +9c1d457c cmake/cpu.cmake: remove unused variable +9ac25bcb CMakeLists.txt,win32: match naming convention used by nmake +76c353ba picture_csp_enc.c,CheckNonOpaque: rm unneeded local +5000de54 Merge "cwebp: fix WebPPictureHasTransparency call" into main +e1729309 Merge "WebPPictureHasTransparency: add missing pointer check" into main +00ff988a vp8l_enc,AddSingleSubGreen: clear int sanitizer warnings +e2fecc22 dsp/lossless_enc.c: clear int sanitizer warnings +129cf9e9 dsp/lossless.c: clear int sanitizer warnings +ad7d1753 dsp/lossless_enc.c: clear int sanitizer warnings +5037220e VP8LSubtractGreenFromBlueAndRed_C: clear int sanitizer warnings +2ee786c7 upsampling_sse2.c: clear int sanitizer warnings +4cc157d4 ParseOptionalChunks: clear int sanitizer warning +892cf033 BuildHuffmanTable: clear int sanitizer warning +3a9a4d45 VP8GetSigned: clear int sanitizer warnings +704a3d0a dsp/lossless.c: quiet int sanitizer warnings +1a6c109c WebPPictureHasTransparency: add missing pointer check +c626e7d5 cwebp: fix WebPPictureHasTransparency call +866e349c Merge tag 'v1.2.4' +c170df38 Merge "Create libsharpyuv.a in makefile.unix." into main +9d7ff74a Create libsharpyuv.a in makefile.unix. +0d1f1254 update ChangeLog (tag: v1.2.4, origin/1.2.4) +fcbc2d78 Merge "doc/*.txt: restrict code to 69 columns" into main +4ad0e189 Merge "webp-container-spec.txt: normalize fourcc spelling" into main 980d2488 update NEWS 9fde8127 bump version to 1.2.4 +7a0a9935 doc/*.txt: restrict code to 69 columns +c040a615 webp-container-spec.txt: normalize fourcc spelling +aff1c546 dsp,x86: normalize types w/_mm_cvtsi128_si32 calls +ab540ae0 dsp,x86: normalize types w/_mm_cvtsi32_si128 calls +8980362e dsp,x86: normalize types w/_mm_set* calls (2) e626925c lossless: fix crunch mode w/WEBP_REDUCE_SIZE +83539239 dsp,x86: normalize types w/_mm_set* calls +8a4576ce webp-container-spec.txt: replace & with & +db870881 Merge "webp-container-spec.txt: make reserved 0 values a MUST" into main +01d7d378 webp-lossless-bitstream-spec: number all sections +337cf69f webp-lossless-bitstream-spec: mv Nomenclature after Intro +79be856e Merge changes I7111d1f7,I872cd62c into main +5b87983a webp-container-spec.txt: make reserved 0 values a MUST +bd939123 Merge changes I7a25b1a6,I51b2c2a0,I87d0cbcf,I6ec60af6,I0a3fe9dc into main +04764b56 libwebp.pc: add libsharpyuv to requires +7deee810 libsharpyuv: add pkg-config file +1a64a7e6 webp-container-spec.txt: clarify some SHOULDs +bec2c88a webp-container-spec.txt: move ChunkHeader to terminology +c9359332 webp-container-spec.txt: clarify 'VP8 '/'XMP ' fourccs +70fe3063 webp-container-spec.txt: rightsize table entries +ddbf3f3f webp-container-spec.txt: update 'key words' text +c151e95b utils.h,WEBP_ALIGN: make bitmask unsigned +748e92bb add WebPInt32ToMem +3fe15b67 Merge "Build libsharpyuv as a full installable library." into main +4f402f34 add WebPMemToInt32 +a3b68c19 Build libsharpyuv as a full installable library. +b4994eaa CMake: set rpath for shared objects +94cd7117 Merge "CMake: fix dylib versioning" into main +e91451b6 Fix the lossless specs a bit more. +231bdfb7 CMake: fix dylib versioning bfad7ab5 CMakeLists.txt: correct libwebpmux name in WebPConfig.cmake c2e3fd30 Revert "cmake: fix webpmux lib name for cmake linking" +7366f7f3 Merge "lossless: fix crunch mode w/WEBP_REDUCE_SIZE" into main +84163d9d lossless: fix crunch mode w/WEBP_REDUCE_SIZE +d01c1eb3 webp-lossless-bitstream-spec,cosmetics: normalize capitalization +8813ca8e Merge tag 'v1.2.3' 3c4a0fbf update ChangeLog (tag: v1.2.3) 56a480e8 dsp/cpu.h: add missing extern "C" 62b45bdd update ChangeLog (tag: v1.2.3-rc1) diff --git a/src/3rdparty/libwebp/NEWS b/src/3rdparty/libwebp/NEWS index c2bf389..c4f8ef7 100644 --- a/src/3rdparty/libwebp/NEWS +++ b/src/3rdparty/libwebp/NEWS @@ -1,3 +1,11 @@ +- 12/16/2022: version 1.3.0 + This is a binary compatible release. + * add libsharpyuv, which exposes -sharp_yuv/config.use_sharp_yuv + functionality to other libraries; libwebp now depends on this library + * major updates to the container and lossless bitstream docs (#448, #546, + #551) + * miscellaneous warning, bug & build fixes (#576, #583, #584) + - 8/4/2022: version 1.2.4 This is a binary compatible release. * restore CMake libwebpmux target name for compatibility with 1.2.2 (#575) diff --git a/src/3rdparty/libwebp/qt_attribution.json b/src/3rdparty/libwebp/qt_attribution.json index 75ba376..258f7ff 100644 --- a/src/3rdparty/libwebp/qt_attribution.json +++ b/src/3rdparty/libwebp/qt_attribution.json @@ -6,7 +6,7 @@ "Description": "WebP is a new image format that provides lossless and lossy compression for images on the web.", "Homepage": "https://developers.google.com/speed/webp/", - "Version": "1.2.4", + "Version": "1.3.0", "License": "BSD 3-clause \"New\" or \"Revised\" License", "LicenseId": "BSD-3-Clause", "LicenseFile": "COPYING", diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv.c index 8b3ab72..7de34fb 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv.c @@ -15,15 +15,21 @@ #include #include -#include +#include #include #include #include "src/webp/types.h" -#include "src/dsp/cpu.h" +#include "sharpyuv/sharpyuv_cpu.h" #include "sharpyuv/sharpyuv_dsp.h" #include "sharpyuv/sharpyuv_gamma.h" +//------------------------------------------------------------------------------ + +int SharpYuvGetVersion(void) { + return SHARPYUV_VERSION; +} + //------------------------------------------------------------------------------ // Sharp RGB->YUV conversion @@ -414,24 +420,45 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, } #undef SAFE_ALLOC +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) +#include // NOLINT + +#define LOCK_ACCESS \ + static pthread_mutex_t sharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; \ + if (pthread_mutex_lock(&sharpyuv_lock)) return +#define UNLOCK_ACCESS_AND_RETURN \ + do { \ + (void)pthread_mutex_unlock(&sharpyuv_lock); \ + return; \ + } while (0) +#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) +#define LOCK_ACCESS do {} while (0) +#define UNLOCK_ACCESS_AND_RETURN return +#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) + // Hidden exported init function. -// By default SharpYuvConvert calls it with NULL. If needed, users can declare -// it as extern and call it with a VP8CPUInfo function. -extern void SharpYuvInit(VP8CPUInfo cpu_info_func); +// By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, +// users can declare it as extern and call it with an alternate VP8CPUInfo +// function. +SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); void SharpYuvInit(VP8CPUInfo cpu_info_func) { static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = (VP8CPUInfo)&sharpyuv_last_cpuinfo_used; - const int initialized = - (sharpyuv_last_cpuinfo_used != (VP8CPUInfo)&sharpyuv_last_cpuinfo_used); - if (cpu_info_func == NULL && initialized) return; - if (sharpyuv_last_cpuinfo_used == cpu_info_func) return; - - SharpYuvInitDsp(cpu_info_func); - if (!initialized) { - SharpYuvInitGammaTables(); + LOCK_ACCESS; + // Only update SharpYuvGetCPUInfo when called from external code to avoid a + // race on reading the value in SharpYuvConvert(). + if (cpu_info_func != (VP8CPUInfo)&SharpYuvGetCPUInfo) { + SharpYuvGetCPUInfo = cpu_info_func; + } + if (sharpyuv_last_cpuinfo_used == SharpYuvGetCPUInfo) { + UNLOCK_ACCESS_AND_RETURN; } - sharpyuv_last_cpuinfo_used = cpu_info_func; + SharpYuvInitDsp(); + SharpYuvInitGammaTables(); + + sharpyuv_last_cpuinfo_used = SharpYuvGetCPUInfo; + UNLOCK_ACCESS_AND_RETURN; } int SharpYuvConvert(const void* r_ptr, const void* g_ptr, @@ -467,7 +494,8 @@ int SharpYuvConvert(const void* r_ptr, const void* g_ptr, // Stride should be even for uint16_t buffers. return 0; } - SharpYuvInit(NULL); + // The address of the function pointer is used to avoid a read race. + SharpYuvInit((VP8CPUInfo)&SharpYuvGetCPUInfo); // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the // rgb->yuv conversion matrix. diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv.h b/src/3rdparty/libwebp/sharpyuv/sharpyuv.h index 9386ea2..181b20a 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv.h +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv.h @@ -12,15 +12,31 @@ #ifndef WEBP_SHARPYUV_SHARPYUV_H_ #define WEBP_SHARPYUV_SHARPYUV_H_ -#include - #ifdef __cplusplus extern "C" { #endif +#ifndef SHARPYUV_EXTERN +#ifdef WEBP_EXTERN +#define SHARPYUV_EXTERN WEBP_EXTERN +#else +// This explicitly marks library functions and allows for changing the +// signature for e.g., Windows DLL builds. +#if defined(__GNUC__) && __GNUC__ >= 4 +#define SHARPYUV_EXTERN extern __attribute__((visibility("default"))) +#else +#if defined(_MSC_VER) && defined(WEBP_DLL) +#define SHARPYUV_EXTERN __declspec(dllexport) +#else +#define SHARPYUV_EXTERN extern +#endif /* _MSC_VER && WEBP_DLL */ +#endif /* __GNUC__ >= 4 */ +#endif /* WEBP_EXTERN */ +#endif /* SHARPYUV_EXTERN */ + // SharpYUV API version following the convention from semver.org #define SHARPYUV_VERSION_MAJOR 0 -#define SHARPYUV_VERSION_MINOR 1 +#define SHARPYUV_VERSION_MINOR 2 #define SHARPYUV_VERSION_PATCH 0 // Version as a uint32_t. The major number is the high 8 bits. // The minor number is the middle 8 bits. The patch number is the low 16 bits. @@ -30,6 +46,10 @@ extern "C" { SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \ SHARPYUV_VERSION_PATCH) +// Returns the library's version number, packed in hexadecimal. See +// SHARPYUV_VERSION. +SHARPYUV_EXTERN int SharpYuvGetVersion(void); + // RGB to YUV conversion matrix, in 16 bit fixed point. // y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3] // u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3] @@ -65,11 +85,13 @@ typedef struct { // adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they // should be multiples of 2. // width, height: width and height of the image in pixels -int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr, - int rgb_step, int rgb_stride, int rgb_bit_depth, - void* y_ptr, int y_stride, void* u_ptr, int u_stride, - void* v_ptr, int v_stride, int yuv_bit_depth, int width, - int height, const SharpYuvConversionMatrix* yuv_matrix); +SHARPYUV_EXTERN int SharpYuvConvert(const void* r_ptr, const void* g_ptr, + const void* b_ptr, int rgb_step, + int rgb_stride, int rgb_bit_depth, + void* y_ptr, int y_stride, void* u_ptr, + int u_stride, void* v_ptr, int v_stride, + int yuv_bit_depth, int width, int height, + const SharpYuvConversionMatrix* yuv_matrix); // TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422 // support (it's rarely used in practice, especially for images). diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c new file mode 100644 index 0000000..29425a0 --- /dev/null +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c @@ -0,0 +1,14 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +#include "sharpyuv/sharpyuv_cpu.h" + +// Include src/dsp/cpu.c to create SharpYuvGetCPUInfo from VP8GetCPUInfo. The +// function pointer is renamed in sharpyuv_cpu.h. +#include "src/dsp/cpu.c" diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h b/src/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h new file mode 100644 index 0000000..176ca3e --- /dev/null +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_cpu.h @@ -0,0 +1,22 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +#ifndef WEBP_SHARPYUV_SHARPYUV_CPU_H_ +#define WEBP_SHARPYUV_SHARPYUV_CPU_H_ + +#include "sharpyuv/sharpyuv.h" + +// Avoid exporting SharpYuvGetCPUInfo in shared object / DLL builds. +// SharpYuvInit() replaces the use of the function pointer. +#undef WEBP_EXTERN +#define WEBP_EXTERN extern +#define VP8GetCPUInfo SharpYuvGetCPUInfo +#include "src/dsp/cpu.h" + +#endif // WEBP_SHARPYUV_SHARPYUV_CPU_H_ diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c index 5334fa6..0ad22be 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.c @@ -13,7 +13,7 @@ #include #include -#include +#include static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); } diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h b/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h index 63c99ef..3214e3a 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_csp.h @@ -35,8 +35,9 @@ typedef struct { } SharpYuvColorSpace; // Fills in 'matrix' for the given YUVColorSpace. -void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space, - SharpYuvConversionMatrix* matrix); +SHARPYUV_EXTERN void SharpYuvComputeConversionMatrix( + const SharpYuvColorSpace* yuv_color_space, + SharpYuvConversionMatrix* matrix); // Enums for precomputed conversion matrices. typedef enum { @@ -49,7 +50,7 @@ typedef enum { } SharpYuvMatrixType; // Returns a pointer to a matrix for one of the predefined colorspaces. -const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix( +SHARPYUV_EXTERN const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix( SharpYuvMatrixType matrix_type); #ifdef __cplusplus diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c index 956fa7c..31c272c 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c @@ -16,7 +16,7 @@ #include #include -#include "src/dsp/cpu.h" +#include "sharpyuv/sharpyuv_cpu.h" //----------------------------------------------------------------------------- @@ -75,23 +75,24 @@ void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, extern void InitSharpYuvSSE2(void); extern void InitSharpYuvNEON(void); -void SharpYuvInitDsp(VP8CPUInfo cpu_info_func) { - (void)cpu_info_func; - +void SharpYuvInitDsp(void) { #if !WEBP_NEON_OMIT_C_CODE SharpYuvUpdateY = SharpYuvUpdateY_C; SharpYuvUpdateRGB = SharpYuvUpdateRGB_C; SharpYuvFilterRow = SharpYuvFilterRow_C; #endif + if (SharpYuvGetCPUInfo != NULL) { #if defined(WEBP_HAVE_SSE2) - if (cpu_info_func == NULL || cpu_info_func(kSSE2)) { - InitSharpYuvSSE2(); - } + if (SharpYuvGetCPUInfo(kSSE2)) { + InitSharpYuvSSE2(); + } #endif // WEBP_HAVE_SSE2 + } #if defined(WEBP_HAVE_NEON) - if (WEBP_NEON_OMIT_C_CODE || cpu_info_func == NULL || cpu_info_func(kNEON)) { + if (WEBP_NEON_OMIT_C_CODE || + (SharpYuvGetCPUInfo != NULL && SharpYuvGetCPUInfo(kNEON))) { InitSharpYuvNEON(); } #endif // WEBP_HAVE_NEON diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h b/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h index e561d8d..805fbad 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.h @@ -12,9 +12,8 @@ #ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_ #define WEBP_SHARPYUV_SHARPYUV_DSP_H_ -#include - -#include "src/dsp/cpu.h" +#include "sharpyuv/sharpyuv_cpu.h" +#include "src/webp/types.h" extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, uint16_t* dst, int len, int bit_depth); @@ -24,6 +23,6 @@ extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, const uint16_t* best_y, uint16_t* out, int bit_depth); -void SharpYuvInitDsp(VP8CPUInfo cpu_info_func); +void SharpYuvInitDsp(void); #endif // WEBP_SHARPYUV_SHARPYUV_DSP_H_ diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c index 05b5436..20ab2da 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c @@ -13,7 +13,6 @@ #include #include -#include #include "src/webp/types.h" diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h b/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h index 2f1a3ff..d13aff5 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_gamma.h @@ -12,7 +12,7 @@ #ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ #define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ -#include +#include "src/webp/types.h" #ifdef __cplusplus extern "C" { diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c index 5cf6aaf..5840914 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_neon.c @@ -17,11 +17,6 @@ #include #include #include -#endif - -extern void InitSharpYuvNEON(void); - -#if defined(WEBP_USE_NEON) static uint16_t clip_NEON(int v, int max) { return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; @@ -169,6 +164,8 @@ static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len, //------------------------------------------------------------------------------ +extern void InitSharpYuvNEON(void); + WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) { SharpYuvUpdateY = SharpYuvUpdateY_NEON; SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON; @@ -177,6 +174,8 @@ WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) { #else // !WEBP_USE_NEON +extern void InitSharpYuvNEON(void); + void InitSharpYuvNEON(void) {} #endif // WEBP_USE_NEON diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c index 1943873..9744d1b 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_sse2.c @@ -16,11 +16,6 @@ #if defined(WEBP_USE_SSE2) #include #include -#endif - -extern void InitSharpYuvSSE2(void); - -#if defined(WEBP_USE_SSE2) static uint16_t clip_SSE2(int v, int max) { return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; @@ -199,6 +194,8 @@ WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) { } #else // !WEBP_USE_SSE2 +extern void InitSharpYuvSSE2(void); + void InitSharpYuvSSE2(void) {} #endif // WEBP_USE_SSE2 diff --git a/src/3rdparty/libwebp/src/dec/vp8i_dec.h b/src/3rdparty/libwebp/src/dec/vp8i_dec.h index 30c1bd3..83791ec 100644 --- a/src/3rdparty/libwebp/src/dec/vp8i_dec.h +++ b/src/3rdparty/libwebp/src/dec/vp8i_dec.h @@ -31,8 +31,8 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 -#define DEC_MIN_VERSION 2 -#define DEC_REV_VERSION 4 +#define DEC_MIN_VERSION 3 +#define DEC_REV_VERSION 0 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/src/3rdparty/libwebp/src/dec/vp8l_dec.c b/src/3rdparty/libwebp/src/dec/vp8l_dec.c index 1348055..c0ea018 100644 --- a/src/3rdparty/libwebp/src/dec/vp8l_dec.c +++ b/src/3rdparty/libwebp/src/dec/vp8l_dec.c @@ -1336,7 +1336,7 @@ static int ReadTransform(int* const xsize, int const* ysize, ok = ok && ExpandColorMap(num_colors, transform); break; } - case SUBTRACT_GREEN: + case SUBTRACT_GREEN_TRANSFORM: break; default: assert(0); // can't happen diff --git a/src/3rdparty/libwebp/src/dec/webp_dec.c b/src/3rdparty/libwebp/src/dec/webp_dec.c index 77a54c5..3f4f7bb 100644 --- a/src/3rdparty/libwebp/src/dec/webp_dec.c +++ b/src/3rdparty/libwebp/src/dec/webp_dec.c @@ -179,7 +179,7 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data, return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size. } // For odd-sized chunk-payload, there's one byte padding at the end. - disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1; + disk_chunk_size = (CHUNK_HEADER_SIZE + chunk_size + 1) & ~1u; total_size += disk_chunk_size; // Check that total bytes skipped so far does not exceed riff_size. diff --git a/src/3rdparty/libwebp/src/demux/demux.c b/src/3rdparty/libwebp/src/demux/demux.c index 41387ec..324e5eb 100644 --- a/src/3rdparty/libwebp/src/demux/demux.c +++ b/src/3rdparty/libwebp/src/demux/demux.c @@ -24,8 +24,8 @@ #include "src/webp/format_constants.h" #define DMUX_MAJ_VERSION 1 -#define DMUX_MIN_VERSION 2 -#define DMUX_REV_VERSION 4 +#define DMUX_MIN_VERSION 3 +#define DMUX_REV_VERSION 0 typedef struct { size_t start_; // start location of the data diff --git a/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c b/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c index a5f8c9f..f0843d0 100644 --- a/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/alpha_processing_sse2.c @@ -26,8 +26,8 @@ static int DispatchAlpha_SSE2(const uint8_t* WEBP_RESTRICT alpha, uint32_t alpha_and = 0xff; int i, j; const __m128i zero = _mm_setzero_si128(); - const __m128i rgb_mask = _mm_set1_epi32(0xffffff00u); // to preserve RGB - const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); + const __m128i rgb_mask = _mm_set1_epi32((int)0xffffff00); // to preserve RGB + const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0); __m128i all_alphas = all_0xff; // We must be able to access 3 extra bytes after the last written byte @@ -106,8 +106,8 @@ static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride, // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; int i, j; - const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha - const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); + const __m128i a_mask = _mm_set1_epi32(0xff); // to preserve alpha + const __m128i all_0xff = _mm_set_epi32(0, 0, ~0, ~0); __m128i all_alphas = all_0xff; // We must be able to access 3 extra bytes after the last written byte @@ -178,7 +178,7 @@ static int ExtractAlpha_SSE2(const uint8_t* WEBP_RESTRICT argb, int argb_stride, static void ApplyAlphaMultiply_SSE2(uint8_t* rgba, int alpha_first, int w, int h, int stride) { const __m128i zero = _mm_setzero_si128(); - const __m128i kMult = _mm_set1_epi16(0x8081u); + const __m128i kMult = _mm_set1_epi16((short)0x8081); const __m128i kMask = _mm_set_epi16(0, 0xff, 0xff, 0, 0, 0xff, 0xff, 0); const int kSpan = 4; while (h-- > 0) { @@ -267,7 +267,7 @@ static int HasAlpha32b_SSE2(const uint8_t* src, int length) { } static void AlphaReplace_SSE2(uint32_t* src, int length, uint32_t color) { - const __m128i m_color = _mm_set1_epi32(color); + const __m128i m_color = _mm_set1_epi32((int)color); const __m128i zero = _mm_setzero_si128(); int i = 0; for (; i + 8 <= length; i += 8) { diff --git a/src/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c b/src/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c index cdf877c..1156ac3 100644 --- a/src/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c +++ b/src/3rdparty/libwebp/src/dsp/alpha_processing_sse41.c @@ -26,7 +26,7 @@ static int ExtractAlpha_SSE41(const uint8_t* WEBP_RESTRICT argb, // value is not 0xff if any of the alpha[] is not equal to 0xff. uint32_t alpha_and = 0xff; int i, j; - const __m128i all_0xff = _mm_set1_epi32(~0u); + const __m128i all_0xff = _mm_set1_epi32(~0); __m128i all_alphas = all_0xff; // We must be able to access 3 extra bytes after the last written byte diff --git a/src/3rdparty/libwebp/src/dsp/cpu.c b/src/3rdparty/libwebp/src/dsp/cpu.c index a4ba7f2..62de73f 100644 --- a/src/3rdparty/libwebp/src/dsp/cpu.c +++ b/src/3rdparty/libwebp/src/dsp/cpu.c @@ -212,7 +212,7 @@ VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; #elif defined(WEBP_HAVE_NEON) // In most cases this function doesn't check for NEON support (it's assumed by // the configuration), but enables turning off NEON at runtime, for testing -// purposes, by setting VP8DecGetCPUInfo = NULL. +// purposes, by setting VP8GetCPUInfo = NULL. static int armCPUInfo(CPUFeature feature) { if (feature != kNEON) return 0; #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) diff --git a/src/3rdparty/libwebp/src/dsp/cpu.h b/src/3rdparty/libwebp/src/dsp/cpu.h index 8cf3e92..de32a39 100644 --- a/src/3rdparty/libwebp/src/dsp/cpu.h +++ b/src/3rdparty/libwebp/src/dsp/cpu.h @@ -14,6 +14,8 @@ #ifndef WEBP_DSP_CPU_H_ #define WEBP_DSP_CPU_H_ +#include + #include #ifdef HAVE_CONFIG_H diff --git a/src/3rdparty/libwebp/src/dsp/dec_sse2.c b/src/3rdparty/libwebp/src/dsp/dec_sse2.c index 873aa59..01e6bcb 100644 --- a/src/3rdparty/libwebp/src/dsp/dec_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/dec_sse2.c @@ -158,10 +158,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) { dst3 = _mm_loadl_epi64((__m128i*)(dst + 3 * BPS)); } else { // Load four bytes/pixels per line. - dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS)); - dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS)); - dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS)); - dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS)); + dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS)); + dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS)); + dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS)); + dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS)); } // Convert to 16b. dst0 = _mm_unpacklo_epi8(dst0, zero); @@ -187,10 +187,10 @@ static void Transform_SSE2(const int16_t* in, uint8_t* dst, int do_two) { _mm_storel_epi64((__m128i*)(dst + 3 * BPS), dst3); } else { // Store four bytes/pixels per line. - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); } } } @@ -213,10 +213,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) { const __m128i m3 = _mm_subs_epi16(B, d4); const __m128i zero = _mm_setzero_si128(); // Load the source pixels. - __m128i dst0 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 0 * BPS)); - __m128i dst1 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 1 * BPS)); - __m128i dst2 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 2 * BPS)); - __m128i dst3 = _mm_cvtsi32_si128(WebPMemToUint32(dst + 3 * BPS)); + __m128i dst0 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 0 * BPS)); + __m128i dst1 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 1 * BPS)); + __m128i dst2 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 2 * BPS)); + __m128i dst3 = _mm_cvtsi32_si128(WebPMemToInt32(dst + 3 * BPS)); // Convert to 16b. dst0 = _mm_unpacklo_epi8(dst0, zero); dst1 = _mm_unpacklo_epi8(dst1, zero); @@ -233,10 +233,10 @@ static void TransformAC3(const int16_t* in, uint8_t* dst) { dst2 = _mm_packus_epi16(dst2, dst2); dst3 = _mm_packus_epi16(dst3, dst3); // Store the results. - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(dst0)); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(dst1)); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(dst2)); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(dst3)); } #undef MUL #endif // USE_TRANSFORM_AC3 @@ -477,11 +477,11 @@ static WEBP_INLINE void Load8x4_SSE2(const uint8_t* const b, int stride, // A0 = 63 62 61 60 23 22 21 20 43 42 41 40 03 02 01 00 // A1 = 73 72 71 70 33 32 31 30 53 52 51 50 13 12 11 10 const __m128i A0 = _mm_set_epi32( - WebPMemToUint32(&b[6 * stride]), WebPMemToUint32(&b[2 * stride]), - WebPMemToUint32(&b[4 * stride]), WebPMemToUint32(&b[0 * stride])); + WebPMemToInt32(&b[6 * stride]), WebPMemToInt32(&b[2 * stride]), + WebPMemToInt32(&b[4 * stride]), WebPMemToInt32(&b[0 * stride])); const __m128i A1 = _mm_set_epi32( - WebPMemToUint32(&b[7 * stride]), WebPMemToUint32(&b[3 * stride]), - WebPMemToUint32(&b[5 * stride]), WebPMemToUint32(&b[1 * stride])); + WebPMemToInt32(&b[7 * stride]), WebPMemToInt32(&b[3 * stride]), + WebPMemToInt32(&b[5 * stride]), WebPMemToInt32(&b[1 * stride])); // B0 = 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00 // B1 = 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20 @@ -540,7 +540,7 @@ static WEBP_INLINE void Store4x4_SSE2(__m128i* const x, uint8_t* dst, int stride) { int i; for (i = 0; i < 4; ++i, dst += stride) { - WebPUint32ToMem(dst, _mm_cvtsi128_si32(*x)); + WebPInt32ToMem(dst, _mm_cvtsi128_si32(*x)); *x = _mm_srli_si128(*x, 4); } } @@ -908,10 +908,10 @@ static void VE4_SSE2(uint8_t* dst) { // vertical const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one); const __m128i b = _mm_subs_epu8(a, lsb); const __m128i avg = _mm_avg_epu8(b, BCDEFGH0); - const uint32_t vals = _mm_cvtsi128_si32(avg); + const int vals = _mm_cvtsi128_si32(avg); int i; for (i = 0; i < 4; ++i) { - WebPUint32ToMem(dst + i * BPS, vals); + WebPInt32ToMem(dst + i * BPS, vals); } } @@ -925,10 +925,10 @@ static void LD4_SSE2(uint8_t* dst) { // Down-Left const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one); const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); } static void VR4_SSE2(uint8_t* dst) { // Vertical-Right @@ -946,10 +946,10 @@ static void VR4_SSE2(uint8_t* dst) { // Vertical-Right const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one); const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i efgh = _mm_avg_epu8(avg2, XABCD); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); // these two are hard to implement in SSE2, so we keep the C-version: DST(0, 2) = AVG3(J, I, X); @@ -970,11 +970,12 @@ static void VL4_SSE2(uint8_t* dst) { // Vertical-Left const __m128i abbc = _mm_or_si128(ab, bc); const __m128i lsb2 = _mm_and_si128(abbc, lsb1); const __m128i avg4 = _mm_subs_epu8(avg3, lsb2); - const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); + const uint32_t extra_out = + (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); // these two are hard to get and irregular DST(3, 2) = (extra_out >> 0) & 0xff; @@ -990,7 +991,7 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right const uint32_t K = dst[-1 + 2 * BPS]; const uint32_t L = dst[-1 + 3 * BPS]; const __m128i LKJI_____ = - _mm_cvtsi32_si128(L | (K << 8) | (J << 16) | (I << 24)); + _mm_cvtsi32_si128((int)(L | (K << 8) | (J << 16) | (I << 24))); const __m128i LKJIXABCD = _mm_or_si128(LKJI_____, ____XABCD); const __m128i KJIXABCD_ = _mm_srli_si128(LKJIXABCD, 1); const __m128i JIXABCD__ = _mm_srli_si128(LKJIXABCD, 2); @@ -998,10 +999,10 @@ static void RD4_SSE2(uint8_t* dst) { // Down-right const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one); const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); } #undef DST @@ -1015,13 +1016,13 @@ static WEBP_INLINE void TrueMotion_SSE2(uint8_t* dst, int size) { const __m128i zero = _mm_setzero_si128(); int y; if (size == 4) { - const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top)); + const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top)); const __m128i top_base = _mm_unpacklo_epi8(top_values, zero); for (y = 0; y < 4; ++y, dst += BPS) { const int val = dst[-1] - top[-1]; const __m128i base = _mm_set1_epi16(val); const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero); - WebPUint32ToMem(dst, _mm_cvtsi128_si32(out)); + WebPInt32ToMem(dst, _mm_cvtsi128_si32(out)); } } else if (size == 8) { const __m128i top_values = _mm_loadl_epi64((const __m128i*)top); @@ -1062,7 +1063,7 @@ static void VE16_SSE2(uint8_t* dst) { static void HE16_SSE2(uint8_t* dst) { // horizontal int j; for (j = 16; j > 0; --j) { - const __m128i values = _mm_set1_epi8(dst[-1]); + const __m128i values = _mm_set1_epi8((char)dst[-1]); _mm_storeu_si128((__m128i*)dst, values); dst += BPS; } @@ -1070,7 +1071,7 @@ static void HE16_SSE2(uint8_t* dst) { // horizontal static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) { int j; - const __m128i values = _mm_set1_epi8(v); + const __m128i values = _mm_set1_epi8((char)v); for (j = 0; j < 16; ++j) { _mm_storeu_si128((__m128i*)(dst + j * BPS), values); } @@ -1130,7 +1131,7 @@ static void VE8uv_SSE2(uint8_t* dst) { // vertical // helper for chroma-DC predictions static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { int j; - const __m128i values = _mm_set1_epi8(v); + const __m128i values = _mm_set1_epi8((char)v); for (j = 0; j < 8; ++j) { _mm_storel_epi64((__m128i*)(dst + j * BPS), values); } diff --git a/src/3rdparty/libwebp/src/dsp/dec_sse41.c b/src/3rdparty/libwebp/src/dsp/dec_sse41.c index 8f18506..08a3630 100644 --- a/src/3rdparty/libwebp/src/dsp/dec_sse41.c +++ b/src/3rdparty/libwebp/src/dsp/dec_sse41.c @@ -23,7 +23,7 @@ static void HE16_SSE41(uint8_t* dst) { // horizontal int j; const __m128i kShuffle3 = _mm_set1_epi8(3); for (j = 16; j > 0; --j) { - const __m128i in = _mm_cvtsi32_si128(WebPMemToUint32(dst - 4)); + const __m128i in = _mm_cvtsi32_si128(WebPMemToInt32(dst - 4)); const __m128i values = _mm_shuffle_epi8(in, kShuffle3); _mm_storeu_si128((__m128i*)dst, values); dst += BPS; diff --git a/src/3rdparty/libwebp/src/dsp/enc_neon.c b/src/3rdparty/libwebp/src/dsp/enc_neon.c index 601962b..3a04111 100644 --- a/src/3rdparty/libwebp/src/dsp/enc_neon.c +++ b/src/3rdparty/libwebp/src/dsp/enc_neon.c @@ -764,9 +764,14 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a, // Horizontal sum of all four uint32_t values in 'sum'. static int SumToInt_NEON(uint32x4_t sum) { +#if defined(__aarch64__) + return (int)vaddvq_u32(sum); +#else const uint64x2_t sum2 = vpaddlq_u32(sum); - const uint64_t sum3 = vgetq_lane_u64(sum2, 0) + vgetq_lane_u64(sum2, 1); - return (int)sum3; + const uint32x2_t sum3 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(sum2)), + vreinterpret_u32_u64(vget_high_u64(sum2))); + return (int)vget_lane_u32(sum3, 0); +#endif } static int SSE16x16_NEON(const uint8_t* a, const uint8_t* b) { diff --git a/src/3rdparty/libwebp/src/dsp/enc_sse2.c b/src/3rdparty/libwebp/src/dsp/enc_sse2.c index b2e78ed..1d10556 100644 --- a/src/3rdparty/libwebp/src/dsp/enc_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/enc_sse2.c @@ -156,10 +156,10 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]); } else { // Load four bytes/pixels per line. - ref0 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[0 * BPS])); - ref1 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[1 * BPS])); - ref2 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[2 * BPS])); - ref3 = _mm_cvtsi32_si128(WebPMemToUint32(&ref[3 * BPS])); + ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS])); + ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS])); + ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS])); + ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS])); } // Convert to 16b. ref0 = _mm_unpacklo_epi8(ref0, zero); @@ -185,10 +185,10 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3); } else { // Store four bytes/pixels per line. - WebPUint32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0)); - WebPUint32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1)); - WebPUint32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2)); - WebPUint32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3)); + WebPInt32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0)); + WebPInt32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1)); + WebPInt32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2)); + WebPInt32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3)); } } } @@ -481,7 +481,7 @@ static void CollectHistogram_SSE2(const uint8_t* ref, const uint8_t* pred, // helper for chroma-DC predictions static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { int j; - const __m128i values = _mm_set1_epi8(v); + const __m128i values = _mm_set1_epi8((char)v); for (j = 0; j < 8; ++j) { _mm_storel_epi64((__m128i*)(dst + j * BPS), values); } @@ -489,7 +489,7 @@ static WEBP_INLINE void Put8x8uv_SSE2(uint8_t v, uint8_t* dst) { static WEBP_INLINE void Put16_SSE2(uint8_t v, uint8_t* dst) { int j; - const __m128i values = _mm_set1_epi8(v); + const __m128i values = _mm_set1_epi8((char)v); for (j = 0; j < 16; ++j) { _mm_store_si128((__m128i*)(dst + j * BPS), values); } @@ -540,7 +540,7 @@ static WEBP_INLINE void VerticalPred_SSE2(uint8_t* dst, static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) { int j; for (j = 0; j < 8; ++j) { - const __m128i values = _mm_set1_epi8(left[j]); + const __m128i values = _mm_set1_epi8((char)left[j]); _mm_storel_epi64((__m128i*)dst, values); dst += BPS; } @@ -549,7 +549,7 @@ static WEBP_INLINE void HE8uv_SSE2(uint8_t* dst, const uint8_t* left) { static WEBP_INLINE void HE16_SSE2(uint8_t* dst, const uint8_t* left) { int j; for (j = 0; j < 16; ++j) { - const __m128i values = _mm_set1_epi8(left[j]); + const __m128i values = _mm_set1_epi8((char)left[j]); _mm_store_si128((__m128i*)dst, values); dst += BPS; } @@ -722,10 +722,10 @@ static WEBP_INLINE void VE4_SSE2(uint8_t* dst, const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGH00), one); const __m128i b = _mm_subs_epu8(a, lsb); const __m128i avg = _mm_avg_epu8(b, BCDEFGH0); - const uint32_t vals = _mm_cvtsi128_si32(avg); + const int vals = _mm_cvtsi128_si32(avg); int i; for (i = 0; i < 4; ++i) { - WebPUint32ToMem(dst + i * BPS, vals); + WebPInt32ToMem(dst + i * BPS, vals); } } @@ -760,10 +760,10 @@ static WEBP_INLINE void LD4_SSE2(uint8_t* dst, const __m128i lsb = _mm_and_si128(_mm_xor_si128(ABCDEFGH, CDEFGHH0), one); const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i abcdefg = _mm_avg_epu8(avg2, BCDEFGH0); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcdefg )); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); } static WEBP_INLINE void VR4_SSE2(uint8_t* dst, @@ -782,10 +782,10 @@ static WEBP_INLINE void VR4_SSE2(uint8_t* dst, const __m128i lsb = _mm_and_si128(_mm_xor_si128(IXABCD, ABCD0), one); const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i efgh = _mm_avg_epu8(avg2, XABCD); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( abcd )); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( efgh )); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(abcd, 1))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_slli_si128(efgh, 1))); // these two are hard to implement in SSE2, so we keep the C-version: DST(0, 2) = AVG3(J, I, X); @@ -807,11 +807,12 @@ static WEBP_INLINE void VL4_SSE2(uint8_t* dst, const __m128i abbc = _mm_or_si128(ab, bc); const __m128i lsb2 = _mm_and_si128(abbc, lsb1); const __m128i avg4 = _mm_subs_epu8(avg3, lsb2); - const uint32_t extra_out = _mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); + const uint32_t extra_out = + (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(avg4, 4)); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32( avg1 )); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32( avg4 )); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg1, 1))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(avg4, 1))); // these two are hard to get and irregular DST(3, 2) = (extra_out >> 0) & 0xff; @@ -829,10 +830,10 @@ static WEBP_INLINE void RD4_SSE2(uint8_t* dst, const __m128i lsb = _mm_and_si128(_mm_xor_si128(JIXABCD__, LKJIXABCD), one); const __m128i avg2 = _mm_subs_epu8(avg1, lsb); const __m128i abcdefg = _mm_avg_epu8(avg2, KJIXABCD_); - WebPUint32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); - WebPUint32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); - WebPUint32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); - WebPUint32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); + WebPInt32ToMem(dst + 3 * BPS, _mm_cvtsi128_si32( abcdefg )); + WebPInt32ToMem(dst + 2 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 1))); + WebPInt32ToMem(dst + 1 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 2))); + WebPInt32ToMem(dst + 0 * BPS, _mm_cvtsi128_si32(_mm_srli_si128(abcdefg, 3))); } static WEBP_INLINE void HU4_SSE2(uint8_t* dst, const uint8_t* top) { @@ -875,14 +876,14 @@ static WEBP_INLINE void HD4_SSE2(uint8_t* dst, const uint8_t* top) { static WEBP_INLINE void TM4_SSE2(uint8_t* dst, const uint8_t* top) { const __m128i zero = _mm_setzero_si128(); - const __m128i top_values = _mm_cvtsi32_si128(WebPMemToUint32(top)); + const __m128i top_values = _mm_cvtsi32_si128(WebPMemToInt32(top)); const __m128i top_base = _mm_unpacklo_epi8(top_values, zero); int y; for (y = 0; y < 4; ++y, dst += BPS) { const int val = top[-2 - y] - top[-1]; const __m128i base = _mm_set1_epi16(val); const __m128i out = _mm_packus_epi16(_mm_add_epi16(base, top_base), zero); - WebPUint32ToMem(dst, _mm_cvtsi128_si32(out)); + WebPInt32ToMem(dst, _mm_cvtsi128_si32(out)); } } diff --git a/src/3rdparty/libwebp/src/dsp/lossless.c b/src/3rdparty/libwebp/src/dsp/lossless.c index 84a5429..fb86e58 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless.c +++ b/src/3rdparty/libwebp/src/dsp/lossless.c @@ -49,7 +49,7 @@ static WEBP_INLINE uint32_t Clip255(uint32_t a) { } static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { - return Clip255(a + b - c); + return Clip255((uint32_t)(a + b - c)); } static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, @@ -66,7 +66,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, } static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { - return Clip255(a + (a - b) / 2); + return Clip255((uint32_t)(a + (a - b) / 2)); } static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, @@ -293,10 +293,10 @@ void VP8LTransformColorInverse_C(const VP8LMultipliers* const m, const uint32_t red = argb >> 16; int new_red = red & 0xff; int new_blue = argb & 0xff; - new_red += ColorTransformDelta(m->green_to_red_, green); + new_red += ColorTransformDelta((int8_t)m->green_to_red_, green); new_red &= 0xff; - new_blue += ColorTransformDelta(m->green_to_blue_, green); - new_blue += ColorTransformDelta(m->red_to_blue_, (int8_t)new_red); + new_blue += ColorTransformDelta((int8_t)m->green_to_blue_, green); + new_blue += ColorTransformDelta((int8_t)m->red_to_blue_, (int8_t)new_red); new_blue &= 0xff; dst[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); } @@ -395,7 +395,7 @@ void VP8LInverseTransform(const VP8LTransform* const transform, assert(row_start < row_end); assert(row_end <= transform->ysize_); switch (transform->type_) { - case SUBTRACT_GREEN: + case SUBTRACT_GREEN_TRANSFORM: VP8LAddGreenToBlueAndRed(in, (row_end - row_start) * width, out); break; case PREDICTOR_TRANSFORM: diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc.c b/src/3rdparty/libwebp/src/dsp/lossless_enc.c index de6c4ac..b1f9f26 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_enc.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_enc.c @@ -522,11 +522,11 @@ static void GetCombinedEntropyUnrefined_C(const uint32_t X[], void VP8LSubtractGreenFromBlueAndRed_C(uint32_t* argb_data, int num_pixels) { int i; for (i = 0; i < num_pixels; ++i) { - const int argb = argb_data[i]; + const int argb = (int)argb_data[i]; const int green = (argb >> 8) & 0xff; const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; const uint32_t new_b = (((argb >> 0) & 0xff) - green) & 0xff; - argb_data[i] = (argb & 0xff00ff00u) | (new_r << 16) | new_b; + argb_data[i] = ((uint32_t)argb & 0xff00ff00u) | (new_r << 16) | new_b; } } @@ -547,10 +547,10 @@ void VP8LTransformColor_C(const VP8LMultipliers* const m, uint32_t* data, const int8_t red = U32ToS8(argb >> 16); int new_red = red & 0xff; int new_blue = argb & 0xff; - new_red -= ColorTransformDelta(m->green_to_red_, green); + new_red -= ColorTransformDelta((int8_t)m->green_to_red_, green); new_red &= 0xff; - new_blue -= ColorTransformDelta(m->green_to_blue_, green); - new_blue -= ColorTransformDelta(m->red_to_blue_, red); + new_blue -= ColorTransformDelta((int8_t)m->green_to_blue_, green); + new_blue -= ColorTransformDelta((int8_t)m->red_to_blue_, red); new_blue &= 0xff; data[i] = (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); } @@ -560,7 +560,7 @@ static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red, uint32_t argb) { const int8_t green = U32ToS8(argb >> 8); int new_red = argb >> 16; - new_red -= ColorTransformDelta(green_to_red, green); + new_red -= ColorTransformDelta((int8_t)green_to_red, green); return (new_red & 0xff); } @@ -569,9 +569,9 @@ static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue, uint32_t argb) { const int8_t green = U32ToS8(argb >> 8); const int8_t red = U32ToS8(argb >> 16); - uint8_t new_blue = argb & 0xff; - new_blue -= ColorTransformDelta(green_to_blue, green); - new_blue -= ColorTransformDelta(red_to_blue, red); + int new_blue = argb & 0xff; + new_blue -= ColorTransformDelta((int8_t)green_to_blue, green); + new_blue -= ColorTransformDelta((int8_t)red_to_blue, red); return (new_blue & 0xff); } diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c index 948001a..66cbaab 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_enc_sse2.c @@ -54,8 +54,8 @@ static void TransformColor_SSE2(const VP8LMultipliers* const m, const __m128i mults_rb = MK_CST_16(CST_5b(m->green_to_red_), CST_5b(m->green_to_blue_)); const __m128i mults_b2 = MK_CST_16(CST_5b(m->red_to_blue_), 0); - const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks - const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks + const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks + const __m128i mask_rb = _mm_set1_epi32(0x00ff00ff); // red-blue masks int i; for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]); // argb @@ -376,7 +376,7 @@ static void BundleColorMap_SSE2(const uint8_t* const row, int width, int xbits, break; } case 2: { - const __m128i mask_or = _mm_set1_epi32(0xff000000); + const __m128i mask_or = _mm_set1_epi32((int)0xff000000); const __m128i mul_cst = _mm_set1_epi16(0x0104); const __m128i mask_mul = _mm_set1_epi16(0x0f00); for (x = 0; x + 16 <= width; x += 16, dst += 4) { @@ -427,7 +427,7 @@ static WEBP_INLINE void Average2_m128i(const __m128i* const a0, static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; - const __m128i black = _mm_set1_epi32(ARGB_BLACK); + const __m128i black = _mm_set1_epi32((int)ARGB_BLACK); for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); const __m128i res = _mm_sub_epi8(src, black); diff --git a/src/3rdparty/libwebp/src/dsp/lossless_sse2.c b/src/3rdparty/libwebp/src/dsp/lossless_sse2.c index 396cb0b..4b6a532 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_sse2.c @@ -27,23 +27,22 @@ static WEBP_INLINE uint32_t ClampedAddSubtractFull_SSE2(uint32_t c0, uint32_t c1, uint32_t c2) { const __m128i zero = _mm_setzero_si128(); - const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero); - const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero); - const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); + const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero); + const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero); + const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero); const __m128i V1 = _mm_add_epi16(C0, C1); const __m128i V2 = _mm_sub_epi16(V1, C2); const __m128i b = _mm_packus_epi16(V2, V2); - const uint32_t output = _mm_cvtsi128_si32(b); - return output; + return (uint32_t)_mm_cvtsi128_si32(b); } static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0, uint32_t c1, uint32_t c2) { const __m128i zero = _mm_setzero_si128(); - const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero); - const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero); - const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero); + const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c0), zero); + const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c1), zero); + const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)c2), zero); const __m128i avg = _mm_add_epi16(C1, C0); const __m128i A0 = _mm_srli_epi16(avg, 1); const __m128i A1 = _mm_sub_epi16(A0, B0); @@ -52,16 +51,15 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf_SSE2(uint32_t c0, const __m128i A3 = _mm_srai_epi16(A2, 1); const __m128i A4 = _mm_add_epi16(A0, A3); const __m128i A5 = _mm_packus_epi16(A4, A4); - const uint32_t output = _mm_cvtsi128_si32(A5); - return output; + return (uint32_t)_mm_cvtsi128_si32(A5); } static WEBP_INLINE uint32_t Select_SSE2(uint32_t a, uint32_t b, uint32_t c) { int pa_minus_pb; const __m128i zero = _mm_setzero_si128(); - const __m128i A0 = _mm_cvtsi32_si128(a); - const __m128i B0 = _mm_cvtsi32_si128(b); - const __m128i C0 = _mm_cvtsi32_si128(c); + const __m128i A0 = _mm_cvtsi32_si128((int)a); + const __m128i B0 = _mm_cvtsi32_si128((int)b); + const __m128i C0 = _mm_cvtsi32_si128((int)c); const __m128i AC0 = _mm_subs_epu8(A0, C0); const __m128i CA0 = _mm_subs_epu8(C0, A0); const __m128i BC0 = _mm_subs_epu8(B0, C0); @@ -94,8 +92,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0, __m128i* const avg) { // (a + b) >> 1 = ((a + b + 1) >> 1) - ((a ^ b) & 1) const __m128i ones = _mm_set1_epi8(1); - const __m128i A0 = _mm_cvtsi32_si128(a0); - const __m128i A1 = _mm_cvtsi32_si128(a1); + const __m128i A0 = _mm_cvtsi32_si128((int)a0); + const __m128i A1 = _mm_cvtsi32_si128((int)a1); const __m128i avg1 = _mm_avg_epu8(A0, A1); const __m128i one = _mm_and_si128(_mm_xor_si128(A0, A1), ones); *avg = _mm_sub_epi8(avg1, one); @@ -103,8 +101,8 @@ static WEBP_INLINE void Average2_uint32_SSE2(const uint32_t a0, static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) { const __m128i zero = _mm_setzero_si128(); - const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a0), zero); - const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero); + const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a0), zero); + const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero); const __m128i sum = _mm_add_epi16(A1, A0); return _mm_srli_epi16(sum, 1); } @@ -112,19 +110,18 @@ static WEBP_INLINE __m128i Average2_uint32_16_SSE2(uint32_t a0, uint32_t a1) { static WEBP_INLINE uint32_t Average2_SSE2(uint32_t a0, uint32_t a1) { __m128i output; Average2_uint32_SSE2(a0, a1, &output); - return _mm_cvtsi128_si32(output); + return (uint32_t)_mm_cvtsi128_si32(output); } static WEBP_INLINE uint32_t Average3_SSE2(uint32_t a0, uint32_t a1, uint32_t a2) { const __m128i zero = _mm_setzero_si128(); const __m128i avg1 = Average2_uint32_16_SSE2(a0, a2); - const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(a1), zero); + const __m128i A1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128((int)a1), zero); const __m128i sum = _mm_add_epi16(avg1, A1); const __m128i avg2 = _mm_srli_epi16(sum, 1); const __m128i A2 = _mm_packus_epi16(avg2, avg2); - const uint32_t output = _mm_cvtsi128_si32(A2); - return output; + return (uint32_t)_mm_cvtsi128_si32(A2); } static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, @@ -134,8 +131,7 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1, const __m128i sum = _mm_add_epi16(avg2, avg1); const __m128i avg3 = _mm_srli_epi16(sum, 1); const __m128i A0 = _mm_packus_epi16(avg3, avg3); - const uint32_t output = _mm_cvtsi128_si32(A0); - return output; + return (uint32_t)_mm_cvtsi128_si32(A0); } static uint32_t Predictor5_SSE2(const uint32_t* const left, @@ -192,7 +188,7 @@ static uint32_t Predictor13_SSE2(const uint32_t* const left, static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; - const __m128i black = _mm_set1_epi32(ARGB_BLACK); + const __m128i black = _mm_set1_epi32((int)ARGB_BLACK); for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); const __m128i res = _mm_add_epi8(src, black); @@ -208,7 +204,7 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper, static void PredictorAdd1_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; - __m128i prev = _mm_set1_epi32(out[-1]); + __m128i prev = _mm_set1_epi32((int)out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { // a | b | c | d const __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); @@ -285,12 +281,12 @@ GENERATE_PREDICTOR_2(9, upper[i + 1]) #undef GENERATE_PREDICTOR_2 // Predictor10: average of (average of (L,TL), average of (T, TR)). -#define DO_PRED10(OUT) do { \ - __m128i avgLTL, avg; \ - Average2_m128i(&L, &TL, &avgLTL); \ - Average2_m128i(&avgTTR, &avgLTL, &avg); \ - L = _mm_add_epi8(avg, src); \ - out[i + (OUT)] = _mm_cvtsi128_si32(L); \ +#define DO_PRED10(OUT) do { \ + __m128i avgLTL, avg; \ + Average2_m128i(&L, &TL, &avgLTL); \ + Average2_m128i(&avgTTR, &avgLTL, &avg); \ + L = _mm_add_epi8(avg, src); \ + out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \ } while (0) #define DO_PRED10_SHIFT do { \ @@ -303,7 +299,7 @@ GENERATE_PREDICTOR_2(9, upper[i + 1]) static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; - __m128i L = _mm_cvtsi32_si128(out[-1]); + __m128i L = _mm_cvtsi32_si128((int)out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { __m128i src = _mm_loadu_si128((const __m128i*)&in[i]); __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); @@ -336,7 +332,7 @@ static void PredictorAdd10_SSE2(const uint32_t* in, const uint32_t* upper, const __m128i B = _mm_andnot_si128(mask, T); \ const __m128i pred = _mm_or_si128(A, B); /* pred = (pa > b)? L : T*/ \ L = _mm_add_epi8(src, pred); \ - out[i + (OUT)] = _mm_cvtsi128_si32(L); \ + out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(L); \ } while (0) #define DO_PRED11_SHIFT do { \ @@ -351,7 +347,7 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; __m128i pa; - __m128i L = _mm_cvtsi32_si128(out[-1]); + __m128i L = _mm_cvtsi32_si128((int)out[-1]); for (i = 0; i + 4 <= num_pixels; i += 4) { __m128i T = _mm_loadu_si128((const __m128i*)&upper[i]); __m128i TL = _mm_loadu_si128((const __m128i*)&upper[i - 1]); @@ -384,12 +380,12 @@ static void PredictorAdd11_SSE2(const uint32_t* in, const uint32_t* upper, #undef DO_PRED11_SHIFT // Predictor12: ClampedAddSubtractFull. -#define DO_PRED12(DIFF, LANE, OUT) do { \ - const __m128i all = _mm_add_epi16(L, (DIFF)); \ - const __m128i alls = _mm_packus_epi16(all, all); \ - const __m128i res = _mm_add_epi8(src, alls); \ - out[i + (OUT)] = _mm_cvtsi128_si32(res); \ - L = _mm_unpacklo_epi8(res, zero); \ +#define DO_PRED12(DIFF, LANE, OUT) do { \ + const __m128i all = _mm_add_epi16(L, (DIFF)); \ + const __m128i alls = _mm_packus_epi16(all, all); \ + const __m128i res = _mm_add_epi8(src, alls); \ + out[i + (OUT)] = (uint32_t)_mm_cvtsi128_si32(res); \ + L = _mm_unpacklo_epi8(res, zero); \ } while (0) #define DO_PRED12_SHIFT(DIFF, LANE) do { \ @@ -402,7 +398,7 @@ static void PredictorAdd12_SSE2(const uint32_t* in, const uint32_t* upper, int num_pixels, uint32_t* out) { int i; const __m128i zero = _mm_setzero_si128(); - const __m128i L8 = _mm_cvtsi32_si128(out[-1]); + const __m128i L8 = _mm_cvtsi32_si128((int)out[-1]); __m128i L = _mm_unpacklo_epi8(L8, zero); for (i = 0; i + 4 <= num_pixels; i += 4) { // Load 4 pixels at a time. @@ -468,7 +464,7 @@ static void TransformColorInverse_SSE2(const VP8LMultipliers* const m, const __m128i mults_b2 = MK_CST_16(CST(red_to_blue_), 0); #undef MK_CST_16 #undef CST - const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); // alpha-green masks + const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); // alpha-green masks int i; for (i = 0; i + 4 <= num_pixels; i += 4) { const __m128i in = _mm_loadu_si128((const __m128i*)&src[i]); // argb @@ -532,7 +528,7 @@ static void ConvertBGRAToRGB_SSE2(const uint32_t* src, int num_pixels, static void ConvertBGRAToRGBA_SSE2(const uint32_t* src, int num_pixels, uint8_t* dst) { - const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ffu); + const __m128i red_blue_mask = _mm_set1_epi32(0x00ff00ff); const __m128i* in = (const __m128i*)src; __m128i* out = (__m128i*)dst; while (num_pixels >= 8) { @@ -561,7 +557,7 @@ static void ConvertBGRAToRGBA_SSE2(const uint32_t* src, static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src, int num_pixels, uint8_t* dst) { const __m128i mask_0x0f = _mm_set1_epi8(0x0f); - const __m128i mask_0xf0 = _mm_set1_epi8(0xf0); + const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0); const __m128i* in = (const __m128i*)src; __m128i* out = (__m128i*)dst; while (num_pixels >= 8) { @@ -596,8 +592,8 @@ static void ConvertBGRAToRGBA4444_SSE2(const uint32_t* src, static void ConvertBGRAToRGB565_SSE2(const uint32_t* src, int num_pixels, uint8_t* dst) { - const __m128i mask_0xe0 = _mm_set1_epi8(0xe0); - const __m128i mask_0xf8 = _mm_set1_epi8(0xf8); + const __m128i mask_0xe0 = _mm_set1_epi8((char)0xe0); + const __m128i mask_0xf8 = _mm_set1_epi8((char)0xf8); const __m128i mask_0x07 = _mm_set1_epi8(0x07); const __m128i* in = (const __m128i*)src; __m128i* out = (__m128i*)dst; diff --git a/src/3rdparty/libwebp/src/dsp/lossless_sse41.c b/src/3rdparty/libwebp/src/dsp/lossless_sse41.c index b0d6daa..bb7ce76 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_sse41.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_sse41.c @@ -25,11 +25,12 @@ static void TransformColorInverse_SSE41(const VP8LMultipliers* const m, int num_pixels, uint32_t* dst) { // sign-extended multiplying constants, pre-shifted by 5. #define CST(X) (((int16_t)(m->X << 8)) >> 5) // sign-extend - const __m128i mults_rb = _mm_set1_epi32((uint32_t)CST(green_to_red_) << 16 | - (CST(green_to_blue_) & 0xffff)); + const __m128i mults_rb = + _mm_set1_epi32((int)((uint32_t)CST(green_to_red_) << 16 | + (CST(green_to_blue_) & 0xffff))); const __m128i mults_b2 = _mm_set1_epi32(CST(red_to_blue_)); #undef CST - const __m128i mask_ag = _mm_set1_epi32(0xff00ff00); + const __m128i mask_ag = _mm_set1_epi32((int)0xff00ff00); const __m128i perm1 = _mm_setr_epi8(-1, 1, -1, 1, -1, 5, -1, 5, -1, 9, -1, 9, -1, 13, -1, 13); const __m128i perm2 = _mm_setr_epi8(-1, 2, -1, -1, -1, 6, -1, -1, diff --git a/src/3rdparty/libwebp/src/dsp/quant.h b/src/3rdparty/libwebp/src/dsp/quant.h index 5e8dba8..fc099bf 100644 --- a/src/3rdparty/libwebp/src/dsp/quant.h +++ b/src/3rdparty/libwebp/src/dsp/quant.h @@ -21,10 +21,15 @@ #define IsFlat IsFlat_NEON -static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) { +static uint32_t horizontal_add_uint32x4(const uint32x4_t a) { +#if defined(__aarch64__) + return vaddvq_u32(a); +#else const uint64x2_t b = vpaddlq_u32(a); - return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), - vreinterpret_u32_u64(vget_high_u64(b))); + const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), + vreinterpret_u32_u64(vget_high_u64(b))); + return vget_lane_u32(c, 0); +#endif } static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks, @@ -45,7 +50,7 @@ static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks, levels += 16; } - return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0); + return thresh >= (int)horizontal_add_uint32x4(sum); } #else diff --git a/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c b/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c index d7effea..3f18e94 100644 --- a/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/rescaler_sse2.c @@ -85,7 +85,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, const __m128i mult = _mm_cvtsi32_si128(((x_add - accum) << 16) | accum); const __m128i out = _mm_madd_epi16(cur_pixels, mult); assert(sizeof(*frow) == sizeof(uint32_t)); - WebPUint32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out)); + WebPInt32ToMem((uint8_t*)frow, _mm_cvtsi128_si32(out)); frow += 1; if (frow >= frow_end) break; accum -= wrk->x_sub; @@ -132,7 +132,7 @@ static void RescalerImportRowShrink_SSE2(WebPRescaler* const wrk, __m128i base = zero; accum += wrk->x_add; while (accum > 0) { - const __m128i A = _mm_cvtsi32_si128(WebPMemToUint32(src)); + const __m128i A = _mm_cvtsi32_si128(WebPMemToInt32(src)); src += 4; base = _mm_unpacklo_epi8(A, zero); // To avoid overflow, we need: base * x_add / x_sub < 32768 @@ -198,7 +198,7 @@ static WEBP_INLINE void ProcessRow_SSE2(const __m128i* const A0, const __m128i* const mult, uint8_t* const dst) { const __m128i rounder = _mm_set_epi32(0, ROUNDER, 0, ROUNDER); - const __m128i mask = _mm_set_epi32(0xffffffffu, 0, 0xffffffffu, 0); + const __m128i mask = _mm_set_epi32(~0, 0, ~0, 0); const __m128i B0 = _mm_mul_epu32(*A0, *mult); const __m128i B1 = _mm_mul_epu32(*A1, *mult); const __m128i B2 = _mm_mul_epu32(*A2, *mult); diff --git a/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c b/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c index 340f1e2..08b6d0b 100644 --- a/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/upsampling_sse2.c @@ -121,7 +121,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \ int uv_pos, pos; \ /* 16byte-aligned array to cache reconstructed u and v */ \ uint8_t uv_buf[14 * 32 + 15] = { 0 }; \ - uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \ + uint8_t* const r_u = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~(uintptr_t)15); \ uint8_t* const r_v = r_u + 32; \ \ assert(top_y != NULL); \ diff --git a/src/3rdparty/libwebp/src/dsp/yuv_sse2.c b/src/3rdparty/libwebp/src/dsp/yuv_sse2.c index 970bbb7..01a48f9 100644 --- a/src/3rdparty/libwebp/src/dsp/yuv_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/yuv_sse2.c @@ -15,10 +15,12 @@ #if defined(WEBP_USE_SSE2) -#include "src/dsp/common_sse2.h" #include #include +#include "src/dsp/common_sse2.h" +#include "src/utils/utils.h" + //----------------------------------------------------------------------------- // Convert spans of 32 pixels to various RGB formats for the fancy upsampler. @@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE2(const uint8_t* src) { // Load and replicate the U/V samples static WEBP_INLINE __m128i Load_UV_HI_8_SSE2(const uint8_t* src) { const __m128i zero = _mm_setzero_si128(); - const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src); + const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src)); const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0); return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples } @@ -130,7 +132,7 @@ static WEBP_INLINE void PackAndStore4444_SSE2(const __m128i* const R, const __m128i rg0 = _mm_packus_epi16(*B, *A); const __m128i ba0 = _mm_packus_epi16(*R, *G); #endif - const __m128i mask_0xf0 = _mm_set1_epi8(0xf0); + const __m128i mask_0xf0 = _mm_set1_epi8((char)0xf0); const __m128i rb1 = _mm_unpacklo_epi8(rg0, ba0); // rbrbrbrbrb... const __m128i ga1 = _mm_unpackhi_epi8(rg0, ba0); // gagagagaga... const __m128i rb2 = _mm_and_si128(rb1, mask_0xf0); @@ -147,9 +149,10 @@ static WEBP_INLINE void PackAndStore565_SSE2(const __m128i* const R, const __m128i r0 = _mm_packus_epi16(*R, *R); const __m128i g0 = _mm_packus_epi16(*G, *G); const __m128i b0 = _mm_packus_epi16(*B, *B); - const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8(0xf8)); + const __m128i r1 = _mm_and_si128(r0, _mm_set1_epi8((char)0xf8)); const __m128i b1 = _mm_and_si128(_mm_srli_epi16(b0, 3), _mm_set1_epi8(0x1f)); - const __m128i g1 = _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0xe0)), 5); + const __m128i g1 = + _mm_srli_epi16(_mm_and_si128(g0, _mm_set1_epi8((char)0xe0)), 5); const __m128i g2 = _mm_slli_epi16(_mm_and_si128(g0, _mm_set1_epi8(0x1c)), 3); const __m128i rg = _mm_or_si128(r1, g1); const __m128i gb = _mm_or_si128(g2, b1); diff --git a/src/3rdparty/libwebp/src/dsp/yuv_sse41.c b/src/3rdparty/libwebp/src/dsp/yuv_sse41.c index 579d1f7..f79b802 100644 --- a/src/3rdparty/libwebp/src/dsp/yuv_sse41.c +++ b/src/3rdparty/libwebp/src/dsp/yuv_sse41.c @@ -15,10 +15,12 @@ #if defined(WEBP_USE_SSE41) -#include "src/dsp/common_sse41.h" #include #include +#include "src/dsp/common_sse41.h" +#include "src/utils/utils.h" + //----------------------------------------------------------------------------- // Convert spans of 32 pixels to various RGB formats for the fancy upsampler. @@ -74,7 +76,7 @@ static WEBP_INLINE __m128i Load_HI_16_SSE41(const uint8_t* src) { // Load and replicate the U/V samples static WEBP_INLINE __m128i Load_UV_HI_8_SSE41(const uint8_t* src) { const __m128i zero = _mm_setzero_si128(); - const __m128i tmp0 = _mm_cvtsi32_si128(*(const uint32_t*)src); + const __m128i tmp0 = _mm_cvtsi32_si128(WebPMemToInt32(src)); const __m128i tmp1 = _mm_unpacklo_epi8(zero, tmp0); return _mm_unpacklo_epi16(tmp1, tmp1); // replicate samples } diff --git a/src/3rdparty/libwebp/src/enc/analysis_enc.c b/src/3rdparty/libwebp/src/enc/analysis_enc.c index ebb7842..a0001ac 100644 --- a/src/3rdparty/libwebp/src/enc/analysis_enc.c +++ b/src/3rdparty/libwebp/src/enc/analysis_enc.c @@ -391,12 +391,14 @@ static int DoSegmentsJob(void* arg1, void* arg2) { return ok; } +#ifdef WEBP_USE_THREAD static void MergeJobs(const SegmentJob* const src, SegmentJob* const dst) { int i; for (i = 0; i <= MAX_ALPHA; ++i) dst->alphas[i] += src->alphas[i]; dst->alpha += src->alpha; dst->uv_alpha += src->uv_alpha; } +#endif // initialize the job struct with some tasks to perform static void InitSegmentJob(VP8Encoder* const enc, SegmentJob* const job, @@ -425,10 +427,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) { (enc->method_ <= 1); // for method 0 - 1, we need preds_[] to be filled. if (do_segments) { const int last_row = enc->mb_h_; - // We give a little more than a half work to the main thread. - const int split_row = (9 * last_row + 15) >> 4; const int total_mb = last_row * enc->mb_w_; #ifdef WEBP_USE_THREAD + // We give a little more than a half work to the main thread. + const int split_row = (9 * last_row + 15) >> 4; const int kMinSplitRow = 2; // minimal rows needed for mt to be worth it const int do_mt = (enc->thread_level_ > 0) && (split_row >= kMinSplitRow); #else @@ -438,6 +440,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) { WebPGetWorkerInterface(); SegmentJob main_job; if (do_mt) { +#ifdef WEBP_USE_THREAD SegmentJob side_job; // Note the use of '&' instead of '&&' because we must call the functions // no matter what. @@ -455,6 +458,7 @@ int VP8EncAnalyze(VP8Encoder* const enc) { } worker_interface->End(&side_job.worker); if (ok) MergeJobs(&side_job, &main_job); // merge results together +#endif // WEBP_USE_THREAD } else { // Even for single-thread case, we use the generic Worker tools. InitSegmentJob(enc, &main_job, 0, last_row); diff --git a/src/3rdparty/libwebp/src/enc/picture_csp_enc.c b/src/3rdparty/libwebp/src/enc/picture_csp_enc.c index fabebcf..78c8ca4 100644 --- a/src/3rdparty/libwebp/src/enc/picture_csp_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_csp_enc.c @@ -69,10 +69,12 @@ static int CheckNonOpaque(const uint8_t* alpha, int width, int height, int WebPPictureHasTransparency(const WebPPicture* picture) { if (picture == NULL) return 0; if (picture->use_argb) { - const int alpha_offset = ALPHA_OFFSET; - return CheckNonOpaque((const uint8_t*)picture->argb + alpha_offset, - picture->width, picture->height, - 4, picture->argb_stride * sizeof(*picture->argb)); + if (picture->argb != NULL) { + return CheckNonOpaque((const uint8_t*)picture->argb + ALPHA_OFFSET, + picture->width, picture->height, + 4, picture->argb_stride * sizeof(*picture->argb)); + } + return 0; } return CheckNonOpaque(picture->a, picture->width, picture->height, 1, picture->a_stride); @@ -170,21 +172,6 @@ static const int kMinDimensionIterativeConversion = 4; //------------------------------------------------------------------------------ // Main function -extern void SharpYuvInit(VP8CPUInfo cpu_info_func); - -static void SafeInitSharpYuv(void) { -#if defined(WEBP_USE_THREAD) && !defined(_WIN32) - static pthread_mutex_t initsharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; - if (pthread_mutex_lock(&initsharpyuv_lock)) return; -#endif - - SharpYuvInit(VP8GetCPUInfo); - -#if defined(WEBP_USE_THREAD) && !defined(_WIN32) - (void)pthread_mutex_unlock(&initsharpyuv_lock); -#endif -} - static int PreprocessARGB(const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr, @@ -481,6 +468,8 @@ static WEBP_INLINE void ConvertRowsToUV(const uint16_t* rgb, } } +extern void SharpYuvInit(VP8CPUInfo cpu_info_func); + static int ImportYUVAFromRGBA(const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr, @@ -516,7 +505,7 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, } if (use_iterative_conversion) { - SafeInitSharpYuv(); + SharpYuvInit(VP8GetCPUInfo); if (!PreprocessARGB(r_ptr, g_ptr, b_ptr, step, rgb_stride, picture)) { return 0; } diff --git a/src/3rdparty/libwebp/src/enc/vp8i_enc.h b/src/3rdparty/libwebp/src/enc/vp8i_enc.h index 71f7670..c9927c4 100644 --- a/src/3rdparty/libwebp/src/enc/vp8i_enc.h +++ b/src/3rdparty/libwebp/src/enc/vp8i_enc.h @@ -31,8 +31,8 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 -#define ENC_MIN_VERSION 2 -#define ENC_REV_VERSION 4 +#define ENC_MIN_VERSION 3 +#define ENC_REV_VERSION 0 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost diff --git a/src/3rdparty/libwebp/src/enc/vp8l_enc.c b/src/3rdparty/libwebp/src/enc/vp8l_enc.c index 2b345df..0b07e52 100644 --- a/src/3rdparty/libwebp/src/enc/vp8l_enc.c +++ b/src/3rdparty/libwebp/src/enc/vp8l_enc.c @@ -361,10 +361,11 @@ typedef enum { kHistoTotal // Must be last. } HistoIx; -static void AddSingleSubGreen(int p, uint32_t* const r, uint32_t* const b) { - const int green = p >> 8; // The upper bits are masked away later. - ++r[((p >> 16) - green) & 0xff]; - ++b[((p >> 0) - green) & 0xff]; +static void AddSingleSubGreen(uint32_t p, + uint32_t* const r, uint32_t* const b) { + const int green = (int)p >> 8; // The upper bits are masked away later. + ++r[(((int)p >> 16) - green) & 0xff]; + ++b[(((int)p >> 0) - green) & 0xff]; } static void AddSingle(uint32_t p, @@ -1354,7 +1355,7 @@ static int EncodeImageInternal( static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, VP8LBitWriter* const bw) { VP8LPutBits(bw, TRANSFORM_PRESENT, 1); - VP8LPutBits(bw, SUBTRACT_GREEN, 2); + VP8LPutBits(bw, SUBTRACT_GREEN_TRANSFORM, 2); VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); } diff --git a/src/3rdparty/libwebp/src/mux/muxi.h b/src/3rdparty/libwebp/src/mux/muxi.h index 0f4af17..7929138 100644 --- a/src/3rdparty/libwebp/src/mux/muxi.h +++ b/src/3rdparty/libwebp/src/mux/muxi.h @@ -28,8 +28,8 @@ extern "C" { // Defines and constants. #define MUX_MAJ_VERSION 1 -#define MUX_MIN_VERSION 2 -#define MUX_REV_VERSION 4 +#define MUX_MIN_VERSION 3 +#define MUX_REV_VERSION 0 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/src/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h b/src/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h index 404b9a6..24f3af7 100644 --- a/src/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h +++ b/src/3rdparty/libwebp/src/utils/bit_reader_inl_utils.h @@ -148,9 +148,9 @@ int VP8GetSigned(VP8BitReader* WEBP_RESTRICT const br, int v, const range_t value = (range_t)(br->value_ >> pos); const int32_t mask = (int32_t)(split - value) >> 31; // -1 or 0 br->bits_ -= 1; - br->range_ += mask; + br->range_ += (range_t)mask; br->range_ |= 1; - br->value_ -= (bit_t)((split + 1) & mask) << pos; + br->value_ -= (bit_t)((split + 1) & (uint32_t)mask) << pos; BT_TRACK(br); return (v ^ mask) - mask; } diff --git a/src/3rdparty/libwebp/src/utils/huffman_utils.c b/src/3rdparty/libwebp/src/utils/huffman_utils.c index 0cba0fb..90c2fbf 100644 --- a/src/3rdparty/libwebp/src/utils/huffman_utils.c +++ b/src/3rdparty/libwebp/src/utils/huffman_utils.c @@ -142,7 +142,7 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits, { int step; // step size to replicate values in current table - uint32_t low = -1; // low bits for current root entry + uint32_t low = 0xffffffffu; // low bits for current root entry uint32_t mask = total_size - 1; // mask for low bits uint32_t key = 0; // reversed prefix code int num_nodes = 1; // number of Huffman tree nodes diff --git a/src/3rdparty/libwebp/src/utils/utils.h b/src/3rdparty/libwebp/src/utils/utils.h index ef04f10..c5ee873 100644 --- a/src/3rdparty/libwebp/src/utils/utils.h +++ b/src/3rdparty/libwebp/src/utils/utils.h @@ -64,7 +64,8 @@ WEBP_EXTERN void WebPSafeFree(void* const ptr); // Alignment #define WEBP_ALIGN_CST 31 -#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & ~WEBP_ALIGN_CST) +#define WEBP_ALIGN(PTR) (((uintptr_t)(PTR) + WEBP_ALIGN_CST) & \ + ~(uintptr_t)WEBP_ALIGN_CST) #include // memcpy() is the safe way of moving potentially unaligned 32b memory. @@ -73,10 +74,19 @@ static WEBP_INLINE uint32_t WebPMemToUint32(const uint8_t* const ptr) { memcpy(&A, ptr, sizeof(A)); return A; } + +static WEBP_INLINE int32_t WebPMemToInt32(const uint8_t* const ptr) { + return (int32_t)WebPMemToUint32(ptr); +} + static WEBP_INLINE void WebPUint32ToMem(uint8_t* const ptr, uint32_t val) { memcpy(ptr, &val, sizeof(val)); } +static WEBP_INLINE void WebPInt32ToMem(uint8_t* const ptr, int val) { + WebPUint32ToMem(ptr, (uint32_t)val); +} + //------------------------------------------------------------------------------ // Reading/writing data. diff --git a/src/3rdparty/libwebp/src/webp/format_constants.h b/src/3rdparty/libwebp/src/webp/format_constants.h index eca6981..999035c 100644 --- a/src/3rdparty/libwebp/src/webp/format_constants.h +++ b/src/3rdparty/libwebp/src/webp/format_constants.h @@ -55,7 +55,7 @@ typedef enum { PREDICTOR_TRANSFORM = 0, CROSS_COLOR_TRANSFORM = 1, - SUBTRACT_GREEN = 2, + SUBTRACT_GREEN_TRANSFORM = 2, COLOR_INDEXING_TRANSFORM = 3 } VP8LImageTransformType; diff --git a/src/3rdparty/libwebp/src/webp/types.h b/src/3rdparty/libwebp/src/webp/types.h index 47f7f2b..f255432 100644 --- a/src/3rdparty/libwebp/src/webp/types.h +++ b/src/3rdparty/libwebp/src/webp/types.h @@ -42,7 +42,11 @@ typedef long long int int64_t; # if defined(__GNUC__) && __GNUC__ >= 4 # define WEBP_EXTERN extern __attribute__ ((visibility ("default"))) # else -# define WEBP_EXTERN extern +# if defined(_MSC_VER) && defined(WEBP_DLL) +# define WEBP_EXTERN __declspec(dllexport) +# else +# define WEBP_EXTERN extern +# endif # endif /* __GNUC__ >= 4 */ #endif /* WEBP_EXTERN */ diff --git a/src/plugins/imageformats/webp/CMakeLists.txt b/src/plugins/imageformats/webp/CMakeLists.txt index 25aa0c9..fbbcc1c 100644 --- a/src/plugins/imageformats/webp/CMakeLists.txt +++ b/src/plugins/imageformats/webp/CMakeLists.txt @@ -30,6 +30,7 @@ qt_internal_extend_target(QWebpPlugin CONDITION QT_FEATURE_system_webp qt_internal_extend_target(QWebpPlugin CONDITION NOT QT_FEATURE_system_webp SOURCES ../../../3rdparty/libwebp/sharpyuv/sharpyuv.c + ../../../3rdparty/libwebp/sharpyuv/sharpyuv_cpu.c ../../../3rdparty/libwebp/sharpyuv/sharpyuv_csp.c ../../../3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c ../../../3rdparty/libwebp/sharpyuv/sharpyuv_gamma.c diff --git a/src/3rdparty/libwebp/AUTHORS b/src/3rdparty/libwebp/AUTHORS index 2f0c537..8359b20 100644 --- a/src/3rdparty/libwebp/AUTHORS +++ b/src/3rdparty/libwebp/AUTHORS @@ -32,6 +32,7 @@ Contributors: - Mislav Bradac (mislavm at google dot com) - Nico Weber (thakis at chromium dot org) - Noel Chromium (noel at chromium dot org) +- Nozomi Isozaki (nontan at pixiv dot co dot jp) - Oliver Wolff (oliver dot wolff at qt dot io) - Owen Rodley (orodley at google dot com) - Parag Salasakar (img dot mips1 at gmail dot com) @@ -47,6 +48,7 @@ Contributors: - Somnath Banerjee (somnath dot banerjee at gmail dot com) - Sriraman Tallam (tmsriram at google dot com) - Tamar Levy (tamar dot levy at intel dot com) +- Thiago Perrotta (tperrotta at google dot com) - Timothy Gu (timothygu99 at gmail dot com) - Urvang Joshi (urvang at google dot com) - Vikas Arora (vikasa at google dot com) diff --git a/src/3rdparty/libwebp/ChangeLog b/src/3rdparty/libwebp/ChangeLog index 00ef617..5e85875 100644 --- a/src/3rdparty/libwebp/ChangeLog +++ b/src/3rdparty/libwebp/ChangeLog @@ -1,3 +1,78 @@ +e1adea50 update NEWS +43393320 enc/*: normalize WebPEncodingSetError() calls +287fdefe enc/*: add missing WebPEncodingSetError() calls +c3bd7cff EncodeAlphaInternal: add missing error check +d49cfbb3 vp8l_enc,WriteImage: add missing error check +2e5a9ec3 muxread,MuxImageParse: add missing error checks +ebb6f949 cmake,emscripten: explicitly set stack size +59a2b1f9 WebPDecodeYUV: check u/v/stride/uv_stride ptrs +8e965ccb Call png_get_channels() to see if image has alpha +7f0a3419 update ChangeLog (tag: v1.3.1-rc1) +bab7efbe update NEWS +7138bf8f bump version to 1.3.1 +435b4ded update AUTHORS +47351229 update .mailmap +ff6c7f4e CONTRIBUTING.md: add C style / cmake-format notes +dd530437 add .cmake-format.py +adbe2cb1 cmake,cosmetics: apply cmake-format +15b36508 doc/webp-container-spec: rm future codec comment +c369c4bf doc/webp-lossless-bitstream-spec: improve link text +1de35f47 doc/webp-container-spec: don't use 'currently' +bb06a16e doc/webp-container-spec: prefer present tense +9f38b71e doc/webp-lossless-bitstream-spec: prefer present tense +7acb6b82 doc/webp-container-spec: avoid i.e. & e.g. +4967e7cd doc/webp-lossless-bitstream-spec: avoid i.e. & e.g. +e3366659 Merge "Do not find_package image libraries if not needed." into main +428588ef clarify single leaf node trees and use of canonical prefix coding +709ec152 Do not find_package image libraries if not needed. +8dd80ef8 fuzz_utils.h: lower kFuzzPxLimit w/ASan +8f187b9f Clean message calls in CMake +cba30078 WebPConfig.cmake.in: use calculated include path +6cf9a76a Merge "webp-lossless-bitstream-spec: remove use of 'dynamics'" into main +740943b2 Merge "Specialize and optimize ITransform_SSE2 using do_two" into main +2d547e24 Compare kFuzzPxLimit to max_num_operations +ac42dde1 Specialize and optimize ITransform_SSE2 using do_two +17e0ef1d webp-lossless-bitstream-spec: remove use of 'dynamics' +ed274371 neon.h,cosmetics: clear a couple lint warnings +3fb82947 cpu.h,cosmetics: segment defines +0c496a4f cpu.h: add WEBP_AARCH64 +8151f388 move VP8GetCPUInfo declaration to cpu.c +916548c2 Make kFuzzPxLimit sanitizer dependent +4070b271 advanced_api_fuzzer: reduce scaling limit +761f49c3 Merge "webp-lossless-bitstream-spec: add missing bits to ABNF" into main +84d04c48 webp-lossless-bitstream-spec: add missing bits to ABNF +0696e1a7 advanced_api_fuzzer: reduce scaling limit +93d88aa2 Merge "deps.cmake: remove unneeded header checks" into main +118e0035 deps.cmake: remove unneeded header checks +4c3d7018 webp-lossless-bitstream-spec: condense normal-prefix-code +a6a09b32 webp-lossless-bitstream-spec: fix 2 code typos +50ac4f7c Merge "cpu.h: enable NEON w/_M_ARM64EC" into main +4b7d7b4f Add contribution instructions +0afbd97b cpu.h: enable NEON w/_M_ARM64EC +349f4353 Merge changes Ibd89e56b,Ic57e7f84,I89096614 into main +8f7513b7 upsampling_neon.c: fix WEBP_SWAP_16BIT_CSP check +cbf624b5 advanced_api_fuzzer: reduce scaling limit +89edfdd1 Skip slow scaling in libwebp advanced_api_fuzzer +859f19f7 Reduce libwebp advanced_api_fuzzer threshold +a4f04835 Merge changes Ic389aaa2,I329ccd79 into main +1275fac8 Makefile.vc: fix img2webp link w/dynamic cfg +2fe27bb9 img2webp: normalize help output +24bed3d9 cwebp: reflow -near_lossless help text +0825faa4 img2webp: add -sharp_yuv/-near_lossless +d64e6d7d Merge "PaletteSortModifiedZeng: fix leak on error" into main +0e12a22d Merge "EncodeAlphaInternal: clear result->bw on error" into main +0edbb6ea PaletteSortModifiedZeng: fix leak on error +41ffe04e Merge "Update yapf style from "chromium" to "yapf"" into main +2d9d9265 Update yapf style from "chromium" to "yapf" +a486d800 EncodeAlphaInternal: clear result->bw on error +1347a32d Skip big scaled advanced_api_fuzzer +52b6f067 Fix scaling limit in advanced_api_fuzzer.c +73618428 Limit scaling in libwebp advanced_api_fuzzer.c +b54d21a0 Merge "CMakeLists.txt: allow CMAKE_INSTALL_RPATH to be set empty" into main +31c28db5 libwebp{,demux,mux}.pc.in: Requires -> Requires.private +d9a505ff CMakeLists.txt: allow CMAKE_INSTALL_RPATH to be set empty +bdf33d03 Merge tag 'v1.3.0' +b5577769 update ChangeLog (tag: v1.3.0-rc1, tag: v1.3.0) 0ba77244 update NEWS e763eb1e bump version to 1.3.0 2a8686fc update AUTHORS @@ -103,7 +178,7 @@ c626e7d5 cwebp: fix WebPPictureHasTransparency call 866e349c Merge tag 'v1.2.4' c170df38 Merge "Create libsharpyuv.a in makefile.unix." into main 9d7ff74a Create libsharpyuv.a in makefile.unix. -0d1f1254 update ChangeLog (tag: v1.2.4, origin/1.2.4) +0d1f1254 update ChangeLog (tag: v1.2.4) fcbc2d78 Merge "doc/*.txt: restrict code to 69 columns" into main 4ad0e189 Merge "webp-container-spec.txt: normalize fourcc spelling" into main 980d2488 update NEWS diff --git a/src/3rdparty/libwebp/NEWS b/src/3rdparty/libwebp/NEWS index c4f8ef7..2111d33 100644 --- a/src/3rdparty/libwebp/NEWS +++ b/src/3rdparty/libwebp/NEWS @@ -1,3 +1,18 @@ +- 6/23/2023: version 1.3.1 + This is a binary compatible release. + * security fixes for lossless encoder (#603, chromium: #1420107, #1455619, + CVE-2023-1999) + * improve error reporting through WebPPicture error codes + * fix upsampling for RGB565 and RGBA4444 in NEON builds + * img2webp: add -sharp_yuv & -near_lossless + * Windows builds: + - fix compatibility with clang-cl (#607) + - improve Arm64 performance with cl.exe + - add Arm64EC support + * fix webp_js with emcc >= 3.1.27 (stack size change, #614) + * CMake fixes (#592, #610, #612) + * further updates to the container and lossless bitstream docs (#581, #611) + - 12/16/2022: version 1.3.0 This is a binary compatible release. * add libsharpyuv, which exposes -sharp_yuv/config.use_sharp_yuv diff --git a/src/3rdparty/libwebp/patches/0001-Fix-Windows-build-for-clang-and-neon.patch b/src/3rdparty/libwebp/patches/0001-Fix-Windows-build-for-clang-and-neon.patch index 2b46f5b..d721476 100644 --- a/src/3rdparty/libwebp/patches/0001-Fix-Windows-build-for-clang-and-neon.patch +++ b/src/3rdparty/libwebp/patches/0001-Fix-Windows-build-for-clang-and-neon.patch @@ -1,5 +1,5 @@ diff --git a/src/3rdparty/libwebp/src/dsp/cpu.h b/src/3rdparty/libwebp/src/dsp/cpu.h -index 57a40d8..8cf3e92 100644 +index c86540f..581ecbd 100644 --- a/src/3rdparty/libwebp/src/dsp/cpu.h +++ b/src/3rdparty/libwebp/src/dsp/cpu.h @@ -14,6 +14,8 @@ @@ -8,10 +8,10 @@ index 57a40d8..8cf3e92 100644 +#include + + #include + #ifdef HAVE_CONFIG_H - #include "src/webp/config.h" - #endif -@@ -43,12 +45,12 @@ +@@ -48,12 +50,12 @@ #if !defined(HAVE_CONFIG_H) #if defined(_MSC_VER) && _MSC_VER > 1310 && \ @@ -26,13 +26,13 @@ index 57a40d8..8cf3e92 100644 #define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets #endif #endif -@@ -97,7 +99,8 @@ - // arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with +@@ -106,7 +108,8 @@ // vtbl4_u8(); a fix was made in 16.6. - #if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ -- (_MSC_VER >= 1926 && defined(_M_ARM64))) -+ (_MSC_VER >= 1926 && defined(_M_ARM64))) && \ -+ !defined(__clang__) && (QT_CONFIG_neon == 1) + #if defined(_MSC_VER) && \ + ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ +- (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC)))) ++ (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC)))) && \ ++ !defined(__clang__) && (QT_CONFIG_neon == 1) #define WEBP_USE_NEON #define WEBP_USE_INTRINSICS #endif diff --git a/src/3rdparty/libwebp/qt_attribution.json b/src/3rdparty/libwebp/qt_attribution.json index 258f7ff..1b71a55 100644 --- a/src/3rdparty/libwebp/qt_attribution.json +++ b/src/3rdparty/libwebp/qt_attribution.json @@ -6,7 +6,8 @@ "Description": "WebP is a new image format that provides lossless and lossy compression for images on the web.", "Homepage": "https://developers.google.com/speed/webp/", - "Version": "1.3.0", + "Version": "1.3.1", + "DownloadLocation": "https://storage.googleapis.com/downloads.webmproject.org/releases/webp/libwebp-1.3.1.tar.gz", "License": "BSD 3-clause \"New\" or \"Revised\" License", "LicenseId": "BSD-3-Clause", "LicenseFile": "COPYING", diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv.c index 7de34fb..a074564 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv.c @@ -440,6 +440,7 @@ static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, // By default SharpYuvConvert calls it with SharpYuvGetCPUInfo. If needed, // users can declare it as extern and call it with an alternate VP8CPUInfo // function. +extern VP8CPUInfo SharpYuvGetCPUInfo; SHARPYUV_EXTERN void SharpYuvInit(VP8CPUInfo cpu_info_func); void SharpYuvInit(VP8CPUInfo cpu_info_func) { static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv.h b/src/3rdparty/libwebp/sharpyuv/sharpyuv.h index 181b20a..7b9904d 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv.h +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv.h @@ -37,7 +37,7 @@ extern "C" { // SharpYUV API version following the convention from semver.org #define SHARPYUV_VERSION_MAJOR 0 #define SHARPYUV_VERSION_MINOR 2 -#define SHARPYUV_VERSION_PATCH 0 +#define SHARPYUV_VERSION_PATCH 1 // Version as a uint32_t. The major number is the high 8 bits. // The minor number is the middle 8 bits. The patch number is the low 16 bits. #define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \ diff --git a/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c b/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c index 31c272c..0da3efc 100644 --- a/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c +++ b/src/3rdparty/libwebp/sharpyuv/sharpyuv_dsp.c @@ -72,6 +72,7 @@ void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, const uint16_t* best_y, uint16_t* out, int bit_depth); +extern VP8CPUInfo SharpYuvGetCPUInfo; extern void InitSharpYuvSSE2(void); extern void InitSharpYuvNEON(void); diff --git a/src/3rdparty/libwebp/src/dec/tree_dec.c b/src/3rdparty/libwebp/src/dec/tree_dec.c index 1c6fdea..2434605 100644 --- a/src/3rdparty/libwebp/src/dec/tree_dec.c +++ b/src/3rdparty/libwebp/src/dec/tree_dec.c @@ -12,10 +12,11 @@ // Author: Skal (pascal.massimino@gmail.com) #include "src/dec/vp8i_dec.h" +#include "src/dsp/cpu.h" #include "src/utils/bit_reader_inl_utils.h" #if !defined(USE_GENERIC_TREE) -#if !defined(__arm__) && !defined(_M_ARM) && !defined(__aarch64__) +#if !defined(__arm__) && !defined(_M_ARM) && !WEBP_AARCH64 // using a table is ~1-2% slower on ARM. Prefer the coded-tree approach then. #define USE_GENERIC_TREE 1 // ALTERNATE_CODE #else diff --git a/src/3rdparty/libwebp/src/dec/vp8_dec.c b/src/3rdparty/libwebp/src/dec/vp8_dec.c index 2003935..20b92e8 100644 --- a/src/3rdparty/libwebp/src/dec/vp8_dec.c +++ b/src/3rdparty/libwebp/src/dec/vp8_dec.c @@ -494,6 +494,8 @@ static int GetCoeffsAlt(VP8BitReader* const br, return 16; } +extern VP8CPUInfo VP8GetCPUInfo; + WEBP_DSP_INIT_FUNC(InitGetCoeffs) { if (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kSlowSSSE3)) { GetCoeffs = GetCoeffsAlt; diff --git a/src/3rdparty/libwebp/src/dec/vp8i_dec.h b/src/3rdparty/libwebp/src/dec/vp8i_dec.h index 83791ec..1ae4ff6 100644 --- a/src/3rdparty/libwebp/src/dec/vp8i_dec.h +++ b/src/3rdparty/libwebp/src/dec/vp8i_dec.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 #define DEC_MIN_VERSION 3 -#define DEC_REV_VERSION 0 +#define DEC_REV_VERSION 1 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/src/3rdparty/libwebp/src/dec/webp_dec.c b/src/3rdparty/libwebp/src/dec/webp_dec.c index 3f4f7bb..f557868 100644 --- a/src/3rdparty/libwebp/src/dec/webp_dec.c +++ b/src/3rdparty/libwebp/src/dec/webp_dec.c @@ -658,19 +658,26 @@ uint8_t* WebPDecodeBGRA(const uint8_t* data, size_t data_size, uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width, int* height, uint8_t** u, uint8_t** v, int* stride, int* uv_stride) { - WebPDecBuffer output; // only to preserve the side-infos - uint8_t* const out = Decode(MODE_YUV, data, data_size, - width, height, &output); - - if (out != NULL) { - const WebPYUVABuffer* const buf = &output.u.YUVA; - *u = buf->u; - *v = buf->v; - *stride = buf->y_stride; - *uv_stride = buf->u_stride; - assert(buf->u_stride == buf->v_stride); - } - return out; + // data, width and height are checked by Decode(). + if (u == NULL || v == NULL || stride == NULL || uv_stride == NULL) { + return NULL; + } + + { + WebPDecBuffer output; // only to preserve the side-infos + uint8_t* const out = Decode(MODE_YUV, data, data_size, + width, height, &output); + + if (out != NULL) { + const WebPYUVABuffer* const buf = &output.u.YUVA; + *u = buf->u; + *v = buf->v; + *stride = buf->y_stride; + *uv_stride = buf->u_stride; + assert(buf->u_stride == buf->v_stride); + } + return out; + } } static void DefaultFeatures(WebPBitstreamFeatures* const features) { diff --git a/src/3rdparty/libwebp/src/demux/demux.c b/src/3rdparty/libwebp/src/demux/demux.c index 324e5eb..fd45a25 100644 --- a/src/3rdparty/libwebp/src/demux/demux.c +++ b/src/3rdparty/libwebp/src/demux/demux.c @@ -25,7 +25,7 @@ #define DMUX_MAJ_VERSION 1 #define DMUX_MIN_VERSION 3 -#define DMUX_REV_VERSION 0 +#define DMUX_REV_VERSION 1 typedef struct { size_t start_; // start location of the data diff --git a/src/3rdparty/libwebp/src/dsp/alpha_processing.c b/src/3rdparty/libwebp/src/dsp/alpha_processing.c index 1892929..1d152f2 100644 --- a/src/3rdparty/libwebp/src/dsp/alpha_processing.c +++ b/src/3rdparty/libwebp/src/dsp/alpha_processing.c @@ -425,6 +425,7 @@ void (*WebPAlphaReplace)(uint32_t* src, int length, uint32_t color); //------------------------------------------------------------------------------ // Init function +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPInitAlphaProcessingMIPSdspR2(void); extern void WebPInitAlphaProcessingSSE2(void); extern void WebPInitAlphaProcessingSSE41(void); diff --git a/src/3rdparty/libwebp/src/dsp/cost.c b/src/3rdparty/libwebp/src/dsp/cost.c index 460ec4f..73d2140 100644 --- a/src/3rdparty/libwebp/src/dsp/cost.c +++ b/src/3rdparty/libwebp/src/dsp/cost.c @@ -374,6 +374,7 @@ static void SetResidualCoeffs_C(const int16_t* const coeffs, VP8GetResidualCostFunc VP8GetResidualCost; VP8SetResidualCoeffsFunc VP8SetResidualCoeffs; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8EncDspCostInitMIPS32(void); extern void VP8EncDspCostInitMIPSdspR2(void); extern void VP8EncDspCostInitSSE2(void); diff --git a/src/3rdparty/libwebp/src/dsp/cost_neon.c b/src/3rdparty/libwebp/src/dsp/cost_neon.c index 8cc8ce5..6582669 100644 --- a/src/3rdparty/libwebp/src/dsp/cost_neon.c +++ b/src/3rdparty/libwebp/src/dsp/cost_neon.c @@ -29,7 +29,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, const uint8x16_t eob = vcombine_u8(vqmovn_u16(eob_0), vqmovn_u16(eob_1)); const uint8x16_t masked = vandq_u8(eob, vld1q_u8(position)); -#ifdef __aarch64__ +#if WEBP_AARCH64 res->last = vmaxvq_u8(masked) - 1; #else const uint8x8_t eob_8x8 = vmax_u8(vget_low_u8(masked), vget_high_u8(masked)); @@ -43,7 +43,7 @@ static void SetResidualCoeffs_NEON(const int16_t* const coeffs, vst1_lane_s32(&res->last, vreinterpret_s32_u32(eob_32x2), 0); --res->last; -#endif // __aarch64__ +#endif // WEBP_AARCH64 res->coeffs = coeffs; } diff --git a/src/3rdparty/libwebp/src/dsp/cpu.c b/src/3rdparty/libwebp/src/dsp/cpu.c index 62de73f..2234c77 100644 --- a/src/3rdparty/libwebp/src/dsp/cpu.c +++ b/src/3rdparty/libwebp/src/dsp/cpu.c @@ -173,6 +173,7 @@ static int x86CPUInfo(CPUFeature feature) { } return 0; } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. static int AndroidCPUInfo(CPUFeature feature) { @@ -184,6 +185,7 @@ static int AndroidCPUInfo(CPUFeature feature) { } return 0; } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test // Use compile flags as an indicator of SIMD support instead of a runtime check. @@ -208,6 +210,7 @@ static int wasmCPUInfo(CPUFeature feature) { } return 0; } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo; #elif defined(WEBP_HAVE_NEON) // In most cases this function doesn't check for NEON support (it's assumed by @@ -236,6 +239,7 @@ static int armCPUInfo(CPUFeature feature) { return 1; #endif } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = armCPUInfo; #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \ defined(WEBP_USE_MSA) @@ -247,7 +251,9 @@ static int mipsCPUInfo(CPUFeature feature) { } } +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; #else +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; VP8CPUInfo VP8GetCPUInfo = NULL; #endif diff --git a/src/3rdparty/libwebp/src/dsp/cpu.h b/src/3rdparty/libwebp/src/dsp/cpu.h index de32a39..581ecbd 100644 --- a/src/3rdparty/libwebp/src/dsp/cpu.h +++ b/src/3rdparty/libwebp/src/dsp/cpu.h @@ -14,10 +14,10 @@ #ifndef WEBP_DSP_CPU_H_ #define WEBP_DSP_CPU_H_ -#include - #include +#include + #ifdef HAVE_CONFIG_H #include "src/webp/config.h" #endif @@ -45,6 +45,9 @@ #define __has_builtin(x) 0 #endif +//------------------------------------------------------------------------------ +// x86 defines. + #if !defined(HAVE_CONFIG_H) #if defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) && !defined(__clang__) @@ -82,6 +85,9 @@ #undef WEBP_MSC_SSE41 #undef WEBP_MSC_SSE2 +//------------------------------------------------------------------------------ +// Arm defines. + // The intrinsics currently cause compiler errors with arm-nacl-gcc and the // inline assembly would need to be modified for use with Native Client. #if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ @@ -100,17 +106,27 @@ // inclusion of arm64_neon.h; Visual Studio 2019 includes this file in // arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with // vtbl4_u8(); a fix was made in 16.6. -#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ - (_MSC_VER >= 1926 && defined(_M_ARM64))) && \ - !defined(__clang__) && (QT_CONFIG_neon == 1) +#if defined(_MSC_VER) && \ + ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ + (_MSC_VER >= 1926 && (defined(_M_ARM64) || defined(_M_ARM64EC)))) && \ + !defined(__clang__) && (QT_CONFIG_neon == 1) #define WEBP_USE_NEON #define WEBP_USE_INTRINSICS #endif +#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define WEBP_AARCH64 1 +#else +#define WEBP_AARCH64 0 +#endif + #if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) #define WEBP_HAVE_NEON #endif +//------------------------------------------------------------------------------ +// MIPS defines. + #if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \ (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) #define WEBP_USE_MIPS32 @@ -126,6 +142,8 @@ #define WEBP_USE_MSA #endif +//------------------------------------------------------------------------------ + #ifndef WEBP_DSP_OMIT_C_CODE #define WEBP_DSP_OMIT_C_CODE 1 #endif @@ -136,13 +154,14 @@ #define WEBP_NEON_OMIT_C_CODE 0 #endif -#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \ - defined(__aarch64__)) +#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64) #define WEBP_NEON_WORK_AROUND_GCC 1 #else #define WEBP_NEON_WORK_AROUND_GCC 0 #endif +//------------------------------------------------------------------------------ + // This macro prevents thread_sanitizer from reporting known concurrent writes. #define WEBP_TSAN_IGNORE_FUNCTION #if defined(__has_feature) @@ -244,16 +263,7 @@ typedef enum { kMSA } CPUFeature; -#ifdef __cplusplus -extern "C" { -#endif - // returns true if the CPU supports the feature. typedef int (*VP8CPUInfo)(CPUFeature feature); -WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; - -#ifdef __cplusplus -} // extern "C" -#endif #endif // WEBP_DSP_CPU_H_ diff --git a/src/3rdparty/libwebp/src/dsp/dec.c b/src/3rdparty/libwebp/src/dsp/dec.c index 537c701..33d8df8 100644 --- a/src/3rdparty/libwebp/src/dsp/dec.c +++ b/src/3rdparty/libwebp/src/dsp/dec.c @@ -734,6 +734,7 @@ VP8SimpleFilterFunc VP8SimpleHFilter16i; void (*VP8DitherCombine8x8)(const uint8_t* dither, uint8_t* dst, int dst_stride); +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8DspInitSSE2(void); extern void VP8DspInitSSE41(void); extern void VP8DspInitNEON(void); diff --git a/src/3rdparty/libwebp/src/dsp/dec_neon.c b/src/3rdparty/libwebp/src/dsp/dec_neon.c index fa85170..22784cf 100644 --- a/src/3rdparty/libwebp/src/dsp/dec_neon.c +++ b/src/3rdparty/libwebp/src/dsp/dec_neon.c @@ -1428,7 +1428,7 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x8_t A = vld1_u8(dst - BPS); // top row -#if defined(__aarch64__) +#if WEBP_AARCH64 const uint16_t p2 = vaddlv_u8(A); sum_top = vdupq_n_u16(p2); #else @@ -1511,7 +1511,7 @@ static WEBP_INLINE void DC16_NEON(uint8_t* dst, int do_top, int do_left) { if (do_top) { const uint8x16_t A = vld1q_u8(dst - BPS); // top row -#if defined(__aarch64__) +#if WEBP_AARCH64 const uint16_t p3 = vaddlvq_u8(A); sum_top = vdupq_n_u16(p3); #else diff --git a/src/3rdparty/libwebp/src/dsp/enc.c b/src/3rdparty/libwebp/src/dsp/enc.c index ea47a3f..2ba97ba 100644 --- a/src/3rdparty/libwebp/src/dsp/enc.c +++ b/src/3rdparty/libwebp/src/dsp/enc.c @@ -732,6 +732,7 @@ VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT; VP8BlockCopy VP8Copy4x4; VP8BlockCopy VP8Copy16x8; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8EncDspInitSSE2(void); extern void VP8EncDspInitSSE41(void); extern void VP8EncDspInitNEON(void); diff --git a/src/3rdparty/libwebp/src/dsp/enc_neon.c b/src/3rdparty/libwebp/src/dsp/enc_neon.c index 3a04111..7148003 100644 --- a/src/3rdparty/libwebp/src/dsp/enc_neon.c +++ b/src/3rdparty/libwebp/src/dsp/enc_neon.c @@ -764,7 +764,7 @@ static WEBP_INLINE void AccumulateSSE16_NEON(const uint8_t* const a, // Horizontal sum of all four uint32_t values in 'sum'. static int SumToInt_NEON(uint32x4_t sum) { -#if defined(__aarch64__) +#if WEBP_AARCH64 return (int)vaddvq_u32(sum); #else const uint64x2_t sum2 = vpaddlq_u32(sum); @@ -865,7 +865,7 @@ static int QuantizeBlock_NEON(int16_t in[16], int16_t out[16], uint8x8x4_t shuffles; // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use // non-standard versions there. -#if defined(__APPLE__) && defined(__aarch64__) && \ +#if defined(__APPLE__) && WEBP_AARCH64 && \ defined(__apple_build_version__) && (__apple_build_version__< 6020037) uint8x16x2_t all_out; INIT_VECTOR2(all_out, vreinterpretq_u8_s16(out0), vreinterpretq_u8_s16(out1)); diff --git a/src/3rdparty/libwebp/src/dsp/enc_sse2.c b/src/3rdparty/libwebp/src/dsp/enc_sse2.c index 1d10556..010624a 100644 --- a/src/3rdparty/libwebp/src/dsp/enc_sse2.c +++ b/src/3rdparty/libwebp/src/dsp/enc_sse2.c @@ -25,9 +25,160 @@ //------------------------------------------------------------------------------ // Transforms (Paragraph 14.4) -// Does one or two inverse transforms. -static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, - int do_two) { +// Does one inverse transform. +static void ITransform_One_SSE2(const uint8_t* ref, const int16_t* in, + uint8_t* dst) { + // This implementation makes use of 16-bit fixed point versions of two + // multiply constants: + // K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16 + // K2 = sqrt(2) * sin (pi/8) ~= 35468 / 2^16 + // + // To be able to use signed 16-bit integers, we use the following trick to + // have constants within range: + // - Associated constants are obtained by subtracting the 16-bit fixed point + // version of one: + // k = K - (1 << 16) => K = k + (1 << 16) + // K1 = 85267 => k1 = 20091 + // K2 = 35468 => k2 = -30068 + // - The multiplication of a variable by a constant become the sum of the + // variable and the multiplication of that variable by the associated + // constant: + // (x * K) >> 16 = (x * (k + (1 << 16))) >> 16 = ((x * k ) >> 16) + x + const __m128i k1k2 = _mm_set_epi16(-30068, -30068, -30068, -30068, + 20091, 20091, 20091, 20091); + const __m128i k2k1 = _mm_set_epi16(20091, 20091, 20091, 20091, + -30068, -30068, -30068, -30068); + const __m128i zero = _mm_setzero_si128(); + const __m128i zero_four = _mm_set_epi16(0, 0, 0, 0, 4, 4, 4, 4); + __m128i T01, T23; + + // Load and concatenate the transform coefficients. + const __m128i in01 = _mm_loadu_si128((const __m128i*)&in[0]); + const __m128i in23 = _mm_loadu_si128((const __m128i*)&in[8]); + // a00 a10 a20 a30 a01 a11 a21 a31 + // a02 a12 a22 a32 a03 a13 a23 a33 + + // Vertical pass and subsequent transpose. + { + const __m128i in1 = _mm_unpackhi_epi64(in01, in01); + const __m128i in3 = _mm_unpackhi_epi64(in23, in23); + + // First pass, c and d calculations are longer because of the "trick" + // multiplications. + // c = MUL(in1, K2) - MUL(in3, K1) = MUL(in1, k2) - MUL(in3, k1) + in1 - in3 + // d = MUL(in1, K1) + MUL(in3, K2) = MUL(in1, k1) + MUL(in3, k2) + in1 + in3 + const __m128i a_d3 = _mm_add_epi16(in01, in23); + const __m128i b_c3 = _mm_sub_epi16(in01, in23); + const __m128i c1d1 = _mm_mulhi_epi16(in1, k2k1); + const __m128i c2d2 = _mm_mulhi_epi16(in3, k1k2); + const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3); + const __m128i c4 = _mm_sub_epi16(c1d1, c2d2); + const __m128i c = _mm_add_epi16(c3, c4); + const __m128i d4u = _mm_add_epi16(c1d1, c2d2); + const __m128i du = _mm_add_epi16(a_d3, d4u); + const __m128i d = _mm_unpackhi_epi64(du, du); + + // Second pass. + const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3); + const __m128i comb_dc = _mm_unpacklo_epi64(d, c); + + const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc); + const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc); + const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2)); + + const __m128i transpose_0 = _mm_unpacklo_epi16(tmp01, tmp23); + const __m128i transpose_1 = _mm_unpackhi_epi16(tmp01, tmp23); + // a00 a20 a01 a21 a02 a22 a03 a23 + // a10 a30 a11 a31 a12 a32 a13 a33 + + T01 = _mm_unpacklo_epi16(transpose_0, transpose_1); + T23 = _mm_unpackhi_epi16(transpose_0, transpose_1); + // a00 a10 a20 a30 a01 a11 a21 a31 + // a02 a12 a22 a32 a03 a13 a23 a33 + } + + // Horizontal pass and subsequent transpose. + { + const __m128i T1 = _mm_unpackhi_epi64(T01, T01); + const __m128i T3 = _mm_unpackhi_epi64(T23, T23); + + // First pass, c and d calculations are longer because of the "trick" + // multiplications. + const __m128i dc = _mm_add_epi16(T01, zero_four); + + // c = MUL(T1, K2) - MUL(T3, K1) = MUL(T1, k2) - MUL(T3, k1) + T1 - T3 + // d = MUL(T1, K1) + MUL(T3, K2) = MUL(T1, k1) + MUL(T3, k2) + T1 + T3 + const __m128i a_d3 = _mm_add_epi16(dc, T23); + const __m128i b_c3 = _mm_sub_epi16(dc, T23); + const __m128i c1d1 = _mm_mulhi_epi16(T1, k2k1); + const __m128i c2d2 = _mm_mulhi_epi16(T3, k1k2); + const __m128i c3 = _mm_unpackhi_epi64(b_c3, b_c3); + const __m128i c4 = _mm_sub_epi16(c1d1, c2d2); + const __m128i c = _mm_add_epi16(c3, c4); + const __m128i d4u = _mm_add_epi16(c1d1, c2d2); + const __m128i du = _mm_add_epi16(a_d3, d4u); + const __m128i d = _mm_unpackhi_epi64(du, du); + + // Second pass. + const __m128i comb_ab = _mm_unpacklo_epi64(a_d3, b_c3); + const __m128i comb_dc = _mm_unpacklo_epi64(d, c); + + const __m128i tmp01 = _mm_add_epi16(comb_ab, comb_dc); + const __m128i tmp32 = _mm_sub_epi16(comb_ab, comb_dc); + const __m128i tmp23 = _mm_shuffle_epi32(tmp32, _MM_SHUFFLE(1, 0, 3, 2)); + + const __m128i shifted01 = _mm_srai_epi16(tmp01, 3); + const __m128i shifted23 = _mm_srai_epi16(tmp23, 3); + // a00 a01 a02 a03 a10 a11 a12 a13 + // a20 a21 a22 a23 a30 a31 a32 a33 + + const __m128i transpose_0 = _mm_unpacklo_epi16(shifted01, shifted23); + const __m128i transpose_1 = _mm_unpackhi_epi16(shifted01, shifted23); + // a00 a20 a01 a21 a02 a22 a03 a23 + // a10 a30 a11 a31 a12 a32 a13 a33 + + T01 = _mm_unpacklo_epi16(transpose_0, transpose_1); + T23 = _mm_unpackhi_epi16(transpose_0, transpose_1); + // a00 a10 a20 a30 a01 a11 a21 a31 + // a02 a12 a22 a32 a03 a13 a23 a33 + } + + // Add inverse transform to 'ref' and store. + { + // Load the reference(s). + __m128i ref01, ref23, ref0123; + int32_t buf[4]; + + // Load four bytes/pixels per line. + const __m128i ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS])); + const __m128i ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS])); + const __m128i ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS])); + const __m128i ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS])); + ref01 = _mm_unpacklo_epi32(ref0, ref1); + ref23 = _mm_unpacklo_epi32(ref2, ref3); + + // Convert to 16b. + ref01 = _mm_unpacklo_epi8(ref01, zero); + ref23 = _mm_unpacklo_epi8(ref23, zero); + // Add the inverse transform(s). + ref01 = _mm_add_epi16(ref01, T01); + ref23 = _mm_add_epi16(ref23, T23); + // Unsigned saturate to 8b. + ref0123 = _mm_packus_epi16(ref01, ref23); + + _mm_storeu_si128((__m128i *)buf, ref0123); + + // Store four bytes/pixels per line. + WebPInt32ToMem(&dst[0 * BPS], buf[0]); + WebPInt32ToMem(&dst[1 * BPS], buf[1]); + WebPInt32ToMem(&dst[2 * BPS], buf[2]); + WebPInt32ToMem(&dst[3 * BPS], buf[3]); + } +} + +// Does two inverse transforms. +static void ITransform_Two_SSE2(const uint8_t* ref, const int16_t* in, + uint8_t* dst) { // This implementation makes use of 16-bit fixed point versions of two // multiply constants: // K1 = sqrt(2) * cos (pi/8) ~= 85627 / 2^16 @@ -49,33 +200,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, __m128i T0, T1, T2, T3; // Load and concatenate the transform coefficients (we'll do two inverse - // transforms in parallel). In the case of only one inverse transform, the - // second half of the vectors will just contain random value we'll never - // use nor store. + // transforms in parallel). __m128i in0, in1, in2, in3; { - in0 = _mm_loadl_epi64((const __m128i*)&in[0]); - in1 = _mm_loadl_epi64((const __m128i*)&in[4]); - in2 = _mm_loadl_epi64((const __m128i*)&in[8]); - in3 = _mm_loadl_epi64((const __m128i*)&in[12]); - // a00 a10 a20 a30 x x x x - // a01 a11 a21 a31 x x x x - // a02 a12 a22 a32 x x x x - // a03 a13 a23 a33 x x x x - if (do_two) { - const __m128i inB0 = _mm_loadl_epi64((const __m128i*)&in[16]); - const __m128i inB1 = _mm_loadl_epi64((const __m128i*)&in[20]); - const __m128i inB2 = _mm_loadl_epi64((const __m128i*)&in[24]); - const __m128i inB3 = _mm_loadl_epi64((const __m128i*)&in[28]); - in0 = _mm_unpacklo_epi64(in0, inB0); - in1 = _mm_unpacklo_epi64(in1, inB1); - in2 = _mm_unpacklo_epi64(in2, inB2); - in3 = _mm_unpacklo_epi64(in3, inB3); - // a00 a10 a20 a30 b00 b10 b20 b30 - // a01 a11 a21 a31 b01 b11 b21 b31 - // a02 a12 a22 a32 b02 b12 b22 b32 - // a03 a13 a23 a33 b03 b13 b23 b33 - } + const __m128i tmp0 = _mm_loadu_si128((const __m128i*)&in[0]); + const __m128i tmp1 = _mm_loadu_si128((const __m128i*)&in[8]); + const __m128i tmp2 = _mm_loadu_si128((const __m128i*)&in[16]); + const __m128i tmp3 = _mm_loadu_si128((const __m128i*)&in[24]); + in0 = _mm_unpacklo_epi64(tmp0, tmp2); + in1 = _mm_unpackhi_epi64(tmp0, tmp2); + in2 = _mm_unpacklo_epi64(tmp1, tmp3); + in3 = _mm_unpackhi_epi64(tmp1, tmp3); + // a00 a10 a20 a30 b00 b10 b20 b30 + // a01 a11 a21 a31 b01 b11 b21 b31 + // a02 a12 a22 a32 b02 b12 b22 b32 + // a03 a13 a23 a33 b03 b13 b23 b33 } // Vertical pass and subsequent transpose. @@ -148,19 +287,11 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, const __m128i zero = _mm_setzero_si128(); // Load the reference(s). __m128i ref0, ref1, ref2, ref3; - if (do_two) { - // Load eight bytes/pixels per line. - ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]); - ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]); - ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]); - ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]); - } else { - // Load four bytes/pixels per line. - ref0 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[0 * BPS])); - ref1 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[1 * BPS])); - ref2 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[2 * BPS])); - ref3 = _mm_cvtsi32_si128(WebPMemToInt32(&ref[3 * BPS])); - } + // Load eight bytes/pixels per line. + ref0 = _mm_loadl_epi64((const __m128i*)&ref[0 * BPS]); + ref1 = _mm_loadl_epi64((const __m128i*)&ref[1 * BPS]); + ref2 = _mm_loadl_epi64((const __m128i*)&ref[2 * BPS]); + ref3 = _mm_loadl_epi64((const __m128i*)&ref[3 * BPS]); // Convert to 16b. ref0 = _mm_unpacklo_epi8(ref0, zero); ref1 = _mm_unpacklo_epi8(ref1, zero); @@ -176,20 +307,21 @@ static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, ref1 = _mm_packus_epi16(ref1, ref1); ref2 = _mm_packus_epi16(ref2, ref2); ref3 = _mm_packus_epi16(ref3, ref3); - // Store the results. - if (do_two) { - // Store eight bytes/pixels per line. - _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0); - _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1); - _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2); - _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3); - } else { - // Store four bytes/pixels per line. - WebPInt32ToMem(&dst[0 * BPS], _mm_cvtsi128_si32(ref0)); - WebPInt32ToMem(&dst[1 * BPS], _mm_cvtsi128_si32(ref1)); - WebPInt32ToMem(&dst[2 * BPS], _mm_cvtsi128_si32(ref2)); - WebPInt32ToMem(&dst[3 * BPS], _mm_cvtsi128_si32(ref3)); - } + // Store eight bytes/pixels per line. + _mm_storel_epi64((__m128i*)&dst[0 * BPS], ref0); + _mm_storel_epi64((__m128i*)&dst[1 * BPS], ref1); + _mm_storel_epi64((__m128i*)&dst[2 * BPS], ref2); + _mm_storel_epi64((__m128i*)&dst[3 * BPS], ref3); + } +} + +// Does one or two inverse transforms. +static void ITransform_SSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst, + int do_two) { + if (do_two) { + ITransform_Two_SSE2(ref, in, dst); + } else { + ITransform_One_SSE2(ref, in, dst); } } diff --git a/src/3rdparty/libwebp/src/dsp/filters.c b/src/3rdparty/libwebp/src/dsp/filters.c index 4506567..85eee50 100644 --- a/src/3rdparty/libwebp/src/dsp/filters.c +++ b/src/3rdparty/libwebp/src/dsp/filters.c @@ -233,6 +233,7 @@ static void GradientUnfilter_C(const uint8_t* prev, const uint8_t* in, WebPFilterFunc WebPFilters[WEBP_FILTER_LAST]; WebPUnfilterFunc WebPUnfilters[WEBP_FILTER_LAST]; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8FiltersInitMIPSdspR2(void); extern void VP8FiltersInitMSA(void); extern void VP8FiltersInitNEON(void); diff --git a/src/3rdparty/libwebp/src/dsp/lossless.c b/src/3rdparty/libwebp/src/dsp/lossless.c index fb86e58..9f81209 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless.c +++ b/src/3rdparty/libwebp/src/dsp/lossless.c @@ -588,6 +588,7 @@ VP8LConvertFunc VP8LConvertBGRAToBGR; VP8LMapARGBFunc VP8LMapColor32b; VP8LMapAlphaFunc VP8LMapColor8b; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8LDspInitSSE2(void); extern void VP8LDspInitSSE41(void); extern void VP8LDspInitNEON(void); diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc.c b/src/3rdparty/libwebp/src/dsp/lossless_enc.c index b1f9f26..cde1280 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_enc.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_enc.c @@ -791,6 +791,7 @@ VP8LBundleColorMapFunc VP8LBundleColorMap; VP8LPredictorAddSubFunc VP8LPredictorsSub[16]; VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8LEncDspInitSSE2(void); extern void VP8LEncDspInitSSE41(void); extern void VP8LEncDspInitNEON(void); diff --git a/src/3rdparty/libwebp/src/dsp/lossless_enc_neon.c b/src/3rdparty/libwebp/src/dsp/lossless_enc_neon.c index 7c7b73f..e32c796 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_enc_neon.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_enc_neon.c @@ -25,7 +25,7 @@ // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use // non-standard versions there. -#if defined(__APPLE__) && defined(__aarch64__) && \ +#if defined(__APPLE__) && WEBP_AARCH64 && \ defined(__apple_build_version__) && (__apple_build_version__< 6020037) #define USE_VTBLQ #endif diff --git a/src/3rdparty/libwebp/src/dsp/lossless_neon.c b/src/3rdparty/libwebp/src/dsp/lossless_neon.c index 89e3e01..ddc9b61 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_neon.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_neon.c @@ -498,7 +498,7 @@ static void PredictorAdd13_NEON(const uint32_t* in, const uint32_t* upper, // vtbl?_u8 are marked unavailable for iOS arm64 with Xcode < 6.3, use // non-standard versions there. -#if defined(__APPLE__) && defined(__aarch64__) && \ +#if defined(__APPLE__) && WEBP_AARCH64 && \ defined(__apple_build_version__) && (__apple_build_version__< 6020037) #define USE_VTBLQ #endif diff --git a/src/3rdparty/libwebp/src/dsp/neon.h b/src/3rdparty/libwebp/src/dsp/neon.h index c591f9b..14acb40 100644 --- a/src/3rdparty/libwebp/src/dsp/neon.h +++ b/src/3rdparty/libwebp/src/dsp/neon.h @@ -21,7 +21,7 @@ // Right now, some intrinsics functions seem slower, so we disable them // everywhere except newer clang/gcc or aarch64 where the inline assembly is // incompatible. -#if LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,9) || defined(__aarch64__) +#if LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 9) || WEBP_AARCH64 #define WEBP_USE_INTRINSICS // use intrinsics when possible #endif @@ -46,7 +46,7 @@ // if using intrinsics, this flag avoids some functions that make gcc-4.6.3 // crash ("internal compiler error: in immed_double_const, at emit-rtl."). // (probably similar to gcc.gnu.org/bugzilla/show_bug.cgi?id=48183) -#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__)) +#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || WEBP_AARCH64) #define WORK_AROUND_GCC #endif diff --git a/src/3rdparty/libwebp/src/dsp/quant.h b/src/3rdparty/libwebp/src/dsp/quant.h index fc099bf..bf7734c 100644 --- a/src/3rdparty/libwebp/src/dsp/quant.h +++ b/src/3rdparty/libwebp/src/dsp/quant.h @@ -22,7 +22,7 @@ #define IsFlat IsFlat_NEON static uint32_t horizontal_add_uint32x4(const uint32x4_t a) { -#if defined(__aarch64__) +#if WEBP_AARCH64 return vaddvq_u32(a); #else const uint64x2_t b = vpaddlq_u32(a); diff --git a/src/3rdparty/libwebp/src/dsp/rescaler.c b/src/3rdparty/libwebp/src/dsp/rescaler.c index 14620ce..325d8be 100644 --- a/src/3rdparty/libwebp/src/dsp/rescaler.c +++ b/src/3rdparty/libwebp/src/dsp/rescaler.c @@ -197,6 +197,7 @@ WebPRescalerImportRowFunc WebPRescalerImportRowShrink; WebPRescalerExportRowFunc WebPRescalerExportRowExpand; WebPRescalerExportRowFunc WebPRescalerExportRowShrink; +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPRescalerDspInitSSE2(void); extern void WebPRescalerDspInitMIPS32(void); extern void WebPRescalerDspInitMIPSdspR2(void); diff --git a/src/3rdparty/libwebp/src/dsp/ssim.c b/src/3rdparty/libwebp/src/dsp/ssim.c index f85c2e6..9a1341e 100644 --- a/src/3rdparty/libwebp/src/dsp/ssim.c +++ b/src/3rdparty/libwebp/src/dsp/ssim.c @@ -137,6 +137,7 @@ VP8SSIMGetClippedFunc VP8SSIMGetClipped; VP8AccumulateSSEFunc VP8AccumulateSSE; #endif +extern VP8CPUInfo VP8GetCPUInfo; extern void VP8SSIMDspInitSSE2(void); WEBP_DSP_INIT_FUNC(VP8SSIMDspInit) { diff --git a/src/3rdparty/libwebp/src/dsp/upsampling.c b/src/3rdparty/libwebp/src/dsp/upsampling.c index 87f771f..983b9c4 100644 --- a/src/3rdparty/libwebp/src/dsp/upsampling.c +++ b/src/3rdparty/libwebp/src/dsp/upsampling.c @@ -215,6 +215,7 @@ static void EmptyYuv444Func(const uint8_t* y, WebPYUV444Converter WebPYUV444Converters[MODE_LAST]; +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPInitYUV444ConvertersMIPSdspR2(void); extern void WebPInitYUV444ConvertersSSE2(void); extern void WebPInitYUV444ConvertersSSE41(void); diff --git a/src/3rdparty/libwebp/src/dsp/upsampling_neon.c b/src/3rdparty/libwebp/src/dsp/upsampling_neon.c index 6ba71a7..bbc000c 100644 --- a/src/3rdparty/libwebp/src/dsp/upsampling_neon.c +++ b/src/3rdparty/libwebp/src/dsp/upsampling_neon.c @@ -111,7 +111,7 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 }; vst4_u8(out, v255_r_g_b); \ } while (0) -#if !defined(WEBP_SWAP_16BIT_CSP) +#if (WEBP_SWAP_16BIT_CSP == 0) #define ZIP_U8(lo, hi) vzip_u8((lo), (hi)) #else #define ZIP_U8(lo, hi) vzip_u8((hi), (lo)) diff --git a/src/3rdparty/libwebp/src/dsp/yuv.c b/src/3rdparty/libwebp/src/dsp/yuv.c index d16c13d..8a04b85 100644 --- a/src/3rdparty/libwebp/src/dsp/yuv.c +++ b/src/3rdparty/libwebp/src/dsp/yuv.c @@ -70,6 +70,7 @@ void WebPSamplerProcessPlane(const uint8_t* y, int y_stride, WebPSamplerRowFunc WebPSamplers[MODE_LAST]; +extern VP8CPUInfo VP8GetCPUInfo; extern void WebPInitSamplersSSE2(void); extern void WebPInitSamplersSSE41(void); extern void WebPInitSamplersMIPS32(void); diff --git a/src/3rdparty/libwebp/src/enc/alpha_enc.c b/src/3rdparty/libwebp/src/enc/alpha_enc.c index f7c0269..26f0034 100644 --- a/src/3rdparty/libwebp/src/enc/alpha_enc.c +++ b/src/3rdparty/libwebp/src/enc/alpha_enc.c @@ -13,6 +13,7 @@ #include #include +#include #include "src/enc/vp8i_enc.h" #include "src/dsp/dsp.h" @@ -140,6 +141,11 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, !reduce_levels, &tmp_bw, &result->stats); if (ok) { output = VP8LBitWriterFinish(&tmp_bw); + if (tmp_bw.error_) { + VP8LBitWriterWipeOut(&tmp_bw); + memset(&result->bw, 0, sizeof(result->bw)); + return 0; + } output_size = VP8LBitWriterNumBytes(&tmp_bw); if (output_size > data_size) { // compressed size is larger than source! Revert to uncompressed mode. @@ -148,6 +154,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, } } else { VP8LBitWriterWipeOut(&tmp_bw); + memset(&result->bw, 0, sizeof(result->bw)); return 0; } } @@ -162,7 +169,7 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height, header = method | (filter << 2); if (reduce_levels) header |= ALPHA_PREPROCESSED_LEVELS << 4; - VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size); + if (!VP8BitWriterInit(&result->bw, ALPHA_HEADER_LEN + output_size)) ok = 0; ok = ok && VP8BitWriterAppend(&result->bw, &header, ALPHA_HEADER_LEN); ok = ok && VP8BitWriterAppend(&result->bw, output, output_size); @@ -312,11 +319,11 @@ static int EncodeAlpha(VP8Encoder* const enc, assert(filter >= WEBP_FILTER_NONE && filter <= WEBP_FILTER_FAST); if (quality < 0 || quality > 100) { - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); } if (method < ALPHA_NO_COMPRESSION || method > ALPHA_LOSSLESS_COMPRESSION) { - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); } if (method == ALPHA_NO_COMPRESSION) { @@ -326,7 +333,7 @@ static int EncodeAlpha(VP8Encoder* const enc, quant_alpha = (uint8_t*)WebPSafeMalloc(1ULL, data_size); if (quant_alpha == NULL) { - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } // Extract alpha data (width x height) from raw_data (stride x height). @@ -346,6 +353,9 @@ static int EncodeAlpha(VP8Encoder* const enc, ok = ApplyFiltersAndEncode(quant_alpha, width, height, data_size, method, filter, reduce_levels, effort_level, output, output_size, pic->stats); + if (!ok) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise + } #if !defined(WEBP_DISABLE_STATS) if (pic->stats != NULL) { // need stats? pic->stats->coded_size += (int)(*output_size); @@ -405,7 +415,7 @@ int VP8EncStartAlpha(VP8Encoder* const enc) { WebPWorker* const worker = &enc->alpha_worker_; // Makes sure worker is good to go. if (!WebPGetWorkerInterface()->Reset(worker)) { - return 0; + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } WebPGetWorkerInterface()->Launch(worker); return 1; diff --git a/src/3rdparty/libwebp/src/enc/analysis_enc.c b/src/3rdparty/libwebp/src/enc/analysis_enc.c index a0001ac..962eaa9 100644 --- a/src/3rdparty/libwebp/src/enc/analysis_enc.c +++ b/src/3rdparty/libwebp/src/enc/analysis_enc.c @@ -474,6 +474,10 @@ int VP8EncAnalyze(VP8Encoder* const enc) { } else { // Use only one default segment. ResetAllMBInfo(enc); } + if (!ok) { + return WebPEncodingSetError(enc->pic_, + VP8_ENC_ERROR_OUT_OF_MEMORY); // imprecise + } return ok; } diff --git a/src/3rdparty/libwebp/src/enc/backward_references_enc.c b/src/3rdparty/libwebp/src/enc/backward_references_enc.c index 49a0fac..dc98bf1 100644 --- a/src/3rdparty/libwebp/src/enc/backward_references_enc.c +++ b/src/3rdparty/libwebp/src/enc/backward_references_enc.c @@ -283,8 +283,7 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, hash_to_first_index = (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); if (hash_to_first_index == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } percent_range = remaining_percent / 2; @@ -1050,8 +1049,7 @@ int VP8LGetBackwardReferences( refs_best = GetBackwardReferencesLowEffort( width, height, argb, cache_bits_best, hash_chain, refs); if (refs_best == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } // Set it in first position. BackwardRefsSwap(refs_best, &refs[0]); @@ -1059,8 +1057,7 @@ int VP8LGetBackwardReferences( if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try, cache_bits_max, do_no_cache, hash_chain, refs, cache_bits_best)) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } } diff --git a/src/3rdparty/libwebp/src/enc/frame_enc.c b/src/3rdparty/libwebp/src/enc/frame_enc.c index b93d9e5..9a98dc1 100644 --- a/src/3rdparty/libwebp/src/enc/frame_enc.c +++ b/src/3rdparty/libwebp/src/enc/frame_enc.c @@ -689,7 +689,7 @@ static int PreLoopInitialize(VP8Encoder* const enc) { } if (!ok) { VP8EncFreeBitWriters(enc); // malloc error occurred - WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } return ok; } @@ -719,6 +719,7 @@ static int PostLoopFinalize(VP8EncIterator* const it, int ok) { } else { // Something bad happened -> need to do some memory cleanup. VP8EncFreeBitWriters(enc); + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } return ok; } @@ -754,6 +755,11 @@ int VP8EncLoop(VP8Encoder* const enc) { // *then* decide how to code the skip decision if there's one. if (!VP8Decimate(&it, &info, rd_opt) || dont_use_skip) { CodeResiduals(it.bw_, &it, &info); + if (it.bw_->error_) { + // enc->pic_->error_code is set in PostLoopFinalize(). + ok = 0; + break; + } } else { // reset predictors after a skip ResetAfterSkip(&it); } diff --git a/src/3rdparty/libwebp/src/enc/picture_csp_enc.c b/src/3rdparty/libwebp/src/enc/picture_csp_enc.c index 78c8ca4..a9280e6 100644 --- a/src/3rdparty/libwebp/src/enc/picture_csp_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_csp_enc.c @@ -98,6 +98,7 @@ static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1]; static uint16_t kGammaToLinearTab[256]; static volatile int kGammaTablesOk = 0; static void InitGammaTables(void); +extern VP8CPUInfo VP8GetCPUInfo; WEBP_DSP_INIT_FUNC(InitGammaTables) { if (!kGammaTablesOk) { @@ -534,7 +535,9 @@ static int ImportYUVAFromRGBA(const uint8_t* r_ptr, WebPInitConvertARGBToYUV(); InitGammaTables(); - if (tmp_rgb == NULL) return 0; // malloc error + if (tmp_rgb == NULL) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + } // Downsample Y/U/V planes, two rows at a time for (y = 0; y < (height >> 1); ++y) { diff --git a/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c b/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c index 839f91c..ea90d82 100644 --- a/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c +++ b/src/3rdparty/libwebp/src/enc/picture_rescale_enc.c @@ -137,7 +137,9 @@ int WebPPictureCrop(WebPPicture* pic, PictureGrabSpecs(pic, &tmp); tmp.width = width; tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; + if (!WebPPictureAlloc(&tmp)) { + return WebPEncodingSetError(pic, tmp.error_code); + } if (!pic->use_argb) { const int y_offset = top * pic->y_stride + left; @@ -212,26 +214,28 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) { prev_height = picture->height; if (!WebPRescalerGetScaledDimensions( prev_width, prev_height, &width, &height)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } PictureGrabSpecs(picture, &tmp); tmp.width = width; tmp.height = height; - if (!WebPPictureAlloc(&tmp)) return 0; + if (!WebPPictureAlloc(&tmp)) { + return WebPEncodingSetError(picture, tmp.error_code); + } if (!picture->use_argb) { work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } // If present, we need to rescale alpha first (for AlphaMultiplyY). if (picture->a != NULL) { WebPInitAlphaProcessing(); if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride, tmp.a, width, height, tmp.a_stride, work, 1)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } } @@ -246,14 +250,14 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) { !RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height), picture->uv_stride, tmp.v, HALVE(width), HALVE(height), tmp.uv_stride, work, 1)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } AlphaMultiplyY(&tmp, 1); } else { work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } // In order to correctly interpolate colors, we need to apply the alpha // weighting first (black-matting), scale the RGB values, and remove @@ -263,7 +267,7 @@ int WebPPictureRescale(WebPPicture* picture, int width, int height) { if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height, picture->argb_stride * 4, (uint8_t*)tmp.argb, width, height, tmp.argb_stride * 4, work, 4)) { - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); } AlphaMultiplyARGB(&tmp, 1); } diff --git a/src/3rdparty/libwebp/src/enc/syntax_enc.c b/src/3rdparty/libwebp/src/enc/syntax_enc.c index e18cf65..9b8f524 100644 --- a/src/3rdparty/libwebp/src/enc/syntax_enc.c +++ b/src/3rdparty/libwebp/src/enc/syntax_enc.c @@ -258,7 +258,10 @@ static int EmitPartitionsSize(const VP8Encoder* const enc, buf[3 * p + 1] = (part_size >> 8) & 0xff; buf[3 * p + 2] = (part_size >> 16) & 0xff; } - return p ? pic->writer(buf, 3 * p, pic) : 1; + if (p && !pic->writer(buf, 3 * p, pic)) { + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); + } + return 1; } //------------------------------------------------------------------------------ @@ -381,6 +384,7 @@ int VP8EncWrite(VP8Encoder* const enc) { enc->coded_size_ = (int)(CHUNK_HEADER_SIZE + riff_size); ok = ok && WebPReportProgress(pic, final_percent, &enc->percent_); + if (!ok) WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); return ok; } diff --git a/src/3rdparty/libwebp/src/enc/vp8i_enc.h b/src/3rdparty/libwebp/src/enc/vp8i_enc.h index c9927c4..19d9a6e 100644 --- a/src/3rdparty/libwebp/src/enc/vp8i_enc.h +++ b/src/3rdparty/libwebp/src/enc/vp8i_enc.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 #define ENC_MIN_VERSION 3 -#define ENC_REV_VERSION 0 +#define ENC_REV_VERSION 1 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost diff --git a/src/3rdparty/libwebp/src/enc/vp8l_enc.c b/src/3rdparty/libwebp/src/enc/vp8l_enc.c index 0b07e52..3a8ec3d 100644 --- a/src/3rdparty/libwebp/src/enc/vp8l_enc.c +++ b/src/3rdparty/libwebp/src/enc/vp8l_enc.c @@ -196,8 +196,7 @@ static int CoOccurrenceBuild(const WebPPicture* const pic, uint32_t palette_sorted[MAX_PALETTE_SIZE]; lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines)); if (lines == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } line_top = &lines[0]; line_current = &lines[pic->width]; @@ -255,10 +254,10 @@ static int PaletteSortModifiedZeng( cooccurrence = (uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence)); if (cooccurrence == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } if (!CoOccurrenceBuild(pic, palette_sorted, num_colors, cooccurrence)) { + WebPSafeFree(cooccurrence); return 0; } @@ -1012,8 +1011,7 @@ static int StoreImageToBitMask( VP8LRefsCursorNext(&c); } if (bw->error_) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } return 1; } @@ -1297,7 +1295,10 @@ static int EncodeImageInternal( } } tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); - if (tokens == NULL) goto Error; + if (tokens == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } for (i = 0; i < 5 * histogram_image_size; ++i) { HuffmanTreeCode* const codes = &huffman_codes[i]; StoreHuffmanCode(bw, huff_tree, tokens, codes); @@ -1448,18 +1449,21 @@ static int WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw, const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size; const size_t pad = vp8l_size & 1; const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad; + *coded_size = 0; + + if (bw->error_) { + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + } if (!WriteRiffHeader(pic, riff_size, vp8l_size) || !pic->writer(webpll_data, webpll_size, pic)) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); } if (pad) { const uint8_t pad_byte[1] = { 0 }; if (!pic->writer(pad_byte, 1, pic)) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); } } *coded_size = CHUNK_HEADER_SIZE + riff_size; @@ -1504,8 +1508,7 @@ static int AllocateTransformBuffer(VP8LEncoder* const enc, int width, ClearTransformBuffer(enc); mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); if (mem == NULL) { - WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); } enc->transform_mem_ = mem; enc->transform_mem_size_ = (size_t)mem_size; @@ -1613,8 +1616,7 @@ static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst, int x, y; if (tmp_row == NULL) { - WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); } if (palette_size < APPLY_PALETTE_GREEDY_MAX) { @@ -1968,9 +1970,8 @@ int VP8LEncodeStream(const WebPConfig* const config, int ok_main; if (enc_main == NULL || !VP8LBitWriterInit(&bw_side, 0)) { - WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); VP8LEncoderDelete(enc_main); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } // Avoid "garbage value" error from Clang's static analysis tool. @@ -2117,8 +2118,7 @@ int VP8LEncodeImage(const WebPConfig* const config, if (picture == NULL) return 0; if (config == NULL || picture->argb == NULL) { - WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); - return 0; + return WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); } width = picture->width; diff --git a/src/3rdparty/libwebp/src/enc/webp_enc.c b/src/3rdparty/libwebp/src/enc/webp_enc.c index 9620e05..583fe6a 100644 --- a/src/3rdparty/libwebp/src/enc/webp_enc.c +++ b/src/3rdparty/libwebp/src/enc/webp_enc.c @@ -307,7 +307,10 @@ int WebPEncodingSetError(const WebPPicture* const pic, WebPEncodingError error) { assert((int)error < VP8_ENC_ERROR_LAST); assert((int)error >= VP8_ENC_OK); - ((WebPPicture*)pic)->error_code = error; + // The oldest error reported takes precedence over the new one. + if (pic->error_code == VP8_ENC_OK) { + ((WebPPicture*)pic)->error_code = error; + } return 0; } @@ -317,8 +320,7 @@ int WebPReportProgress(const WebPPicture* const pic, *percent_store = percent; if (pic->progress_hook && !pic->progress_hook(percent, pic)) { // user abort requested - WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT); - return 0; + return WebPEncodingSetError(pic, VP8_ENC_ERROR_USER_ABORT); } } return 1; // ok @@ -329,7 +331,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { int ok = 0; if (pic == NULL) return 0; - WebPEncodingSetError(pic, VP8_ENC_OK); // all ok so far + pic->error_code = VP8_ENC_OK; // all ok so far if (config == NULL) { // bad params return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER); } diff --git a/src/3rdparty/libwebp/src/mux/muxi.h b/src/3rdparty/libwebp/src/mux/muxi.h index 7929138..fc44d6f 100644 --- a/src/3rdparty/libwebp/src/mux/muxi.h +++ b/src/3rdparty/libwebp/src/mux/muxi.h @@ -29,7 +29,7 @@ extern "C" { #define MUX_MAJ_VERSION 1 #define MUX_MIN_VERSION 3 -#define MUX_REV_VERSION 0 +#define MUX_REV_VERSION 1 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/src/3rdparty/libwebp/src/mux/muxread.c b/src/3rdparty/libwebp/src/mux/muxread.c index 8005039..9862ec6 100644 --- a/src/3rdparty/libwebp/src/mux/muxread.c +++ b/src/3rdparty/libwebp/src/mux/muxread.c @@ -116,9 +116,12 @@ static int MuxImageParse(const WebPChunk* const chunk, int copy_data, // Each of ANMF chunk contain a header at the beginning. So, its size should // be at least 'hdr_size'. if (size < hdr_size) goto Fail; - ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_); + if (ChunkAssignData(&subchunk, &temp, copy_data, + chunk->tag_) != WEBP_MUX_OK) { + goto Fail; + } } - ChunkSetHead(&subchunk, &wpi->header_); + if (ChunkSetHead(&subchunk, &wpi->header_) != WEBP_MUX_OK) goto Fail; wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks. // Rest of the chunks. diff --git a/src/3rdparty/libwebp/src/utils/bit_reader_utils.c b/src/3rdparty/libwebp/src/utils/bit_reader_utils.c index 857cd60..a26557a 100644 --- a/src/3rdparty/libwebp/src/utils/bit_reader_utils.c +++ b/src/3rdparty/libwebp/src/utils/bit_reader_utils.c @@ -15,6 +15,7 @@ #include "src/webp/config.h" #endif +#include "src/dsp/cpu.h" #include "src/utils/bit_reader_inl_utils.h" #include "src/utils/utils.h" @@ -121,7 +122,7 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits, #define VP8L_LOG8_WBITS 4 // Number of bytes needed to store VP8L_WBITS bits. -#if defined(__arm__) || defined(_M_ARM) || defined(__aarch64__) || \ +#if defined(__arm__) || defined(_M_ARM) || WEBP_AARCH64 || \ defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64__) || defined(_M_X64) #define VP8L_USE_FAST_LOAD diff --git a/src/3rdparty/libwebp/src/utils/bit_reader_utils.h b/src/3rdparty/libwebp/src/utils/bit_reader_utils.h index e64156e..25ff31e 100644 --- a/src/3rdparty/libwebp/src/utils/bit_reader_utils.h +++ b/src/3rdparty/libwebp/src/utils/bit_reader_utils.h @@ -19,6 +19,7 @@ #ifdef _MSC_VER #include // _byteswap_ulong #endif +#include "src/dsp/cpu.h" #include "src/webp/types.h" // Warning! This macro triggers quite some MACRO wizardry around func signature! @@ -64,7 +65,7 @@ extern "C" { #define BITS 56 #elif defined(__arm__) || defined(_M_ARM) // ARM #define BITS 24 -#elif defined(__aarch64__) // ARM 64bit +#elif WEBP_AARCH64 // ARM 64bit #define BITS 56 #elif defined(__mips__) // MIPS #define BITS 24 diff --git a/src/3rdparty/libwebp/src/webp/decode.h b/src/3rdparty/libwebp/src/webp/decode.h index d982475..0177b12 100644 --- a/src/3rdparty/libwebp/src/webp/decode.h +++ b/src/3rdparty/libwebp/src/webp/decode.h @@ -81,10 +81,11 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size, // returned is the Y samples buffer. Upon return, *u and *v will point to // the U and V chroma data. These U and V buffers need NOT be passed to // WebPFree(), unlike the returned Y luma one. The dimension of the U and V -// planes are both (*width + 1) / 2 and (*height + 1)/ 2. +// planes are both (*width + 1) / 2 and (*height + 1) / 2. // Upon return, the Y buffer has a stride returned as '*stride', while U and V // have a common stride returned as '*uv_stride'. -// Return NULL in case of error. +// 'width' and 'height' may be NULL, the other pointers must not be. +// Returns NULL in case of error. // (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size, int* width, int* height, diff --git a/src/3rdparty/libwebp/ChangeLog b/src/3rdparty/libwebp/ChangeLog index 5e85875..33ec486 100644 --- a/src/3rdparty/libwebp/ChangeLog +++ b/src/3rdparty/libwebp/ChangeLog @@ -1,3 +1,7 @@ +1ace578c update NEWS +63234c42 bump version to 1.3.2 +2af26267 Fix OOB write in BuildHuffmanTable. +fd7bb21c update ChangeLog (tag: v1.3.1-rc2, tag: v1.3.1) e1adea50 update NEWS 43393320 enc/*: normalize WebPEncodingSetError() calls 287fdefe enc/*: add missing WebPEncodingSetError() calls diff --git a/src/3rdparty/libwebp/NEWS b/src/3rdparty/libwebp/NEWS index 2111d33..47f8451 100644 --- a/src/3rdparty/libwebp/NEWS +++ b/src/3rdparty/libwebp/NEWS @@ -1,3 +1,7 @@ +- 9/13/2023: version 1.3.2 + This is a binary compatible release. + * security fix for lossless decoder (chromium: #1479274, CVE-2023-4863) + - 6/23/2023: version 1.3.1 This is a binary compatible release. * security fixes for lossless encoder (#603, chromium: #1420107, #1455619, diff --git a/src/3rdparty/libwebp/qt_attribution.json b/src/3rdparty/libwebp/qt_attribution.json index 1b71a55..f1ddc6e 100644 --- a/src/3rdparty/libwebp/qt_attribution.json +++ b/src/3rdparty/libwebp/qt_attribution.json @@ -6,8 +6,8 @@ "Description": "WebP is a new image format that provides lossless and lossy compression for images on the web.", "Homepage": "https://developers.google.com/speed/webp/", - "Version": "1.3.1", - "DownloadLocation": "https://storage.googleapis.com/downloads.webmproject.org/releases/webp/libwebp-1.3.1.tar.gz", + "Version": "1.3.2", + "DownloadLocation": "https://storage.googleapis.com/downloads.webmproject.org/releases/webp/libwebp-1.3.2.tar.gz", "License": "BSD 3-clause \"New\" or \"Revised\" License", "LicenseId": "BSD-3-Clause", "LicenseFile": "COPYING", diff --git a/src/3rdparty/libwebp/src/dec/vp8i_dec.h b/src/3rdparty/libwebp/src/dec/vp8i_dec.h index 1ae4ff6..7929fd7 100644 --- a/src/3rdparty/libwebp/src/dec/vp8i_dec.h +++ b/src/3rdparty/libwebp/src/dec/vp8i_dec.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 #define DEC_MIN_VERSION 3 -#define DEC_REV_VERSION 1 +#define DEC_REV_VERSION 2 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/src/3rdparty/libwebp/src/dec/vp8l_dec.c b/src/3rdparty/libwebp/src/dec/vp8l_dec.c index c0ea018..7995313 100644 --- a/src/3rdparty/libwebp/src/dec/vp8l_dec.c +++ b/src/3rdparty/libwebp/src/dec/vp8l_dec.c @@ -253,11 +253,11 @@ static int ReadHuffmanCodeLengths( int symbol; int max_symbol; int prev_code_len = DEFAULT_CODE_LENGTH; - HuffmanCode table[1 << LENGTHS_TABLE_BITS]; + HuffmanTables tables; - if (!VP8LBuildHuffmanTable(table, LENGTHS_TABLE_BITS, - code_length_code_lengths, - NUM_CODE_LENGTH_CODES)) { + if (!VP8LHuffmanTablesAllocate(1 << LENGTHS_TABLE_BITS, &tables) || + !VP8LBuildHuffmanTable(&tables, LENGTHS_TABLE_BITS, + code_length_code_lengths, NUM_CODE_LENGTH_CODES)) { goto End; } @@ -277,7 +277,7 @@ static int ReadHuffmanCodeLengths( int code_len; if (max_symbol-- == 0) break; VP8LFillBitWindow(br); - p = &table[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK]; + p = &tables.curr_segment->start[VP8LPrefetchBits(br) & LENGTHS_TABLE_MASK]; VP8LSetBitPos(br, br->bit_pos_ + p->bits); code_len = p->value; if (code_len < kCodeLengthLiterals) { @@ -300,6 +300,7 @@ static int ReadHuffmanCodeLengths( ok = 1; End: + VP8LHuffmanTablesDeallocate(&tables); if (!ok) dec->status_ = VP8_STATUS_BITSTREAM_ERROR; return ok; } @@ -307,7 +308,8 @@ static int ReadHuffmanCodeLengths( // 'code_lengths' is pre-allocated temporary buffer, used for creating Huffman // tree. static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, - int* const code_lengths, HuffmanCode* const table) { + int* const code_lengths, + HuffmanTables* const table) { int ok = 0; int size = 0; VP8LBitReader* const br = &dec->br_; @@ -362,8 +364,7 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, VP8LMetadata* const hdr = &dec->hdr_; uint32_t* huffman_image = NULL; HTreeGroup* htree_groups = NULL; - HuffmanCode* huffman_tables = NULL; - HuffmanCode* huffman_table = NULL; + HuffmanTables* huffman_tables = &hdr->huffman_tables_; int num_htree_groups = 1; int num_htree_groups_max = 1; int max_alphabet_size = 0; @@ -372,6 +373,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, int* mapping = NULL; int ok = 0; + // Check the table has been 0 initialized (through InitMetadata). + assert(huffman_tables->root.start == NULL); + assert(huffman_tables->curr_segment == NULL); + if (allow_recursion && VP8LReadBits(br, 1)) { // use meta Huffman codes. const int huffman_precision = VP8LReadBits(br, 3) + 2; @@ -434,16 +439,15 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, code_lengths = (int*)WebPSafeCalloc((uint64_t)max_alphabet_size, sizeof(*code_lengths)); - huffman_tables = (HuffmanCode*)WebPSafeMalloc(num_htree_groups * table_size, - sizeof(*huffman_tables)); htree_groups = VP8LHtreeGroupsNew(num_htree_groups); - if (htree_groups == NULL || code_lengths == NULL || huffman_tables == NULL) { + if (htree_groups == NULL || code_lengths == NULL || + !VP8LHuffmanTablesAllocate(num_htree_groups * table_size, + huffman_tables)) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; goto Error; } - huffman_table = huffman_tables; for (i = 0; i < num_htree_groups_max; ++i) { // If the index "i" is unused in the Huffman image, just make sure the // coefficients are valid but do not store them. @@ -468,19 +472,20 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, int max_bits = 0; for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) { int alphabet_size = kAlphabetSize[j]; - htrees[j] = huffman_table; if (j == 0 && color_cache_bits > 0) { alphabet_size += (1 << color_cache_bits); } - size = ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_table); + size = + ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_tables); + htrees[j] = huffman_tables->curr_segment->curr_table; if (size == 0) { goto Error; } if (is_trivial_literal && kLiteralMap[j] == 1) { - is_trivial_literal = (huffman_table->bits == 0); + is_trivial_literal = (htrees[j]->bits == 0); } - total_size += huffman_table->bits; - huffman_table += size; + total_size += htrees[j]->bits; + huffman_tables->curr_segment->curr_table += size; if (j <= ALPHA) { int local_max_bits = code_lengths[0]; int k; @@ -515,14 +520,13 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize, hdr->huffman_image_ = huffman_image; hdr->num_htree_groups_ = num_htree_groups; hdr->htree_groups_ = htree_groups; - hdr->huffman_tables_ = huffman_tables; Error: WebPSafeFree(code_lengths); WebPSafeFree(mapping); if (!ok) { WebPSafeFree(huffman_image); - WebPSafeFree(huffman_tables); + VP8LHuffmanTablesDeallocate(huffman_tables); VP8LHtreeGroupsFree(htree_groups); } return ok; @@ -1358,7 +1362,7 @@ static void ClearMetadata(VP8LMetadata* const hdr) { assert(hdr != NULL); WebPSafeFree(hdr->huffman_image_); - WebPSafeFree(hdr->huffman_tables_); + VP8LHuffmanTablesDeallocate(&hdr->huffman_tables_); VP8LHtreeGroupsFree(hdr->htree_groups_); VP8LColorCacheClear(&hdr->color_cache_); VP8LColorCacheClear(&hdr->saved_color_cache_); @@ -1673,7 +1677,7 @@ int VP8LDecodeImage(VP8LDecoder* const dec) { if (dec == NULL) return 0; - assert(dec->hdr_.huffman_tables_ != NULL); + assert(dec->hdr_.huffman_tables_.root.start != NULL); assert(dec->hdr_.htree_groups_ != NULL); assert(dec->hdr_.num_htree_groups_ > 0); diff --git a/src/3rdparty/libwebp/src/dec/vp8li_dec.h b/src/3rdparty/libwebp/src/dec/vp8li_dec.h index 72b2e86..32540a4 100644 --- a/src/3rdparty/libwebp/src/dec/vp8li_dec.h +++ b/src/3rdparty/libwebp/src/dec/vp8li_dec.h @@ -51,7 +51,7 @@ typedef struct { uint32_t* huffman_image_; int num_htree_groups_; HTreeGroup* htree_groups_; - HuffmanCode* huffman_tables_; + HuffmanTables huffman_tables_; } VP8LMetadata; typedef struct VP8LDecoder VP8LDecoder; diff --git a/src/3rdparty/libwebp/src/demux/demux.c b/src/3rdparty/libwebp/src/demux/demux.c index fd45a25..4b0d3f5 100644 --- a/src/3rdparty/libwebp/src/demux/demux.c +++ b/src/3rdparty/libwebp/src/demux/demux.c @@ -25,7 +25,7 @@ #define DMUX_MAJ_VERSION 1 #define DMUX_MIN_VERSION 3 -#define DMUX_REV_VERSION 1 +#define DMUX_REV_VERSION 2 typedef struct { size_t start_; // start location of the data diff --git a/src/3rdparty/libwebp/src/enc/vp8i_enc.h b/src/3rdparty/libwebp/src/enc/vp8i_enc.h index 19d9a6e..0864fbf 100644 --- a/src/3rdparty/libwebp/src/enc/vp8i_enc.h +++ b/src/3rdparty/libwebp/src/enc/vp8i_enc.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 #define ENC_MIN_VERSION 3 -#define ENC_REV_VERSION 1 +#define ENC_REV_VERSION 2 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost diff --git a/src/3rdparty/libwebp/src/mux/muxi.h b/src/3rdparty/libwebp/src/mux/muxi.h index fc44d6f..afc5954 100644 --- a/src/3rdparty/libwebp/src/mux/muxi.h +++ b/src/3rdparty/libwebp/src/mux/muxi.h @@ -29,7 +29,7 @@ extern "C" { #define MUX_MAJ_VERSION 1 #define MUX_MIN_VERSION 3 -#define MUX_REV_VERSION 1 +#define MUX_REV_VERSION 2 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/src/3rdparty/libwebp/src/utils/huffman_utils.c b/src/3rdparty/libwebp/src/utils/huffman_utils.c index 90c2fbf..cf73abd 100644 --- a/src/3rdparty/libwebp/src/utils/huffman_utils.c +++ b/src/3rdparty/libwebp/src/utils/huffman_utils.c @@ -177,21 +177,24 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits, if (num_open < 0) { return 0; } - if (root_table == NULL) continue; for (; count[len] > 0; --count[len]) { HuffmanCode code; if ((key & mask) != low) { - table += table_size; + if (root_table != NULL) table += table_size; table_bits = NextTableBitSize(count, len, root_bits); table_size = 1 << table_bits; total_size += table_size; low = key & mask; - root_table[low].bits = (uint8_t)(table_bits + root_bits); - root_table[low].value = (uint16_t)((table - root_table) - low); + if (root_table != NULL) { + root_table[low].bits = (uint8_t)(table_bits + root_bits); + root_table[low].value = (uint16_t)((table - root_table) - low); + } + } + if (root_table != NULL) { + code.bits = (uint8_t)(len - root_bits); + code.value = (uint16_t)sorted[symbol++]; + ReplicateValue(&table[key >> root_bits], step, table_size, code); } - code.bits = (uint8_t)(len - root_bits); - code.value = (uint16_t)sorted[symbol++]; - ReplicateValue(&table[key >> root_bits], step, table_size, code); key = GetNextKey(key, len); } } @@ -211,25 +214,83 @@ static int BuildHuffmanTable(HuffmanCode* const root_table, int root_bits, ((1 << MAX_CACHE_BITS) + NUM_LITERAL_CODES + NUM_LENGTH_CODES) // Cut-off value for switching between heap and stack allocation. #define SORTED_SIZE_CUTOFF 512 -int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits, +int VP8LBuildHuffmanTable(HuffmanTables* const root_table, int root_bits, const int code_lengths[], int code_lengths_size) { - int total_size; + const int total_size = + BuildHuffmanTable(NULL, root_bits, code_lengths, code_lengths_size, NULL); assert(code_lengths_size <= MAX_CODE_LENGTHS_SIZE); - if (root_table == NULL) { - total_size = BuildHuffmanTable(NULL, root_bits, - code_lengths, code_lengths_size, NULL); - } else if (code_lengths_size <= SORTED_SIZE_CUTOFF) { + if (total_size == 0 || root_table == NULL) return total_size; + + if (root_table->curr_segment->curr_table + total_size >= + root_table->curr_segment->start + root_table->curr_segment->size) { + // If 'root_table' does not have enough memory, allocate a new segment. + // The available part of root_table->curr_segment is left unused because we + // need a contiguous buffer. + const int segment_size = root_table->curr_segment->size; + struct HuffmanTablesSegment* next = + (HuffmanTablesSegment*)WebPSafeMalloc(1, sizeof(*next)); + if (next == NULL) return 0; + // Fill the new segment. + // We need at least 'total_size' but if that value is small, it is better to + // allocate a big chunk to prevent more allocations later. 'segment_size' is + // therefore chosen (any other arbitrary value could be chosen). + next->size = total_size > segment_size ? total_size : segment_size; + next->start = + (HuffmanCode*)WebPSafeMalloc(next->size, sizeof(*next->start)); + if (next->start == NULL) { + WebPSafeFree(next); + return 0; + } + next->curr_table = next->start; + next->next = NULL; + // Point to the new segment. + root_table->curr_segment->next = next; + root_table->curr_segment = next; + } + if (code_lengths_size <= SORTED_SIZE_CUTOFF) { // use local stack-allocated array. uint16_t sorted[SORTED_SIZE_CUTOFF]; - total_size = BuildHuffmanTable(root_table, root_bits, - code_lengths, code_lengths_size, sorted); - } else { // rare case. Use heap allocation. + BuildHuffmanTable(root_table->curr_segment->curr_table, root_bits, + code_lengths, code_lengths_size, sorted); + } else { // rare case. Use heap allocation. uint16_t* const sorted = (uint16_t*)WebPSafeMalloc(code_lengths_size, sizeof(*sorted)); if (sorted == NULL) return 0; - total_size = BuildHuffmanTable(root_table, root_bits, - code_lengths, code_lengths_size, sorted); + BuildHuffmanTable(root_table->curr_segment->curr_table, root_bits, + code_lengths, code_lengths_size, sorted); WebPSafeFree(sorted); } return total_size; } + +int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables) { + // Have 'segment' point to the first segment for now, 'root'. + HuffmanTablesSegment* const root = &huffman_tables->root; + huffman_tables->curr_segment = root; + // Allocate root. + root->start = (HuffmanCode*)WebPSafeMalloc(size, sizeof(*root->start)); + if (root->start == NULL) return 0; + root->curr_table = root->start; + root->next = NULL; + root->size = size; + return 1; +} + +void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables) { + HuffmanTablesSegment *current, *next; + if (huffman_tables == NULL) return; + // Free the root node. + current = &huffman_tables->root; + next = current->next; + WebPSafeFree(current->start); + current->start = NULL; + current->next = NULL; + current = next; + // Free the following nodes. + while (current != NULL) { + next = current->next; + WebPSafeFree(current->start); + WebPSafeFree(current); + current = next; + } +} diff --git a/src/3rdparty/libwebp/src/utils/huffman_utils.h b/src/3rdparty/libwebp/src/utils/huffman_utils.h index 13b7ad1..98415c5 100644 --- a/src/3rdparty/libwebp/src/utils/huffman_utils.h +++ b/src/3rdparty/libwebp/src/utils/huffman_utils.h @@ -43,6 +43,29 @@ typedef struct { // or non-literal symbol otherwise } HuffmanCode32; +// Contiguous memory segment of HuffmanCodes. +typedef struct HuffmanTablesSegment { + HuffmanCode* start; + // Pointer to where we are writing into the segment. Starts at 'start' and + // cannot go beyond 'start' + 'size'. + HuffmanCode* curr_table; + // Pointer to the next segment in the chain. + struct HuffmanTablesSegment* next; + int size; +} HuffmanTablesSegment; + +// Chained memory segments of HuffmanCodes. +typedef struct HuffmanTables { + HuffmanTablesSegment root; + // Currently processed segment. At first, this is 'root'. + HuffmanTablesSegment* curr_segment; +} HuffmanTables; + +// Allocates a HuffmanTables with 'size' contiguous HuffmanCodes. Returns 0 on +// memory allocation error, 1 otherwise. +int VP8LHuffmanTablesAllocate(int size, HuffmanTables* huffman_tables); +void VP8LHuffmanTablesDeallocate(HuffmanTables* const huffman_tables); + #define HUFFMAN_PACKED_BITS 6 #define HUFFMAN_PACKED_TABLE_SIZE (1u << HUFFMAN_PACKED_BITS) @@ -78,9 +101,7 @@ void VP8LHtreeGroupsFree(HTreeGroup* const htree_groups); // the huffman table. // Returns built table size or 0 in case of error (invalid tree or // memory error). -// If root_table is NULL, it returns 0 if a lookup cannot be built, something -// > 0 otherwise (but not the table size). -int VP8LBuildHuffmanTable(HuffmanCode* const root_table, int root_bits, +int VP8LBuildHuffmanTable(HuffmanTables* const root_table, int root_bits, const int code_lengths[], int code_lengths_size); #ifdef __cplusplus -- 2.27.0.windows.1