diff --git a/CMakeLists.txt b/CMakeLists.txt index 78dd1109..bed820ed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,7 +62,7 @@ if (NOT MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBASISU_SUPPORT_SSE=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_SSE=0") endif() - + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -static-libgcc -static-libstdc++ -static") else() if (SSE) @@ -72,7 +72,7 @@ if (NOT MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBASISU_SUPPORT_SSE=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBASISU_SUPPORT_SSE=0") endif() - + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GCC_LINK_FLAGS} -Wl,-rpath .") endif() @@ -93,7 +93,7 @@ else() endif() endif() -set(BASISU_SRC_LIST ${COMMON_SRC_LIST} +set(BASISU_SRC_LIST ${COMMON_SRC_LIST} basisu_tool.cpp encoder/basisu_backend.cpp encoder/basisu_basis_file.cpp @@ -143,7 +143,7 @@ endif() if (NOT EMSCRIPTEN) install(TARGETS basisu DESTINATION bin) - + if (UNIX) if (CMAKE_BUILD_TYPE STREQUAL Release) if (APPLE) diff --git a/LICENSES/Apache-2.0.txt b/LICENSES/Apache-2.0.txt index 4ed90b95..527a83a2 100644 --- a/LICENSES/Apache-2.0.txt +++ b/LICENSES/Apache-2.0.txt @@ -7,17 +7,17 @@ AND DISTRIBUTION 1. Definitions. - + "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. - + "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. - + "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. @@ -26,31 +26,31 @@ or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. - + "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. - + "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. - + "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. - + "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). - + "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, @@ -59,7 +59,7 @@ original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. - + "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative @@ -74,7 +74,7 @@ for the purpose of discussing and improving the Work, but excluding communicatio that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." - + "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated diff --git a/README.md b/README.md index 17a76cc5..f64467ab 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # basis_universal Basis Universal Supercompressed GPU Texture Codec -Basis Universal is a ["supercompressed"](http://gamma.cs.unc.edu/GST/gst.pdf) GPU texture data interchange system that supports two highly compressed intermediate file formats (.basis or the [.KTX2 open standard from the Khronos Group](https://github.khronos.org/KTX-Specification/)) that can be quickly transcoded to a [very wide variety](https://github.com/BinomialLLC/basis_universal/wiki/OpenGL-texture-format-enums-table) of GPU compressed and uncompressed pixel formats: ASTC 4x4 L/LA/RGB/RGBA, PVRTC1 4bpp RGB/RGBA, PVRTC2 RGB/RGBA, BC7 mode 6 RGB, BC7 mode 5 RGB/RGBA, BC1-5 RGB/RGBA/X/XY, ETC1 RGB, ETC2 RGBA, ATC RGB/RGBA, ETC2 EAC R11 and RG11, FXT1 RGB, and uncompressed raster image formats 8888/565/4444. +Basis Universal is a ["supercompressed"](http://gamma.cs.unc.edu/GST/gst.pdf) GPU texture data interchange system that supports two highly compressed intermediate file formats (.basis or the [.KTX2 open standard from the Khronos Group](https://github.khronos.org/KTX-Specification/)) that can be quickly transcoded to a [very wide variety](https://github.com/BinomialLLC/basis_universal/wiki/OpenGL-texture-format-enums-table) of GPU compressed and uncompressed pixel formats: ASTC 4x4 L/LA/RGB/RGBA, PVRTC1 4bpp RGB/RGBA, PVRTC2 RGB/RGBA, BC7 mode 6 RGB, BC7 mode 5 RGB/RGBA, BC1-5 RGB/RGBA/X/XY, ETC1 RGB, ETC2 RGBA, ATC RGB/RGBA, ETC2 EAC R11 and RG11, FXT1 RGB, and uncompressed raster image formats 8888/565/4444. The system now supports two modes: a high quality mode which is internally based off the [UASTC compressed texture format](https://richg42.blogspot.com/2020/01/uastc-block-format-encoding.html), and the original lower quality mode which is based off a subset of ETC1 called "ETC1S". UASTC is for extremely high quality (similar to BC7 quality) textures, and ETC1S is for very small files. The ETC1S system includes built-in data compression, while the UASTC system includes an optional Rate Distortion Optimization (RDO) post-process stage that conditions the encoded UASTC texture data in the .basis file so it can be more effectively LZ compressed by the end user. More technical details about UASTC integration are [here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-implementation-details). @@ -11,7 +11,7 @@ The system's bitrate depends on the quality setting and image content, but commo The .basis and .KTX2 transcoders have been fuzz tested using [zzuf](https://www.linux.com/news/fuzz-testing-zzuf). -So far, we've compiled the code using MSVC 2019, under Ubuntu 18.04 and 20 x64 using cmake with either clang 3.8 or gcc 5.4, and emscripten 1.35 to asm.js. (Be sure to use this version or later of emcc, as earlier versions fail with internal errors/exceptions during compilation.) +So far, we've compiled the code using MSVC 2019, under Ubuntu 18.04 and 20 x64 using cmake with either clang 3.8 or gcc 5.4, and emscripten 1.35 to asm.js. (Be sure to use this version or later of emcc, as earlier versions fail with internal errors/exceptions during compilation.) Basis Universal supports "skip blocks" in ETC1S compressed texture arrays, which makes it useful for basic [compressed texture video](http://gamma.cs.unc.edu/MPTC/) applications. Note that Basis Universal is still at heart a GPU texture compression system, not a dedicated video codec, so bitrates will be larger than even MPEG1. 1/10/21 release notes: @@ -22,11 +22,11 @@ For v1.13, we've added numerous ETC1S encoder optimizations designed to greatly ### Quick Introduction -Probably the most important concept to understand about Basis Universal before using it: The system supports **two** very different universal texture modes: The original "ETC1S" mode is low/medium quality, but the resulting file sizes are very small because the system has built-in compression for ETC1S texture format files. This is the command line encoding tool's default mode. ETC1S textures work best on images, photos, map data, or albedo/specular/etc. textures, but don't work as well on normal maps. +Probably the most important concept to understand about Basis Universal before using it: The system supports **two** very different universal texture modes: The original "ETC1S" mode is low/medium quality, but the resulting file sizes are very small because the system has built-in compression for ETC1S texture format files. This is the command line encoding tool's default mode. ETC1S textures work best on images, photos, map data, or albedo/specular/etc. textures, but don't work as well on normal maps. There's the second "UASTC" mode, which is significantly higher quality (comparable to BC7 and highest quality LDR ASTC 4x4), and is usable on all texture types including complex normal maps. UASTC mode purposely does not have built-in file compression like ETC1S mode does, so the resulting files are quite large (8-bits/texel - same as BC7) compared to ETC1S mode. The UASTC encoder has an optional Rate Distortion Optimization (RDO) encoding mode (implemented as a post-process over the encoded UASTC texture data), which conditions the output texture data in a way that results in better lossless compression when UASTC .basis files are compressed with Deflate/Zstd, etc. In UASTC mode, you must losslessly compress .basis files yourself. .KTX2 files have built-in lossless compression support using [Zstandard](https://facebook.github.io/zstd/), which is used by default on UASTC textures. -Basis Universal is not an image compression codec, but a GPU texture compression codec. It can be used just like an image compression codec, but that's not the only use case. Here's a [good intro](http://renderingpipeline.com/2012/07/texture-compression/) to GPU texture compression. If you're looking to primarily use the system as an image compression codec on sRGB photographic content, use the default ETC1S mode, because it has built-in compression. +Basis Universal is not an image compression codec, but a GPU texture compression codec. It can be used just like an image compression codec, but that's not the only use case. Here's a [good intro](http://renderingpipeline.com/2012/07/texture-compression/) to GPU texture compression. If you're looking to primarily use the system as an image compression codec on sRGB photographic content, use the default ETC1S mode, because it has built-in compression. **The "-q X" option controls the output quality in ETC1S mode.** The default is quality level 128. "-q 255" will increase quality quite a bit. If you want even higher quality, try "-max_selectors 16128 -max_endpoints 16128" instead of -q. -q internally tries to set the codebook sizes (or the # of quantization intervals for endpoints/selectors) for you. You need to experiment with the quality level on your content. @@ -48,7 +48,7 @@ The encoder optionally uses Zstandard's single source file compressor (in zstd/z ### Command Line Compression Tool -The command line tool used to create, validate, and transcode/unpack .basis/.KTX2 files is named "basisu". Run basisu without any parameters for help. +The command line tool used to create, validate, and transcode/unpack .basis/.KTX2 files is named "basisu". Run basisu without any parameters for help. To build basisu (without SSE 4.1 support - the default): @@ -137,7 +137,7 @@ The mipmapped or cubemap .KTX files will be in a wide variety of compressed GPU After compression, the compressor transcodes all slices in the output .basis file to validate that the file decompresses correctly. It also validates all header, compressed data, and slice data CRC16's. -For best quality, you must **supply basisu with original uncompressed source images**. Any other type of lossy compression applied before basisu (including ETC1/BC1-5, BC7, JPEG, etc.) will cause multi-generational artifacts to appear in the final output textures. +For best quality, you must **supply basisu with original uncompressed source images**. Any other type of lossy compression applied before basisu (including ETC1/BC1-5, BC7, JPEG, etc.) will cause multi-generational artifacts to appear in the final output textures. For the maximum possible achievable ETC1S mode quality with the current format and encoder (completely ignoring encoding speed!), use: @@ -154,7 +154,7 @@ To compress small video sequences, say using tools like ffmpeg and VirtualDub: For video, the more cores your machine has, the better. Basis is intended for smaller videos of a few dozen seconds or so. If you are very patient and have a Threadripper or Xeon workstation, you should be able to encode up to a few thousand 720P frames. The "webgl_videotest" directory contains a very simple video viewer. For texture video, use -comp_level 2 or 3. The default is 1, which isn't quite good enough for texture video. Higher comp_level's result in reduced ETC1S artifacts. -The .basis file will contain multiple images (all using the same global codebooks), which you can retrieve using the transcoder's image API. The system now supports [conditional replenisment](https://en.wikipedia.org/wiki/MPEG-1) (CR, or "skip blocks"). CR can reduce the bitrate of some videos (highly dependent on how dynamic the content is) by over 50%. For videos using CR, the images must be requested from the transcoder in sequence from first to last, and random access is only allowed to I-Frames. +The .basis file will contain multiple images (all using the same global codebooks), which you can retrieve using the transcoder's image API. The system now supports [conditional replenisment](https://en.wikipedia.org/wiki/MPEG-1) (CR, or "skip blocks"). CR can reduce the bitrate of some videos (highly dependent on how dynamic the content is) by over 50%. For videos using CR, the images must be requested from the transcoder in sequence from first to last, and random access is only allowed to I-Frames. If you are doing rate distortion comparisons vs. other similar systems, be sure to experiment with increasing the endpoint RDO threshold (-endpoint_rdo_thresh X). This setting controls how aggressively the compressor's backend will combine together nearby blocks so they use the same block endpoint codebook vectors, for better coding efficiency. X defaults to a modest 1.5, which means the backend is allowed to increase the overall color distance by 1.5x while searching for merge candidates. The higher this setting, the better the compression, with the tradeoff of more block artifacts. Settings up to ~2.25 can work well, and make the codec more competitive. "-endpoint_rdo_thresh 1.75" is a good setting on many textures. @@ -200,7 +200,7 @@ Compress a 20 sRGB source image video sequence (x01.png, x02.png, x03.png, etc.) `basisu -comp_level 2 -q 255 -file x.png -mipmap -y_flip`\ Compress a mipmapped x.basis file from an sRGB image named x.png, Y flip each source image, set encoder to level 2 for slightly higher quality (but slower encoding). -### WebGL test +### WebGL test The "WebGL" directory contains three simple WebGL demos that use the transcoder and compressor compiled to wasm with [emscripten](https://emscripten.org/). See more details [here](webgl/README.md). @@ -212,7 +212,7 @@ The "WebGL" directory contains three simple WebGL demos that use the transcoder Both the transcoder and now the compressor (as of 12/17/2020) may be compiled using emscripten to WebAssembly and used on the web. Currently, multithreading is not supported by the compressor when compiled with emscripten. A simple Web compression demo is in webgl/encode_test. All compressor features, including texture video, are supported and fully exposed. -To enable compression support compile the JavaScript wrappers in `webgl/transcoding/basis_wrappers.cpp` with `BASISU_SUPPORT_ENCODING` set to 1. See the webgl/encoding directory. +To enable compression support compile the JavaScript wrappers in `webgl/transcoding/basis_wrappers.cpp` with `BASISU_SUPPORT_ENCODING` set to 1. See the webgl/encoding directory. ### Repository Licensing with REUSE @@ -229,7 +229,7 @@ a clean, checked-out repository periodically, or run it during CI tests before any build artifacts have been created. ### Special Thanks -A huge thanks to Google for partnering with us and enabling this system to be open sourced. +A huge thanks to Google for partnering with us and enabling this system to be open sourced. Thank you to [Esri - Environmental Systems Research Institute](https://www.esri.com/) for sponsoring the encoder optimization work in the v1.13 release, and the KTX2 work in the v1.15 release. @@ -245,7 +245,7 @@ Thanks to Mike Dussault (SpaceX) and Elon Musk for supporting Binomial in the ea Thanks to Graeme Devine at Magic Leap. -Thanks to Matt Pritchard, formerly of Valve Software and Microsoft, for helping me with the computer hardware I used while building this system and its predecessor. +Thanks to Matt Pritchard, formerly of Valve Software and Microsoft, for helping me with the computer hardware I used while building this system and its predecessor. Thanks to John Brooks at Blue Shift, Inc. for inspiring this work by showing me his Dreamcast texture compression system around 2002, and for releasing etc2comp. I first saw the subblock flip estimation approach (used in basisu_etc.cpp) in etc2comp. @@ -258,7 +258,7 @@ Thanks to Mark Callow at Edgewise Consulting for his work on glTF and the KTX2 f I first saw using precomputed tables for quickly computing optimal encodings of solid color blocks in ryg_dxt. The method that limits the canonical Huffman codelengths to a maximum codesize was used in Yoshizaki's lharc. The canonical Huffman codelength compression system is similar to Katz's Deflate method. ### Possible improvements -The codebook generation process is basically a high quality, but slow and brute force reference. It's possible to massively speed up codebook gen in several ways. One way is to not throw away the tree structures constructed during the creation of the initial codebooks. +The codebook generation process is basically a high quality, but slow and brute force reference. It's possible to massively speed up codebook gen in several ways. One way is to not throw away the tree structures constructed during the creation of the initial codebooks. The way the -q (quality) option is converted to codebook sizes is very simple (fixed formulas), and could be improved. It has a tendency to plateau on some files. diff --git a/appveyor.yml b/appveyor.yml index c12d6645..1ab386a3 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,6 @@ --- -image: +image: - macos - Ubuntu2004 - Visual Studio 2019 diff --git a/basisu_tool.cpp b/basisu_tool.cpp index 2ab01151..e3bfe1b4 100644 --- a/basisu_tool.cpp +++ b/basisu_tool.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #if _MSC_VER -// For sprintf(), strcpy() +// For sprintf(), strcpy() #define _CRT_SECURE_NO_WARNINGS (1) #endif @@ -58,7 +58,7 @@ enum tool_mode static void print_usage() { printf("\nUsage: basisu filename [filename ...] \n"); - + puts("\n" "The default mode is compression of one or more PNG/BMP/TGA/JPG files to a .basis file. Alternate modes:\n" " -unpack: Use transcoder to unpack .basis file to one or more .ktx/.png files\n" @@ -225,7 +225,7 @@ static bool load_listing_file(const std::string &f, basisu::vector { if (read_filename[0] == ' ') read_filename.erase(0, 1); - else + else break; } @@ -234,7 +234,7 @@ static bool load_listing_file(const std::string &f, basisu::vector const char c = read_filename.back(); if ((c == ' ') || (c == '\n') || (c == '\r')) read_filename.erase(read_filename.size() - 1, 1); - else + else break; } @@ -383,13 +383,13 @@ class command_line_params int uastc_level = atoi(arg_v[arg_index + 1]); uastc_level = clamp(uastc_level, 0, TOTAL_PACK_UASTC_LEVELS - 1); - + static_assert(TOTAL_PACK_UASTC_LEVELS == 5, "TOTAL_PACK_UASTC_LEVELS==5"); static const uint32_t s_level_flags[TOTAL_PACK_UASTC_LEVELS] = { cPackUASTCLevelFastest, cPackUASTCLevelFaster, cPackUASTCLevelDefault, cPackUASTCLevelSlower, cPackUASTCLevelVerySlow }; - + m_comp_params.m_pack_uastc_flags &= ~cPackUASTCLevelMask; m_comp_params.m_pack_uastc_flags |= s_level_flags[uastc_level]; - + arg_count++; } else if (strcasecmp(pArg, "-resample_factor") == 0) @@ -567,7 +567,7 @@ class command_line_params { REMAINING_ARGS_CHECK(1); m_comp_params.m_mip_filter = arg_v[arg_index + 1]; - // TODO: Check filter + // TODO: Check filter arg_count++; } else if (strcasecmp(pArg, "-mip_renorm") == 0) @@ -700,7 +700,7 @@ class command_line_params arg_index += arg_count; } - + if (m_comp_params.m_quality_level != -1) { m_comp_params.m_max_endpoint_clusters = 0; @@ -722,7 +722,7 @@ class command_line_params else m_comp_params.m_mip_srgb = false; } - + return true; } @@ -753,12 +753,12 @@ class command_line_params new_input_alpha_filenames.push_back(m_input_alpha_filenames[i]); } new_input_alpha_filenames.swap(m_input_alpha_filenames); - + return true; } basis_compressor_params m_comp_params; - + tool_mode m_mode; bool m_ktx2_mode; @@ -767,7 +767,7 @@ class command_line_params uint32_t m_ktx2_animdata_duration; uint32_t m_ktx2_animdata_timescale; uint32_t m_ktx2_animdata_loopcount; - + basisu::vector m_input_filenames; basisu::vector m_input_alpha_filenames; @@ -793,13 +793,13 @@ static bool expand_multifile(command_line_params &opts) { if (!opts.m_multifile_printf.size()) return true; - + if (!opts.m_multifile_num) { error_printf("-multifile_printf specified, but not -multifile_num\n"); return false; } - + std::string fmt(opts.m_multifile_printf); // Workaround for MSVC debugger issues. Questionable to leave in here. size_t x = fmt.find_first_of('!'); @@ -811,15 +811,15 @@ static bool expand_multifile(command_line_params &opts) error_printf("Must include C-style printf() format character '%%' in -multifile_printf string\n"); return false; } - + for (uint32_t i = opts.m_multifile_first; i < opts.m_multifile_first + opts.m_multifile_num; i++) { char buf[1024]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(buf, sizeof(buf), fmt.c_str(), i); #else snprintf(buf, sizeof(buf), fmt.c_str(), i); -#endif +#endif if (buf[0]) opts.m_input_filenames.push_back(buf); @@ -830,8 +830,8 @@ static bool expand_multifile(command_line_params &opts) struct basis_data { - basis_data(basist::etc1_global_selector_codebook& sel_codebook) : - m_transcoder(&sel_codebook) + basis_data(basist::etc1_global_selector_codebook& sel_codebook) : + m_transcoder(&sel_codebook) { } uint8_vec m_file_data; @@ -894,7 +894,7 @@ static bool compress_mode(command_line_params &opts) job_pool jpool(num_threads); opts.m_comp_params.m_pJob_pool = &jpool; - + if (!expand_multifile(opts)) { error_printf("-multifile expansion failed!\n"); @@ -906,7 +906,7 @@ static bool compress_mode(command_line_params &opts) error_printf("No input files to process!\n"); return false; } - + basis_data* pGlobal_codebook_data = nullptr; if (opts.m_etc1s_use_global_codebooks_file.size()) { @@ -950,7 +950,7 @@ static bool compress_mode(command_line_params &opts) pGlobal_codebook_data2 = nullptr; #endif } - + basis_compressor_params ¶ms = opts.m_comp_params; if (opts.m_ktx2_mode) @@ -960,7 +960,7 @@ static bool compress_mode(command_line_params &opts) params.m_ktx2_uastc_supercompression = basist::KTX2_SS_ZSTANDARD; else params.m_ktx2_uastc_supercompression = basist::KTX2_SS_NONE; - + params.m_ktx2_srgb_transfer_func = opts.m_comp_params.m_perceptual; if (params.m_tex_type == basist::basis_texture_type::cBASISTexTypeVideoFrames) @@ -972,7 +972,7 @@ static bool compress_mode(command_line_params &opts) const char* pAD = "KTXanimData"; kv.m_key.resize(strlen(pAD) + 1); strcpy((char*)kv.m_key.data(), pAD); - + basist::ktx2_animdata ad; ad.m_duration = opts.m_ktx2_animdata_duration; ad.m_timescale = opts.m_ktx2_animdata_timescale; @@ -983,7 +983,7 @@ static bool compress_mode(command_line_params &opts) params.m_ktx2_key_values.push_back(kv); } - + // TODO- expose this to command line. params.m_ktx2_zstd_supercompression_level = opts.m_ktx2_zstandard_level; } @@ -991,7 +991,7 @@ static bool compress_mode(command_line_params &opts) params.m_read_source_images = true; params.m_write_output_basis_files = true; params.m_pSel_codebook = &sel_codebook; - params.m_pGlobal_codebooks = pGlobal_codebook_data ? &pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder() : nullptr; + params.m_pGlobal_codebooks = pGlobal_codebook_data ? &pGlobal_codebook_data->m_transcoder.get_lowlevel_etc1s_decoder() : nullptr; FILE *pCSV_file = nullptr; if (opts.m_csv_file.size()) { @@ -1007,7 +1007,7 @@ static bool compress_mode(command_line_params &opts) } printf("Processing %u total file(s)\n", (uint32_t)opts.m_input_filenames.size()); - + interval_timer all_tm; all_tm.start(); @@ -1019,17 +1019,17 @@ static bool compress_mode(command_line_params &opts) params.m_source_filenames.resize(1); params.m_source_filenames[0] = opts.m_input_filenames[file_index]; - if (file_index < opts.m_input_alpha_filenames.size()) + if (file_index < opts.m_input_alpha_filenames.size()) { params.m_source_alpha_filenames.resize(1); params.m_source_alpha_filenames[0] = opts.m_input_alpha_filenames[file_index]; - + printf("Processing source file \"%s\", alpha file \"%s\"\n", params.m_source_filenames[0].c_str(), params.m_source_alpha_filenames[0].c_str()); } else { params.m_source_alpha_filenames.resize(0); - + printf("Processing source file \"%s\"\n", params.m_source_filenames[0].c_str()); } } @@ -1038,16 +1038,16 @@ static bool compress_mode(command_line_params &opts) params.m_source_filenames = opts.m_input_filenames; params.m_source_alpha_filenames = opts.m_input_alpha_filenames; } - + if ((opts.m_output_filename.size()) && (!opts.m_individual)) params.m_out_filename = opts.m_output_filename; - else + else { std::string filename; - + string_get_filename(opts.m_input_filenames[file_index].c_str(), filename); string_remove_extension(filename); - + if (opts.m_ktx2_mode) filename += ".ktx2"; else @@ -1055,10 +1055,10 @@ static bool compress_mode(command_line_params &opts) if (opts.m_output_path.size()) string_combine_path(filename, opts.m_output_path.c_str(), filename.c_str()); - + params.m_out_filename = filename; } - + basis_compressor c; if (!c.init(opts.m_comp_params)) @@ -1084,8 +1084,8 @@ static bool compress_mode(command_line_params &opts) if (ec == basis_compressor::cECSuccess) { - printf("Compression succeeded to file \"%s\" size %u bytes in %3.3f secs\n", params.m_out_filename.c_str(), - opts.m_ktx2_mode ? c.get_output_ktx2_file().size() : c.get_output_basis_file().size(), + printf("Compression succeeded to file \"%s\" size %u bytes in %3.3f secs\n", params.m_out_filename.c_str(), + opts.m_ktx2_mode ? c.get_output_ktx2_file().size() : c.get_output_basis_file().size(), tm.get_elapsed_secs()); } else @@ -1097,7 +1097,7 @@ static bool compress_mode(command_line_params &opts) case basis_compressor::cECFailedReadingSourceImages: { error_printf("Compressor failed reading a source image!\n"); - + if (opts.m_individual) exit_flag = false; @@ -1134,7 +1134,7 @@ static bool compress_mode(command_line_params &opts) error_printf("basis_compress::process() failed!\n"); break; } - + if (exit_flag) { if (pCSV_file) @@ -1187,7 +1187,7 @@ static bool compress_mode(command_line_params &opts) rgb_avg_psnr_avg /= c.get_stats().size(); a_avg_psnr_avg /= c.get_stats().size(); luma_709_psnr_avg /= c.get_stats().size(); - + fprintf(pCSV_file, "\"%s\", %u, %u, %u, %u, %u, %f, %f, %f, %f, %f, %u, %u, %f, %f, %f, %f, %f, %f, %f\n", params.m_out_filename.c_str(), c.get_basis_file_size(), @@ -1206,7 +1206,7 @@ static bool compress_mode(command_line_params &opts) } #endif } - + if (opts.m_individual) printf("\n"); @@ -1222,9 +1222,9 @@ static bool compress_mode(command_line_params &opts) fclose(pCSV_file); pCSV_file = nullptr; } - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; - + return true; } @@ -1258,16 +1258,16 @@ static bool unpack_and_validate_ktx2_file( error_printf("ktx2_transcoder::start_transcoding() failed! File either uses an unsupported feature, is invalid, was corrupted, or this is a bug.\n"); return false; } - + printf("Resolution: %ux%u\n", dec.get_width(), dec.get_height()); printf("Mipmap Levels: %u\n", dec.get_levels()); printf("Texture Array Size (layers): %u\n", dec.get_layers()); printf("Total Faces: %u (%s)\n", dec.get_faces(), (dec.get_faces() == 6) ? "CUBEMAP" : "2D"); printf("Is Texture Video: %u\n", dec.is_video()); - + const bool is_etc1s = dec.get_format() == basist::basis_tex_format::cETC1S; printf("Supercompression Format: %s\n", is_etc1s ? "ETC1S" : "UASTC"); - + printf("Supercompression Scheme: "); switch (dec.get_header().m_supercompression_scheme) { @@ -1297,7 +1297,7 @@ static bool unpack_and_validate_ktx2_file( } else printf("DFD chan0: %s\n", basist::ktx2_get_uastc_df_channel_id_str(dec.get_dfd_channel_id0())); - + printf("DFD hex values:\n"); for (uint32_t i = 0; i < dec.get_dfd().size(); i++) { @@ -1315,13 +1315,13 @@ static bool unpack_and_validate_ktx2_file( if (dec.get_key_values()[i].m_value.size() > 256) continue; - + bool is_ascii = true; for (uint32_t j = 0; j < dec.get_key_values()[i].m_value.size(); j++) { uint8_t c = dec.get_key_values()[i].m_value[j]; - if (!( - ((c >= ' ') && (c < 0x80)) || + if (!( + ((c >= ' ') && (c < 0x80)) || ((j == dec.get_key_values()[i].m_value.size() - 1) && (!c)) )) { @@ -1637,7 +1637,7 @@ static bool unpack_and_validate_basis_file( uint32_t file_index, const std::string &base_filename, uint8_vec &basis_file_data, - command_line_params& opts, + command_line_params& opts, FILE *pCSV_file, basis_data* pGlobal_codebook_data, basist::etc1_global_selector_codebook &sel_codebook, @@ -1661,7 +1661,7 @@ static bool unpack_and_validate_basis_file( if (!dec.validate_file_checksums(&basis_file_data[0], (uint32_t)basis_file_data.size(), true)) { error_printf("File version is unsupported, or file failed one or more CRC checks!\n"); - + return false; } } @@ -2335,7 +2335,7 @@ static bool unpack_and_validate_mode(command_line_params &opts) delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return false; } - + bool is_ktx2 = false; if (file_data.size() >= sizeof(basist::g_ktx2_file_identifier)) { @@ -2374,10 +2374,10 @@ static bool unpack_and_validate_mode(command_line_params &opts) if (!status) { - if (pCSV_file) + if (pCSV_file) fclose(pCSV_file); - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return false; @@ -2398,7 +2398,7 @@ static bool unpack_and_validate_mode(command_line_params &opts) fclose(pCSV_file); pCSV_file = nullptr; } - delete pGlobal_codebook_data; + delete pGlobal_codebook_data; pGlobal_codebook_data = nullptr; return true; @@ -2466,7 +2466,7 @@ static bool compare_mode(command_line_params &opts) im.calc(a, b, 0, 0, true, true); im.print("Y 601 " ); - + if (opts.m_compare_ssim) { vec4F s_rgb(compute_ssim(a, b, false, false)); @@ -2509,10 +2509,10 @@ static bool compare_mode(command_line_params &opts) save_png("delta_img_rgb.png", delta_img, cImageSaveIgnoreAlpha); printf("Wrote delta_img_rgb.png\n"); - + save_png("delta_img_a.png", delta_img, cImageSaveGrayscale, 3); printf("Wrote delta_img_a.png\n"); - + return true; } @@ -3079,7 +3079,7 @@ static bool bench_mode(command_line_params& opts) // HACK HACK const uint32_t max_rdo_jobs = 4; - + char rdo_fname[256]; FILE* pFile = nullptr; for (uint32_t try_index = 0; try_index < 100; try_index++) @@ -3091,7 +3091,7 @@ static bool bench_mode(command_line_params& opts) fclose(pFile); continue; } - + pFile = fopen(rdo_fname, "w"); if (!pFile) printf("Cannot open CSV file %s\n", rdo_fname); @@ -3110,7 +3110,7 @@ static bool bench_mode(command_line_params& opts) p.m_lambda = q; p.m_max_allowed_rms_increase_ratio = 10.0f; p.m_skip_block_rms_thresh = 8.0f; - + bool rdo_status = uastc_rdo((uint32_t)ublocks.size(), &ublocks[0], &orig_block_pixels[0], p, flags, &jpool, max_rdo_jobs); if (!rdo_status) { @@ -3174,7 +3174,7 @@ static bool bench_mode(command_line_params& opts) } if (pFile) fclose(pFile); - + { size_t comp_size = 0; void* pComp_data = tdefl_compress_mem_to_heap(&ublocks[0], ublocks.size() * 16, &comp_size, TDEFL_MAX_PROBES_MASK);// TDEFL_DEFAULT_MAX_PROBES); @@ -3200,7 +3200,7 @@ static bool bench_mode(command_line_params& opts) total_rdo_raw_size += ublocks.size() * 16; total_comp_blocks += ublocks.size(); } - + printf("Total blocks: %u\n", total_blocks); printf("Total BC1 hint 0's: %u %3.1f%%\n", total_bc1_hint0s, total_bc1_hint0s * 100.0f / total_blocks); printf("Total BC1 hint 1's: %u %3.1f%%\n", total_bc1_hint1s, total_bc1_hint1s * 100.0f / total_blocks); @@ -3245,7 +3245,7 @@ static bool bench_mode(command_line_params& opts) c[i] = (uint8_t)v; } - + } #endif @@ -3366,7 +3366,7 @@ static bool bench_mode(command_line_params& opts) em.print("RDOUASTC RGBA "); total_rdo_uastc_rgba_psnr += basisu::minimum(99.0f, em.m_psnr); - // UASTC2 + // UASTC2 em.calc(img, uastc2_img, 0, 3); em.print("UASTC2 RGB "); total_uastc2_psnr += basisu::minimum(99.0f, em.m_psnr); @@ -3459,7 +3459,7 @@ static bool bench_mode(command_line_params& opts) total_obc1_psnr += basisu::minimum(99.0f, em.m_psnr); total_obc1_psnr_sq += basisu::minimum(99.0f, em.m_psnr) * basisu::minimum(99.0f, em.m_psnr); #endif - + em.calc(img, opt_bc1_2_img, 0, 3); em.print("OBC1 2 RGB "); total_obc1_2_psnr += basisu::minimum(99.0f, em.m_psnr); @@ -3593,7 +3593,7 @@ static bool bench_mode(command_line_params& opts) } // image_index printf("Total time: %f secs\n", otm.get_elapsed_secs()); - + printf("Total Non-RDO UASTC size: %llu, compressed size: %llu, %3.2f bits/texel\n", (unsigned long long)total_raw_size, (unsigned long long)total_comp_size, @@ -3706,7 +3706,7 @@ static int main_internal(int argc, const char **argv) //tm.start(); basisu_encoder_init(); - + //printf("Encoder and transcoder libraries initialized in %3.3f ms\n", tm.get_elapsed_ms()); #if defined(DEBUG) || defined(_DEBUG) diff --git a/contrib/previewers/lib/basisu_transcoder.cpp b/contrib/previewers/lib/basisu_transcoder.cpp index c5652e02..fa21c2bc 100644 --- a/contrib/previewers/lib/basisu_transcoder.cpp +++ b/contrib/previewers/lib/basisu_transcoder.cpp @@ -3,7 +3,7 @@ * \code * ./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -k basisu_transcoder.h -o basisu_transcoder.cpp basisu_transcoder-in.cpp * \endcode - * + * * \note The script above excludes the BC7 mode 6 tables, a choice reflected in * the build options. */ @@ -12,7 +12,7 @@ * Transcoder build options for known platforms (iOS has ETC, ASTC and PVRTC; * Emscripten adds DXT to iOS's options; Android adds PVRTC2 to Emscripten's * options; other platforms build all except BC7 mode 6 and FXT1). - * + * * See https://github.com/BinomialLLC/basis_universal#shrinking-the-transcoders-compiled-size */ #ifdef __APPLE__ @@ -175,7 +175,7 @@ namespace basisu void debug_printf(const char* pFmt, ...) { -#if BASISU_FORCE_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -608,14 +608,14 @@ namespace basist //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - + static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; struct decoder_etc_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -883,7 +883,7 @@ namespace basist { return (m_bytes[3] & 2) != 0; } - + inline uint32_t get_inten_table(uint32_t subblock_id) const { assert(subblock_id < 2); @@ -898,7 +898,7 @@ namespace basist const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } - + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const { color32 b; @@ -1016,7 +1016,7 @@ namespace basist g = c.g; b = c.b; } - + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) { result = unpack_color5(packed_color5, scaled, 255); @@ -1145,7 +1145,7 @@ namespace basist static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r) { assert(index < 4); - + uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2); const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -1321,7 +1321,7 @@ namespace basist { 1, 2, 2, 2 }, { 1, 2, 3, 3 }, }; - + static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; @@ -2703,9 +2703,9 @@ namespace basist return best_err; } #endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES - + static -#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES +#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES const #endif etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] = @@ -3193,7 +3193,7 @@ namespace basist #if BASISD_SUPPORT_UASTC void uastc_init(); #endif - + // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. // If this is too slow, these computed tables can easilky be moved to be compiled in. void basisu_transcoder_init() @@ -3201,11 +3201,11 @@ namespace basist static bool s_initialized; if (s_initialized) { - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; } - - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); #if BASISD_SUPPORT_UASTC uastc_init(); @@ -3214,7 +3214,7 @@ namespace basist #if BASISD_SUPPORT_ASTC transcoder_init_astc(); #endif - + #if BASISD_WRITE_NEW_ASTC_TABLES create_etc1_to_astc_conversion_table_0_47(); create_etc1_to_astc_conversion_table_0_255(); @@ -3470,7 +3470,7 @@ namespace basist std::swap(l, h); pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0]; } - + pDst_block->set_low_color(static_cast(l)); pDst_block->set_high_color(static_cast(h)); @@ -3630,7 +3630,7 @@ namespace basist fxt1_block* pBlock = static_cast(pDst); // CC_MIXED is basically DXT1 with different encoding tricks. - // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. + // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.) dxt1_block blk; convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false); @@ -3643,7 +3643,7 @@ namespace basist uint32_t g0 = color0.g & 1; uint32_t g1 = color1.g & 1; - + color0.g >>= 1; color1.g >>= 1; @@ -3651,7 +3651,7 @@ namespace basist blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]); blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]); blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]); - + if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1)) { std::swap(color0, color1); @@ -3665,7 +3665,7 @@ namespace basist if (fxt1_subblock == 0) { - pBlock->m_hi.m_mode = 1; + pBlock->m_hi.m_mode = 1; pBlock->m_hi.m_alpha = 0; pBlock->m_hi.m_glsb = g1 | (g1 << 1); pBlock->m_hi.m_r0 = color0.r; @@ -3986,7 +3986,7 @@ namespace basist { uint32_t r; decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); - + pDst_block->set_low_alpha(r); pDst_block->set_high_alpha(r); pDst_block->m_selectors[0] = 0; @@ -4069,7 +4069,7 @@ namespace basist static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; - + static const uint8_t g_pvrtc_5_floor[256] = { 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, @@ -4093,7 +4093,7 @@ namespace basist 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 }; - + static const uint8_t g_pvrtc_4_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -4117,7 +4117,7 @@ namespace basist 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 }; - + static const uint8_t g_pvrtc_3_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -4141,7 +4141,7 @@ namespace basist 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; - + static const uint8_t g_pvrtc_alpha_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -4248,10 +4248,10 @@ namespace basist } assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); - + return color32(r, g, b, a); } - + inline color32 get_endpoint_8888(uint32_t endpoint_index) const { assert(endpoint_index < 2); @@ -4298,7 +4298,7 @@ namespace basist a = g_pvrtc_alpha[a]; } - + return color32(r, g, b, a); } @@ -4307,7 +4307,7 @@ namespace basist color32 c(get_endpoint_8888(endpoint_index)); return c.r + c.g + c.b + c.a; } - + inline uint32_t get_opaque_endpoint_l0() const { uint32_t packed = m_endpoints & 0xFFFE; @@ -4422,7 +4422,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + // opaque endpoints: 554 or 555 // transparent endpoints: 3443 or 3444 inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint) @@ -4475,7 +4475,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c) { assert(endpoint_index < 2); @@ -4700,7 +4700,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -4708,7 +4708,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -4857,8 +4857,8 @@ namespace basist } static void fixup_pvrtc1_4_modulation_rgba( - const decoder_etc_block* pETC_Blocks, - const uint32_t* pPVRTC_endpoints, + const decoder_etc_block* pETC_Blocks, + const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks, const endpoint* pEndpoints, const selector* pSelectors) { @@ -4881,7 +4881,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -4889,7 +4889,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -4903,13 +4903,13 @@ namespace basist for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) { const decoder_etc_block& src_block = pETC_Blocks[block_index]; - + const uint16_t* pSrc_alpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + x + (y * num_blocks_x)); const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]]; const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]]; - + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); - + uint32_t swizzled = x_swizzle | y_swizzle; if (num_blocks_x != num_blocks_y) { @@ -5052,7 +5052,7 @@ namespace basist const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]); static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4]; - + const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10; static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] = { @@ -5074,7 +5074,7 @@ namespace basist uint8_t m_hi; uint16_t m_err; }; - + static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = { /**** start inlining basisu_transcoder_tables_bc7_m5_color.inc ****/ {0,7,18},{0,5,2},{0,4,1},{0,3,8},{0,4,35},{0,3,24},{0,3,12},{0,2,29},{0,2,36},{0,2,30},{0,7,18},{0,5,2},{0,4,1},{0,3,8},{2,0,35},{0,3,24},{0,3,12},{0,2,29},{4,0,35},{0,2,29},{0,3,0},{0,3,0},{0,3,0},{0,1,1},{0,1,2},{0,1,2},{0,1,2},{0,1,1},{1,0,3},{0,1,2},{0,3,0}, @@ -5560,7 +5560,7 @@ namespace basist {5,127,1413}, /**** ended inlining basisu_transcoder_tables_bc7_m5_color.inc ****/ }; - + static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] = { { 0, 3 }, @@ -5635,7 +5635,7 @@ namespace basist {208,5,2}, /**** ended inlining basisu_transcoder_tables_bc7_m5_alpha.inc ****/ }; - + static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs) { assert(num_bits < 32); @@ -5782,7 +5782,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -5861,7 +5861,7 @@ namespace basist int mapping_err = block_colors[s].g - colors[k]; mapping_err *= mapping_err; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) mapping_err *= 5; @@ -5872,7 +5872,7 @@ namespace basist best_k = k; } } // k - + total_err += best_mapping_err; output_selectors |= (best_k << (s * 2)); } // s @@ -5887,7 +5887,7 @@ namespace basist } // lo } // hi - + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors); n++; if ((n & 31) == 31) @@ -5926,7 +5926,7 @@ namespace basist {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115}, {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127} }; - + static void transcoder_init_bc7_mode5() { #if 0 @@ -5954,9 +5954,9 @@ namespace basist } } // hi - + } // lo - + printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo); if ((i & 15) == 15) printf("\n"); } @@ -5980,7 +5980,7 @@ namespace basist static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { bc7_mode_5* pDst_block = static_cast(pDst); - + // First ensure the block is cleared to all 0's static_cast(pDst)[0] = 0; static_cast(pDst)[1] = 0; @@ -6106,7 +6106,7 @@ namespace basist pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo; pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo; pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo; - + s_inv = 3; } else @@ -6127,7 +6127,7 @@ namespace basist for (uint32_t x = 0; x < 4; x++) { const uint32_t s = pSelector->get_selector(x, y); - + const uint32_t os = pSelectors_xlat[s] ^ s_inv; output_bits |= (os << output_bit_ofs); @@ -6157,7 +6157,7 @@ namespace basist pDst_block->m_lo.m_a0 = r; pDst_block->m_lo.m_a1_0 = r & 63; pDst_block->m_hi.m_a1_1 = r >> 6; - + return; } else if (pSelector->m_num_unique_selectors == 2) @@ -6207,7 +6207,7 @@ namespace basist } const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector]; - + const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table]; pDst_block->m_lo.m_a0 = pTable->m_lo; @@ -7159,7 +7159,7 @@ namespace basist // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data. static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES]; - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = { /**** start inlining basisu_transcoder_tables_astc_0_255.inc ****/ @@ -7706,7 +7706,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 8; @@ -7727,7 +7727,7 @@ namespace basist mapping_best_high[m] = best_hi; mapping_best_err[m] = best_err; highest_best_err = basisu::maximum(highest_best_err, best_err); - + } // m for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) @@ -7803,7 +7803,7 @@ namespace basist { int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. int err_scale = 1; if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) @@ -7832,9 +7832,9 @@ namespace basist uint64_t err = mapping_best_err[m]; err = basisu::minimum(err, 0xFFFF); - + fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err); - + n++; if ((n & 31) == 31) fprintf(pFile, "\n"); @@ -7917,14 +7917,14 @@ namespace basist struct astc_block_params { // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) - uint8_t m_endpoints[10]; + uint8_t m_endpoints[10]; uint8_t m_weights[32]; }; - - // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). + + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. - // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. + // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec: // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization // 32 total weights, stored as 16 CA CA, each ranging from 0-3. @@ -7946,7 +7946,7 @@ namespace basist astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4); // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order. - + for (uint32_t i = 0; i < 32; i++) { static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; @@ -7955,7 +7955,7 @@ namespace basist } } - // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights + // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient. static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock) { @@ -7993,7 +7993,7 @@ namespace basist // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00; pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0; - + pOutput[2] = 0; pOutput[3] = 0; @@ -8019,7 +8019,7 @@ namespace basist // Write constant block mode, color component selector, number of partitions, color endpoint mode // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00; - + pOutput[1] = 0; pOutput[2] = 0; pOutput[3] = 0; @@ -8047,7 +8047,7 @@ namespace basist { uint8_t m_lo, m_hi; } g_astc_single_color_encoding_1[256]; - + static void transcoder_init_astc() { for (uint32_t base_color = 0; base_color < 32; base_color++) @@ -8125,7 +8125,7 @@ namespace basist g_ise_to_unquant[bit | (trit << 4)] = unq; } } - + // Compute table used for optimal single color encoding. for (int i = 0; i < 256; i++) { @@ -8140,9 +8140,9 @@ namespace basist int l = lo_v | (lo_v << 8); int h = hi_v | (hi_v << 8); - + int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8; - + int e = abs(v - i); if (e < lowest_e) @@ -8164,7 +8164,7 @@ namespace basist for (int lo = 0; lo < 48; lo++) { const int lo_v = g_ise_to_unquant[lo]; - + int e = abs(lo_v - i); if (e < lowest_e) @@ -8179,7 +8179,7 @@ namespace basist // Converts opaque or color+alpha ETC1S block to ASTC 4x4. // This function tries to use the best ASTC mode given the block's actual contents. - static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, + static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook) { astc_block_params blk; @@ -8223,7 +8223,7 @@ namespace basist // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks uint32_t r, g, b; decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); - + uint32_t* pOutput = static_cast(pDst_block); uint8_t* pBytes = reinterpret_cast(pDst_block); @@ -8243,7 +8243,7 @@ namespace basist } else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2)) { - // Both color and alpha use <= 2 unique selectors each. + // Both color and alpha use <= 2 unique selectors each. // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights). color32 block_colors[4]; decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); @@ -8290,7 +8290,7 @@ namespace basist { uint32_t s = alpha_selectors.get_selector(x, y); s = (s == alpha_high_selector) ? 1 : 0; - + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(s); } // x } // y @@ -8323,12 +8323,12 @@ namespace basist return; } - + // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex. - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints. - + // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha. if ((base_color.r == base_color.g) && (base_color.r == base_color.b)) { @@ -8362,7 +8362,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -8370,7 +8370,7 @@ namespace basist blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; - + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; for (uint32_t y = 0; y < 4; y++) @@ -8414,10 +8414,10 @@ namespace basist { // Convert ETC1S alpha const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; - + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table]; blk.m_endpoints[0] = pTable_g[best_mapping].m_lo; @@ -8559,7 +8559,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -8603,7 +8603,7 @@ namespace basist const uint32_t r = block_colors[low_selector].r; const uint32_t g = block_colors[low_selector].g; const uint32_t b = block_colors[low_selector].b; - + blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo; blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi; @@ -8705,7 +8705,7 @@ namespace basist blk.m_endpoints[4] = pTable_b[best_mapping].m_lo; blk.m_endpoints[5] = pTable_b[best_mapping].m_hi; - + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; bool invert = false; @@ -10798,8 +10798,8 @@ namespace basist static void transcoder_init_atc() { prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1); - prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); - prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); + prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); + prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3); prepare_atc_single_color_table(g_atc_match5, 1, 32, 3); @@ -10853,7 +10853,7 @@ namespace basist pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo); pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi); - + pBlock->m_sels[0] = 0x55; pBlock->m_sels[1] = 0x55; pBlock->m_sels[2] = 0x55; @@ -10988,7 +10988,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -11062,7 +11062,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -11092,7 +11092,7 @@ namespace basist } // inten fclose(pFile); - + // PVRTC2 45 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w"); @@ -11137,7 +11137,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -11214,7 +11214,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -11291,7 +11291,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -11368,7 +11368,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -11496,12 +11496,12 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_trans_match44[256]; - + static struct { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33[256]; - + static struct { uint8_t m_l, m_h; @@ -11511,7 +11511,7 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33_3[256]; - + // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity. static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { @@ -11625,7 +11625,7 @@ namespace basist } typedef struct { float c[4]; } vec4F; - + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } @@ -11643,9 +11643,9 @@ namespace basist } static inline int sq(int x) { return x * x; } - - // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. - // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! + + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. + // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook) { @@ -11700,13 +11700,13 @@ namespace basist const uint32_t high_selector = pSelector->m_hi_selector; const int num_unique_color_selectors = pSelector->m_num_unique_selectors; - + // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes. // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values. const int br = (base_color.r << 3) | (base_color.r >> 2); const int bg = (base_color.g << 3) | (base_color.g >> 2); const int bb = (base_color.b << 3) | (base_color.b >> 2); - + color32 block_cols[4]; for (uint32_t i = 0; i < 4; i++) { @@ -11735,14 +11735,14 @@ namespace basist decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); // Mod 0 - uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; + uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l; uint32_t cr0 = (lr0 << 1) | (lr0 >> 3); uint32_t cg0 = (lg0 << 1) | (lg0 >> 3); uint32_t cb0 = (lb0 << 2) | (lb0 >> 1); uint32_t ca0 = (la0 << 1); - + cr0 = (cr0 << 3) | (cr0 >> 2); cg0 = (cg0 << 3) | (cg0 >> 2); cb0 = (cb0 << 3) | (cb0 >> 2); @@ -11771,14 +11771,14 @@ namespace basist uint32_t cg3 = (lg3 << 1) | (lg3 >> 3); uint32_t cb3 = (lb3 << 1) | (lb3 >> 3); uint32_t ca3 = (la3 << 1) | 1; - + cr3 = (cr3 << 3) | (cr3 >> 2); cg3 = (cg3 << 3) | (cg3 >> 2); cb3 = (cb3 << 3) | (cb3 >> 2); ca3 = (ca3 << 4) | ca3; uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2; - + // Mod 1 uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l; uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h; @@ -11853,7 +11853,7 @@ namespace basist // It's a solid color block. uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a; uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a; - + const float S = 1.0f / 255.0f; vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S); vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S); @@ -11865,7 +11865,7 @@ namespace basist vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S); vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S); } - // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). + // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). // To keep quality up we need to use full 4D PCA in this case. else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) || (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) || @@ -11916,7 +11916,7 @@ namespace basist } vec4F_normalize_in_place(&axis); - + if (vec4F_dot(&axis, &axis) < .5f) vec4F_set_scalar(&axis, .5f); @@ -12016,10 +12016,10 @@ namespace basist // 4433 4443 color32 trialMinColor, trialMaxColor; - + trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f)); trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f)); - + pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a); pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a); @@ -12092,7 +12092,7 @@ namespace basist } } } - + static void transcoder_init_pvrtc2() { for (uint32_t v = 0; v < 256; v++) @@ -12198,7 +12198,7 @@ namespace basist g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l; g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h; } - + for (uint32_t v = 0; v < 256; v++) { int best_l = 0, best_h = 0, lowest_err = INT_MAX; @@ -12321,7 +12321,7 @@ namespace basist sym_codec.stop(); m_selectors.resize(num_selectors); - + if (!sym_codec.init(pSelectors_data, selectors_data_size)) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); @@ -12633,7 +12633,7 @@ namespace basist if (!output_rows_in_pixels) output_rows_in_pixels = slice_desc.m_orig_height; } - + std::vector* pPrev_frame_indices = nullptr; if (is_video) { @@ -12903,7 +12903,7 @@ namespace basist case block_format::cETC1: { decoder_etc_block* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -12954,7 +12954,7 @@ namespace basist const uint32_t low_selector = pSelector->m_lo_selector; const uint32_t high_selector = pSelector->m_hi_selector; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 block_colors[2]; decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector); @@ -12970,7 +12970,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -12978,7 +12978,7 @@ namespace basist { #if BASISD_SUPPORT_PVRTC1 assert(pAlpha_blocks); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -12986,7 +12986,7 @@ namespace basist ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block; - // Get block's RGBA bounding box + // Get block's RGBA bounding box const color32& base_color = pEndpoints->m_color5; const uint32_t inten_table = pEndpoints->m_inten5; const uint32_t low_selector = pSelector->m_lo_selector; @@ -13021,7 +13021,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -13105,7 +13105,7 @@ namespace basist assert(transcode_alpha); void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &m_endpoints[0], &m_selectors[0]); #endif break; @@ -13121,10 +13121,10 @@ namespace basist { assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - + int colors[4]; decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -13138,7 +13138,7 @@ namespace basist pDst_pixels[3+4] = static_cast(colors[(s >> 2) & 3]); pDst_pixels[3+8] = static_cast(colors[(s >> 4) & 3]); pDst_pixels[3+12] = static_cast(colors[(s >> 6) & 3]); - + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); } } @@ -13167,7 +13167,7 @@ namespace basist color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); - + for (uint32_t y = 0; y < max_y; y++) { const uint32_t s = pSelector->m_selectors[y]; @@ -13288,7 +13288,7 @@ namespace basist cur = byteswap_uint16(cur); cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; - + if (IS_BIG_ENDIAN) cur = byteswap_uint16(cur); @@ -13406,7 +13406,7 @@ namespace basist return true; } - + basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder() { } @@ -13465,7 +13465,7 @@ namespace basist for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) { void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; - + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) { switch (fmt) @@ -13495,7 +13495,7 @@ namespace basist } case block_format::cBC4: { - if (channel0 < 0) + if (channel0 < 0) channel0 = 0; status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); break; @@ -13660,7 +13660,7 @@ namespace basist return false; #endif } - + basisu_transcoder::basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : m_lowlevel_etc1s_decoder(pGlobal_sel_codebook), m_ready_to_transcode(false) @@ -13689,7 +13689,7 @@ namespace basist return false; } } -#endif +#endif return true; } @@ -13776,7 +13776,7 @@ namespace basist return false; } } - + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) { @@ -13792,7 +13792,7 @@ namespace basist return false; } } - + if ((pHeader->m_slice_desc_file_ofs >= data_size) || ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) ) @@ -13908,12 +13908,12 @@ namespace basist image_info.m_image_index = image_index; image_info.m_total_levels = total_levels; - + image_info.m_alpha_flag = false; // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; @@ -14036,13 +14036,13 @@ namespace basist image_info.m_image_index = image_index; image_info.m_level_index = level_index; - + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; - + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; image_info.m_width = slice_desc.m_num_blocks_x * 4; image_info.m_height = slice_desc.m_num_blocks_y * 4; @@ -14082,7 +14082,7 @@ namespace basist file_info.m_tex_format = static_cast(static_cast(pHeader->m_tex_format)); file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); - + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; @@ -14147,7 +14147,7 @@ namespace basist return true; } - + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) { if (!validate_header_quick(pData, data_size)) @@ -14217,7 +14217,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); } } - + m_ready_to_transcode = true; return true; @@ -14228,7 +14228,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); m_ready_to_transcode = false; - + return true; } @@ -14267,7 +14267,7 @@ namespace basist const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - + if (basis_block_format_is_uncompressed(fmt)) { // Assume the output buffer is orig_width by orig_height @@ -14330,7 +14330,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); return false; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, @@ -14419,7 +14419,7 @@ namespace basist if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = num_blocks_x; - + if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) { #if BASISD_SUPPORT_ETC2_EAC_A8 @@ -14505,7 +14505,7 @@ namespace basist if (slice_index < 0) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); - // Unable to find the requested image/level + // Unable to find the requested image/level return false; } @@ -14514,7 +14514,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) @@ -14551,7 +14551,7 @@ namespace basist } } } - + bool status = false; const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; @@ -14559,11 +14559,11 @@ namespace basist if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) { // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. - // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. + // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { // UASTC4x4 @@ -14619,8 +14619,8 @@ namespace basist } case transcoder_texture_format::cTFBC5_RG: { - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, - nullptr, 0, + status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC5, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + nullptr, 0, 0, 3); if (!status) { @@ -14688,7 +14688,7 @@ namespace basist } case transcoder_texture_format::cTFETC2_EAC_R11: { - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_R11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, 0, ((basis_file_has_alpha_slices) && (transcode_alpha_data_to_opaque_formats)) ? 3 : 0); if (!status) @@ -14699,8 +14699,8 @@ namespace basist } case transcoder_texture_format::cTFETC2_EAC_RG11: { - status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, - nullptr, 0, + status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_RG11, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState, + nullptr, 0, 0, 3); if (!status) { @@ -14752,7 +14752,7 @@ namespace basist } } } - else + else { // ETC1S @@ -14874,7 +14874,7 @@ namespace basist // Now transcode the alpha slice. The cBC7_M5_ALPHA transcoder will now change the opaque mode 5 blocks to blocks with alpha. status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC7_M5_ALPHA, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); } - + if (!status) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to BC7 failed (0)\n"); @@ -14893,7 +14893,7 @@ namespace basist if (basis_file_has_alpha_slices) { - // First decode the alpha data + // First decode the alpha data status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); } else @@ -14929,7 +14929,7 @@ namespace basist #endif assert(bytes_per_block_or_pixel == 16); - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) { status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -15012,7 +15012,7 @@ namespace basist status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cASTC_4x4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); if (!status) - { + { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: transcode_slice() to ASTC failed (0)\n"); } @@ -15048,7 +15048,7 @@ namespace basist #endif assert(bytes_per_block_or_pixel == 16); - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) { status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -15107,7 +15107,7 @@ namespace basist } else { - // Now decode the color data and transcode to PVRTC2 RGBA. + // Now decode the color data and transcode to PVRTC2 RGBA. status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); } } @@ -15125,7 +15125,7 @@ namespace basist { // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); else @@ -15168,7 +15168,7 @@ namespace basist { // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); else @@ -15269,14 +15269,14 @@ namespace basist } } } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) - + if (!status) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); } else { - //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); } return status; @@ -15460,13 +15460,13 @@ namespace basist } return false; } - + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) { switch (fmt) { case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t); + return sizeof(uint32_t); case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: @@ -15476,7 +15476,7 @@ namespace basist } return 0; } - + uint32_t basis_get_block_width(transcoder_texture_format tex_type) { switch (tex_type) @@ -15494,7 +15494,7 @@ namespace basist BASISU_NOTE_UNUSED(tex_type); return 4; } - + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) { if (fmt == basis_tex_format::cUASTC4x4) @@ -15552,7 +15552,7 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return true; #endif -#if BASISD_SUPPORT_ASTC +#if BASISD_SUPPORT_ASTC case transcoder_texture_format::cTFASTC_4x4_RGBA: return true; #endif @@ -15583,9 +15583,9 @@ namespace basist return false; } - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // UASTC - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_UASTC const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = @@ -16310,7 +16310,7 @@ namespace basist if (group_size) { - // Range has trits or quints - pack each group of 5 or 3 values + // Range has trits or quints - pack each group of 5 or 3 values const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); for (int group_index = 0; group_index < total_groups; group_index++) @@ -16600,7 +16600,7 @@ namespace basist bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) { //memset(&unpacked, 0, sizeof(unpacked)); - + #if 0 uint8_t table[128]; memset(table, 0xFF, sizeof(table)); @@ -16655,7 +16655,7 @@ namespace basist return true; } - + if (read_hints) { if (g_uastc_mode_has_bc1_hint0[mode]) @@ -16688,7 +16688,7 @@ namespace basist } else bit_ofs += g_uastc_mode_total_hint_bits[mode]; - + uint32_t subsets = 1; switch (mode) { @@ -16901,38 +16901,38 @@ namespace basist { // All other modes have <= 64 weight bits. uint64_t bits; - + // Read the weight bits if ((IS_BIG_ENDIAN) || (!USE_UNALIGNED_WORD_READS)) bits = read_bits64(blk.m_bytes, bit_ofs, std::min(64, 128 - (int)bit_ofs)); else { bits = blk.m_qwords[1]; - + if (bit_ofs >= 64U) bits >>= (bit_ofs - 64U); else { assert(bit_ofs >= 56U); - + uint32_t bits_needed = 64U - bit_ofs; bits <<= bits_needed; bits |= (blk.m_bytes[7] >> (8U - bits_needed)); } } - + bit_ofs = 0; const uint32_t mask = (1U << weight_bits) - 1U; const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; - + if (total_planes == 2) { // Dual plane modes always have a single subset, and the first 2 weights are anchors. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); - + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); @@ -16950,7 +16950,7 @@ namespace basist if (weight_bits == 4) { assert(bit_ofs == 0); - + // Specialize the most common case: 4-bit weights. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); @@ -17496,7 +17496,7 @@ namespace basist } case 2: { - // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 dst_blk.m_mode = 1; dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; @@ -18435,7 +18435,7 @@ namespace basist bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); // non-flipped: | | - // vs. + // vs. // flipped: -- // -- @@ -19046,7 +19046,7 @@ namespace basist static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; - + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) { uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; @@ -19134,7 +19134,7 @@ namespace basist a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); } - + { const int v0 = pPixels[8 * stride] * 14 + bias; const int v1 = pPixels[9 * stride] * 14 + bias; @@ -19158,7 +19158,7 @@ namespace basist } const uint64_t f = a0 | a1 | a2 | a3; - + pDst_bytes[2] = (uint8_t)f; pDst_bytes[3] = (uint8_t)(f >> 8U); pDst_bytes[4] = (uint8_t)(f >> 16U); @@ -19181,7 +19181,7 @@ namespace basist int dots[4]; for (uint32_t i = 0; i < 4; i++) dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; ar *= 2; ag *= 2; ab *= 2; @@ -19190,7 +19190,7 @@ namespace basist { const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - + // Rounding matters here! // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; @@ -19233,11 +19233,11 @@ namespace basist } struct vec3F { float c[3]; }; - + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) { // Derived from bc7enc16's LS function. - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; @@ -19311,7 +19311,7 @@ namespace basist return true; } - void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) { dxt1_block* pDst_block = static_cast(pDst); @@ -19363,19 +19363,19 @@ namespace basist { const color32* pSrc_pixels = (const color32*)pPixels; dxt1_block* pDst_block = static_cast(pDst); - + int avg_r = -1, avg_g = 0, avg_b = 0; int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; uint8_t sels[16]; - + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; if (use_sels) { // Caller is jamming in their own selectors for us to try. const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); - + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; - + for (uint32_t i = 0; i < 16; i++) sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; } @@ -19387,13 +19387,13 @@ namespace basist for (j = 1; j < 16; j++) if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break; - + if (j == 16) { encode_bc1_solid_block(pDst, fr, fg, fb); return; } - + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) int total_r = fr, total_g = fg, total_b = fb; int max_r = fr, max_g = fg, max_b = fb; @@ -19425,7 +19425,7 @@ namespace basist float cov[6]; for (uint32_t i = 0; i < 6; i++) cov[i] = static_cast(icov[i])* (1.0f / 255.0f); - + #if 0 // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta @@ -19457,7 +19457,7 @@ namespace basist saxis_b = (int)(xb * m); } #endif - + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; for (uint32_t i = 0; i < 16; i++) { @@ -19481,7 +19481,7 @@ namespace basist hr = to_5(pSrc_pixels[high_c].r); hg = to_6(pSrc_pixels[high_c].g); hb = to_5(pSrc_pixels[high_c].b); - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } // if (use_sels) @@ -19526,13 +19526,13 @@ namespace basist hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); } - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); - + // Always forbid 3 color blocks if (lc16 == hc16) { @@ -19584,7 +19584,7 @@ namespace basist pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; } } - + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) { const color32* pSrc_pixels = (const color32*)pPixels; @@ -19633,8 +19633,8 @@ namespace basist min_r = std::min(min_r, r); min_g = std::min(min_g, g); min_b = std::min(min_b, b); total_r += r; total_g += g; total_b += b; } - - if (grayscale_flag) + + if (grayscale_flag) { // Grayscale blocks are a common enough case to specialize. if ((max_r - min_r) < 2) @@ -19938,7 +19938,7 @@ namespace basist // Always forbid 3 color blocks uint16_t lc16 = (uint16_t)b.get_low_color(); uint16_t hc16 = (uint16_t)b.get_high_color(); - + uint8_t mask = 0; // Make l > h @@ -20168,7 +20168,7 @@ namespace basist blk.m_base = static_cast(a); blk.m_table = 13; blk.m_multiplier = 0; - + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); return; @@ -20858,7 +20858,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); if (from_alpha) @@ -20917,7 +20917,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGBA bounding box + // Get block's RGBA bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); for (uint32_t i = 0; i < 16; i++) @@ -21044,7 +21044,7 @@ namespace basist /** * Collection of unused functions and const variables to work around \c * -Wunused-function and \c -Wunused-const-variable warnings. - * + * * \todo LTO does its thing so any unused are removed but is there a better way? */ void _basisu_translib_dummy() { diff --git a/contrib/previewers/lib/basisu_transcoder.h b/contrib/previewers/lib/basisu_transcoder.h index d1d893b5..28b3a857 100644 --- a/contrib/previewers/lib/basisu_transcoder.h +++ b/contrib/previewers/lib/basisu_transcoder.h @@ -171,7 +171,7 @@ namespace basisu void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); - + template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } template inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; } @@ -179,7 +179,7 @@ namespace basisu template inline S maximum(S a, S b) { return (a > b) ? a : b; } template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } - + template inline S minimum(S a, S b) { return (a < b) ? a : b; } template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } @@ -203,7 +203,7 @@ namespace basisu inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } inline uint64_t iabs64(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } + template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } @@ -216,8 +216,8 @@ namespace basisu template inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); } - template inline void append_vector(T &vec, const R *pObjs, size_t n) - { + template inline void append_vector(T &vec, const R *pObjs, size_t n) + { if (n) { const size_t cur_s = vec.size(); @@ -263,7 +263,7 @@ namespace basisu for (size_t i = 0; i < vec.size(); i++) vec[i] = obj; } - + inline uint64_t read_be64(const void *p) { uint64_t val = 0; @@ -311,7 +311,7 @@ namespace basisu if ((ha <= lb) || (la >= hb)) return false; return true; } - + // Always little endian 2-4 byte unsigned int template struct packed_uint @@ -339,13 +339,13 @@ namespace basisu enum eZero { cZero }; enum eNoClamp { cNoClamp }; - + // Rice/Huffman entropy coding - + // This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols. enum { - cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, + cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, cHuffmanFastLookupBits = 10, cHuffmanFastLookupSize = 1 << cHuffmanFastLookupBits, cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, @@ -372,13 +372,13 @@ namespace basisu enum class texture_format { cInvalidTextureFormat = -1, - + // Block-based formats cETC1, // ETC1 cETC1S, // ETC1 (subset: diff colors only, no subblocks) cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block - cETC2_ALPHA, // ETC2 EAC alpha block + cETC2_ALPHA, // ETC2 EAC alpha block cBC1, // DXT1 cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A @@ -393,8 +393,8 @@ namespace basisu cPVRTC2_4_RGBA, cETC2_R11_EAC, cETC2_RG11_EAC, - cUASTC4x4, - + cUASTC4x4, + // Uncompressed/raw pixels cRGBA32, cRGB565, @@ -450,7 +450,7 @@ namespace basisu BASISU_NOTE_UNUSED(fmt); return 4; } - + } // namespace basisu /**** ended inlining basisu.h ****/ @@ -468,9 +468,9 @@ namespace basist // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. enum class block_format { - cETC1, // ETC1S RGB + cETC1, // ETC1S RGB cETC2_RGBA, // full ETC2 EAC RGBA8 block - cBC1, // DXT1 RGB + cBC1, // DXT1 RGB cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) cBC5, // two BC4 blocks @@ -480,9 +480,9 @@ namespace basist cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) - cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC + cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. - + cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size @@ -492,21 +492,21 @@ namespace basist cETC2_EAC_R11, cETC2_EAC_RG11, - + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels - + cRGB565, cBGR565, - + cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, cRGBA4444, - + cTotalBlockFormats }; @@ -527,9 +527,9 @@ namespace basist const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - + uint16_t crc16(const void *r, size_t size, uint16_t crc); - + class huffman_decoding_table { friend class bitwise_decoder; @@ -645,7 +645,7 @@ namespace basist return false; else if (idx >= (int)m_tree.size()) m_tree.resize(idx + 1); - + if (!m_tree[idx]) { m_tree[idx] = (int16_t)tree_next; @@ -812,14 +812,14 @@ namespace basist for (;;) { uint32_t k = peek_bits(16); - + uint32_t l = 0; while (k & 1) { l++; k >>= 1; } - + q += l; remove_bits(l); @@ -837,7 +837,7 @@ namespace basist const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t v = 0; uint32_t ofs = 0; @@ -849,7 +849,7 @@ namespace basist if ((s & chunk_size) == 0) break; - + if (ofs >= 32) { assert(0); @@ -863,7 +863,7 @@ namespace basist inline uint32_t decode_huffman(const huffman_decoding_table &ct) { assert(ct.m_code_sizes.size()); - + while (m_bit_buf_size < 16) { uint32_t c = 0; @@ -874,7 +874,7 @@ namespace basist m_bit_buf_size += 8; assert(m_bit_buf_size <= 32); } - + int code_len; int sym; @@ -1059,7 +1059,7 @@ namespace basist }; struct decoder_etc_block; - + inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); @@ -1083,7 +1083,7 @@ namespace basist }; uint8_t c[4]; - + uint32_t m; }; @@ -1189,7 +1189,7 @@ namespace basist }; bool basis_block_format_is_uncompressed(block_format tex_type); - + } // namespace basist @@ -1202,8 +1202,8 @@ namespace basist namespace basist { struct color_quad_u8 - { - uint8_t m_c[4]; + { + uint8_t m_c[4]; }; const uint32_t TOTAL_UASTC_MODES = 19; @@ -1298,9 +1298,9 @@ namespace basist int m_ccs; // color component selector (dual plane only) bool m_dual_plane; // true if dual plane - // Weight and endpoint BISE values. + // Weight and endpoint BISE values. // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. - uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order }; @@ -1395,7 +1395,7 @@ namespace basist #ifdef _DEBUG int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); #endif - + struct uastc_block { union @@ -1432,10 +1432,10 @@ namespace basist }; color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); - + struct decoder_etc_block; struct eac_block; - + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); @@ -1457,7 +1457,7 @@ namespace basist // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); - + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); enum @@ -1467,7 +1467,7 @@ namespace basist cEncodeBC1UseSelectors = 4, }; void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); - + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); @@ -1484,7 +1484,7 @@ namespace basist bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); - + // uastc_init() MUST be called before using this module. void uastc_init(); @@ -2202,7 +2202,7 @@ namespace basist basisu::packed_uint<2> m_orig_height; // The original image height (may not be a multiple of 4 pixels) basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. - basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. + basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. basisu::packed_uint<4> m_file_ofs; // Offset from the header to the start of the slice's data basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes @@ -2219,7 +2219,7 @@ namespace basist }; // The image type field attempts to describe how to interpret the image data in a Basis file. - // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. + // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. // We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6) enum basis_texture_type { @@ -2262,7 +2262,7 @@ namespace basist basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha basis files) basisu::packed_uint<3> m_total_images; // The total # of images - + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format basisu::packed_uint<2> m_flags; // enum basist::header_flags basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type @@ -2272,11 +2272,11 @@ namespace basist basisu::packed_uint<4> m_userdata0; // For client use basisu::packed_uint<4> m_userdata1; // For client use - basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook + basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the header basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes - basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook + basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the header basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes @@ -2284,7 +2284,7 @@ namespace basist basisu::packed_uint<4> m_tables_file_size; // The file size in bytes of the compressed huffman codelength tables basisu::packed_uint<4> m_slice_desc_file_ofs; // The file offset to the slice description array, usually follows the header - + basisu::packed_uint<4> m_extended_file_ofs; // The file offset of the "extended" header and compressed data, for future use basisu::packed_uint<4> m_extended_file_size; // The file size in bytes of the "extended" header and compressed data, for future use }; @@ -2298,7 +2298,7 @@ namespace basist // High-level composite texture formats supported by the transcoder. // Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats. // Notes: - // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a + // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a // fully opaque (255) alpha channel. // - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version. // - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality. @@ -2317,7 +2317,7 @@ namespace basist cTFBC4_R = 4, // Red only, alpha slice is transcoded to output if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified cTFBC5_RG = 5, // XY: Two BC4 blocks, X=R and Y=Alpha, .basis file should have alpha data (if not Y will be all 255's) cTFBC7_RGBA = 6, // RGB or RGBA, mode 5 for ETC1S, modes (1,2,3,5,6,7) for UASTC - + // PVRTC1 4bpp (mobile, PowerVR devices) cTFPVRTC1_4_RGB = 8, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified, nearly lowest quality of any texture format. cTFPVRTC1_4_RGBA = 9, // Opaque+alpha, most useful for simple opacity maps. If .basis file doesn't have alpha cTFPVRTC1_4_RGB will be used instead. Lowest quality of any supported texture format. @@ -2327,7 +2327,7 @@ namespace basist // ATC (mobile, Adreno devices, this is a niche format) cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) - cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) + cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) // FXT1 (desktop, Intel devices, this is a super obscure format) cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). @@ -2339,7 +2339,7 @@ namespace basist cTFETC2_EAC_R11 = 20, // R only (ETC2 EAC R11 unsigned) cTFETC2_EAC_RG11 = 21, // RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps - + // Uncompressed (raw pixel) formats cTFRGBA32 = 13, // 32bpp RGBA image stored in raster (not block) order in memory, R is first byte, A is last byte. cTFRGB565 = 14, // 166pp RGB image stored in raster (not block) order in memory, R at bit position 11 @@ -2355,7 +2355,7 @@ namespace basist cTFBC3 = cTFBC3_RGBA, cTFBC4 = cTFBC4_R, cTFBC5 = cTFBC5_RG, - + // Previously, the caller had some control over which BC7 mode the transcoder output. We've simplified this due to UASTC, which supports numerous modes. cTFBC7_M6_RGB = cTFBC7_RGBA, // Opaque only, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. Highest quality of all the non-ETC1 formats. cTFBC7_M5_RGBA = cTFBC7_RGBA, // Opaque+alpha, alpha channel will be opaque for opaque .basis files @@ -2364,7 +2364,7 @@ namespace basist cTFBC7_ALT = 7, cTFASTC_4x4 = cTFASTC_4x4_RGBA, - + cTFATC_RGBA_INTERPOLATED_ALPHA = cTFATC_RGBA, }; @@ -2383,22 +2383,22 @@ namespace basist // Returns the texture type's name in ASCII. const char* basis_get_texture_type_name(basis_texture_type tex_type); - + // Returns true if the transcoder texture type is an uncompressed (raw pixel) format. bool basis_transcoder_format_is_uncompressed(transcoder_texture_format tex_type); // Returns the # of bytes per pixel for uncompressed formats, or 0 for block texture formats. uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt); - + // Returns the block width for the specified texture format, which is currently either 4 or 8 for FXT1. uint32_t basis_get_block_width(transcoder_texture_format tex_type); - + // Returns the block height for the specified texture format, which is currently always 4. uint32_t basis_get_block_height(transcoder_texture_format tex_type); // Returns true if the specified format was enabled at compile time. bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S); - + class basisu_transcoder; // This struct holds all state used during transcoding. For video, it needs to persist between image transcodes (it holds the previous frame). @@ -2412,16 +2412,16 @@ namespace basist }; std::vector m_block_endpoint_preds[2]; - + enum { cMaxPrevFrameLevels = 16 }; - std::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + std::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] }; - + // Low-level helper class that does the actual transcoding. class basisu_lowlevel_etc1s_transcoder { friend class basisu_transcoder; - + public: basisu_lowlevel_etc1s_transcoder(const basist::etc1_global_selector_codebook *pGlobal_sel_codebook); @@ -2431,7 +2431,7 @@ namespace basist bool decode_tables(const uint8_t *pTable_data, uint32_t table_data_size); - bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, + bool transcode_slice(void *pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t *pImage_data, uint32_t image_data_size, block_format fmt, uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header &header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state *pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0); @@ -2458,7 +2458,7 @@ namespace basist huffman_decoding_table m_endpoint_pred_model, m_delta_endpoint_model, m_selector_model, m_selector_history_buf_rle_model; uint32_t m_selector_history_buf_size; - + basisu_transcoder_state m_def_state; }; @@ -2475,7 +2475,7 @@ namespace basist // This flag is used internally when decoding to BC3. cDecodeFlagsBC1ForbidThreeColorBlocks = 8, - // The output buffer contains alpha endpoint/selector indices. + // The output buffer contains alpha endpoint/selector indices. // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. cDecodeFlagsOutputHasAlphaIndices = 16, @@ -2511,9 +2511,9 @@ namespace basist uint32_t m_slice_index; // the slice index in the .basis file uint32_t m_image_index; // the source image index originally provided to the encoder uint32_t m_level_index; // the mipmap level within this image - + uint32_t m_unpacked_slice_crc16; - + bool m_alpha_flag; // true if the slice has alpha data bool m_iframe_flag; // true if the slice is an I-Frame }; @@ -2523,7 +2523,7 @@ namespace basist struct basisu_image_info { uint32_t m_image_index; - uint32_t m_total_levels; + uint32_t m_total_levels; uint32_t m_orig_width; uint32_t m_orig_height; @@ -2535,8 +2535,8 @@ namespace basist uint32_t m_num_blocks_y; uint32_t m_total_blocks; - uint32_t m_first_slice_index; - + uint32_t m_first_slice_index; + bool m_alpha_flag; // true if the image has alpha data bool m_iframe_flag; // true if the image is an I-Frame }; @@ -2556,8 +2556,8 @@ namespace basist uint32_t m_num_blocks_y; uint32_t m_total_blocks; - uint32_t m_first_slice_index; - + uint32_t m_first_slice_index; + bool m_alpha_flag; // true if the image has alpha data bool m_iframe_flag; // true if the image is an I-Frame }; @@ -2574,7 +2574,7 @@ namespace basist uint32_t m_endpoint_codebook_size; uint32_t m_tables_size; - uint32_t m_slices_size; + uint32_t m_slices_size; basis_texture_type m_tex_type; uint32_t m_us_per_frame; @@ -2589,7 +2589,7 @@ namespace basist uint32_t m_userdata1; basis_tex_format m_tex_format; // ETC1S, UASTC, etc. - + bool m_y_flipped; // true if the image was Y flipped bool m_etc1s; // true if the file is ETC1S bool m_has_alpha_slices; // true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha) @@ -2613,7 +2613,7 @@ namespace basist basis_texture_type get_texture_type(const void *pData, uint32_t data_size) const; bool get_userdata(const void *pData, uint32_t data_size, uint32_t &userdata0, uint32_t &userdata1) const; - + // Returns the total number of images in the basis file (always 1 or more). // Note that the number of mipmap levels for each image may differ, and that images may have different resolutions. uint32_t get_total_images(const void *pData, uint32_t data_size) const; @@ -2622,7 +2622,7 @@ namespace basist // Returns the number of mipmap levels in an image. uint32_t get_total_image_levels(const void *pData, uint32_t data_size, uint32_t image_index) const; - + // Returns basic information about an image. Note that orig_width/orig_height may not be a multiple of 4. bool get_image_level_desc(const void *pData, uint32_t data_size, uint32_t image_index, uint32_t level_index, uint32_t &orig_width, uint32_t &orig_height, uint32_t &total_blocks) const; @@ -2631,33 +2631,33 @@ namespace basist // Returns information about the specified image's mipmap level. bool get_image_level_info(const void *pData, uint32_t data_size, basisu_image_level_info &level_info, uint32_t image_index, uint32_t level_index) const; - + // Get a description of the basis file and low-level information about each slice. bool get_file_info(const void *pData, uint32_t data_size, basisu_file_info &file_info) const; - + // start_transcoding() must be called before calling transcode_slice() or transcode_image_level(). // For ETC1S files, this call decompresses the selector/endpoint codebooks, so ideally you would only call this once per .basis file (not each image/mipmap level). bool start_transcoding(const void *pData, uint32_t data_size); - + bool stop_transcoding(); - + // Returns true if start_transcoding() has been called. bool get_ready_to_transcode() const { return m_ready_to_transcode; } - + // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). - // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. + // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). - // Notes: + // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in // a first pass, which will be read in a second pass. bool transcode_image_level( - const void *pData, uint32_t data_size, - uint32_t image_index, uint32_t level_index, + const void *pData, uint32_t data_size, + uint32_t image_index, uint32_t level_index, void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, transcoder_texture_format fmt, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state *pState = nullptr, uint32_t output_rows_in_pixels = 0) const; @@ -2674,9 +2674,9 @@ namespace basist // output_rows_in_pixels: Ignored unless fmt is cRGBA32. The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. - bool transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, + bool transcode_slice(const void *pData, uint32_t data_size, uint32_t slice_index, void *pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, - block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state * pState = nullptr, void* pAlpha_blocks = nullptr, + block_format fmt, uint32_t output_block_stride_in_bytes, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, basisu_transcoder_state * pState = nullptr, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1) const; private: @@ -2686,7 +2686,7 @@ namespace basist bool m_ready_to_transcode; int find_first_slice_index(const void* pData, uint32_t data_size, uint32_t image_index, uint32_t level_index) const; - + bool validate_header_quick(const void* pData, uint32_t data_size) const; }; diff --git a/contrib/previewers/win/basisthumbprovider.h b/contrib/previewers/win/basisthumbprovider.h index e73747cb..cca93a8f 100644 --- a/contrib/previewers/win/basisthumbprovider.h +++ b/contrib/previewers/win/basisthumbprovider.h @@ -4,7 +4,7 @@ #include /** - * + * */ class BasisThumbProvider : public IInitializeWithStream, public IThumbnailProvider { @@ -16,23 +16,23 @@ class BasisThumbProvider : public IInitializeWithStream, public IThumbnailProvid IFACEMETHODIMP_(ULONG) AddRef() override; // IUnknown::Release() IFACEMETHODIMP_(ULONG) Release() override; - + // IInitializeWithStream::Initialize() IFACEMETHODIMP Initialize(IStream *pStream, DWORD grfMode) override; - + // IThumbnailProvider::GetThumbnail() IFACEMETHODIMP GetThumbnail(UINT cx, HBITMAP *phbmp, WTS_ALPHATYPE *pdwAlpha) override; protected: virtual ~BasisThumbProvider(); - + private: LONG count; IStream* stream; }; /** - * + * */ class BasisThumbProviderFactory : public IClassFactory { @@ -44,15 +44,15 @@ class BasisThumbProviderFactory : public IClassFactory IFACEMETHODIMP_(ULONG) AddRef() override; // IUnknown::Release() IFACEMETHODIMP_(ULONG) Release() override; - + // IClassFactory::CreateInstance() IFACEMETHODIMP CreateInstance(IUnknown *pUnkOuter, REFIID riid, void **ppv) override; // IClassFactory::LockServer() IFACEMETHODIMP LockServer(BOOL fLock) override; - + protected: virtual ~BasisThumbProviderFactory(); - + private: LONG count; }; diff --git a/contrib/previewers/win/helpers.cpp b/contrib/previewers/win/helpers.cpp index d4fab038..82100940 100644 --- a/contrib/previewers/win/helpers.cpp +++ b/contrib/previewers/win/helpers.cpp @@ -22,7 +22,7 @@ HBITMAP rgbToBitmap(const uint32_t* src, uint32_t const imgW, uint32_t const img * Creates a bitmap (a DIB) for the passed-in pixel size. Note that * negation of the height means top-down, origin upper-left, which is the * regular case. - * + * * TODO: 16-bit variant instead? */ assert(src && imgW && imgH); @@ -38,7 +38,7 @@ HBITMAP rgbToBitmap(const uint32_t* src, uint32_t const imgW, uint32_t const img HBITMAP hbmp = CreateDIBSection(NULL, &bmi, DIB_RGB_COLORS, &pixels, NULL, 0); /* * RGBA to BGRA conversion. - * + * * Note: we keep the alpha. */ if (hbmp && pixels) { diff --git a/contrib/previewers/win/helpers.h b/contrib/previewers/win/helpers.h index 691d5610..4fd23722 100644 --- a/contrib/previewers/win/helpers.h +++ b/contrib/previewers/win/helpers.h @@ -6,15 +6,15 @@ /** * Write a formatted string to the connected debugger (e.g. DebugView). - * + * * \param[in] fmt content to write in \c printf format (followed by optional arguments) */ void dprintf(char* const fmt, ...); /** * Converts raw RGBA data to a Windows BGRA bitmap. - * - * \param[in] src raw RGBA data + * + * \param[in] src raw RGBA data * \param[in] imgW width of the decoded image * \param[in] imgH height of the decoded image * \return handle to a bitmap (ownership passed to the caller) diff --git a/contrib/single_file_transcoder/README.md b/contrib/single_file_transcoder/README.md index d37788eb..1fd97492 100644 --- a/contrib/single_file_transcoder/README.md +++ b/contrib/single_file_transcoder/README.md @@ -20,7 +20,7 @@ The combiner script can also generate separate amalgamated header and source fil ``` ./combine.sh -r ../../transcoder -o basisu_transcoder.h -p ../../transcoder/basisu_transcoder.h -./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -k basisu_transcoder.h -o basisu_transcoder.cpp basisu_transcoder-in.cpp +./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -k basisu_transcoder.h -o basisu_transcoder.cpp basisu_transcoder-in.cpp ``` @@ -29,6 +29,6 @@ Note: the amalgamation script will run on pretty much anything but is _extremely Why? ---- -Because all it now takes to support Basis Universal is the addition of a single file, two if using the header, with no configuration or further build steps (the out-of-the-box defaults tailor the included formats for various platforms). +Because all it now takes to support Basis Universal is the addition of a single file, two if using the header, with no configuration or further build steps (the out-of-the-box defaults tailor the included formats for various platforms). The library is small, adding, for example, around 250kB to an Emscripten compiled WebAssembly project (with transcoding disabled for BC7 and ATC; disabling ASTC will remove a further 64kB, and `gzip` will approximately half the `wasm` file). diff --git a/contrib/single_file_transcoder/basisu_transcoder-in.cpp b/contrib/single_file_transcoder/basisu_transcoder-in.cpp index 19d35895..4ce8bf4d 100644 --- a/contrib/single_file_transcoder/basisu_transcoder-in.cpp +++ b/contrib/single_file_transcoder/basisu_transcoder-in.cpp @@ -3,7 +3,7 @@ * \code * ./combine.sh -r ../../transcoder -x basisu_transcoder_tables_bc7_m6.inc -o basisu_transcoder.cpp basisu_transcoder-in.cpp * \endcode - * + * * \note The script above excludes the BC7 mode 6 tables, a choice reflected in * the build options. */ @@ -12,7 +12,7 @@ * Transcoder build options for known platforms (iOS has ETC, ASTC and PVRTC; * Emscripten adds DXT to iOS's options; Android adds PVRTC2 to Emscripten's * options; other platforms build all except BC7 mode 6 and FXT1). - * + * * See https://github.com/BinomialLLC/basis_universal#shrinking-the-transcoders-compiled-size */ #ifdef __APPLE__ @@ -38,7 +38,7 @@ /** * Collection of unused functions and const variables to work around \c * -Wunused-function and \c -Wunused-const-variable warnings. - * + * * \todo LTO does its thing so any unused are removed but is there a better way? */ void _basisu_translib_dummy() { diff --git a/contrib/single_file_transcoder/combine.sh b/contrib/single_file_transcoder/combine.sh index aedae922..2b9ab2c5 100755 --- a/contrib/single_file_transcoder/combine.sh +++ b/contrib/single_file_transcoder/combine.sh @@ -1,13 +1,13 @@ #!/bin/sh -e # Tool to bundle multiple C/C++ source files, inlining any includes. -# +# # Note: this POSIX-compliant script is many times slower than the original bash # implementation (due to the grep calls) but it runs and works everywhere. -# +# # TODO: ROOTS, FOUND, etc., as arrays (since they fail on paths with spaces) # TODO: revert to Bash-only regex (the grep ones being too slow) -# +# # Script released under a CC0 license. # Common file roots diff --git a/contrib/single_file_transcoder/examples/emscripten.cpp b/contrib/single_file_transcoder/examples/emscripten.cpp index 740bd268..49d210fa 100644 --- a/contrib/single_file_transcoder/examples/emscripten.cpp +++ b/contrib/single_file_transcoder/examples/emscripten.cpp @@ -131,7 +131,7 @@ static GLchar const fragShader2D[] = /** * Helper to compile a shader. - * + * * \param type shader type * \param text shader source * \return the shader ID (or zero if compilation failed) @@ -199,7 +199,7 @@ struct posTex2d { /* * Possibly missing GL enums. - * + * * Note: GL_COMPRESSED_RGB_ETC1_WEBGL is the same as GL_ETC1_RGB8_OES */ #ifndef GL_ETC1_RGB8_OES @@ -233,7 +233,7 @@ static etc1_global_selector_codebook* globalCodebook = NULL; /** * Returns a supported compressed texture format for a given context. - * + * * \param[in] ctx WebGL context * \param[in] alpha \c true if the texture has an alpha channel * \return corresponding Basis format @@ -243,7 +243,7 @@ static transcoder_texture_format supports(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const /* * Test for both prefixed and non-prefixed versions. This should grab iOS * and other ImgTec GPUs first as a preference. - * + * * TODO: do older iOS expose ASTC to the browser and does it transcode to RGBA? */ static bool const pvr = GL_HAS_EXT(ctx, "WEBKIT_WEBGL_compressed_texture_pvrtc") @@ -291,7 +291,7 @@ static transcoder_texture_format supports(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const #endif /* * Finally ETC1, falling back on RGBA. - * + * * TODO: we might just prefer to transcode to dithered 565 once available */ static bool const etc1 = GL_HAS_EXT(ctx, "WEBGL_compressed_texture_etc1"); @@ -306,10 +306,10 @@ static transcoder_texture_format supports(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const /** * Returns the equivalent GL type given a BasisU type. - * + * * \note This relies on \c #supports() returning the supported formats, and so * only converts to the GL equivalents (without further testing for support). - * + * * \param[in] type BasisU transcode target * \return equivalent GL type */ @@ -340,13 +340,13 @@ static GLenum toGlType(transcoder_texture_format const type) { /** * Uploads the texture. - * + * * \param[in] ctx ctx WebGL context * \param[in] name texture \e name * \param[in] data \c .basis file content * \param[in] size number of bytes in \a data * \return \c true if the texture was decoded and created - * + * * \todo reuse the decode buffer (the first mips level should be able to contain the rest) */ bool upload(EMSCRIPTEN_WEBGL_CONTEXT_HANDLE const ctx, GLuint const name, const uint8_t* const data, size_t const size) { @@ -477,7 +477,7 @@ static EM_BOOL initContext() { static void tick() { glClearColor(1.0f, 0.0f, 1.0f, 1.0f); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - + if (uRotId >= 0) { glUniform1f(uRotId, rotDeg); rotDeg += 0.1f; @@ -486,7 +486,7 @@ static void tick() { } glBindTexture(GL_TEXTURE_2D, txName[(lround(rotDeg / 45) & 1) != 0]); } - + glDrawElements(GL_TRIANGLES, 6, GL_UNSIGNED_SHORT, 0); glFlush(); } @@ -501,10 +501,10 @@ int main() { if ((progId = glCreateProgram())) { vertId = compileShader(GL_VERTEX_SHADER, vertShader2D); fragId = compileShader(GL_FRAGMENT_SHADER, fragShader2D); - + glBindAttribLocation(progId, GL_VERT_POSXY_ID, "aPos"); glBindAttribLocation(progId, GL_VERT_TXUV0_ID, "aUV0"); - + glAttachShader(progId, vertId); glAttachShader(progId, fragId); glLinkProgram (progId); @@ -514,7 +514,7 @@ int main() { if (uTx0Id >= 0) { glUniform1i(uTx0Id, 0); } - + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glEnable(GL_BLEND); glDisable(GL_DITHER); @@ -522,7 +522,7 @@ int main() { glCullFace(GL_BACK); glEnable(GL_CULL_FACE); } - + GLuint vertsBuf = 0; GLuint indexBuf = 0; // Create the textured quad (vert positions then UVs) @@ -551,14 +551,14 @@ int main() { sizeof(index2d), index2d, GL_STATIC_DRAW); glEnableVertexAttribArray(GL_VERT_POSXY_ID); glEnableVertexAttribArray(GL_VERT_TXUV0_ID); - + glGenTextures(2, txName); if (upload(glCtx, txName[0], srcRgb, sizeof srcRgb) && upload(glCtx, txName[1], srcRgba, sizeof srcRgba)) { printf("Decoded!\n"); } - + emscripten_set_main_loop(tick, 0, EM_FALSE); emscripten_exit_with_live_runtime(); } else { diff --git a/contrib/single_file_transcoder/examples/simple.cpp b/contrib/single_file_transcoder/examples/simple.cpp index 9f61c26b..66e5b41d 100644 --- a/contrib/single_file_transcoder/examples/simple.cpp +++ b/contrib/single_file_transcoder/examples/simple.cpp @@ -8,7 +8,7 @@ * \code * cc -std=c++11 -lstdc++ simple.cpp * \endcode - * + * * Example code released under a CC0 license. */ #include "../basisu_transcoder.cpp" diff --git a/encoder/apg_bmp.c b/encoder/apg_bmp.c index ef3d015e..cca1d80c 100644 --- a/encoder/apg_bmp.c +++ b/encoder/apg_bmp.c @@ -89,7 +89,7 @@ static bool _read_entire_file( const char* filename, _entire_file_t* record ) { fseek( fp, 0L, SEEK_END ); record->sz = (size_t)ftell( fp ); - // Immediately bail on anything larger than _BMP_MAX_IMAGE_FILE_SIZE. + // Immediately bail on anything larger than _BMP_MAX_IMAGE_FILE_SIZE. if (record->sz > _BMP_MAX_IMAGE_FILE_SIZE) { fclose( fp ); return false; diff --git a/encoder/apg_bmp.h b/encoder/apg_bmp.h index 8cd73b62..97a4f55f 100644 --- a/encoder/apg_bmp.h +++ b/encoder/apg_bmp.h @@ -32,7 +32,7 @@ Current Limitations: To Do: - FUZZING - create a unique fuzz test set for (8,4,1 BPP). -- (maybe) FEATURE Flipping the image based on negative width and height in header, and/or function arguments. +- (maybe) FEATURE Flipping the image based on negative width and height in header, and/or function arguments. - (maybe) PERF ifdef intrinsics/asm for bitscan. Platform-specific code so won't include unless necessary. - (maybe) FEATURE Add parameter for padding output memory to eg 4-byte alignment or n channels. - (maybe) FEATURE Improved apps support in alpha channel writing (using v5 header). diff --git a/encoder/basisu_astc_decomp.cpp b/encoder/basisu_astc_decomp.cpp index 53bccfc5..f97987fb 100644 --- a/encoder/basisu_astc_decomp.cpp +++ b/encoder/basisu_astc_decomp.cpp @@ -23,11 +23,11 @@ * we don't need it, minor fix to decompress() so it converts non-sRGB * output to 8-bits correctly. I've compared this decoder's output * vs. astc-codec with random inputs on 4x4 blocks, and after fixing a few obvious - * bugs in astc-codec where it didn't correctly follow the spec they match so + * bugs in astc-codec where it didn't correctly follow the spec they match so * I'm assuming they are both correct for 4x4 now. - * HDR support should be easily added back in, but as we don't need it + * HDR support should be easily added back in, but as we don't need it * I'm leaving this for someone else. - * + * *//*! * \file * \brief ASTC Utilities. @@ -1387,7 +1387,7 @@ DecompressResult setTexelColors (void* dst, ColorEndpointPair* colorEndpoints, T for (int i = 0; i < numPartitions; i++) { isHDREndpoint[i] = isColorEndpointModeHDR(colorEndpointModes[i]); - + // rg - REMOVING HDR SUPPORT FOR NOW if (isHDREndpoint[i]) return DECOMPRESS_RESULT_ERROR; @@ -1527,14 +1527,14 @@ bool decompress(uint8_t *pDst, const uint8_t * data, bool isSRGB, int blockWidth // rg - We only support LDR here, although adding back in HDR would be easy. const bool isLDR = true; DE_ASSERT(isLDR || !isSRGB); - + float linear[MAX_BLOCK_WIDTH * MAX_BLOCK_HEIGHT * 4]; const Block128 blockData(data); if (decompressBlock(isSRGB ? (void*)pDst : (void*)& linear[0], blockData, blockWidth, blockHeight, isSRGB, isLDR) != DECOMPRESS_RESULT_VALID_BLOCK) return false; - + if (!isSRGB) { int pix = 0; diff --git a/encoder/basisu_astc_decomp.h b/encoder/basisu_astc_decomp.h index 9ec2e460..35f0e80e 100644 --- a/encoder/basisu_astc_decomp.h +++ b/encoder/basisu_astc_decomp.h @@ -33,7 +33,7 @@ namespace astc { // Unpacks a single ASTC block to pDst -// If isSRGB is true, the spec requires the decoder to scale the LDR 8-bit endpoints to 16-bit before interpolation slightly differently, +// If isSRGB is true, the spec requires the decoder to scale the LDR 8-bit endpoints to 16-bit before interpolation slightly differently, // which will lead to different outputs. So be sure to set it correctly (ideally it should match whatever the encoder did). bool decompress(uint8_t* pDst, const uint8_t* data, bool isSRGB, int blockWidth, int blockHeight); diff --git a/encoder/basisu_backend.cpp b/encoder/basisu_backend.cpp index 19911fcb..bbcc8d55 100644 --- a/encoder/basisu_backend.cpp +++ b/encoder/basisu_backend.cpp @@ -247,7 +247,7 @@ namespace basisu m_endpoint_remap_table_old_to_new = reorderer.get_remap_table(); } - // For endpoints, old_to_new[] may not be bijective! + // For endpoints, old_to_new[] may not be bijective! // Some "old" entries may be unused and don't get remapped into the "new" array. m_old_endpoint_was_used.clear(); @@ -271,13 +271,13 @@ namespace basisu } // slice_index debug_printf("basisu_backend::reoptimize_and_sort_endpoints_codebook: First old entry index: %u\n", first_old_entry_index); - + m_new_endpoint_was_used.clear(); m_new_endpoint_was_used.resize(r.get_total_endpoint_clusters()); m_endpoint_remap_table_new_to_old.clear(); m_endpoint_remap_table_new_to_old.resize(r.get_total_endpoint_clusters()); - + // Set unused entries in the new array to point to the first used entry in the old array. m_endpoint_remap_table_new_to_old.set_all(first_old_entry_index); @@ -286,7 +286,7 @@ namespace basisu if (m_old_endpoint_was_used[old_index]) { const uint32_t new_index = m_endpoint_remap_table_old_to_new[old_index]; - + m_new_endpoint_was_used[new_index] = true; m_endpoint_remap_table_new_to_old[new_index] = old_index; @@ -696,11 +696,11 @@ namespace basisu gi.unpack(gi_unpacked); char buf[256]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); #else snprintf(buf, sizeof(buf), "basisu_backend_slice_%u.png", slice_index); -#endif +#endif save_png(buf, gi_unpacked); } @@ -964,7 +964,7 @@ namespace basisu for (uint32_t p = 0; p < 16; p++) cur_err += color_distance(false, src_pixels.get_ptr()[p], etc_blk_unpacked[p], false); } - + uint64_t best_trial_err = UINT64_MAX; int best_trial_idx = 0; uint32_t best_trial_history_buf_idx = 0; @@ -973,7 +973,7 @@ namespace basisu const bool use_strict_search = (m_params.m_compression_level == 0) && (selector_remap_thresh == 1.0f); const uint64_t limit_err = (uint64_t)ceilf(cur_err * selector_remap_thresh); - + for (uint32_t j = 0; j < selector_history_buf.size(); j++) { const int trial_idx = selector_history_buf[j]; @@ -997,7 +997,7 @@ namespace basisu etc_blk.get_block_colors(block_colors, 0); const uint8_t* pSelectors = &m_selector_palette[m_selector_remap_table_new_to_old[trial_idx]](0, 0); - + if (r.get_params().m_perceptual) { for (uint32_t p = 0; p < 16; p++) @@ -1022,7 +1022,7 @@ namespace basisu if ((trial_err < best_trial_err) && (trial_err <= thresh_err)) { assert(trial_err <= limit_err); - + best_trial_err = trial_err; best_trial_idx = trial_idx; best_trial_history_buf_idx = j; @@ -1415,7 +1415,7 @@ namespace basisu if (old_endpoint_was_used[old_endpoint_index]) { const uint32_t new_endpoint_index = m_endpoint_remap_table_old_to_new[old_endpoint_index]; - + new_endpoint_was_used[new_endpoint_index] = true; endpoint_remap_table_new_to_old[new_endpoint_index] = old_endpoint_index; @@ -1762,7 +1762,7 @@ namespace basisu m_output.m_selector_palette = coder.get_bytes(); } - } // if (m_params.m_use_global_sel_codebook) + } // if (m_params.m_use_global_sel_codebook) debug_printf("Selector codebook bits: %u bytes: %u, Bits per entry: %3.1f, Avg bits/texel: %3.3f\n", (int)m_output.m_selector_palette.size() * 8, (int)m_output.m_selector_palette.size(), diff --git a/encoder/basisu_backend.h b/encoder/basisu_backend.h index 393dccd2..bddc4f92 100644 --- a/encoder/basisu_backend.h +++ b/encoder/basisu_backend.h @@ -28,8 +28,8 @@ namespace basisu { clear(); } - - uint32_t m_endpoint_predictor; + + uint32_t m_endpoint_predictor; int m_endpoint_index; int m_selector_index; @@ -40,10 +40,10 @@ namespace basisu void clear() { m_endpoint_predictor = 0; - + m_endpoint_index = 0; m_selector_index = 0; - + m_selector_history_buf_index = 0; m_is_cr_target = false; } @@ -62,7 +62,7 @@ namespace basisu color_rgba m_color5; uint32_t m_inten5; bool m_color5_valid; - + void clear() { clear_obj(*this); @@ -264,12 +264,12 @@ namespace basisu return slice.m_first_block_index + block_y * slice.m_num_blocks_x + block_x; } - + uint32_t get_total_blocks(uint32_t slice_index) const { return m_slices[slice_index].m_num_blocks_x * m_slices[slice_index].m_num_blocks_y; } - + uint32_t get_total_blocks() const { uint32_t total_blocks = 0; diff --git a/encoder/basisu_basis_file.cpp b/encoder/basisu_basis_file.cpp index f4c77bef..3db73af8 100644 --- a/encoder/basisu_basis_file.cpp +++ b/encoder/basisu_basis_file.cpp @@ -27,14 +27,14 @@ namespace basisu m_header.m_data_size = m_total_file_size - sizeof(basist::basis_file_header); m_header.m_total_slices = (uint32_t)encoder_output.m_slice_desc.size(); - + m_header.m_total_images = 0; for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) m_header.m_total_images = maximum(m_header.m_total_images, encoder_output.m_slice_desc[i].m_source_file_index + 1); - + m_header.m_tex_format = (int)encoder_output.m_tex_format; m_header.m_flags = 0; - + if (encoder_output.m_etc1s) { assert(encoder_output.m_tex_format == basist::basis_tex_format::cETC1S); @@ -51,7 +51,7 @@ namespace basisu m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagUsesGlobalCodebook; if (encoder_output.m_srgb) m_header.m_flags = m_header.m_flags | basist::cBASISHeaderFlagSRGB; - + for (uint32_t i = 0; i < encoder_output.m_slice_desc.size(); i++) { if (encoder_output.m_slice_desc[i].m_alpha) @@ -108,7 +108,7 @@ namespace basisu m_images_descs[i].m_image_index = slice_descs[i].m_source_file_index; m_images_descs[i].m_level_index = slice_descs[i].m_mip_index; - + if (slice_descs[i].m_alpha) m_images_descs[i].m_flags = m_images_descs[i].m_flags | basist::cSliceDescFlagsHasAlpha; if (slice_descs[i].m_iframe) @@ -186,7 +186,7 @@ namespace basisu pHeader->m_data_size = m_total_file_size - sizeof(basist::basis_file_header); pHeader->m_data_crc16 = basist::crc16(&m_comp_data[0] + sizeof(basist::basis_file_header), m_total_file_size - sizeof(basist::basis_file_header), 0); - + pHeader->m_header_crc16 = basist::crc16(&pHeader->m_data_size, sizeof(basist::basis_file_header) - BASISU_OFFSETOF(basist::basis_file_header, m_data_size), 0); pHeader->m_sig = basist::basis_file_header::cBASISSigValue; @@ -242,7 +242,7 @@ namespace basisu m_tables_file_ofs = 0; m_first_image_file_ofs = m_slice_descs_file_ofs + sizeof(basist::basis_slice_desc) * (uint32_t)slice_descs.size(); } - + uint64_t total_file_size = m_first_image_file_ofs; for (uint32_t i = 0; i < encoder_output.m_slice_image_data.size(); i++) total_file_size += encoder_output.m_slice_image_data[i].size(); diff --git a/encoder/basisu_bc7enc.cpp b/encoder/basisu_bc7enc.cpp index 06aa7eb8..39837029 100644 --- a/encoder/basisu_bc7enc.cpp +++ b/encoder/basisu_bc7enc.cpp @@ -100,24 +100,24 @@ static void astc_init() { if (!astc_is_valid_endpoint_range(range)) continue; - + const uint32_t levels = astc_get_levels(range); uint32_t vals[256]; // TODO for (uint32_t i = 0; i < levels; i++) vals[i] = (unquant_astc_endpoint_val(i, range) << 8) | i; - + std::sort(vals, vals + levels); for (uint32_t i = 0; i < levels; i++) { uint32_t order = vals[i] & 0xFF; uint32_t unq = vals[i] >> 8; - + g_astc_sorted_order_unquant[range][i].m_unquant = (uint8_t)unq; g_astc_sorted_order_unquant[range][i].m_index = (uint8_t)order; - + } // i #if 0 @@ -187,7 +187,7 @@ static inline uint32_t astc_interpolate(uint32_t l, uint32_t h, uint32_t w) void bc7enc_compress_block_init() { astc_init(); - + // BC7 666.1 for (int c = 0; c < 256; c++) { @@ -225,11 +225,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 16; l++) { uint32_t low = (l << 4) | l; - + for (uint32_t h = 0; h < 16; h++) { uint32_t high = (h << 4) | h; - + const int k = astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -241,9 +241,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_4bit_3bit_optimal_endpoints[c] = best; - + } // c // ASTC [0,15] 2-bit @@ -254,11 +254,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 16; l++) { uint32_t low = (l << 4) | l; - + for (uint32_t h = 0; h < 16; h++) { uint32_t high = (h << 4) | h; - + const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -270,9 +270,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_4bit_2bit_optimal_endpoints[c] = best; - + } // c // ASTC range 7 [0,11] 2-bit @@ -283,11 +283,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 12; l++) { uint32_t low = g_astc_sorted_order_unquant[7][l].m_unquant; - + for (uint32_t h = 0; h < 12; h++) { uint32_t high = g_astc_sorted_order_unquant[7][h].m_unquant; - + const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -299,9 +299,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_range7_2bit_optimal_endpoints[c] = best; - + } // c // ASTC range 13 [0,47] 4-bit @@ -312,11 +312,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 48; l++) { uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; - + for (uint32_t h = 0; h < 48; h++) { uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; - + const int k = astc_interpolate(low, high, g_astc_weights4[BC7ENC_ASTC_RANGE13_4BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -328,9 +328,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_range13_4bit_optimal_endpoints[c] = best; - + } // c // ASTC range 13 [0,47] 2-bit @@ -341,11 +341,11 @@ void bc7enc_compress_block_init() for (uint32_t l = 0; l < 48; l++) { uint32_t low = g_astc_sorted_order_unquant[13][l].m_unquant; - + for (uint32_t h = 0; h < 48; h++) { uint32_t high = g_astc_sorted_order_unquant[13][h].m_unquant; - + const int k = astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); const int err = (k - c) * (k - c); @@ -357,9 +357,9 @@ void bc7enc_compress_block_init() } } // h } // l - + g_astc_range13_2bit_optimal_endpoints[c] = best; - + } // c // ASTC range 11 [0,31] 5-bit @@ -394,14 +394,14 @@ void bc7enc_compress_block_init() static void compute_least_squares_endpoints_rgba(uint32_t N, const uint8_t *pSelectors, const bc7enc_vec4F* pSelector_weights, bc7enc_vec4F* pXl, bc7enc_vec4F* pXh, const color_quad_u8 *pColors) { - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. double z00 = 0.0f, z01 = 0.0f, z10 = 0.0f, z11 = 0.0f; double q00_r = 0.0f, q10_r = 0.0f, t_r = 0.0f; double q00_g = 0.0f, q10_g = 0.0f, t_g = 0.0f; double q00_b = 0.0f, q10_b = 0.0f, t_b = 0.0f; double q00_a = 0.0f, q10_a = 0.0f, t_a = 0.0f; - + for (uint32_t i = 0; i < N; i++) { const uint32_t sel = pSelectors[i]; @@ -649,7 +649,7 @@ static uint64_t pack_astc_4bit_3bit_to_one_color(const color_cell_compressor_par { uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; - + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights3[BC7ENC_ASTC_4BIT_3BIT_OPTIMAL_INDEX]); } p.m_c[3] = 255; @@ -688,10 +688,10 @@ static uint64_t pack_astc_4bit_2bit_to_one_color_rgba(const color_cell_compresso { uint32_t low = (pResults->m_low_endpoint.m_c[i] << 4) | pResults->m_low_endpoint.m_c[i]; uint32_t high = (pResults->m_high_endpoint.m_c[i] << 4) | pResults->m_high_endpoint.m_c[i]; - + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_4BIT_2BIT_OPTIMAL_INDEX]); } - + uint64_t total_err = 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) total_err += compute_color_distance_rgba(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); @@ -727,7 +727,7 @@ static uint64_t pack_astc_range7_2bit_to_one_color(const color_cell_compressor_p { uint32_t low = g_astc_sorted_order_unquant[7][pResults->m_low_endpoint.m_c[i]].m_unquant; uint32_t high = g_astc_sorted_order_unquant[7][pResults->m_high_endpoint.m_c[i]].m_unquant; - + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE7_2BIT_OPTIMAL_INDEX]); } p.m_c[3] = 255; @@ -748,7 +748,7 @@ static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_ const endpoint_err *pEr = &g_astc_range13_2bit_optimal_endpoints[r]; const endpoint_err *pEg = &g_astc_range13_2bit_optimal_endpoints[g]; const endpoint_err *pEb = &g_astc_range13_2bit_optimal_endpoints[b]; - + color_quad_u8_set(&pResults->m_low_endpoint, pEr->m_lo, pEg->m_lo, pEb->m_lo, 47); color_quad_u8_set(&pResults->m_high_endpoint, pEr->m_hi, pEg->m_hi, pEb->m_hi, 47); pResults->m_pbits[0] = 0; @@ -767,10 +767,10 @@ static uint64_t pack_astc_range13_2bit_to_one_color(const color_cell_compressor_ { uint32_t low = g_astc_sorted_order_unquant[13][pResults->m_low_endpoint.m_c[i]].m_unquant; uint32_t high = g_astc_sorted_order_unquant[13][pResults->m_high_endpoint.m_c[i]].m_unquant; - + p.m_c[i] = (uint8_t)astc_interpolate(low, high, g_bc7_weights2[BC7ENC_ASTC_RANGE13_2BIT_OPTIMAL_INDEX]); } - + uint64_t total_err = 0; for (uint32_t i = 0; i < pParams->m_num_pixels; i++) total_err += compute_color_distance_rgb(&p, &pParams->m_pPixels[i], pParams->m_perceptual, pParams->m_weights); @@ -879,18 +879,18 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 const int dr = actualMaxColor.m_c[0] - lr; const int dg = actualMaxColor.m_c[1] - lg; const int db = actualMaxColor.m_c[2] - lb; - + uint64_t total_err = 0; - + if (pParams->m_pForce_selectors) { for (uint32_t i = 0; i < pParams->m_num_pixels; i++) { const color_quad_u8* pC = &pParams->m_pPixels[i]; - + const uint8_t sel = pParams->m_pForce_selectors[i]; assert(sel < N); - + total_err += (pParams->m_has_alpha ? compute_color_distance_rgba : compute_color_distance_rgb)(&weightedColors[sel], pC, pParams->m_perceptual, pParams->m_weights); pResults->m_pSelectors_temp[i] = sel; @@ -931,7 +931,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 --best_sel; } total_err += err1; - + pResults->m_pSelectors_temp[i] = (uint8_t)best_sel; } } @@ -1028,7 +1028,7 @@ static uint64_t evaluate_solution(const color_quad_u8 *pLow, const color_quad_u8 memcpy(pResults->m_pSelectors, pResults->m_pSelectors_temp, sizeof(pResults->m_pSelectors[0]) * pParams->m_num_pixels); } - + return total_err; } @@ -1258,7 +1258,7 @@ static uint64_t find_optimal_solution(uint32_t mode, bc7enc_vec4F xl, bc7enc_vec } } } - + fixDegenerateEndpoints(mode, &bestMinColor, &bestMaxColor, &xl, &xh, iscalep >> 1, 0); if ((pResults->m_best_overall_err == UINT64_MAX) || color_quad_u8_notequals(&bestMinColor, &pResults->m_low_endpoint) || color_quad_u8_notequals(&bestMaxColor, &pResults->m_high_endpoint) || (best_pbits[0] != pResults->m_pbits[0]) || (best_pbits[1] != pResults->m_pbits[1])) @@ -1297,7 +1297,7 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color colors[n-1].m_c[c] = g_astc_unquant[pParams->m_astc_endpoint_range][pResults->m_astc_high_endpoint.m_c[c]].m_unquant; assert(colors[n-1].m_c[c] == g_astc_sorted_order_unquant[pParams->m_astc_endpoint_range][pResults->m_high_endpoint.m_c[c]].m_unquant); } - + for (uint32_t i = 1; i < pParams->m_num_selector_weights - 1; i++) for (uint32_t c = 0; c < 4; c++) colors[i].m_c[c] = (uint8_t)astc_interpolate(colors[0].m_c[c], colors[n - 1].m_c[c], pParams->m_pSelector_weights[i]); @@ -1307,14 +1307,14 @@ void check_best_overall_error(const color_cell_compressor_params *pParams, color { const color_quad_u8 &orig = pParams->m_pPixels[p]; const color_quad_u8 &packed = colors[pResults->m_pSelectors[p]]; - + if (pParams->m_has_alpha) total_err += compute_color_distance_rgba(&orig, &packed, pParams->m_perceptual, pParams->m_weights); else total_err += compute_color_distance_rgb(&orig, &packed, pParams->m_perceptual, pParams->m_weights); } assert(total_err == pResults->m_best_overall_err); - + // HACK HACK //if (total_err != pResults->m_best_overall_err) // printf("X"); @@ -1417,12 +1417,12 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param bc7enc_vec4F color = vec4F_from_color(&pParams->m_pPixels[i]); meanColor = vec4F_add(&meanColor, &color); } - + bc7enc_vec4F meanColorScaled = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels)); meanColor = vec4F_mul(&meanColor, 1.0f / (float)(pParams->m_num_pixels * 255.0f)); vec4F_saturate_in_place(&meanColor); - + if (pParams->m_has_alpha) { // Use incremental PCA for RGBA PCA, because it's simple. @@ -1485,7 +1485,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param vec4F_set(&axis, xr, xg, xb, 0); } } - + if (vec4F_dot(&axis, &axis) < .5f) { if (pParams->m_perceptual) @@ -1494,7 +1494,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param vec4F_set(&axis, 1.0f, 1.0f, 1.0f, pParams->m_has_alpha ? 1.0f : 0); vec4F_normalize_in_place(&axis); } - + bc7enc_vec4F minColor, maxColor; float l = 1e+9f, h = -1e+9f; @@ -1519,7 +1519,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param bc7enc_vec4F c1 = vec4F_add(&meanColor, &b1); minColor = vec4F_saturate(&c0); maxColor = vec4F_saturate(&c1); - + bc7enc_vec4F whiteVec; vec4F_set_scalar(&whiteVec, 1.0f); if (vec4F_dot(&minColor, &whiteVec) > vec4F_dot(&maxColor, &whiteVec)) @@ -1543,7 +1543,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param // First find a solution using the block's PCA. if (!find_optimal_solution(mode, minColor, maxColor, pParams, pResults)) return 0; - + for (uint32_t i = 0; i < pComp_params->m_least_squares_passes; i++) { // Now try to refine the solution using least squares by computing the optimal endpoints from the current selectors. @@ -1557,11 +1557,11 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) return 0; } - + if ((!pParams->m_pForce_selectors) && (pComp_params->m_uber_level > 0)) { // In uber level 1, try varying the selectors a little, somewhat like cluster fit would. First try incrementing the minimum selectors, @@ -1598,7 +1598,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) return 0; @@ -1617,7 +1617,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param xl = vec4F_mul(&xl, (1.0f / 255.0f)); xh = vec4F_mul(&xh, (1.0f / 255.0f)); - + if (!find_optimal_solution(mode, xl, xh, pParams, pResults)) return 0; @@ -1674,7 +1674,7 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param } } } - + if (!pParams->m_pForce_selectors) { // Try encoding the partition as a single color by using the optimal single colors tables to encode the block to its mean. @@ -1755,13 +1755,13 @@ uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_param #if BC7ENC_CHECK_OVERALL_ERROR check_best_overall_error(pParams, pResults); #endif - + return pResults->m_best_overall_err; } uint64_t color_cell_compression_est_astc( uint32_t num_weights, uint32_t num_comps, const uint32_t *pWeight_table, - uint32_t num_pixels, const color_quad_u8* pPixels, + uint32_t num_pixels, const color_quad_u8* pPixels, uint64_t best_err_so_far, const uint32_t weights[4]) { assert(num_comps == 3 || num_comps == 4); @@ -1808,7 +1808,7 @@ uint64_t color_cell_compression_est_astc( color_quad_u8_set(&lowColor, lr, lg, lb, la); color_quad_u8_set(&highColor, hr, hg, hb, ha); - // Place endpoints at bbox diagonals and compute interpolated colors + // Place endpoints at bbox diagonals and compute interpolated colors color_quad_u8 weightedColors[32]; weightedColors[0] = lowColor; diff --git a/encoder/basisu_bc7enc.h b/encoder/basisu_bc7enc.h index 23469912..b7195c3e 100644 --- a/encoder/basisu_bc7enc.h +++ b/encoder/basisu_bc7enc.h @@ -25,7 +25,7 @@ namespace basisu #define BC7ENC_TRUE (1) #define BC7ENC_FALSE (0) - + typedef struct { float m_c[4]; } bc7enc_vec4F; extern const float g_bc7_weights1x[2 * 4]; @@ -35,9 +35,9 @@ namespace basisu extern const float g_astc_weights4x[16 * 4]; extern const float g_astc_weights5x[32 * 4]; extern const float g_astc_weights_3levelsx[3 * 4]; - + extern basist::astc_quant_bin g_astc_sorted_order_unquant[basist::BC7ENC_TOTAL_ASTC_RANGES][256]; // [sorted unquantized order] - + struct color_cell_compressor_params { uint32_t m_num_pixels; @@ -93,12 +93,12 @@ namespace basisu }; uint64_t color_cell_compression(uint32_t mode, const color_cell_compressor_params* pParams, color_cell_compressor_results* pResults, const bc7enc_compress_block_params* pComp_params); - + uint64_t color_cell_compression_est_astc( uint32_t num_weights, uint32_t num_comps, const uint32_t* pWeight_table, uint32_t num_pixels, const basist::color_quad_u8* pPixels, uint64_t best_err_so_far, const uint32_t weights[4]); - + inline void bc7enc_compress_block_params_init_linear_weights(bc7enc_compress_block_params* p) { p->m_perceptual = BC7ENC_FALSE; @@ -127,5 +127,5 @@ namespace basisu // bc7enc_compress_block_init() MUST be called before calling bc7enc_compress_block() (or you'll get artifacts). void bc7enc_compress_block_init(); - + } // namespace basisu diff --git a/encoder/basisu_comp.cpp b/encoder/basisu_comp.cpp index 9a4a1c00..20103f39 100644 --- a/encoder/basisu_comp.cpp +++ b/encoder/basisu_comp.cpp @@ -107,15 +107,15 @@ namespace basisu PRINT_BOOL_VALUE(m_renormalize); PRINT_BOOL_VALUE(m_multithreading); PRINT_BOOL_VALUE(m_disable_hierarchical_endpoint_codebooks); - + PRINT_FLOAT_VALUE(m_hybrid_sel_cb_quality_thresh); - + PRINT_INT_VALUE(m_global_pal_bits); PRINT_INT_VALUE(m_global_mod_bits); PRINT_FLOAT_VALUE(m_endpoint_rdo_thresh); PRINT_FLOAT_VALUE(m_selector_rdo_thresh); - + PRINT_BOOL_VALUE(m_mip_gen); PRINT_BOOL_VALUE(m_mip_renormalize); PRINT_BOOL_VALUE(m_mip_wrapping); @@ -134,7 +134,7 @@ namespace basisu debug_printf("m_userdata0: 0x%X, m_userdata1: 0x%X\n", m_params.m_userdata0, m_params.m_userdata1); debug_printf("m_us_per_frame: %i (%f fps)\n", m_params.m_us_per_frame, m_params.m_us_per_frame ? 1.0f / (m_params.m_us_per_frame / 1000000.0f) : 0); debug_printf("m_pack_uastc_flags: 0x%X\n", m_params.m_pack_uastc_flags); - + PRINT_BOOL_VALUE(m_rdo_uastc); PRINT_FLOAT_VALUE(m_rdo_uastc_quality_scalar); PRINT_INT_VALUE(m_rdo_uastc_dict_size); @@ -163,7 +163,7 @@ namespace basisu debug_printf("Key: \"%s\"\n", m_params.m_ktx2_key_values[i].m_key.data()); debug_printf("Value size: %u\n", m_params.m_ktx2_key_values[i].m_value.size()); } - + #undef PRINT_BOOL_VALUE #undef PRINT_INT_VALUE #undef PRINT_UINT_VALUE @@ -178,7 +178,7 @@ namespace basisu return true; } - + basis_compressor::error_code basis_compressor::process() { debug_printf("basis_compressor::process\n"); @@ -218,7 +218,7 @@ namespace basisu if (!create_basis_file_and_transcode()) return cECFailedCreateBasisFile; - + if (m_params.m_create_ktx2_file) { if (!create_ktx2_file()) @@ -244,7 +244,7 @@ namespace basisu m_uastc_backend_output.m_slice_desc = m_slice_descs; m_uastc_backend_output.m_slice_image_data.resize(m_slice_descs.size()); m_uastc_backend_output.m_slice_image_crcs.resize(m_slice_descs.size()); - + for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { gpu_image& tex = m_uastc_slice_textures[slice_index]; @@ -255,7 +255,7 @@ namespace basisu const uint32_t num_blocks_y = tex.get_blocks_y(); const uint32_t total_blocks = tex.get_total_blocks(); const image& source_image = m_slice_images[slice_index]; - + std::atomic total_blocks_processed; total_blocks_processed = 0; @@ -271,7 +271,7 @@ namespace basisu { #endif BASISU_NOTE_UNUSED(num_blocks_y); - + uint32_t uastc_flags = m_params.m_pack_uastc_flags; if ((m_params.m_rdo_uastc) && (m_params.m_rdo_uastc_favor_simpler_modes_in_rdo_mode)) uastc_flags |= cPackUASTCFavorSimplerModes; @@ -290,7 +290,7 @@ namespace basisu encode_uastc(&block_pixels[0][0].r, dest_block, uastc_flags); total_blocks_processed++; - + uint32_t val = total_blocks_processed; if ((val & 16383) == 16383) { @@ -318,7 +318,7 @@ namespace basisu rdo_params.m_lz_dict_size = m_params.m_rdo_uastc_dict_size; rdo_params.m_smooth_block_max_error_scale = m_params.m_rdo_uastc_max_smooth_block_error_scale; rdo_params.m_max_smooth_block_std_dev = m_params.m_rdo_uastc_smooth_block_max_std_dev; - + bool status = uastc_rdo(tex.get_total_blocks(), (basist::uastc_block*)tex.get_ptr(), (const color_rgba *)m_source_blocks[slice_desc.m_first_block_index].m_pixels, rdo_params, m_params.m_pack_uastc_flags, m_params.m_rdo_uastc_multithreading ? m_params.m_pJob_pool : nullptr, (m_params.m_rdo_uastc_multithreading && m_params.m_pJob_pool) ? basisu::minimum(4, (uint32_t)m_params.m_pJob_pool->get_total_threads()) : 0); @@ -330,11 +330,11 @@ namespace basisu m_uastc_backend_output.m_slice_image_data[slice_index].resize(tex.get_size_in_bytes()); memcpy(&m_uastc_backend_output.m_slice_image_data[slice_index][0], tex.get_ptr(), tex.get_size_in_bytes()); - + m_uastc_backend_output.m_slice_image_crcs[slice_index] = basist::crc16(tex.get_ptr(), tex.get_size_in_bytes(), 0); - + } // slice_index - + return cECSuccess; } @@ -375,12 +375,12 @@ namespace basisu image &level_img = *enlarge_vector(mips, 1); level_img.resize(level_width, level_height); - - int result = stbir_resize_uint8_generic( + + int result = stbir_resize_uint8_generic( (const uint8_t *)img.get_ptr(), img.get_width(), img.get_height(), img.get_pitch() * sizeof(color_rgba), (uint8_t *)level_img.get_ptr(), level_img.get_width(), level_img.get_height(), level_img.get_pitch() * sizeof(color_rgba), has_alpha ? 4 : 3, has_alpha ? 3 : STBIR_ALPHA_CHANNEL_NONE, m_params.m_mip_premultiplied ? STBIR_FLAG_ALPHA_PREMULTIPLIED : 0, - m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, + m_params.m_mip_wrapping ? STBIR_EDGE_WRAP : STBIR_EDGE_CLAMP, filter, m_params.m_mip_srgb ? STBIR_COLORSPACE_SRGB : STBIR_COLORSPACE_LINEAR, nullptr); if (result == 0) @@ -388,7 +388,7 @@ namespace basisu error_printf("basis_compressor::generate_mipmaps: stbir_resize_uint8_generic() failed!\n"); return false; } - + if (m_params.m_mip_renormalize) level_img.renormalize_normal_map(); } @@ -446,14 +446,14 @@ namespace basisu basisu::vector source_images; basisu::vector source_filenames; - + // First load all source images, and determine if any have an alpha channel. for (uint32_t source_file_index = 0; source_file_index < total_source_files; source_file_index++) { const char *pSource_filename = ""; image file_image; - + if (m_params.m_read_source_images) { pSource_filename = m_params.m_source_filenames[source_file_index].c_str(); @@ -512,7 +512,7 @@ namespace basisu } alpha_swizzled = m_params.m_swizzle[3] != 3; } - + bool has_alpha = false; if (m_params.m_force_alpha || alpha_swizzled) has_alpha = true; @@ -525,7 +525,7 @@ namespace basisu m_any_source_image_has_alpha = true; debug_printf("Source image index %u filename %s %ux%u has alpha: %u\n", source_file_index, pSource_filename, file_image.get_width(), file_image.get_height(), has_alpha); - + if (m_params.m_y_flip) file_image.flip_y(); @@ -570,7 +570,7 @@ namespace basisu source_filenames.push_back(pSource_filename); } - // Check if the caller has generated their own mipmaps. + // Check if the caller has generated their own mipmaps. if (m_params.m_source_mipmap_images.size()) { // Make sure they've passed us enough mipmap chains. @@ -612,12 +612,12 @@ namespace basisu // Now, for each source image, create the slices corresponding to that image. basisu::vector slices; - + slices.reserve(32); - + // The first (largest) mipmap level. slices.push_back(file_image); - + if (m_params.m_source_mipmap_images.size()) { // User-provided mipmaps for each layer or image in the texture array. @@ -652,10 +652,10 @@ namespace basisu uint_vec mip_indices(slices.size()); for (uint32_t i = 0; i < slices.size(); i++) mip_indices[i] = i; - + if ((m_any_source_image_has_alpha) && (!m_params.m_uastc)) { - // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. + // For ETC1S, if source has alpha, then even mips will have RGB, and odd mips will have alpha in RGB. basisu::vector alpha_slices; uint_vec new_mip_indices; @@ -674,7 +674,7 @@ namespace basisu lvl_a(x, y).set_noclamp_rgba(a, a, a, 255); } } - + lvl_rgb.set_alpha(255); alpha_slices.push_back(lvl_rgb); @@ -689,7 +689,7 @@ namespace basisu } assert(slices.size() == mip_indices.size()); - + for (uint32_t slice_index = 0; slice_index < slices.size(); slice_index++) { image& slice_image = slices[slice_index]; @@ -748,7 +748,7 @@ namespace basisu slice_desc.m_num_macroblocks_y = (slice_desc.m_num_blocks_y + 1) >> 1; slice_desc.m_source_file_index = source_file_index; - + slice_desc.m_mip_index = mip_indices[slice_index]; slice_desc.m_alpha = is_alpha_slice; @@ -760,7 +760,7 @@ namespace basisu m_total_blocks += slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; total_macroblocks += slice_desc.m_num_macroblocks_x * slice_desc.m_num_macroblocks_y; - + } // slice_index } // source_file_index @@ -773,7 +773,7 @@ namespace basisu error_printf("Too many slices!\n"); return false; } - + // Basic sanity check on the slices for (uint32_t i = 1; i < m_slice_descs.size(); i++) { @@ -783,7 +783,7 @@ namespace basisu // Make sure images are in order int image_delta = (int)slice_desc.m_source_file_index - (int)prev_slice_desc.m_source_file_index; if (image_delta > 1) - return false; + return false; // Make sure mipmap levels are in order if (!image_delta) @@ -855,20 +855,20 @@ namespace basisu } // Do some basic validation for 2D arrays, cubemaps, video, and volumes. - bool basis_compressor::validate_texture_type_constraints() + bool basis_compressor::validate_texture_type_constraints() { debug_printf("basis_compressor::validate_texture_type_constraints\n"); // In 2D mode anything goes (each image may have a different resolution and # of mipmap levels). if (m_params.m_tex_type == basist::cBASISTexType2D) return true; - + uint32_t total_basis_images = 0; for (uint32_t slice_index = 0; slice_index < m_slice_images.size(); slice_index++) { const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - + total_basis_images = maximum(total_basis_images, slice_desc.m_source_file_index + 1); } @@ -891,7 +891,7 @@ namespace basisu const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; image_mipmap_levels[slice_desc.m_source_file_index] = maximum(image_mipmap_levels[slice_desc.m_source_file_index], slice_desc.m_mip_index + 1); - + if (slice_desc.m_mip_index != 0) continue; @@ -945,7 +945,7 @@ namespace basisu bool basis_compressor::process_frontend() { debug_printf("basis_compressor::process_frontend\n"); - + #if 0 // TODO basis_etc1_pack_params pack_params; @@ -996,21 +996,21 @@ namespace basisu error_printf("Too many selector clusters! (%u but max is %u)\n", selector_clusters, basisu_frontend::cMaxSelectorClusters); return false; } - + if (m_params.m_quality_level != -1) { const float quality = saturate(m_params.m_quality_level / 255.0f); - + const float bits_per_endpoint_cluster = 14.0f; const float max_desired_endpoint_cluster_bits_per_texel = 1.0f; // .15f int max_endpoints = static_cast((max_desired_endpoint_cluster_bits_per_texel * total_texels) / bits_per_endpoint_cluster); - + const float mid = 128.0f / 255.0f; float color_endpoint_quality = quality; const float endpoint_split_point = 0.5f; - + // In v1.2 and in previous versions, the endpoint codebook size at quality 128 was 3072. This wasn't quite large enough. const int ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE = 4800; const int MAX_ENDPOINT_CODEBOOK_SIZE = 8192; @@ -1021,7 +1021,7 @@ namespace basisu max_endpoints = clamp(max_endpoints, 256, ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE); max_endpoints = minimum(max_endpoints, m_total_blocks); - + if (max_endpoints < 64) max_endpoints = 64; endpoint_clusters = clamp((uint32_t)(.5f + lerp(32, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); @@ -1032,12 +1032,12 @@ namespace basisu max_endpoints = clamp(max_endpoints, 256, MAX_ENDPOINT_CODEBOOK_SIZE); max_endpoints = minimum(max_endpoints, m_total_blocks); - + if (max_endpoints < ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE) max_endpoints = ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE; endpoint_clusters = clamp((uint32_t)(.5f + lerp(ENDPOINT_CODEBOOK_MID_QUALITY_CODEBOOK_SIZE, static_cast(max_endpoints), color_endpoint_quality)), 32, basisu_frontend::cMaxEndpointClusters); } - + float bits_per_selector_cluster = m_params.m_global_sel_pal ? 21.0f : 14.0f; const float max_desired_selector_cluster_bits_per_texel = 1.0f; // .15f @@ -1061,7 +1061,7 @@ namespace basisu { if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= .25f; - + if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= .25f; } @@ -1088,7 +1088,7 @@ namespace basisu if (!m_params.m_endpoint_rdo_thresh.was_changed()) m_params.m_endpoint_rdo_thresh *= lerp(1.0f, .75f, l); - + if (!m_params.m_selector_rdo_thresh.was_changed()) m_params.m_selector_rdo_thresh *= lerp(1.0f, .75f, l); } @@ -1146,18 +1146,18 @@ namespace basisu for (uint32_t i = 0; i < m_slice_descs.size(); i++) { char filename[1024]; -#ifdef _WIN32 +#ifdef _WIN32 sprintf_s(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); #else snprintf(filename, sizeof(filename), "rdo_frontend_output_output_blocks_%u.png", i); -#endif +#endif m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, true); #ifdef _WIN32 sprintf_s(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); #else snprintf(filename, sizeof(filename), "rdo_frontend_output_api_%u.png", i); -#endif +#endif m_frontend.dump_debug_image(filename, m_slice_descs[i].m_first_block_index, m_slice_descs[i].m_num_blocks_x, m_slice_descs[i].m_num_blocks_y, false); } } @@ -1218,13 +1218,13 @@ namespace basisu backend_params.m_debug_images = m_params.m_debug_images; backend_params.m_etc1s = true; backend_params.m_compression_level = m_params.m_compression_level; - + if (!m_params.m_no_endpoint_rdo) backend_params.m_endpoint_rdo_quality_thresh = m_params.m_endpoint_rdo_thresh; if (!m_params.m_no_selector_rdo) backend_params.m_selector_rdo_quality_thresh = m_params.m_selector_rdo_thresh; - + backend_params.m_use_global_sel_codebook = (m_frontend.get_params().m_pGlobal_sel_codebook != NULL); backend_params.m_global_sel_codebook_pal_bits = m_frontend.get_params().m_num_global_sel_codebook_pal_bits; backend_params.m_global_sel_codebook_mod_bits = m_frontend.get_params().m_num_global_sel_codebook_mod_bits; @@ -1256,7 +1256,7 @@ namespace basisu error_printf("basis_compressor::create_basis_file_and_transcode: basisu_backend:init() failed!\n"); return false; } - + const uint8_vec &comp_data = m_basis_file.get_compressed_data(); m_output_basis_file = comp_data; @@ -1281,7 +1281,7 @@ namespace basisu m_decoded_output_textures_bc7.resize(m_slice_descs.size()); m_decoded_output_textures_unpacked_bc7.resize(m_slice_descs.size()); - + tm.start(); if (m_params.m_pGlobal_codebooks) { @@ -1307,12 +1307,12 @@ namespace basisu { gpu_image decoded_texture; decoded_texture.init(m_params.m_uastc ? texture_format::cASTC4x4 : texture_format::cETC1, m_slice_descs[i].m_width, m_slice_descs[i].m_height); - + tm.start(); basist::block_format format = m_params.m_uastc ? basist::block_format::cASTC_4x4 : basist::block_format::cETC1; uint32_t bytes_per_block = m_params.m_uastc ? 16 : 8; - + if (!decoder.transcode_slice(&comp_data[0], (uint32_t)comp_data.size(), i, reinterpret_cast(decoded_texture.get_ptr()), m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y, format, bytes_per_block)) { @@ -1338,7 +1338,7 @@ namespace basisu total_orig_pixels += m_slice_descs[i].m_orig_width * m_slice_descs[i].m_orig_height; total_texels += m_slice_descs[i].m_width * m_slice_descs[i].m_height; } - + double total_time_bc7 = 0; if (basist::basis_is_format_supported(basist::transcoder_texture_format::cTFBC7_RGBA, basist::basis_tex_format::cUASTC4x4) && @@ -1378,7 +1378,7 @@ namespace basisu debug_printf("Transcoded to BC7 in %3.3fms, %f texels/sec\n", total_time_bc7 * 1000.0f, total_orig_pixels / total_time_bc7); debug_printf("Total .basis output file size: %u, %3.3f bits/texel\n", comp_data.size(), comp_data.size() * 8.0f / total_orig_pixels); - + uint32_t total_orig_texels = 0; for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { @@ -1434,7 +1434,7 @@ namespace basisu uint32_t total_texels = 0; for (uint32_t i = 0; i < m_slice_descs.size(); i++) total_texels += (m_slice_descs[i].m_num_blocks_x * m_slice_descs[i].m_num_blocks_y) * 16; - + m_basis_bits_per_texel = comp_size * 8.0f / total_texels; debug_printf(".basis file size: %u, LZ compressed file size: %u, %3.2f bits/texel\n", @@ -1444,13 +1444,13 @@ namespace basisu } m_stats.resize(m_slice_descs.size()); - + uint32_t total_orig_texels = 0; for (uint32_t slice_index = 0; slice_index < m_slice_descs.size(); slice_index++) { const basisu_backend_slice_desc &slice_desc = m_slice_descs[slice_index]; - + total_orig_texels += slice_desc.m_orig_width * slice_desc.m_orig_height; if (m_params.m_compute_stats) @@ -1462,7 +1462,7 @@ namespace basisu // TODO: We used to output SSIM (during heavy encoder development), but this slowed down compression too much. We'll be adding it back. image_metrics em; - + // ---- .basis stats em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 3); em.print(".basis RGB Avg: "); @@ -1474,10 +1474,10 @@ namespace basisu em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 1); em.print(".basis R Avg: "); - + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 1, 1); em.print(".basis G Avg: "); - + em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 2, 1); em.print(".basis B Avg: "); @@ -1497,7 +1497,7 @@ namespace basisu em.calc(m_slice_images[slice_index], m_decoded_output_textures_unpacked[slice_index], 0, 0, true, true); em.print(".basis 601 Luma: "); s.m_basis_luma_601_psnr = static_cast(em.m_psnr); - + if (m_slice_descs.size() == 1) { const uint32_t output_size = comp_size ? (uint32_t)comp_size : (uint32_t)comp_data.size(); @@ -1563,7 +1563,7 @@ namespace basisu s.m_best_etc1s_rgb_avg_psnr = static_cast(em.m_psnr); } } - + std::string out_basename; if (m_params.m_out_filename.size()) string_get_filename(m_params.m_out_filename.c_str(), out_basename); @@ -1614,10 +1614,10 @@ namespace basisu } } } - + return true; } - + // Make sure all the mip 0's have the same dimensions and number of mipmap levels, or we can't encode the KTX2 file. bool basis_compressor::validate_ktx2_constraints() { @@ -1663,7 +1663,7 @@ namespace basisu static uint8_t g_ktx2_etc1s_alpha_dfd[60] = { 0x3C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x38,0x0,0xA3,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3F,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF,0x40,0x0,0x3F,0xF,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; static uint8_t g_ktx2_uastc_nonalpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x4,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; static uint8_t g_ktx2_uastc_alpha_dfd[44] = { 0x2C,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x2,0x0,0x28,0x0,0xA6,0x1,0x2,0x0,0x3,0x3,0x0,0x0,0x10,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x7F,0x3,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0xFF,0xFF,0xFF,0xFF }; - + void basis_compressor::get_dfd(uint8_vec &dfd, const basist::ktx2_header &header) { const uint8_t* pDFD; @@ -1695,14 +1695,14 @@ namespace basisu dfd_len = sizeof(g_ktx2_etc1s_nonalpha_dfd); } } - + assert(dfd_len >= 44); dfd.resize(dfd_len); memcpy(dfd.data(), pDFD, dfd_len); uint32_t dfd_bits = basisu::read_le_dword(dfd.data() + 3 * sizeof(uint32_t)); - + dfd_bits &= ~(0xFF << 16); if (m_params.m_ktx2_srgb_transfer_func) @@ -1727,7 +1727,7 @@ namespace basisu if (m_params.m_uastc) { dfd_chan0 &= ~(0xF << 24); - + // TODO: Allow the caller to override this if (m_any_source_image_has_alpha) dfd_chan0 |= (basist::KTX2_DF_CHANNEL_UASTC_RGBA << 24); @@ -1751,7 +1751,7 @@ namespace basisu // Determine the width/height, number of array layers, mipmap levels, and the number of faces (1 for 2D, 6 for cubemap). // This does not support 1D or 3D. uint32_t base_width = 0, base_height = 0, total_layers = 0, total_levels = 0, total_faces = 1; - + for (uint32_t i = 0; i < m_slice_descs.size(); i++) { if ((m_slice_descs[i].m_mip_index == 0) && (!base_width)) @@ -1769,7 +1769,7 @@ namespace basisu if (m_params.m_tex_type == basist::cBASISTexTypeCubemapArray) { assert((total_layers % 6) == 0); - + total_layers /= 6; assert(total_layers >= 1); @@ -1855,7 +1855,7 @@ namespace basisu // No supercompression compressed_level_data_bytes = level_data_bytes; } - + uint8_vec etc1s_global_data; // Create ETC1S global supercompressed data @@ -1909,14 +1909,14 @@ namespace basisu append_vector(etc1s_global_data, backend_output.m_endpoint_palette); append_vector(etc1s_global_data, backend_output.m_selector_palette); append_vector(etc1s_global_data, backend_output.m_slice_image_tables); - + header.m_supercompression_scheme = basist::KTX2_SS_BASISLZ; } // Key values basist::ktx2_transcoder::key_value_vec key_values(m_params.m_ktx2_key_values); key_values.enlarge(1); - + const char* pKTXwriter = "KTXwriter"; key_values.back().m_key.resize(strlen(pKTXwriter) + 1); memcpy(key_values.back().m_key.data(), pKTXwriter, strlen(pKTXwriter) + 1); @@ -1977,7 +1977,7 @@ namespace basisu #if BASISU_DISABLE_KTX2_ALIGNMENT_WORKAROUND break; #endif - + // Hack to ensure the KVD block ends on a 16 byte boundary, because we have no other official way of aligning the data. uint32_t kvd_end_file_offset = kvd_file_offset + key_value_data.size(); uint32_t bytes_needed_to_pad = (16 - (kvd_end_file_offset & 15)) & 15; @@ -1995,13 +1995,13 @@ namespace basisu bytes_needed_to_pad += 16; printf("WARNING: Due to a KTX2 validator bug related to mipPadding, we must insert a dummy key into the KTX2 file of %u bytes\n", bytes_needed_to_pad); - - // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. + + // We're not good - need to add a dummy key large enough to force file alignment so the mip level array gets aligned. // We can't just add some bytes before the mip level array because ktx2check will see that as extra data in the file that shouldn't be there in ktxValidator::validateDataSize(). key_values.enlarge(1); for (uint32_t i = 0; i < (bytes_needed_to_pad - 4 - 1 - 1); i++) key_values.back().m_key.push_back(127); - + key_values.back().m_key.push_back(0); key_values.back().m_value.push_back(0); @@ -2009,13 +2009,13 @@ namespace basisu key_values.sort(); key_value_data.resize(0); - + // Try again } basisu::vector level_index_array(total_levels); memset(level_index_array.data(), 0, level_index_array.size_in_bytes()); - + m_output_ktx2_file.clear(); m_output_ktx2_file.reserve(m_output_basis_file.size()); @@ -2024,7 +2024,7 @@ namespace basisu // Level index array append_vector(m_output_ktx2_file, (const uint8_t*)level_index_array.data(), level_index_array.size_in_bytes()); - + // DFD const uint8_t* pDFD = dfd.data(); uint32_t dfd_len = dfd.size(); @@ -2084,7 +2084,7 @@ namespace basisu level_index_array[level].m_byte_offset = m_output_ktx2_file.size(); append_vector(m_output_ktx2_file, compressed_level_data_bytes[level]); } - + // Write final header memcpy(m_output_ktx2_file.data(), &header, sizeof(header)); diff --git a/encoder/basisu_comp.h b/encoder/basisu_comp.h index 748b872c..641b6b3a 100644 --- a/encoder/basisu_comp.h +++ b/encoder/basisu_comp.h @@ -39,10 +39,10 @@ namespace basisu const uint32_t BASISU_MAX_SUPPORTED_TEXTURE_DIMENSION = 16384; // Allow block's color distance to increase by 1.5 while searching for an alternative nearby endpoint. - const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; - + const float BASISU_DEFAULT_ENDPOINT_RDO_THRESH = 1.5f; + // Allow block's color distance to increase by 1.25 while searching the selector history buffer for a close enough match. - const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; + const float BASISU_DEFAULT_SELECTOR_RDO_THRESH = 1.25f; const int BASISU_DEFAULT_QUALITY = 128; const float BASISU_DEFAULT_HYBRID_SEL_CB_QUALITY_THRESH = 2.0f; @@ -71,7 +71,7 @@ namespace basisu m_filename.clear(); m_width = 0; m_height = 0; - + m_basis_rgb_avg_psnr = 0.0f; m_basis_rgba_avg_psnr = 0.0f; m_basis_a_avg_psnr = 0.0f; @@ -85,7 +85,7 @@ namespace basisu m_bc7_luma_709_psnr = 0.0f; m_bc7_luma_601_psnr = 0.0f; m_bc7_luma_709_ssim = 0.0f; - + m_best_etc1s_rgb_avg_psnr = 0.0f; m_best_etc1s_luma_709_psnr = 0.0f; m_best_etc1s_luma_601_psnr = 0.0f; @@ -111,7 +111,7 @@ namespace basisu float m_bc7_luma_709_psnr; float m_bc7_luma_601_psnr; float m_bc7_luma_709_ssim; - + // Highest achievable quality ETC1S statistics float m_best_etc1s_rgb_avg_psnr; float m_best_etc1s_luma_709_psnr; @@ -274,7 +274,7 @@ namespace basisu m_no_endpoint_rdo.clear(); m_endpoint_rdo_thresh.clear(); - + m_mip_gen.clear(); m_mip_scale.clear(); m_mip_filter = "kaiser"; @@ -317,25 +317,25 @@ namespace basisu m_pJob_pool = nullptr; } - + // Pointer to the global selector codebook, or nullptr to not use a global selector codebook const basist::etc1_global_selector_codebook *m_pSel_codebook; // True to generate UASTC .basis file data, otherwise ETC1S. bool_param m_uastc; - // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. + // If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG images to read. // Otherwise, the compressor processes the images in m_source_images. basisu::vector m_source_filenames; basisu::vector m_source_alpha_filenames; - + basisu::vector m_source_images; - + // Stores mipmaps starting from level 1. Level 0 is still stored in m_source_images, as usual. // If m_source_mipmaps isn't empty, automatic mipmap generation isn't done. m_source_mipmaps.size() MUST equal m_source_images.size() or the compressor returns an error. // The compressor applies the user-provided swizzling (in m_swizzle) to these images. basisu::vector< basisu::vector > m_source_mipmap_images; - + // Filename of the output basis file std::string m_out_filename; @@ -346,11 +346,11 @@ namespace basisu // If true, the compressor will print basis status to stdout during compression. bool_param m_status_output; - + // Output debug information during compression bool_param m_debug; bool_param m_validate; - + // m_debug_images is pretty slow bool_param m_debug_images; @@ -362,7 +362,7 @@ namespace basisu // Frontend/backend codec parameters bool_param m_no_hybrid_sel_cb; - + // Use perceptual sRGB colorspace metrics instead of linear bool_param m_perceptual; @@ -378,17 +378,17 @@ namespace basisu // Write the output basis file to disk using m_out_filename bool_param m_write_output_basis_files; - - // Compute and display image metrics + + // Compute and display image metrics bool_param m_compute_stats; - + // Check to see if any input image has an alpha channel, if so then the output basis file will have alpha channels bool_param m_check_for_alpha; - + // Always put alpha slices in the output basis file, even when the input doesn't have alpha - bool_param m_force_alpha; + bool_param m_force_alpha; bool_param m_multithreading; - + // Split the R channel to RGB and the G channel to alpha, then write a basis file with alpha channels char m_swizzle[4]; @@ -400,25 +400,25 @@ namespace basisu param m_hybrid_sel_cb_quality_thresh; param m_global_pal_bits; param m_global_mod_bits; - + // mipmap generation parameters bool_param m_mip_gen; param m_mip_scale; std::string m_mip_filter; bool_param m_mip_srgb; bool_param m_mip_premultiplied; // not currently supported - bool_param m_mip_renormalize; + bool_param m_mip_renormalize; bool_param m_mip_wrapping; bool_param m_mip_fast; param m_mip_smallest_dimension; - - // Codebook size (quality) control. + + // Codebook size (quality) control. // If m_quality_level != -1, it controls the quality level. It ranges from [0,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX]. // Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly. uint32_t m_max_endpoint_clusters; uint32_t m_max_selector_clusters; int m_quality_level; - + // m_tex_type, m_userdata0, m_userdata1, m_framerate - These fields go directly into the Basis file header. basist::basis_texture_type m_tex_type; uint32_t m_userdata0; @@ -450,7 +450,7 @@ namespace basisu job_pool *m_pJob_pool; }; - + class basis_compressor { BASISU_NO_EQUALS_OR_COPY_CONSTRUCT(basis_compressor); @@ -459,7 +459,7 @@ namespace basisu basis_compressor(); bool init(const basis_compressor_params ¶ms); - + enum error_code { cECSuccess = 0, @@ -479,7 +479,7 @@ namespace basisu // The output .basis file will always be valid of process() succeeded. const uint8_vec &get_output_basis_file() const { return m_output_basis_file; } - + // The output .ktx2 file will only be valid if m_create_ktx2_file was true and process() succeeded. const uint8_vec& get_output_ktx2_file() const { return m_output_ktx2_file; } @@ -487,19 +487,19 @@ namespace basisu uint32_t get_basis_file_size() const { return m_basis_file_size; } double get_basis_bits_per_texel() const { return m_basis_bits_per_texel; } - + bool get_any_source_image_has_alpha() const { return m_any_source_image_has_alpha; } - + private: basis_compressor_params m_params; - + basisu::vector m_slice_images; basisu::vector m_stats; uint32_t m_basis_file_size; double m_basis_bits_per_texel; - + basisu_backend_slice_desc_vec m_slice_descs; uint32_t m_total_blocks; @@ -524,7 +524,7 @@ namespace basisu uint8_vec m_output_basis_file; uint8_vec m_output_ktx2_file; - + basisu::vector m_uastc_slice_textures; basisu_backend_output m_uastc_backend_output; diff --git a/encoder/basisu_enc.cpp b/encoder/basisu_enc.cpp index f02fb62c..72e1c24a 100644 --- a/encoder/basisu_enc.cpp +++ b/encoder/basisu_enc.cpp @@ -59,7 +59,7 @@ namespace basisu // This is a Public Domain 8x8 font from here: // https://github.com/dhepper/font8x8/blob/master/font8x8_basic.h - const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = + const uint8_t g_debug_font8x8_basic[127 - 32 + 1][8] = { { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+0020 ( ) { 0x18, 0x3C, 0x3C, 0x18, 0x18, 0x00, 0x18, 0x00}, // U+0021 (!) @@ -158,7 +158,7 @@ namespace basisu { 0x6E, 0x3B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // U+007E (~) { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} // U+007F }; - + // Encoder library initialization (just call once at startup) void basisu_encoder_init() { @@ -176,7 +176,7 @@ namespace basisu va_list args; va_start(args, pFmt); -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); @@ -222,7 +222,7 @@ namespace basisu #else #error TODO #endif - + interval_timer::interval_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_timer_freq) @@ -256,7 +256,7 @@ namespace basisu timer_ticks delta = stop_time - m_start_time; return delta * g_timer_freq; } - + void interval_timer::init() { if (!g_timer_freq) @@ -282,7 +282,7 @@ namespace basisu init(); return ticks * g_timer_freq; } - + const uint32_t MAX_32BIT_ALLOC_SIZE = 250000000; bool load_bmp(const char* pFilename, image& img) @@ -290,14 +290,14 @@ namespace basisu int w = 0, h = 0; unsigned int n_chans = 0; unsigned char* pImage_data = apg_bmp_read(pFilename, &w, &h, &n_chans); - + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) { error_printf("Failed loading .BMP image \"%s\"!\n", pFilename); if (pImage_data) apg_bmp_free(pImage_data); - + return false; } @@ -313,7 +313,7 @@ namespace basisu return false; } } - + img.resize(w, h); const uint8_t *pSrc = pImage_data; @@ -337,19 +337,19 @@ namespace basisu return true; } - + bool load_tga(const char* pFilename, image& img) { int w = 0, h = 0, n_chans = 0; uint8_t* pImage_data = read_tga(pFilename, w, h, n_chans); - + if ((!pImage_data) || (!w) || (!h) || ((n_chans != 3) && (n_chans != 4))) { error_printf("Failed loading .TGA image \"%s\"!\n", pFilename); if (pImage_data) free(pImage_data); - + return false; } @@ -365,7 +365,7 @@ namespace basisu return false; } } - + img.resize(w, h); const uint8_t *pSrc = pImage_data; @@ -431,7 +431,7 @@ namespace basisu return true; } - + bool load_png(const char* pFilename, image& img) { std::vector buffer; @@ -449,9 +449,9 @@ namespace basisu uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering); if (!pImage_data) return false; - + img.init(pImage_data, width, height, 4); - + free(pImage_data); return true; @@ -477,7 +477,7 @@ namespace basisu return false; } - + bool save_png(const char* pFilename, const image &img, uint32_t image_save_flags, uint32_t grayscale_comp) { if (!img.get_total_pixels()) @@ -489,7 +489,7 @@ namespace basisu std::vector out; unsigned err = 0; - + if (image_save_flags & cImageSaveGrayscale) { uint8_vec g_pixels(img.get_width() * img.get_height()); @@ -511,7 +511,7 @@ namespace basisu return false; uint8_vec rgb_pixels(static_cast(total_bytes)); uint8_t *pDst = &rgb_pixels[0]; - + for (uint32_t y = 0; y < img.get_height(); y++) { for (uint32_t x = 0; x < img.get_width(); x++) @@ -538,7 +538,7 @@ namespace basisu return true; } - + bool read_file_to_vec(const char* pFilename, uint8_vec& data) { FILE* pFile = nullptr; @@ -549,7 +549,7 @@ namespace basisu #endif if (!pFile) return false; - + fseek(pFile, 0, SEEK_END); #ifdef _WIN32 int64_t filesize = _ftelli64(pFile); @@ -634,17 +634,17 @@ namespace basisu } bool image_resample(const image &src, image &dst, bool srgb, - const char *pFilter, float filter_scale, + const char *pFilter, float filter_scale, bool wrapping, uint32_t first_comp, uint32_t num_comps) { assert((first_comp + num_comps) <= 4); const int cMaxComps = 4; - + const uint32_t src_w = src.get_width(), src_h = src.get_height(); const uint32_t dst_w = dst.get_width(), dst_h = dst.get_height(); - + if (maximum(src_w, src_h) > BASISU_RESAMPLER_MAX_DIMENSION) { printf("Image is too large!\n"); @@ -653,10 +653,10 @@ namespace basisu if (!src_w || !src_h || !dst_w || !dst_h) return false; - + if ((num_comps < 1) || (num_comps > cMaxComps)) return false; - + if ((minimum(dst_w, dst_h) < 1) || (maximum(dst_w, dst_h) > BASISU_RESAMPLER_MAX_DIMENSION)) { printf("Image is too large!\n"); @@ -687,7 +687,7 @@ namespace basisu std::vector samples[cMaxComps]; Resampler *resamplers[cMaxComps]; - + resamplers[0] = new Resampler(src_w, src_h, dst_w, dst_h, wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, pFilter, nullptr, nullptr, filter_scale, filter_scale, 0, 0); @@ -747,7 +747,7 @@ namespace basisu break; const bool linear_flag = !srgb || (comp_index == 3); - + color_rgba *pDst = &dst(0, dst_y); for (uint32_t x = 0; x < dst_w; x++) @@ -791,9 +791,9 @@ namespace basisu A[0].m_key = 1; return; } - + A[0].m_key += A[1].m_key; - + int s = 2, r = 0, next; for (next = 1; next < (num_syms - 1); ++next) { @@ -885,7 +885,7 @@ namespace basisu for (i = 0; i < num_syms; i++) { uint32_t freq = pSyms0[i].m_key; - + // We scale all input frequencies to 16-bits. assert(freq <= UINT16_MAX); @@ -1076,7 +1076,7 @@ namespace basisu uint32_t total_used = tab.get_total_used_codes(); put_bits(total_used, cHuffmanMaxSymsLog2); - + if (!total_used) return 0; @@ -1140,7 +1140,7 @@ namespace basisu const uint32_t l = syms[i] & 63, e = syms[i] >> 6; put_code(l, ct); - + if (l == cHuffmanSmallZeroRunCode) put_bits(e, cHuffmanSmallZeroRunExtraBits); else if (l == cHuffmanBigZeroRunCode) @@ -1167,7 +1167,7 @@ namespace basisu huffman_encoding_table etab; etab.init(h, 16); - + { bitwise_coder c; c.init(1024); @@ -1302,9 +1302,9 @@ namespace basisu // We now have chosen an entry to place in the picked list, now determine which side it goes on. const uint32_t entry_to_move = m_entries_to_do[best_entry]; - + float side = pick_side(num_syms, entry_to_move, pDist_func, pCtx, dist_func_weight); - + // Put entry_to_move either on the "left" or "right" side of the picked entries if (side <= 0) m_entries_picked.push_back(entry_to_move); @@ -1482,7 +1482,7 @@ namespace basisu uint32_t hash_hsieh(const uint8_t *pBuf, size_t len) { - if (!pBuf || !len) + if (!pBuf || !len) return 0; uint32_t h = static_cast(len); @@ -1495,23 +1495,23 @@ namespace basisu const uint16_t *pWords = reinterpret_cast(pBuf); h += pWords[0]; - + const uint32_t t = (pWords[1] << 11) ^ h; h = (h << 16) ^ t; - + pBuf += sizeof(uint32_t); - + h += h >> 11; } switch (bytes_left) { - case 1: + case 1: h += *reinterpret_cast(pBuf); h ^= h << 10; h += h >> 1; break; - case 2: + case 2: h += *reinterpret_cast(pBuf); h ^= h << 11; h += h >> 17; @@ -1525,7 +1525,7 @@ namespace basisu default: break; } - + h ^= h << 3; h += h >> 5; h ^= h << 4; @@ -1536,7 +1536,7 @@ namespace basisu return h; } - job_pool::job_pool(uint32_t num_threads) : + job_pool::job_pool(uint32_t num_threads) : m_num_active_jobs(0), m_kill_flag(false) { @@ -1556,17 +1556,17 @@ namespace basisu job_pool::~job_pool() { debug_printf("job_pool::~job_pool\n"); - + // Notify all workers that they need to die right now. m_kill_flag = true; - + m_has_work.notify_all(); // Wait for all workers to die. for (uint32_t i = 0; i < m_threads.size(); i++) m_threads[i].join(); } - + void job_pool::add_job(const std::function& job) { std::unique_lock lock(m_mutex); @@ -1586,7 +1586,7 @@ namespace basisu std::unique_lock lock(m_mutex); m_queue.emplace_back(std::move(job)); - + const size_t queue_size = m_queue.size(); lock.unlock(); @@ -1621,7 +1621,7 @@ namespace basisu void job_pool::job_thread(uint32_t index) { debug_printf("job_pool::job_thread: starting %u\n", index); - + while (true) { std::unique_lock lock(m_mutex); @@ -1647,9 +1647,9 @@ namespace basisu --m_num_active_jobs; - // Now check if there are no more jobs remaining. + // Now check if there are no more jobs remaining. const bool all_done = m_queue.empty() && !m_num_active_jobs; - + lock.unlock(); if (all_done) @@ -1708,7 +1708,7 @@ namespace basisu // Simple validation if ((hdr.m_cmap != 0) && (hdr.m_cmap != 1)) return nullptr; - + if (hdr.m_cmap) { if ((hdr.m_cmap_bpp == 0) || (hdr.m_cmap_bpp > 32)) @@ -1867,13 +1867,13 @@ namespace basisu bytes_remaining += bytes_to_skip; } } - + width = hdr.m_width; height = hdr.m_height; const uint32_t source_pitch = width * tga_bytes_per_pixel; const uint32_t dest_pitch = width * n_chans; - + uint8_t *pImage = (uint8_t *)malloc(dest_pitch * height); if (!pImage) return nullptr; @@ -1895,7 +1895,7 @@ namespace basisu int pixels_remaining = width; uint8_t *pDst = &input_line_buf[0]; - do + do { if (!run_remaining) { @@ -2080,7 +2080,7 @@ namespace basisu if (!filedata.size() || (filedata.size() > UINT32_MAX)) return nullptr; - + return read_tga(&filedata[0], (uint32_t)filedata.size(), width, height, n_chans); } @@ -2090,7 +2090,7 @@ namespace basisu va_list args; va_start(args, pFmt); -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); @@ -2115,7 +2115,7 @@ namespace basisu for (uint32_t x = 0; x < 8; x++) { const uint32_t q = row_bits & (1 << x); - + const color_rgba* pColor = q ? &fg : pBG; if (!pColor) continue; @@ -2135,5 +2135,5 @@ namespace basisu } } } - + } // namespace basisu diff --git a/encoder/basisu_enc.h b/encoder/basisu_enc.h index 05c95cbc..24832b02 100644 --- a/encoder/basisu_enc.h +++ b/encoder/basisu_enc.h @@ -60,18 +60,18 @@ namespace basisu return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } - inline int32_t clampi(int32_t value, int32_t low, int32_t high) - { - if (value < low) - value = low; - else if (value > high) - value = high; - return value; + inline int32_t clampi(int32_t value, int32_t low, int32_t high) + { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; } inline uint8_t mul_8(uint32_t v, uint32_t a) { - v = v * a + 128; + v = v * a + 128; return (uint8_t)((v + (v >> 8)) >> 8); } @@ -120,10 +120,10 @@ namespace basisu return bits; } - + // Hashing - - inline uint32_t bitmix32c(uint32_t v) + + inline uint32_t bitmix32c(uint32_t v) { v = (v + 0x7ed55d16) + (v << 12); v = (v ^ 0xc761c23c) ^ (v >> 19); @@ -134,7 +134,7 @@ namespace basisu return v; } - inline uint32_t bitmix32(uint32_t v) + inline uint32_t bitmix32(uint32_t v) { v -= (v << 6); v ^= (v >> 17); @@ -336,7 +336,7 @@ namespace basisu inline const T *get_ptr() const { return reinterpret_cast(&m_v[0]); } inline T *get_ptr() { return reinterpret_cast(&m_v[0]); } - + inline vec operator- () const { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = -m_v[i]; return res; } inline vec operator+ () const { return *this; } inline vec &operator+= (const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] += other.m_v[i]; return *this; } @@ -345,14 +345,14 @@ namespace basisu inline vec &operator*=(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] *= other.m_v[i]; return *this; } inline vec &operator/= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] /= s; return *this; } inline vec &operator*= (T s) { for (uint32_t i = 0; i < N; i++) m_v[i] *= s; return *this; } - + friend inline vec operator+(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] + rhs.m_v[i]; return res; } friend inline vec operator-(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] - rhs.m_v[i]; return res; } friend inline vec operator*(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] * val; return res; } friend inline vec operator*(T val, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = val * rhs.m_v[i]; return res; } friend inline vec operator/(const vec &lhs, T val) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / val; return res; } friend inline vec operator/(const vec &lhs, const vec &rhs) { vec res; for (uint32_t i = 0; i < N; i++) res.m_v[i] = lhs.m_v[i] / rhs.m_v[i]; return res; } - + static inline T dot_product(const vec &lhs, const vec &rhs) { T res = lhs.m_v[0] * rhs.m_v[0]; for (uint32_t i = 1; i < N; i++) res += lhs.m_v[i] * rhs.m_v[i]; return res; } inline T dot(const vec &rhs) const { return dot_product(*this, rhs); } @@ -401,7 +401,7 @@ namespace basisu typedef vec<3, float> vec3F; typedef vec<2, float> vec2F; typedef vec<1, float> vec1F; - + template class matrix { @@ -504,7 +504,7 @@ namespace basisu [pKeys](uint32_t a, uint32_t b) { return pKeys[a] < pKeys[b]; } ); } - + // Very simple job pool with no dependencies. class job_pool { @@ -514,24 +514,24 @@ namespace basisu // num_threads is the TOTAL number of job pool threads, including the calling thread! So 2=1 new thread, 3=2 new threads, etc. job_pool(uint32_t num_threads); ~job_pool(); - + void add_job(const std::function& job); void add_job(std::function&& job); void wait_for_all(); size_t get_total_threads() const { return 1 + m_threads.size(); } - + private: std::vector m_threads; std::vector > m_queue; - + std::mutex m_mutex; std::condition_variable m_has_work; std::condition_variable m_no_more_jobs; - + uint32_t m_num_active_jobs; - + std::atomic m_kill_flag; void job_thread(uint32_t index); @@ -574,7 +574,7 @@ namespace basisu return *this; } }; - + class color_rgba { public: @@ -698,7 +698,7 @@ namespace basisu inline const uint8_t &operator[] (uint32_t index) const { assert(index < 4); return m_comps[index]; } inline uint8_t &operator[] (uint32_t index) { assert(index < 4); return m_comps[index]; } - + inline void clear() { m_comps[0] = 0; @@ -734,7 +734,7 @@ namespace basisu } inline int get_601_luma() const { return (19595U * m_comps[0] + 38470U * m_comps[1] + 7471U * m_comps[2] + 32768U) >> 16U; } - inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } + inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); } inline basist::color32 get_color32() const @@ -771,7 +771,7 @@ namespace basisu else return color_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); } - + // TODO: Allow user to control channel weightings. inline uint32_t color_distance(bool perceptual, const color_rgba &e1, const color_rgba &e2, bool alpha) { @@ -792,7 +792,7 @@ namespace basisu const float dcb = cb1 - cb2; uint32_t d = static_cast(32.0f*4.0f*dl*dl + 32.0f*2.0f*(.5f / (1.0f - .2126f))*(.5f / (1.0f - .2126f))*dcr*dcr + 32.0f*.25f*(.5f / (1.0f - .0722f))*(.5f / (1.0f - .0722f))*dcb*dcb); - + if (alpha) { int da = static_cast(e1.a) - static_cast(e2.a); @@ -906,11 +906,11 @@ namespace basisu va_list args; va_start(args, pFmt); -#ifdef _WIN32 +#ifdef _WIN32 vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); -#endif +#endif va_end(args); return std::string(buf); @@ -958,7 +958,7 @@ namespace basisu char fname_buf[_MAX_FNAME] = { 0 }; char ext_buf[_MAX_EXT] = { 0 }; - errno_t error = _splitpath_s(p, + errno_t error = _splitpath_s(p, pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, @@ -990,7 +990,7 @@ namespace basisu if ((pDir->size()) && (pDir->back() != '/')) *pDir += "/"; } - + if (pFilename) { *pFilename = pBaseName; @@ -1017,7 +1017,7 @@ namespace basisu return (c == '/'); #endif } - + inline bool is_drive_separator(char c) { #ifdef _WIN32 @@ -1045,7 +1045,7 @@ namespace basisu string_combine_path(dst, p, q); string_combine_path(dst, dst.c_str(), r); } - + inline void string_combine_path_and_extension(std::string &dst, const char *p, const char *q, const char *r, const char *pExt) { string_combine_path(dst, p, q, r); @@ -1247,7 +1247,7 @@ namespace basisu codebook.resize(0); codebook.reserve(max_clusters); - + uint32_t node_index = 0; while (true) @@ -1266,7 +1266,7 @@ namespace basisu node_stack.pop_back(); continue; } - + node_stack.push_back(cur.m_right_index); node_index = cur.m_left_index; } @@ -1304,7 +1304,7 @@ namespace basisu assert(node.is_leaf()); var_heap.delete_top(); - + if (node.m_training_vecs.size() > 1) { if (split_node(node_index, var_heap, l_children, r_children)) @@ -1391,7 +1391,7 @@ namespace basisu m_nodes[node_index].m_left_index = l_child_index; m_nodes[node_index].m_right_index = r_child_index; - + m_nodes[node_index].m_codebook_index = m_next_codebook_index; m_next_codebook_index++; @@ -1405,7 +1405,7 @@ namespace basisu if ((l_child.m_var <= 0.0f) && (l_child.m_training_vecs.size() > 1)) { TrainingVectorType v(m_training_vecs[l_child.m_training_vecs[0]].first); - + for (uint32_t i = 1; i < l_child.m_training_vecs.size(); i++) { if (!(v == m_training_vecs[l_child.m_training_vecs[i]].first)) @@ -1432,10 +1432,10 @@ namespace basisu if ((l_child.m_var > 0.0f) && (l_child.m_training_vecs.size() > 1)) var_heap.add_heap(l_child_index, l_child.m_var); - + if ((r_child.m_var > 0.0f) && (r_child.m_training_vecs.size() > 1)) var_heap.add_heap(r_child_index, r_child.m_var); - + return true; } @@ -1516,7 +1516,7 @@ namespace basisu for (uint32_t i = 0; i < node.m_training_vecs.size(); i++) { const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; - + l = TrainingVectorType::component_min(l, v); h = TrainingVectorType::component_max(h, v); } @@ -1597,8 +1597,8 @@ namespace basisu const uint32_t cMaxIters = 6; for (uint32_t iter = 0; iter < cMaxIters; iter++) { - l_children.resize(0); - r_children.resize(0); + l_children.resize(0); + r_children.resize(0); TrainingVectorType new_l_child(cZero), new_r_child(cZero); @@ -1639,7 +1639,7 @@ namespace basisu { const TrainingVectorType& v = m_training_vecs[node.m_training_vecs[i]].first; const uint64_t weight = m_training_vecs[node.m_training_vecs[i]].second; - + if ((!i) || (v == firstVec)) { firstVec = v; @@ -1741,7 +1741,7 @@ namespace basisu } Quantizer quantizers[cMaxThreads]; - + bool success_flags[cMaxThreads]; clear_obj(success_flags); @@ -1842,9 +1842,9 @@ namespace basisu uint32_t max_threads, job_pool *pJob_pool) { typedef bit_hasher training_vec_bit_hasher; - typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, + typedef std::unordered_map < typename Quantizer::training_vec_type, weighted_block_group, training_vec_bit_hasher> group_hash; - + group_hash unique_vecs; weighted_block_group g; @@ -1904,7 +1904,7 @@ namespace basisu typename group_hash::const_iterator group_iter = unique_vec_iters[group_index]; const uint_vec& training_vec_indices = group_iter->second.m_indices; - + append_vector(codebook.back(), training_vec_indices); } } @@ -1981,7 +1981,7 @@ namespace basisu const double inv_total = 1.0f / total; const double neg_inv_log2 = -1.0f / log(2.0f); - + double e = 0.0f; for (uint32_t i = 0; i < m_hist.size(); i++) if (m_hist[i]) @@ -1990,7 +1990,7 @@ namespace basisu return e; } }; - + struct sym_freq { uint32_t m_key; @@ -2000,7 +2000,7 @@ namespace basisu sym_freq *canonical_huffman_radix_sort_syms(uint32_t num_syms, sym_freq *pSyms0, sym_freq *pSyms1); void canonical_huffman_calculate_minimum_redundancy(sym_freq *A, int num_syms); void canonical_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size); - + class huffman_encoding_table { public: @@ -2021,7 +2021,7 @@ namespace basisu bool init(uint32_t num_syms, const uint16_t *pFreq, uint32_t max_code_size); bool init(uint32_t num_syms, const uint32_t *pSym_freq, uint32_t max_code_size); - + inline const uint16_vec &get_codes() const { return m_codes; } inline const uint8_vec &get_code_sizes() const { return m_code_sizes; } @@ -2080,7 +2080,7 @@ namespace basisu m_bit_buffer = 0; m_bit_buffer_size = 0; - + return 8; } @@ -2129,7 +2129,7 @@ namespace basisu if (v < u) return put_bits(v, k); - + uint32_t x = v + u; assert((x >> 1) >= u); @@ -2141,20 +2141,20 @@ namespace basisu inline uint32_t put_rice(uint32_t v, uint32_t m) { assert(m); - + const uint64_t start_bits = m_total_bits; uint32_t q = v >> m, r = v & ((1 << m) - 1); // rice coding sanity check assert(q <= 64); - + for (; q > 16; q -= 16) put_bits(0xFFFF, 16); put_bits((1 << q) - 1, q); put_bits(r << 1, m + 1); - + return (uint32_t)(m_total_bits - start_bits); } @@ -2164,13 +2164,13 @@ namespace basisu const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t total_bits = 0; for ( ; ; ) { uint32_t next_v = v >> chunk_bits; - + total_bits += put_bits((v & chunk_mask) | (next_v ? chunk_size : 0), chunk_bits + 1); if (!next_v) break; @@ -2182,7 +2182,7 @@ namespace basisu } uint32_t emit_huffman_table(const huffman_encoding_table &tab); - + private: uint8_vec m_bytes; uint32_t m_bit_buffer, m_bit_buffer_size; @@ -2210,7 +2210,7 @@ namespace basisu inline void init(uint32_t bits_per_sym, uint32_t total_syms_per_group) { assert((bits_per_sym * total_syms_per_group) <= 16 && total_syms_per_group >= 1 && bits_per_sym >= 1); - + m_bits_per_sym = bits_per_sym; m_total_syms_per_group = total_syms_per_group; m_cur_sym_bits = 0; @@ -2264,7 +2264,7 @@ namespace basisu return true; } - + inline uint32_t emit_next_sym(bitwise_coder &c) { uint32_t bits = 0; @@ -2294,7 +2294,7 @@ namespace basisu bool huffman_test(int rand_seed); // VQ index reordering - + class palette_index_reorderer { public: @@ -2315,7 +2315,7 @@ namespace basisu typedef float(*pEntry_dist_func)(uint32_t i, uint32_t j, void *pCtx); void init(uint32_t num_indices, const uint32_t *pIndices, uint32_t num_syms, pEntry_dist_func pDist_func, void *pCtx, float dist_func_weight); - + // Table remaps old to new symbol indices inline const uint_vec &get_remap_table() const { return m_remap_table; } @@ -2336,12 +2336,12 @@ namespace basisu class image { public: - image() : + image() : m_width(0), m_height(0), m_pitch(0) { } - image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + image(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : m_width(0), m_height(0), m_pitch(0) { resize(w, h, p); @@ -2382,7 +2382,7 @@ namespace basisu image &clear() { - m_width = 0; + m_width = 0; m_height = 0; m_pitch = 0; clear_vector(m_pixels); @@ -2404,7 +2404,7 @@ namespace basisu void init(const uint8_t *pImage, uint32_t width, uint32_t height, uint32_t comps) { assert(comps >= 1 && comps <= 4); - + resize(width, height); for (uint32_t y = 0; y < height; y++) @@ -2501,7 +2501,7 @@ namespace basisu cur_state.swap(m_pixels); m_pixels.resize(p * h); - + for (uint32_t y = 0; y < h; y++) { for (uint32_t x = 0; x < w; x++) @@ -2539,8 +2539,8 @@ namespace basisu y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); return m_pixels[x + y * m_pitch]; } - - inline image &set_clipped(int x, int y, const color_rgba &c) + + inline image &set_clipped(int x, int y, const color_rgba &c) { if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) (*this)(x, y) = c; @@ -2680,7 +2680,7 @@ namespace basisu } void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...); - + private: uint32_t m_width, m_height, m_pitch; // all in pixels color_rgba_vec m_pixels; @@ -2693,12 +2693,12 @@ namespace basisu class imagef { public: - imagef() : + imagef() : m_width(0), m_height(0), m_pitch(0) { } - imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : + imagef(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX) : m_width(0), m_height(0), m_pitch(0) { resize(w, h, p); @@ -2733,7 +2733,7 @@ namespace basisu imagef &clear() { - m_width = 0; + m_width = 0; m_height = 0; m_pitch = 0; clear_vector(m_pixels); @@ -2783,7 +2783,7 @@ namespace basisu set_clipped(x + ix, y + iy, c); return *this; } - + imagef &crop(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const vec4F &background = vec4F(0,0,0,1)) { if (p == UINT32_MAX) @@ -2802,7 +2802,7 @@ namespace basisu cur_state.swap(m_pixels); m_pixels.resize(p * h); - + for (uint32_t y = 0; y < h; y++) { for (uint32_t x = 0; x < w; x++) @@ -2840,8 +2840,8 @@ namespace basisu y = wrap_v ? posmod(y, m_height) : clamp(y, 0, m_height - 1); return m_pixels[x + y * m_pitch]; } - - inline imagef &set_clipped(int x, int y, const vec4F &c) + + inline imagef &set_clipped(int x, int y, const vec4F &c) { if ((static_cast(x) < m_width) && (static_cast(y) < m_height)) (*this)(x, y) = c; @@ -2904,14 +2904,14 @@ namespace basisu inline const vec4F *get_ptr() const { return &m_pixels[0]; } inline vec4F *get_ptr() { return &m_pixels[0]; } - + private: uint32_t m_width, m_height, m_pitch; // all in pixels vec4F_vec m_pixels; }; // Image metrics - + class image_metrics { public: @@ -2939,27 +2939,27 @@ namespace basisu }; // Image saving/loading/resampling - + bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr); bool load_png(const char* pFilename, image& img); inline bool load_png(const std::string &filename, image &img) { return load_png(filename.c_str(), img); } bool load_bmp(const char* pFilename, image& img); inline bool load_bmp(const std::string &filename, image &img) { return load_bmp(filename.c_str(), img); } - + bool load_tga(const char* pFilename, image& img); inline bool load_tga(const std::string &filename, image &img) { return load_tga(filename.c_str(), img); } bool load_jpg(const char *pFilename, image& img); inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); } - + // Currently loads .BMP, .PNG, or .TGA. bool load_image(const char* pFilename, image& img); inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); } uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans); uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans); - + enum { cImageSaveGrayscale = 1, @@ -2968,23 +2968,23 @@ namespace basisu bool save_png(const char* pFilename, const image& img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0); inline bool save_png(const std::string &filename, const image &img, uint32_t image_save_flags = 0, uint32_t grayscale_comp = 0) { return save_png(filename.c_str(), img, image_save_flags, grayscale_comp); } - + bool read_file_to_vec(const char* pFilename, uint8_vec& data); - + bool write_data_to_file(const char* pFilename, const void* pData, size_t len); - + inline bool write_vec_to_file(const char* pFilename, const uint8_vec& v) { return v.size() ? write_data_to_file(pFilename, &v[0], v.size()) : write_data_to_file(pFilename, "", 0); } float linear_to_srgb(float l); float srgb_to_linear(float s); bool image_resample(const image &src, image &dst, bool srgb = false, - const char *pFilter = "lanczos4", float filter_scale = 1.0f, + const char *pFilter = "lanczos4", float filter_scale = 1.0f, bool wrapping = false, uint32_t first_comp = 0, uint32_t num_comps = 4); // Timing - + typedef uint64_t timer_ticks; class interval_timer @@ -2997,7 +2997,7 @@ namespace basisu double get_elapsed_secs() const; inline double get_elapsed_ms() const { return 1000.0f* get_elapsed_secs(); } - + static void init(); static inline timer_ticks get_ticks_per_sec() { return g_freq; } static timer_ticks get_ticks(); @@ -3065,8 +3065,8 @@ namespace basisu inline const T &operator[] (uint32_t i) const { return m_values[i]; } inline T &operator[] (uint32_t i) { return m_values[i]; } - - inline const T &at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } + + inline const T &at_clamped(int x, int y) const { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } inline T &at_clamped(int x, int y) { return (*this)(clamp(x, 0, m_width), clamp(y, 0, m_height)); } void clear() @@ -3121,7 +3121,7 @@ namespace basisu } void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed = 1); - + } // namespace basisu diff --git a/encoder/basisu_etc.cpp b/encoder/basisu_etc.cpp index 232e8965..b9261d03 100644 --- a/encoder/basisu_etc.cpp +++ b/encoder/basisu_etc.cpp @@ -39,7 +39,7 @@ namespace basisu { -16,-48,-64,-80,8,40,56,72 }, { -16,-40,-64,-80,8,32,56,72 }, { -16,-32,-64,-80,8,24,56,72 }, { -16,-40,-56,-80,8,32,48,72 }, { -24,-32,-56,-80,16,24,48,72 }, { -8,-16,-24,-80,0,8,16,72 }, { -32,-48,-64,-72,24,40,56,64 }, { -24,-40,-56,-72,16,32,48,64 } }; - + // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. static uint16_t g_etc1_inverse_lookup[2 * 8 * 4][256]; // [ diff/inten_table/selector][desired_color ] @@ -113,7 +113,7 @@ namespace basisu static uint32_t etc1_decode_value(uint32_t diff, uint32_t inten, uint32_t selector, uint32_t packed_c) { - const uint32_t limit = diff ? 32 : 16; + const uint32_t limit = diff ? 32 : 16; BASISU_NOTE_UNUSED(limit); assert((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < limit)); int c; @@ -245,7 +245,7 @@ namespace basisu return best_error; } - + const uint32_t BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE = 165; static const struct { uint8_t m_v[4]; } g_cluster_fit_order_tab[BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE] = @@ -284,7 +284,7 @@ namespace basisu { { 2, 1, 2, 3 } },{ { 4, 1, 0, 3 } },{ { 3, 1, 1, 3 } },{ { 1, 1, 2, 4 } },{ { 2, 1, 0, 5 } }, { { 1, 0, 1, 6 } },{ { 0, 2, 1, 5 } },{ { 0, 2, 0, 6 } },{ { 1, 1, 1, 5 } },{ { 1, 1, 0, 6 } } }; - + const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = { { -8, -2, 2, 8 }, { -17, -5, 5, 17 }, { -29, -9, 9, 29 }, { -42, -13, 13, 42 }, @@ -584,7 +584,7 @@ namespace basisu const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3, 255); } - + bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha) { const bool diff_flag = block.get_diff_bit(); @@ -707,7 +707,7 @@ namespace basisu { return (n << 4) | n; } - + uint64_t etc_block::evaluate_etc1_error(const color_rgba* pBlock_pixels, bool perceptual, int subblock_index) const { color_rgba unpacked_block[16]; @@ -756,7 +756,7 @@ namespace basisu } } } - + bool etc1_optimizer::compute() { assert(m_pResult->m_pSelectors); @@ -801,19 +801,19 @@ namespace basisu const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; uint64_t actual_error = 0; - + bool perceptual; if (m_pParams->m_quality >= cETCQualityMedium) perceptual = m_pParams->m_perceptual; else perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; - + for (uint32_t i = 0; i < n; i++) actual_error += color_distance(perceptual, pSrc_pixels[i], block_colors[pSelectors[i]], false); assert(actual_error == m_best_solution.m_error); } -#endif +#endif m_pResult->m_error = m_best_solution.m_error; @@ -998,10 +998,10 @@ namespace basisu m_luma.resize(n); m_sorted_luma_indices.resize(n); m_sorted_luma.resize(n); - + int min_r = 255, min_g = 255, min_b = 255; int max_r = 0, max_g = 0, max_b = 0; - + for (uint32_t i = 0; i < n; i++) { const color_rgba& c = m_pParams->m_pSrc_pixels[i]; @@ -1039,7 +1039,7 @@ namespace basisu m_pSorted_luma = &m_sorted_luma[0]; m_pSorted_luma_indices = &m_sorted_luma_indices[0]; - + for (uint32_t i = 0; i < n; i++) m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; } @@ -1070,7 +1070,7 @@ namespace basisu return true; } - + static uint8_t g_eval_dist_tables[8][256] = { // 99% threshold @@ -1238,7 +1238,7 @@ namespace basisu } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; - + #if BASISU_DEBUG_ETC_ENCODER_DEEPER printf("Eval done: %u error: %I64u best error so far: %I64u\n", (trial_solution.m_error < pBest_solution->m_error), trial_solution.m_error, pBest_solution->m_error); #endif @@ -1252,7 +1252,7 @@ namespace basisu success = true; } } - + return success; } @@ -1283,14 +1283,14 @@ namespace basisu } const color_rgba base_color(coords.get_scaled_color()); - + const uint32_t n = m_pParams->m_num_src_pixels; assert(trial_solution.m_selectors.size() == n); trial_solution.m_error = UINT64_MAX; - + const bool perceptual = (m_pParams->m_quality == cETCQualityFast) ? false : m_pParams->m_perceptual; - + for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) { const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -1310,10 +1310,10 @@ namespace basisu // 0 1 2 3 // 01 12 23 const uint32_t block_inten_midpoints[3] = { block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3] }; - + uint64_t total_error = 0; const color_rgba* pSrc_pixels = m_pParams->m_pSrc_pixels; - + if (perceptual) { if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) diff --git a/encoder/basisu_etc.h b/encoder/basisu_etc.h index 1e3ece43..472958b6 100644 --- a/encoder/basisu_etc.h +++ b/encoder/basisu_etc.h @@ -76,7 +76,7 @@ namespace basisu // 000 001 010 011 100 101 110 111 // 0 1 2 3 -4 -3 -2 -1 }; - + extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; extern const uint8_t g_etc1_to_selector_index[cETC1SelectorValues]; extern const uint8_t g_selector_index_to_etc1[cETC1SelectorValues]; @@ -92,7 +92,7 @@ namespace basisu { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -275,7 +275,7 @@ namespace basisu const uint32_t byte_bit_ofs = bit_index & 7; const uint32_t mask = 1 << byte_bit_ofs; - + const uint32_t lsb = etc1_val & 1; const uint32_t msb = etc1_val >> 1; @@ -617,7 +617,7 @@ namespace basisu void set_block_color5_etc1s(const color_rgba &c_unscaled) { set_diff_bit(true); - + set_base5_color(pack_color5(c_unscaled, false)); set_delta3_color(pack_delta3(0, 0, 0)); } @@ -650,11 +650,11 @@ namespace basisu int dr = c1_unscaled.r - c0_unscaled.r; int dg = c1_unscaled.g - c0_unscaled.g; int db = c1_unscaled.b - c0_unscaled.b; - + dr = clamp(dr, cETC1ColorDeltaMin, cETC1ColorDeltaMax); dg = clamp(dg, cETC1ColorDeltaMin, cETC1ColorDeltaMax); db = clamp(db, cETC1ColorDeltaMin, cETC1ColorDeltaMax); - + set_delta3_color(pack_delta3(dr, dg, db)); return true; @@ -756,12 +756,12 @@ namespace basisu return static_cast(x); } }; - + typedef basisu::vector etc_block_vec; // Returns false if the unpack fails (could be bogus data or ETC2) bool unpack_etc1(const etc_block& block, color_rgba *pDst, bool preserve_alpha = false); - + enum basis_etc_quality { cETCQualityFast, @@ -1048,13 +1048,13 @@ namespace basisu enum { cSolutionsTriedHashBits = 10, cTotalSolutionsTriedHashSize = 1 << cSolutionsTriedHashBits, cSolutionsTriedHashMask = cTotalSolutionsTriedHashSize - 1 }; uint8_t m_solutions_tried[cTotalSolutionsTriedHashSize / 8]; - + void get_nearby_inten_tables(uint32_t idx, int &first_inten_table, int &last_inten_table) { first_inten_table = maximum(idx - 1, 0); last_inten_table = minimum(cETC1IntenModifierValues, idx + 1); } - + bool check_for_redundant_solution(const etc1_solution_coordinates& coords); bool evaluate_solution_slow(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); @@ -1076,7 +1076,7 @@ namespace basisu { etc1_optimizer m_optimizer; }; - + void pack_etc1_solid_color_init(); uint64_t pack_etc1_block_solid_color(etc_block& block, const uint8_t* pColor); @@ -1148,5 +1148,5 @@ namespace basisu uint64_t pack_eac_a8(pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); void pack_eac_a8(eac_a8_block* pBlock, const uint8_t* pPixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask = UINT32_MAX); - + } // namespace basisu diff --git a/encoder/basisu_frontend.cpp b/encoder/basisu_frontend.cpp index 324fc8e4..6af67ec7 100644 --- a/encoder/basisu_frontend.cpp +++ b/encoder/basisu_frontend.cpp @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // -// TODO: +// TODO: // This code originally supported full ETC1 and ETC1S, so there's some legacy stuff to be cleaned up in here. // Add endpoint tiling support (where we force adjacent blocks to use the same endpoints during quantization), for a ~10% or more increase in bitrate at same SSIM. The backend already supports this. // @@ -39,14 +39,14 @@ namespace basisu const uint32_t BASISU_ENDPOINT_PARENT_CODEBOOK_SIZE = 16; const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_01 = 32; const uint32_t BASISU_SELECTOR_PARENT_CODEBOOK_SIZE_COMP_LEVEL_DEFAULT = 16; - + // TODO - How to handle internal verifies in the basisu lib static inline void handle_verify_failure(int line) { fprintf(stderr, "ERROR: basisu_frontend: verify check failed at line %i!\n", line); abort(); } - + bool basisu_frontend::init(const params &p) { #if 0 @@ -98,7 +98,7 @@ namespace basisu p.m_num_global_sel_codebook_mod_bits, p.m_use_hybrid_selector_codebooks, p.m_hybrid_codebook_quality_thresh); - + if ((p.m_max_endpoint_clusters < 1) || (p.m_max_endpoint_clusters > cMaxEndpointClusters)) return false; if ((p.m_max_selector_clusters < 1) || (p.m_max_selector_clusters > cMaxSelectorClusters)) @@ -111,7 +111,7 @@ namespace basisu m_encoded_blocks.resize(m_params.m_num_source_blocks); memset(&m_encoded_blocks[0], 0, m_encoded_blocks.size() * sizeof(m_encoded_blocks[0])); - + m_num_endpoint_codebook_iterations = 1; m_num_selector_codebook_iterations = 1; @@ -181,7 +181,7 @@ namespace basisu if (m_params.m_disable_hierarchical_endpoint_codebooks) m_use_hierarchical_endpoint_codebooks = false; - debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n", + debug_printf("Endpoint refinement: %u, Hierarchical endpoint codebooks: %u, Hierarchical selector codebooks: %u, Endpoint codebook iters: %u, Selector codebook iters: %u\n", m_endpoint_refinement, m_use_hierarchical_endpoint_codebooks, m_use_hierarchical_selector_codebooks, m_num_endpoint_codebook_iterations, m_num_selector_codebook_iterations); return true; @@ -205,7 +205,7 @@ namespace basisu init_endpoint_training_vectors(); generate_endpoint_clusters(); - + for (uint32_t refine_endpoint_step = 0; refine_endpoint_step < m_num_endpoint_codebook_iterations; refine_endpoint_step++) { BASISU_FRONTEND_VERIFY(check_etc1s_constraints()); @@ -250,7 +250,7 @@ namespace basisu dump_endpoint_clusterization_visualization(buf, true); } } - + eliminate_redundant_or_empty_endpoint_clusters(); if (m_params.m_debug_stats) @@ -270,13 +270,13 @@ namespace basisu if (m_use_hierarchical_selector_codebooks) compute_selector_clusters_within_each_parent_cluster(); - + if (m_params.m_compression_level == 0) { create_optimized_selector_codebook(0); find_optimal_selector_clusters_for_each_block(); - + introduce_special_selector_clusters(); } else @@ -289,7 +289,7 @@ namespace basisu find_optimal_selector_clusters_for_each_block(); introduce_special_selector_clusters(); - + if ((m_params.m_compression_level >= 4) || (m_params.m_tex_type == basist::cBASISTexTypeVideoFrames)) { if (!refine_block_endpoints_given_selectors()) @@ -297,7 +297,7 @@ namespace basisu } } } - + optimize_selector_codebook(); if (m_params.m_debug_stats) @@ -323,7 +323,7 @@ namespace basisu const basist::basisu_lowlevel_etc1s_transcoder::endpoint_vec& endpoints = pTranscoder->get_endpoints(); const basist::basisu_lowlevel_etc1s_transcoder::selector_vec& selectors = pTranscoder->get_selectors(); - + m_endpoint_cluster_etc_params.resize(endpoints.size()); for (uint32_t i = 0; i < endpoints.size(); i++) { @@ -430,7 +430,7 @@ namespace basisu #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] { #endif - + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const etc_block& blk = pass ? m_encoded_blocks[block_index] : m_etc1_blocks_etc1s[block_index]; @@ -443,7 +443,7 @@ namespace basisu uint64_t best_err = UINT64_MAX; uint32_t best_index = 0; etc_block best_block(trial_blk); - + for (uint32_t i = 0; i < m_endpoint_cluster_etc_params.size(); i++) { if (m_endpoint_cluster_etc_params[i].m_inten_table[0] > blk.get_inten_table(0)) @@ -566,7 +566,7 @@ namespace basisu m_selector_cluster_block_indices.resize(selectors.size()); for (uint32_t block_index = 0; block_index < m_etc1_blocks_etc1s.size(); block_index++) m_selector_cluster_block_indices[m_block_selector_cluster_index[block_index]].push_back(block_index); - + return true; } @@ -603,9 +603,9 @@ namespace basisu const uint32_t new_selector_cluster_index = (uint32_t)m_optimized_cluster_selectors.size(); m_optimized_cluster_selectors.push_back(blk); - + vector_ensure_element_is_valid(m_selector_cluster_block_indices, new_selector_cluster_index); - + for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++) { if (m_orig_encoded_blocks[block_index].get_raw_selector_bits() != blk.get_raw_selector_bits()) @@ -613,7 +613,7 @@ namespace basisu // See if using flat selectors actually decreases the block's error. const uint32_t old_selector_cluster_index = m_block_selector_cluster_index[block_index]; - + etc_block cur_blk; const uint32_t endpoint_cluster_index = get_subblock_endpoint_cluster_index(block_index, 0); cur_blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(endpoint_cluster_index, false)); @@ -629,10 +629,10 @@ namespace basisu if (new_err >= cur_err) continue; - + // Change the block to use the new cluster m_block_selector_cluster_index[block_index] = new_selector_cluster_index; - + m_selector_cluster_block_indices[new_selector_cluster_index].push_back(block_index); block_relocated_flags[block_index] = true; @@ -708,7 +708,7 @@ namespace basisu old_to_new[i] = (find_res.first)->second; continue; } - + old_to_new[i] = total_new_entries++; new_to_old.push_back(i); } @@ -745,7 +745,7 @@ namespace basisu { new_selector_cluster_indices[m_block_selector_cluster_index[i]].push_back(i); } - + m_optimized_cluster_selectors.swap(new_optimized_cluster_selectors); m_optimized_cluster_selector_global_cb_ids.swap(new_optimized_cluster_selector_global_cb_ids); m_selector_cluster_block_indices.swap(new_selector_cluster_indices); @@ -758,7 +758,7 @@ namespace basisu for (uint32_t j = 0; j < m_selector_clusters_within_each_parent_cluster[i].size(); j++) m_selector_clusters_within_each_parent_cluster[i][j] = old_to_new[m_selector_clusters_within_each_parent_cluster[i][j]]; } - + debug_printf("optimize_selector_codebook: Before: %u After: %u\n", orig_total_selector_clusters, total_new_entries); } @@ -768,34 +768,34 @@ namespace basisu interval_timer tm; tm.start(); - + m_etc1_blocks_etc1s.resize(m_total_blocks); const uint32_t N = 4096; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { - const uint32_t first_index = block_index_iter; - const uint32_t last_index = minimum(m_total_blocks, first_index + N); + const uint32_t first_index = block_index_iter; + const uint32_t last_index = minimum(m_total_blocks, first_index + N); #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { #endif - for (uint32_t block_index = first_index; block_index < last_index; block_index++) + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const pixel_block &source_blk = get_source_pixel_block(block_index); etc1_optimizer optimizer; etc1_optimizer::params optimizer_params; etc1_optimizer::results optimizer_results; - + if (m_params.m_compression_level == 0) optimizer_params.m_quality = cETCQualityFast; else if (m_params.m_compression_level == 1) optimizer_params.m_quality = cETCQualityMedium; else if (m_params.m_compression_level == BASISU_MAX_COMPRESSION_LEVEL) optimizer_params.m_quality = cETCQualityUber; - + optimizer_params.m_num_src_pixels = 16; optimizer_params.m_pSrc_pixels = source_blk.get_ptr(); optimizer_params.m_perceptual = m_params.m_perceptual; @@ -836,9 +836,9 @@ namespace basisu void basisu_frontend::init_endpoint_training_vectors() { debug_printf("init_endpoint_training_vectors\n"); - + vec6F_quantizer::array_of_weighted_training_vecs &training_vecs = m_endpoint_clusterizer.get_training_vecs(); - + training_vecs.resize(m_total_blocks * 2); const uint32_t N = 16384; @@ -852,12 +852,12 @@ namespace basisu #endif for (uint32_t block_index = first_index; block_index < last_index; block_index++) - { + { const etc_block &blk = m_etc1_blocks_etc1s[block_index]; color_rgba block_colors[2]; blk.get_block_low_high_colors(block_colors, 0); - + vec6F v; v[0] = block_colors[0].r * (1.0f / 255.0f); v[1] = block_colors[0].g * (1.0f / 255.0f); @@ -865,7 +865,7 @@ namespace basisu v[3] = block_colors[1].r * (1.0f / 255.0f); v[4] = block_colors[1].g * (1.0f / 255.0f); v[5] = block_colors[1].b * (1.0f / 255.0f); - + training_vecs[block_index * 2 + 0] = std::make_pair(v, 1); training_vecs[block_index * 2 + 1] = std::make_pair(v, 1); @@ -935,7 +935,7 @@ namespace basisu for (uint32_t cluster_index = 0; cluster_index < m_endpoint_clusters.size(); cluster_index++) { const uint_vec &cluster = m_endpoint_clusters[cluster_index]; - + uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) { @@ -951,7 +951,7 @@ namespace basisu } } } - + if (m_params.m_debug_stats) debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", (uint32_t)m_endpoint_clusters.size(), (uint32_t)m_endpoint_parent_clusters.size()); } @@ -1004,7 +1004,7 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_indices.size()); vector_sort(cluster_indices); - + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); cluster_indices.erase(last, cluster_indices.end()); } @@ -1017,8 +1017,8 @@ namespace basisu const uint32_t N = 512; for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { @@ -1049,7 +1049,7 @@ namespace basisu const endpoint_cluster_etc_params &etc_params = m_endpoint_cluster_etc_params[cluster_index]; assert(etc_params.m_valid); - + color_rgba block_colors[4]; etc_block::get_block_colors5(block_colors, etc_params.m_color_unscaled[0], etc_params.m_inten_table[0], true); @@ -1081,7 +1081,7 @@ namespace basisu quant_err.m_cluster_subblock_index = cluster_indices_iter; quant_err.m_block_index = block_index; quant_err.m_subblock_index = subblock_index; - + { std::lock_guard lock(m_lock); @@ -1102,7 +1102,7 @@ namespace basisu vector_sort(m_subblock_endpoint_quant_err_vec); } - + void basisu_frontend::introduce_new_endpoint_clusters() { debug_printf("introduce_new_endpoint_clusters\n"); @@ -1171,7 +1171,7 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_sizes[subblock_to_move.m_cluster_index] >= 2); cluster_sizes[subblock_to_move.m_cluster_index] -= 2; - + ignore_cluster.insert(subblock_to_move.m_cluster_index); num_new_endpoint_clusters--; @@ -1211,8 +1211,8 @@ namespace basisu const uint32_t N = 128; for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N); #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, step ] { @@ -1245,7 +1245,7 @@ namespace basisu } endpoint_cluster_etc_params new_subblock_params; - + { etc1_optimizer optimizer; etc1_solution_coordinates solutions[2]; @@ -1276,7 +1276,7 @@ namespace basisu new_subblock_params.m_color_unscaled[0] = cluster_optimizer_results.m_block_color_unscaled; new_subblock_params.m_inten_table[0] = cluster_optimizer_results.m_block_inten_table; new_subblock_params.m_color_error[0] = cluster_optimizer_results.m_error; - } + } endpoint_cluster_etc_params &prev_etc_params = m_endpoint_cluster_etc_params[cluster_index]; @@ -1288,7 +1288,7 @@ namespace basisu assert(prev_etc_params.m_valid); uint64_t total_prev_err = 0; - + { color_rgba block_colors[4]; @@ -1332,7 +1332,7 @@ namespace basisu prev_etc_params = new_subblock_params; } - + } // cluster_index #ifndef __EMSCRIPTEN__ @@ -1376,7 +1376,7 @@ namespace basisu uint32_t basisu_frontend::refine_endpoint_clusterization() { debug_printf("refine_endpoint_clusterization\n"); - + if (m_use_hierarchical_endpoint_codebooks) compute_endpoint_clusters_within_each_parent_cluster(); @@ -1395,9 +1395,9 @@ namespace basisu } // cluster_indices_iter } - + //---------------------------------------------------------- - + // Create a new endpoint clusterization uint_vec best_cluster_indices(m_total_blocks); @@ -1427,7 +1427,7 @@ namespace basisu const uint_vec *pCluster_indices = m_endpoint_clusters_within_each_parent_cluster.size() ? &m_endpoint_clusters_within_each_parent_cluster[block_parent_endpoint_cluster_index] : nullptr; const uint32_t total_clusters = m_use_hierarchical_endpoint_codebooks ? (uint32_t)pCluster_indices->size() : (uint32_t)m_endpoint_clusters.size(); - + for (uint32_t i = 0; i < total_clusters; i++) { const uint32_t cluster_iter = m_use_hierarchical_endpoint_codebooks ? (*pCluster_indices)[i] : i; @@ -1448,7 +1448,7 @@ namespace basisu } etc_block::get_block_colors5(subblock_colors, cluster_etc_base_color, cluster_etc_inten); - + #if 0 for (uint32_t p = 0; p < num_subblock_pixels; p++) { @@ -1531,12 +1531,12 @@ namespace basisu { best_cluster_err = total_err; best_cluster_index = cluster_iter; - + if (!best_cluster_err) break; } } // j - + best_cluster_indices[block_index] = best_cluster_index; } // block_index @@ -1544,7 +1544,7 @@ namespace basisu #ifndef __EMSCRIPTEN__ } ); #endif - + } // block_index_iter #ifndef __EMSCRIPTEN__ @@ -1591,7 +1591,7 @@ namespace basisu basisu::vector > new_endpoint_clusters(m_endpoint_clusters.size()); basisu::vector new_subblock_etc_params(m_endpoint_clusters.size()); - + for (uint32_t i = 0; i < m_endpoint_clusters.size(); i++) { uint32_t j = sorted_endpoint_cluster_indices[i]; @@ -1606,7 +1606,7 @@ namespace basisu new_endpoint_clusters.resize(0); new_subblock_etc_params.resize(0); - + for (int i = 0; i < (int)m_endpoint_clusters.size(); ) { if (!m_endpoint_clusters[i].size()) @@ -1624,7 +1624,7 @@ namespace basisu new_endpoint_clusters.push_back(m_endpoint_clusters[i]); new_subblock_etc_params.push_back(m_endpoint_cluster_etc_params[i]); - + for (int k = i + 1; k < j; k++) { append_vector(new_endpoint_clusters.back(), m_endpoint_clusters[k]); @@ -1632,7 +1632,7 @@ namespace basisu i = j; } - + if (m_endpoint_clusters.size() != new_endpoint_clusters.size()) { if (m_params.m_debug_stats) @@ -1657,7 +1657,7 @@ namespace basisu #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index] { #endif - + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { uint32_t cluster0 = m_block_endpoint_clusters_indices[block_index][0]; @@ -1670,7 +1670,7 @@ namespace basisu color_rgba unscaled[2] = { m_endpoint_cluster_etc_params[cluster0].m_color_unscaled[0], m_endpoint_cluster_etc_params[cluster1].m_color_unscaled[0] }; uint32_t inten[2] = { m_endpoint_cluster_etc_params[cluster0].m_inten_table[0], m_endpoint_cluster_etc_params[cluster1].m_inten_table[0] }; - + blk.set_block_color5(unscaled[0], unscaled[1]); blk.set_flip_bit(true); @@ -1678,7 +1678,7 @@ namespace basisu blk.set_inten_table(1, inten[1]); blk.determine_selectors(pSource_pixels, m_params.m_perceptual); - + } // block_index #ifndef __EMSCRIPTEN__ @@ -1705,7 +1705,7 @@ namespace basisu for (uint32_t cluster_indices_iter = 0; cluster_indices_iter < cluster_indices.size(); cluster_indices_iter++) { const uint32_t block_index = cluster_indices[cluster_indices_iter]; - + block_selector_cluster_indices[block_index] = cluster_index; } // cluster_indices_iter @@ -1730,7 +1730,7 @@ namespace basisu BASISU_FRONTEND_VERIFY(cluster_indices.size()); vector_sort(cluster_indices); - + auto last = std::unique(cluster_indices.begin(), cluster_indices.end()); cluster_indices.erase(last, cluster_indices.end()); } @@ -1742,9 +1742,9 @@ namespace basisu typedef vec<16, float> vec16F; typedef tree_vector_quant vec16F_clusterizer; - + vec16F_clusterizer::array_of_weighted_training_vecs training_vecs(m_total_blocks); - + const uint32_t N = 4096; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { @@ -1774,10 +1774,10 @@ namespace basisu const uint32_t cColorDistToWeight = 300; const uint32_t cMaxWeight = 4096; uint32_t weight = clamp(dist / cColorDistToWeight, 1, cMaxWeight); - + training_vecs[block_index].first = v; training_vecs[block_index].second = weight; - + } // block_index #ifndef __EMSCRIPTEN__ @@ -1840,7 +1840,7 @@ namespace basisu for (uint32_t cluster_index = 0; cluster_index < m_selector_cluster_block_indices.size(); cluster_index++) { const uint_vec &cluster = m_selector_cluster_block_indices[cluster_index]; - + uint32_t parent_cluster_index = 0; for (uint32_t j = 0; j < cluster.size(); j++) { @@ -1879,13 +1879,13 @@ namespace basisu const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &total_clusters_processed, &total_selector_clusters] { #endif - + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { const basisu::vector &cluster_block_indices = m_selector_cluster_block_indices[cluster_index]; @@ -1963,13 +1963,13 @@ namespace basisu const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < total_selector_clusters; cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)total_selector_clusters, cluster_index_iter + N); -#ifndef __EMSCRIPTEN__ +#ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &uses_hybrid_sel_codebook, &total_clusters_processed, &total_selector_clusters] { #endif - + for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++) { const basisu::vector &cluster_block_indices = m_selector_cluster_block_indices[cluster_index]; @@ -2074,7 +2074,7 @@ namespace basisu if (uses_hybrid_sel_codebook) { std::lock_guard lock(m_lock); - + total_clusters_processed++; if ((total_clusters_processed % 63) == 0) debug_printf("Global selector palette optimization: %3.1f%% complete\n", total_clusters_processed * 100.0f / total_selector_clusters); @@ -2093,7 +2093,7 @@ namespace basisu #endif } // if (m_params.m_pGlobal_sel_codebook) - + if (m_params.m_debug_images) { uint32_t max_selector_cluster_size = 0; @@ -2119,7 +2119,7 @@ namespace basisu uint32_t block_index = cluster_block_indices[i]; const etc_block &blk = m_orig_encoded_blocks[block_index]; - + for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) selector_cluster_vis.set_clipped(x_spacer_len + x + 5 * i, selector_cluster_index * 5 + y, color_rgba((blk.get_selector(x, y) * 255) / 3)); @@ -2148,7 +2148,7 @@ namespace basisu } m_block_selector_cluster_index.resize(m_total_blocks); - + if (m_params.m_compression_level == 0) { // Don't do anything, just leave the blocks in their original selector clusters. @@ -2162,7 +2162,7 @@ namespace basisu { // Note that this method may leave some empty clusters (i.e. arrays with no block indices), including at the end. basisu::vector< basisu::vector > new_cluster_indices(m_optimized_cluster_selectors.size()); - + // For each block: Determine which quantized selectors best encode that block, given its quantized endpoints. basisu::vector unpacked_optimized_cluster_selectors(16 * m_optimized_cluster_selectors.size()); @@ -2176,7 +2176,7 @@ namespace basisu } } } - + const uint32_t N = 1024; for (uint32_t block_index_iter = 0; block_index_iter < m_total_blocks; block_index_iter += N) { @@ -2192,7 +2192,7 @@ namespace basisu const color_rgba* pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); etc_block& blk = m_encoded_blocks[block_index]; - + color_rgba trial_block_colors[4]; blk.get_block_colors(trial_block_colors, 0); @@ -2255,11 +2255,11 @@ namespace basisu //const etc_block& cluster_blk = m_optimized_cluster_selectors[cluster_index]; uint64_t trial_err = 0; - + for (int i = 0; i < 16; i++) { const uint32_t sel = unpacked_optimized_cluster_selectors[cluster_index * 16 + i]; - + trial_err += trial_errors[sel][i]; if (trial_err > best_cluster_err) goto early_out; @@ -2314,14 +2314,14 @@ namespace basisu blk.set_raw_selector_bits(m_optimized_cluster_selectors[best_cluster_index].get_raw_selector_bits()); m_block_selector_cluster_index[block_index] = best_cluster_index; - + { std::lock_guard lock(m_lock); vector_ensure_element_is_valid(new_cluster_indices, best_cluster_index); new_cluster_indices[best_cluster_index].push_back(block_index); } - + } // block_index #ifndef __EMSCRIPTEN__ @@ -2345,7 +2345,7 @@ namespace basisu uint32_t basisu_frontend::refine_block_endpoints_given_selectors() { debug_printf("refine_block_endpoints_given_selectors\n"); - + for (int block_index = 0; block_index < static_cast(m_total_blocks); block_index++) { //uint32_t selector_cluster = m_block_selector_cluster_index(block_x, block_y); @@ -2526,7 +2526,7 @@ namespace basisu if (m_params.m_debug_stats) debug_printf("Total subblock endpoints refined: %u (%3.1f%%)\n", total_subblocks_refined, total_subblocks_refined * 100.0f / total_subblocks_examined); - + return total_subblocks_refined; } @@ -2618,7 +2618,7 @@ namespace basisu } // The backend has remapped the block endpoints while optimizing the output symbols for better rate distortion performance, so let's go and reoptimize the endpoint codebook. - // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up. + // This is currently the only place where the backend actually goes and changes the quantization and calls the frontend to fix things up. // This is basically a bottom up clusterization stage, where some leaves can be combined. void basisu_frontend::reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices) { @@ -2630,12 +2630,12 @@ namespace basisu basisu::vector cluster_valid(new_endpoint_cluster_block_indices.size()); basisu::vector cluster_improved(new_endpoint_cluster_block_indices.size()); - + const uint32_t N = 256; for (uint32_t cluster_index_iter = 0; cluster_index_iter < new_endpoint_cluster_block_indices.size(); cluster_index_iter += N) { - const uint32_t first_index = cluster_index_iter; - const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); + const uint32_t first_index = cluster_index_iter; + const uint32_t last_index = minimum((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N); #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] { @@ -2657,13 +2657,13 @@ namespace basisu blk.set_block_color5_etc1s(get_endpoint_cluster_unscaled_color(cluster_index, false)); blk.set_inten_tables_etc1s(get_endpoint_cluster_inten_table(cluster_index, false)); blk.set_flip_bit(true); - + uint64_t cur_err = 0; for (uint32_t cluster_block_indices_iter = 0; cluster_block_indices_iter < cluster_block_indices.size(); cluster_block_indices_iter++) { const uint32_t block_index = cluster_block_indices[cluster_block_indices_iter]; - + const color_rgba *pBlock_pixels = get_source_pixel_block(block_index).get_ptr(); memcpy(&cluster_pixels[cluster_block_indices_iter * 16], pBlock_pixels, 16 * sizeof(color_rgba)); @@ -2675,14 +2675,14 @@ namespace basisu blk.set_raw_selector_bits(blk_selectors.get_raw_selector_bits()); cur_err += blk.evaluate_etc1_error(pBlock_pixels, m_params.m_perceptual); - + for (uint32_t y = 0; y < 4; y++) for (uint32_t x = 0; x < 4; x++) force_selectors[cluster_block_indices_iter * 16 + x + y * 4] = static_cast(blk_selectors.get_selector(x, y)); } endpoint_cluster_etc_params new_endpoint_cluster_etc_params; - + { etc1_optimizer optimizer; etc1_solution_coordinates solutions[2]; @@ -2721,7 +2721,7 @@ namespace basisu if (new_endpoint_cluster_etc_params.m_color_error[0] < cur_err) { m_endpoint_cluster_etc_params[cluster_index] = new_endpoint_cluster_etc_params; - + cluster_improved[cluster_index] = true; } @@ -2738,13 +2738,13 @@ namespace basisu #ifndef __EMSCRIPTEN__ m_params.m_pJob_pool->wait_for_all(); #endif - + uint32_t total_unused_clusters = 0; uint32_t total_improved_clusters = 0; - + old_to_new_endpoint_cluster_indices.resize(m_endpoint_clusters.size()); vector_set_all(old_to_new_endpoint_cluster_indices, -1); - + int total_new_endpoint_clusters = 0; for (uint32_t old_cluster_index = 0; old_cluster_index < m_endpoint_clusters.size(); old_cluster_index++) @@ -2779,7 +2779,7 @@ namespace basisu for (uint32_t block_index = 0; block_index < new_block_endpoints.size(); block_index++) { const uint32_t old_endpoint_cluster_index = new_block_endpoints[block_index]; - + const int new_endpoint_cluster_index = old_to_new_endpoint_cluster_indices[old_endpoint_cluster_index]; BASISU_FRONTEND_VERIFY(new_endpoint_cluster_index >= 0); @@ -2792,13 +2792,13 @@ namespace basisu new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 0); new_endpoint_cluster_etc_params[new_endpoint_cluster_index].m_subblocks.push_back(block_index * 2 + 1); - + m_block_endpoint_clusters_indices[block_index][0] = new_endpoint_cluster_index; m_block_endpoint_clusters_indices[block_index][1] = new_endpoint_cluster_index; } debug_printf("basisu_frontend::reoptimize_remapped_endpoints: stage 2\n"); - + m_endpoint_clusters = new_endpoint_clusters; m_endpoint_cluster_etc_params = new_endpoint_cluster_etc_params; @@ -2834,10 +2834,10 @@ namespace basisu debug_printf("Final (post-RDO) endpoint clusters: %u\n", m_endpoint_clusters.size()); } - + //debug_printf("validate_output: %u\n", validate_output()); } - + bool basisu_frontend::validate_output() const { debug_printf("validate_output\n"); @@ -2850,10 +2850,10 @@ namespace basisu //#define CHECK(x) do { if (!(x)) { DebugBreak(); return false; } } while(0) #define CHECK(x) BASISU_FRONTEND_VERIFY(x); - CHECK(get_output_block(block_index).get_flip_bit() == true); - + CHECK(get_output_block(block_index).get_flip_bit()); + const bool diff_flag = get_diff_flag(block_index); - CHECK(diff_flag == true); + CHECK(diff_flag); etc_block blk; memset(&blk, 0, sizeof(blk)); @@ -2865,11 +2865,11 @@ namespace basisu // basisu only supports ETC1S, so these must be equal. CHECK(endpoint_cluster0_index == endpoint_cluster1_index); - + CHECK(blk.set_block_color5_check(get_endpoint_cluster_unscaled_color(endpoint_cluster0_index, false), get_endpoint_cluster_unscaled_color(endpoint_cluster1_index, false))); CHECK(get_endpoint_cluster_color_is_used(endpoint_cluster0_index, false)); - + blk.set_inten_table(0, get_endpoint_cluster_inten_table(endpoint_cluster0_index, false)); blk.set_inten_table(1, get_endpoint_cluster_inten_table(endpoint_cluster1_index, false)); diff --git a/encoder/basisu_frontend.h b/encoder/basisu_frontend.h index 4ff6d404..a09ab0e4 100644 --- a/encoder/basisu_frontend.h +++ b/encoder/basisu_frontend.h @@ -58,7 +58,7 @@ namespace basisu enum { cMaxEndpointClusters = 16128, - + cMaxSelectorClusters = 16128, }; @@ -73,7 +73,7 @@ namespace basisu m_perceptual(true), m_debug_stats(false), m_debug_images(false), - + m_dump_endpoint_clusterization(true), m_validate(false), m_multithreaded(false), @@ -85,7 +85,7 @@ namespace basisu m_hybrid_codebook_quality_thresh(0.0f), m_tex_type(basist::cBASISTexType2D), m_pGlobal_codebooks(nullptr), - + m_pJob_pool(nullptr) { } @@ -105,7 +105,7 @@ namespace basisu bool m_validate; bool m_multithreaded; bool m_disable_hierarchical_endpoint_codebooks; - + const basist::etc1_global_selector_codebook *m_pGlobal_sel_codebook; uint32_t m_num_global_sel_codebook_pal_bits; uint32_t m_num_global_sel_codebook_mod_bits; @@ -113,7 +113,7 @@ namespace basisu float m_hybrid_codebook_quality_thresh; basist::basis_texture_type m_tex_type; const basist::basisu_lowlevel_etc1s_transcoder *m_pGlobal_codebooks; - + job_pool *m_pJob_pool; }; @@ -158,7 +158,7 @@ namespace basisu const uint_vec &get_selector_cluster_block_indices(uint32_t selector_cluster_index) const { return m_selector_cluster_block_indices[selector_cluster_index]; } void dump_debug_image(const char *pFilename, uint32_t first_block, uint32_t num_blocks_x, uint32_t num_blocks_y, bool output_blocks); - + void reoptimize_remapped_endpoints(const uint_vec &new_block_endpoints, int_vec &old_to_new_endpoint_cluster_indices, bool optimize_final_codebook, uint_vec *pBlock_selector_indices = nullptr); private: @@ -178,15 +178,15 @@ namespace basisu // The quantized ETC1S texture. etc_block_vec m_encoded_blocks; - + // Quantized blocks after endpoint quant, but before selector quant - etc_block_vec m_orig_encoded_blocks; - + etc_block_vec m_orig_encoded_blocks; + // Full quality ETC1S texture etc_block_vec m_etc1_blocks_etc1s; - + typedef vec<6, float> vec6F; - + // Endpoint clusterizer typedef tree_vector_quant vec6F_quantizer; vec6F_quantizer m_endpoint_clusterizer; @@ -197,13 +197,13 @@ namespace basisu // Array of block indices for each parent endpoint cluster basisu::vector m_endpoint_parent_clusters; - + // Each block's parent cluster index - uint8_vec m_block_parent_endpoint_cluster; + uint8_vec m_block_parent_endpoint_cluster; // Array of endpoint cluster indices for each parent endpoint cluster basisu::vector m_endpoint_clusters_within_each_parent_cluster; - + struct endpoint_cluster_etc_params { endpoint_cluster_etc_params() @@ -273,13 +273,13 @@ namespace basisu }; typedef basisu::vector cluster_subblock_etc_params_vec; - - // Each endpoint cluster's ETC1S parameters + + // Each endpoint cluster's ETC1S parameters cluster_subblock_etc_params_vec m_endpoint_cluster_etc_params; // The endpoint cluster index used by each ETC1 subblock. basisu::vector m_block_endpoint_clusters_indices; - + // The block(s) within each selector cluster // Note: If you add anything here that uses selector cluster indicies, be sure to update optimize_selector_codebook()! basisu::vector m_selector_cluster_block_indices; @@ -289,7 +289,7 @@ namespace basisu // The block(s) within each parent selector cluster. basisu::vector m_selector_parent_cluster_block_indices; - + // Each block's parent selector cluster uint8_vec m_block_parent_selector_cluster; diff --git a/encoder/basisu_gpu_texture.cpp b/encoder/basisu_gpu_texture.cpp index 3f9fb67b..cf512716 100644 --- a/encoder/basisu_gpu_texture.cpp +++ b/encoder/basisu_gpu_texture.cpp @@ -27,9 +27,9 @@ namespace basisu const eac_a8_block *pBlock = static_cast(pBlock_bits); const int8_t *pTable = g_etc2_eac_tables[pBlock->m_table]; - + const uint64_t selector_bits = pBlock->get_selector_bits(); - + const int32_t base = pBlock->m_base; const int32_t mul = pBlock->m_multiplier; @@ -61,16 +61,16 @@ namespace basisu uint8_t m_low_color[cTotalEndpointBytes]; uint8_t m_high_color[cTotalEndpointBytes]; uint8_t m_selectors[cTotalSelectorBytes]; - + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } - static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) + static void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { r = (c >> 11) & 31; g = (c >> 5) & 63; b = c & 31; - + r = (r << 3) | (r >> 2); g = (g << 2) | (g >> 4); b = (b << 3) | (b >> 2); @@ -116,9 +116,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0] = c[pBlock->get_selector(0, y)]; - pPixels[1] = c[pBlock->get_selector(1, y)]; - pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; pPixels[3] = c[pBlock->get_selector(3, y)]; } } @@ -126,9 +126,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); } } @@ -195,9 +195,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0] = c[pBlock->get_selector(0, y)]; - pPixels[1] = c[pBlock->get_selector(1, y)]; - pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; pPixels[3] = c[pBlock->get_selector(3, y)]; } } @@ -205,9 +205,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); } } @@ -233,7 +233,7 @@ namespace basisu c[0].set_noclamp_rgba(r0, g0, b0, 255); c[1].set_noclamp_rgba(r1, g1, b1, 255); - + bool used_punchthrough = false; if (l > h) @@ -252,9 +252,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0] = c[pBlock->get_selector(0, y)]; - pPixels[1] = c[pBlock->get_selector(1, y)]; - pPixels[2] = c[pBlock->get_selector(2, y)]; + pPixels[0] = c[pBlock->get_selector(0, y)]; + pPixels[1] = c[pBlock->get_selector(1, y)]; + pPixels[2] = c[pBlock->get_selector(2, y)]; pPixels[3] = c[pBlock->get_selector(3, y)]; } } @@ -262,9 +262,9 @@ namespace basisu { for (uint32_t y = 0; y < 4; y++, pPixels += 4) { - pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); - pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); - pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); + pPixels[0].set_rgb(c[pBlock->get_selector(0, y)]); + pPixels[1].set_rgb(c[pBlock->get_selector(1, y)]); + pPixels[2].set_rgb(c[pBlock->get_selector(2, y)]); pPixels[3].set_rgb(c[pBlock->get_selector(3, y)]); } } @@ -284,7 +284,7 @@ namespace basisu inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } inline uint64_t get_selector_bits() const - { + { return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | ((uint32_t)m_selectors[3] << 24U))) | (((uint64_t)m_selectors[4]) << 32U) | (((uint64_t)m_selectors[5]) << 40U); @@ -295,7 +295,7 @@ namespace basisu assert((x < 4U) && (y < 4U)); return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); } - + static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) { pDst[0] = static_cast(l); @@ -350,7 +350,7 @@ namespace basisu pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; } } - + // Returns false if the block uses 3-color punchthrough alpha mode, which isn't supported on some GPU's for BC3. bool unpack_bc3(const void *pBlock_bits, color_rgba *pPixels) { @@ -360,7 +360,7 @@ namespace basisu success = false; unpack_bc4(pBlock_bits, &pPixels[0].a, sizeof(color_rgba)); - + return success; } @@ -417,9 +417,9 @@ namespace basisu for (uint32_t i = 0; i < 16; i++) { const uint32_t s = sels & 3; - + pPixels[i] = c[s]; - + sels >>= 2; } } @@ -441,12 +441,12 @@ namespace basisu case 2: return bc7_interp2(l, h, w); case 3: return bc7_interp3(l, h, w); case 4: return bc7_interp4(l, h, w); - default: + default: break; } return 0; } - + bool unpack_bc7_mode0_2(uint32_t mode, const void* pBlock_bits, color_rgba* pPixels) { //const uint32_t SUBSETS = 3; @@ -456,7 +456,7 @@ namespace basisu const uint32_t ENDPOINT_BITS = (mode == 0) ? 4 : 5; const uint32_t PBITS = (mode == 0) ? 6 : 0; const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - + uint32_t bit_offset = 0; const uint8_t* pBuf = static_cast(pBlock_bits); @@ -508,7 +508,7 @@ namespace basisu const uint32_t PBITS = (mode == 1) ? 2 : 4; const uint32_t SHARED_PBITS = (mode == 1) ? true : false; const uint32_t WEIGHT_VALS = 1 << WEIGHT_BITS; - + uint32_t bit_offset = 0; const uint8_t* pBuf = static_cast(pBlock_bits); @@ -520,21 +520,21 @@ namespace basisu for (uint32_t c = 0; c < COMPS; c++) for (uint32_t e = 0; e < ENDPOINTS; e++) endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, ENDPOINT_BITS); - + uint32_t pbits[4]; for (uint32_t p = 0; p < PBITS; p++) pbits[p] = read_bits32(pBuf, bit_offset, 1); - + uint32_t weights[16]; for (uint32_t i = 0; i < 16; i++) weights[i] = read_bits32(pBuf, bit_offset, ((!i) || (i == basist::g_bc7_table_anchor_index_second_subset[part])) ? (WEIGHT_BITS - 1) : WEIGHT_BITS); - + assert(bit_offset == 128); for (uint32_t e = 0; e < ENDPOINTS; e++) for (uint32_t c = 0; c < 4; c++) endpoints[e][c] = (uint8_t)((c == ((mode == 7U) ? 4U : 3U)) ? 255 : bc7_dequant(endpoints[e][c], pbits[SHARED_PBITS ? (e >> 1) : e], ENDPOINT_BITS)); - + color_rgba block_colors[2][8]; for (uint32_t s = 0; s < 2; s++) for (uint32_t i = 0; i < WEIGHT_VALS; i++) @@ -573,11 +573,11 @@ namespace basisu for (uint32_t c = 0; c < COMPS; c++) for (uint32_t e = 0; e < ENDPOINTS; e++) endpoints[e][c] = (uint8_t)read_bits32(pBuf, bit_offset, (c == 3) ? A_ENDPOINT_BITS : ENDPOINT_BITS); - + const uint32_t weight_bits[2] = { index_mode ? A_WEIGHT_BITS : WEIGHT_BITS, index_mode ? WEIGHT_BITS : A_WEIGHT_BITS }; - + uint32_t weights[16], a_weights[16]; - + for (uint32_t i = 0; i < 16; i++) (index_mode ? a_weights : weights)[i] = read_bits32(pBuf, bit_offset, weight_bits[index_mode] - ((!i) ? 1 : 0)); @@ -679,10 +679,10 @@ namespace basisu { const uint32_t w = basist::g_bc7_weights4[i]; const uint32_t iw = 64 - w; - vals[i].set_noclamp_rgba( - (r0 * iw + r1 * w + 32) >> 6, - (g0 * iw + g1 * w + 32) >> 6, - (b0 * iw + b1 * w + 32) >> 6, + vals[i].set_noclamp_rgba( + (r0 * iw + r1 * w + 32) >> 6, + (g0 * iw + g1 * w + 32) >> 6, + (b0 * iw + b1 * w + 32) >> 6, (a0 * iw + a1 * w + 32) >> 6); } @@ -695,7 +695,7 @@ namespace basisu pPixels[5] = vals[block.m_hi.m_s11]; pPixels[6] = vals[block.m_hi.m_s21]; pPixels[7] = vals[block.m_hi.m_s31]; - + pPixels[8] = vals[block.m_hi.m_s02]; pPixels[9] = vals[block.m_hi.m_s12]; pPixels[10] = vals[block.m_hi.m_s22]; @@ -739,7 +739,7 @@ namespace basisu return false; } - + struct fxt1_block { union @@ -840,7 +840,7 @@ namespace basisu return false; if (pBlock->m_hi.m_alpha == 1) return false; - + color_rgba colors[4]; colors[0].r = pBlock->m_hi.m_r0; @@ -890,7 +890,7 @@ namespace basisu for (uint32_t i = 0; i < 16; i++) { const uint32_t sel = (pBlock->m_sels[4 + (i >> 2)] >> ((i & 3) * 2)) & 3; - + const uint32_t x = i & 3; const uint32_t y = i >> 2; pPixels[4 + x + y * 8] = block1_colors[sel]; @@ -948,7 +948,7 @@ namespace basisu { return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), 255); } - + static color_rgba convert_rgba_5554_to_8888(const color_rgba& col) { return color_rgba((col[0] << 3) | (col[0] >> 2), (col[1] << 3) | (col[1] >> 2), (col[2] << 3) | (col[2] >> 2), (col[3] << 4) | col[3]); @@ -971,10 +971,10 @@ namespace basisu { // colora=554 color_rgba color_a(pBlock->m_opaque_color_data.m_red_a, pBlock->m_opaque_color_data.m_green_a, (pBlock->m_opaque_color_data.m_blue_a << 1) | (pBlock->m_opaque_color_data.m_blue_a >> 3), 255); - + // colora=555 color_rgba color_b(pBlock->m_opaque_color_data.m_red_b, pBlock->m_opaque_color_data.m_green_b, pBlock->m_opaque_color_data.m_blue_b, 255); - + colors[0] = convert_rgb_555_to_888(color_a); colors[3] = convert_rgb_555_to_888(color_b); @@ -983,11 +983,11 @@ namespace basisu } else { - // colora=4433 + // colora=4433 color_rgba color_a( - (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), + (pBlock->m_trans_color_data.m_red_a << 1) | (pBlock->m_trans_color_data.m_red_a >> 3), (pBlock->m_trans_color_data.m_green_a << 1) | (pBlock->m_trans_color_data.m_green_a >> 3), - (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), + (pBlock->m_trans_color_data.m_blue_a << 2) | (pBlock->m_trans_color_data.m_blue_a >> 1), pBlock->m_trans_color_data.m_alpha_a << 1); //colorb=4443 @@ -1060,9 +1060,9 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { const uint32_t shift = 45 - ((y + x * 4) * 3); - + const uint32_t sel = (uint32_t)((sels >> shift) & 7); - + int val = base + g_etc2_eac_tables[table][sel] * mul; val = clamp(val, 0, 2047); @@ -1083,12 +1083,12 @@ namespace basisu unpack_etc2_eac_r(pBlock, pPixels, c); } } - + void unpack_uastc(const void* p, color_rgba* pPixels) { basist::unpack_uastc(*static_cast(p), (basist::color32 *)pPixels, false); } - + // Unpacks to RGBA, R, RG, or A bool unpack_block(texture_format fmt, const void* pBlock, color_rgba* pPixels) { @@ -1211,10 +1211,10 @@ namespace basisu if ((m_fmt == texture_format::cPVRTC1_4_RGB) || (m_fmt == texture_format::cPVRTC1_4_RGBA)) { pvrtc4_image pi(m_width, m_height); - + if (get_total_blocks() != pi.get_total_blocks()) return false; - + memcpy(&pi.get_blocks()[0], get_ptr(), get_size_in_bytes()); pi.deswizzle(); @@ -1246,13 +1246,13 @@ namespace basisu return success; } - + static const uint8_t g_ktx_file_id[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; // KTX/GL enums enum { - KTX_ENDIAN = 0x04030201, + KTX_ENDIAN = 0x04030201, KTX_OPPOSITE_ENDIAN = 0x01020304, KTX_ETC1_RGB8_OES = 0x8D64, KTX_RED = 0x1903, @@ -1280,7 +1280,7 @@ namespace basisu KTX_COMPRESSED_R11_EAC = 0x9270, KTX_COMPRESSED_RG11_EAC = 0x9272 }; - + struct ktx_header { uint8_t m_identifier[12]; @@ -1490,15 +1490,15 @@ namespace basisu return false; } } - + ktx_header header; header.clear(); memcpy(&header.m_identifier, g_ktx_file_id, sizeof(g_ktx_file_id)); header.m_endianness = KTX_ENDIAN; - + header.m_pixelWidth = width; header.m_pixelHeight = height; - + header.m_glInternalFormat = internal_fmt; header.m_glBaseInternalFormat = base_internal_fmt; @@ -1514,7 +1514,7 @@ namespace basisu for (uint32_t level_index = 0; level_index < total_levels; level_index++) { uint32_t img_size = gpu_images[0][level_index].get_size_in_bytes(); - + if ((header.m_numberOfFaces == 1) || (header.m_numberOfArrayElements > 1)) { img_size = img_size * header.m_numberOfFaces * maximum(1, header.m_numberOfArrayElements); @@ -1534,10 +1534,10 @@ namespace basisu const gpu_image& img = gpu_images[cubemap_flag ? (array_index * 6 + face_index) : array_index][level_index]; append_vector(ktx_data, (uint8_t *)img.get_ptr(), img.get_size_in_bytes()); - + bytes_written += img.get_size_in_bytes(); } - + } // array_index } // level_index @@ -1583,7 +1583,7 @@ namespace basisu } //const uint32_t OUT_FILE_MAGIC = 'TEXC'; - struct out_file_header + struct out_file_header { packed_uint<4> m_magic; packed_uint<4> m_pad; @@ -1615,7 +1615,7 @@ namespace basisu fwrite(&hdr, sizeof(hdr), 1, pFile); fwrite(gi.get_ptr(), gi.get_size_in_bytes(), 1, pFile); - + return fclose(pFile) != EOF; } } // basisu diff --git a/encoder/basisu_gpu_texture.h b/encoder/basisu_gpu_texture.h index 619926f5..9a2c8475 100644 --- a/encoder/basisu_gpu_texture.h +++ b/encoder/basisu_gpu_texture.h @@ -48,11 +48,11 @@ namespace basisu } inline texture_format get_format() const { return m_fmt; } - + // Width/height in pixels inline uint32_t get_pixel_width() const { return m_width; } inline uint32_t get_pixel_height() const { return m_height; } - + // Width/height in blocks, row pitch is assumed to be m_blocks_x. inline uint32_t get_blocks_x() const { return m_blocks_x; } inline uint32_t get_blocks_y() const { return m_blocks_y; } @@ -67,7 +67,7 @@ namespace basisu inline uint32_t get_row_pitch_in_bytes() const { return get_bytes_per_block() * get_blocks_x(); } inline const uint64_vec &get_blocks() const { return m_blocks; } - + inline const uint64_t *get_ptr() const { return &m_blocks[0]; } inline uint64_t *get_ptr() { return &m_blocks[0]; } @@ -101,7 +101,7 @@ namespace basisu } bool unpack(image& img) const; - + void override_dimensions(uint32_t w, uint32_t h) { m_width = w; @@ -119,9 +119,9 @@ namespace basisu // KTX file writing bool create_ktx_texture_file(uint8_vec &ktx_data, const basisu::vector& gpu_images, bool cubemap_flag); - + bool write_compressed_texture_file(const char *pFilename, const basisu::vector& g, bool cubemap_flag); - + inline bool write_compressed_texture_file(const char *pFilename, const gpu_image_vec &g) { basisu::vector a; @@ -130,7 +130,7 @@ namespace basisu } bool write_compressed_texture_file(const char *pFilename, const gpu_image &g); - + bool write_3dfx_out_file(const char* pFilename, const gpu_image& gi); // GPU texture block unpacking @@ -150,5 +150,5 @@ namespace basisu // unpack_block() is primarily intended to unpack texture data created by the transcoder. // For some texture formats (like ETC2 RGB, PVRTC2, FXT1) it's not a complete implementation. bool unpack_block(texture_format fmt, const void *pBlock, color_rgba *pPixels); - + } // namespace basisu diff --git a/encoder/basisu_kernels_imp.h b/encoder/basisu_kernels_imp.h index 04688051..10d2be3c 100644 --- a/encoder/basisu_kernels_imp.h +++ b/encoder/basisu_kernels_imp.h @@ -22,7 +22,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -110,7 +110,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, const uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -205,7 +205,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -301,7 +301,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, uint8_t* pSelectors, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_err) { assert(early_out_err >= 0); @@ -397,7 +397,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) void _call(int64_t* pDistance, const color_rgba* pBlock_colors, - const color_rgba* pSrc_pixels, uint32_t n, + const color_rgba* pSrc_pixels, uint32_t n, int64_t early_out_error) { assert(early_out_error >= 0); @@ -453,7 +453,7 @@ namespace CPPSPMD_NAME(basisu_kernels_namespace) int id = ((delta_l * delta_l) >> 7) + ((((delta_cr * delta_cr) >> 7) * 26) >> 7) + ((((delta_cb * delta_cb) >> 7) * 3) >> 7); - + if (id < best_err) { best_err = id; diff --git a/encoder/basisu_kernels_sse.cpp b/encoder/basisu_kernels_sse.cpp index 12d2321f..dec54942 100644 --- a/encoder/basisu_kernels_sse.cpp +++ b/encoder/basisu_kernels_sse.cpp @@ -26,7 +26,7 @@ #if __AVX__ || __AVX2__ || __AVX512F__ #error Please check your compiler options #endif - + #if CPPSPMD_SSE2 #if __SSE4_1__ || __SSE3__ || __SSE4_2__ || __SSSE3__ #error SSE4.1/SSE3/SSE4.2/SSSE3 cannot be enabled to use this file diff --git a/encoder/basisu_miniz.h b/encoder/basisu_miniz.h index 8627abe8..fdf06d38 100644 --- a/encoder/basisu_miniz.h +++ b/encoder/basisu_miniz.h @@ -1,8 +1,8 @@ /* miniz.c v1.15 - deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt - - Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ - + + Forked from the public domain/unlicense version at: https://code.google.com/archive/p/miniz/ + Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); @@ -488,7 +488,7 @@ size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void // Compresses an image to a compressed PNG file in memory. // On entry: -// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. +// pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. // The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. // level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL // If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). @@ -790,7 +790,7 @@ mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) mz_uint64 a = 128ULL + (source_len * 110ULL) / 100ULL; mz_uint64 b = 128ULL + (mz_uint64)source_len + ((source_len / (31 * 1024)) + 1ULL) * 5ULL; - + mz_uint64 t = MZ_MAX(a, b); if (((mz_ulong)t) != t) t = (mz_ulong)(-1); diff --git a/encoder/basisu_pvrtc1_4.cpp b/encoder/basisu_pvrtc1_4.cpp index 596fc197..fe2c5917 100644 --- a/encoder/basisu_pvrtc1_4.cpp +++ b/encoder/basisu_pvrtc1_4.cpp @@ -131,7 +131,7 @@ namespace basisu uint32_t pvrtc4_swizzle_uv(uint32_t width, uint32_t height, uint32_t x, uint32_t y) { assert((x < width) && (y < height) && basisu::is_pow2(height) && basisu::is_pow2(width)); - + uint32_t min_d = width, max_v = y; if (height < width) { @@ -148,7 +148,7 @@ namespace basisu } max_v >>= shift_ofs; - + // OR in the rest of the bits from the largest dimension swizzled |= (max_v << (2 * shift_ofs)); @@ -169,7 +169,7 @@ namespace basisu r = (packed >> 10) & 31; g = (packed >> 5) & 31; b = (packed >> 1) & 15; - + if (unpack) { b = (b << 1) | (b >> 3); @@ -198,7 +198,7 @@ namespace basisu { a = (a << 1); a = (a << 4) | a; - + r = (r << 1) | (r >> 3); g = (g << 1) | (g >> 3); b = (b << 2) | (b >> 1); @@ -272,7 +272,7 @@ namespace basisu b = (packed >> 1) & 7; a = a << 1; - + r = (r << 1) | (r >> 3); g = (g << 1) | (g >> 3); b = (b << 2) | (b >> 1); @@ -285,13 +285,13 @@ namespace basisu b = packed & 15; a = a << 1; - + r = (r << 1) | (r >> 3); g = (g << 1) | (g >> 3); b = (b << 1) | (b >> 3); } } - + assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); return color_rgba(r, g, b, a); @@ -305,12 +305,12 @@ namespace basisu int block_x1 = block_x0 + 1; int block_y0 = (static_cast(y) - 2) >> 2; int block_y1 = block_y0 + 1; - + block_x0 = posmod(block_x0, m_block_width); block_x1 = posmod(block_x1, m_block_width); block_y0 = posmod(block_y0, m_block_height); block_y1 = posmod(block_y1, m_block_height); - + pColors[0] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(0), m_blocks(block_x1, block_y0).get_endpoint_5554(0), m_blocks(block_x0, block_y1).get_endpoint_5554(0), m_blocks(block_x1, block_y1).get_endpoint_5554(0)); pColors[3] = interpolate(x, y, m_blocks(block_x0, block_y0).get_endpoint_5554(1), m_blocks(block_x1, block_y0).get_endpoint_5554(1), m_blocks(block_x0, block_y1).get_endpoint_5554(1), m_blocks(block_x1, block_y1).get_endpoint_5554(1)); @@ -334,7 +334,7 @@ namespace basisu return false; } - + color_rgba pvrtc4_image::get_pixel(uint32_t x, uint32_t y, uint32_t m) const { assert((x < m_width) && (y < m_height)); @@ -343,12 +343,12 @@ namespace basisu int block_x1 = block_x0 + 1; int block_y0 = (static_cast(y) - 2) >> 2; int block_y1 = block_y0 + 1; - + block_x0 = posmod(block_x0, m_block_width); block_x1 = posmod(block_x1, m_block_width); block_y0 = posmod(block_y0, m_block_height); block_y1 = posmod(block_y1, m_block_height); - + if (get_block_uses_transparent_modulation(x >> 2, y >> 2)) { if (m == 0) @@ -471,7 +471,7 @@ namespace basisu color_rgba color_1((int)colors[1][0], (int)colors[1][1], (int)colors[1][2], 0); pvrtc4_block cur_blocks[3][3]; - + for (int y = -1; y <= 1; y++) { for (int x = -1; x <= 1; x++) diff --git a/encoder/basisu_pvrtc1_4.h b/encoder/basisu_pvrtc1_4.h index db6985a4..afe71841 100644 --- a/encoder/basisu_pvrtc1_4.h +++ b/encoder/basisu_pvrtc1_4.h @@ -17,14 +17,14 @@ namespace basisu { - enum - { - PVRTC2_MIN_WIDTH = 16, - PVRTC2_MIN_HEIGHT = 8, - PVRTC4_MIN_WIDTH = 8, - PVRTC4_MIN_HEIGHT = 8 + enum + { + PVRTC2_MIN_WIDTH = 16, + PVRTC2_MIN_HEIGHT = 8, + PVRTC4_MIN_WIDTH = 8, + PVRTC4_MIN_HEIGHT = 8 }; - + struct pvrtc4_block { uint32_t m_modulation; @@ -56,9 +56,9 @@ namespace basisu // Returns raw endpoint or 8888 color_rgba get_endpoint(uint32_t endpoint_index, bool unpack) const; - + color_rgba get_endpoint_5554(uint32_t endpoint_index) const; - + static uint32_t get_component_precision_in_bits(uint32_t c, uint32_t endpoint_index, bool opaque_endpoint) { static const uint32_t s_comp_prec[4][4] = @@ -80,7 +80,7 @@ namespace basisu }; return s_color_prec[open_range_check(endpoint_index, 2U) + (opaque_endpoint * 2)]; } - + inline uint32_t get_modulation(uint32_t x, uint32_t y) const { assert((x < 4) && (y < 4)); @@ -121,7 +121,7 @@ namespace basisu assert(endpoint_index < 2); const uint32_t m = m_endpoints & 1; uint32_t r = c[0], g = c[1], b = c[2], a = c[3]; - + uint32_t packed; if (opaque_endpoint) @@ -243,7 +243,7 @@ namespace basisu { return m_blocks(bx, by).is_endpoint_opaque(endpoint_index); } - + color_rgba get_endpoint(uint32_t bx, uint32_t by, uint32_t endpoint_index, bool unpack) const { assert((bx < m_block_width) && (by < m_block_height)); @@ -255,12 +255,12 @@ namespace basisu assert((x < m_width) && (y < m_height)); return m_blocks(x >> 2, y >> 2).get_modulation(x & 3, y & 3); } - + // Returns true if the block uses transparent modulation. bool get_interpolated_colors(uint32_t x, uint32_t y, color_rgba* pColors) const; - + color_rgba get_pixel(uint32_t x, uint32_t y, uint32_t m) const; - + inline color_rgba get_pixel(uint32_t x, uint32_t y) const { assert((x < m_width) && (y < m_height)); @@ -445,12 +445,12 @@ namespace basisu return total_error; } - - public: + + public: uint32_t m_width, m_height; pvrtc4_block_vector2D m_blocks; uint32_t m_block_width, m_block_height; - + bool m_uses_alpha; }; diff --git a/encoder/basisu_resample_filters.cpp b/encoder/basisu_resample_filters.cpp index 597cb3f6..1125be87 100644 --- a/encoder/basisu_resample_filters.cpp +++ b/encoder/basisu_resample_filters.cpp @@ -310,21 +310,21 @@ namespace basisu const resample_filter g_resample_filters[] = { - { "box", box_filter, BOX_FILTER_SUPPORT }, - { "tent", tent_filter, TENT_FILTER_SUPPORT }, - { "bell", bell_filter, BELL_SUPPORT }, + { "box", box_filter, BOX_FILTER_SUPPORT }, + { "tent", tent_filter, TENT_FILTER_SUPPORT }, + { "bell", bell_filter, BELL_SUPPORT }, { "b-spline", B_spline_filter, B_SPLINE_SUPPORT }, - { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, - { "blackman", blackman_filter, BLACKMAN_SUPPORT }, + { "mitchell", mitchell_filter, MITCHELL_SUPPORT }, + { "blackman", blackman_filter, BLACKMAN_SUPPORT }, { "lanczos3", lanczos3_filter, LANCZOS3_SUPPORT }, { "lanczos4", lanczos4_filter, LANCZOS4_SUPPORT }, - { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, - { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, - { "kaiser", kaiser_filter, KAISER_SUPPORT }, + { "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, + { "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, + { "kaiser", kaiser_filter, KAISER_SUPPORT }, { "gaussian", gaussian_filter, GAUSSIAN_SUPPORT }, - { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, - { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, - { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, + { "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, + { "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, + { "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, { "quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT }, }; diff --git a/encoder/basisu_resampler.cpp b/encoder/basisu_resampler.cpp index e193ce83..41492082 100644 --- a/encoder/basisu_resampler.cpp +++ b/encoder/basisu_resampler.cpp @@ -147,7 +147,7 @@ namespace basisu n += (right - left + 1); } - // Allocate memory for contributors. + // Allocate memory for contributors. if ((n == 0) || ((Pcpool = (Contrib*)calloc(n, sizeof(Contrib))) == NULL)) { @@ -848,5 +848,5 @@ namespace basisu else return g_resample_filters[filter_num].name; } - + } // namespace basisu diff --git a/encoder/basisu_ssim.cpp b/encoder/basisu_ssim.cpp index cceb400b..8e8cdea7 100644 --- a/encoder/basisu_ssim.cpp +++ b/encoder/basisu_ssim.cpp @@ -26,7 +26,7 @@ namespace basisu float g = (1.0f / (sqrtf((float)(2.0f * M_PI * sigma_sqr)))) * pow; return g; } - + // size_x/y should be odd void compute_gaussian_kernel(float *pDst, int size_x, int size_y, float sigma_sqr, uint32_t flags) { @@ -316,14 +316,14 @@ namespace basisu return avg; } - + // Reference: https://ece.uwaterloo.ca/~z70wang/research/ssim/index.html vec4F compute_ssim(const imagef &a, const imagef &b) { imagef axb, a_sq, b_sq, mu1, mu2, mu1_sq, mu2_sq, mu1_mu2, s1_sq, s2_sq, s12, smap, t1, t2, t3; const float C1 = 6.50250f, C2 = 58.52250f; - + pow_image(a, a_sq, vec4F(2)); pow_image(b, b_sq, vec4F(2)); mul_image(a, b, axb, vec4F(1.0f)); diff --git a/encoder/basisu_uastc_enc.cpp b/encoder/basisu_uastc_enc.cpp index ca2b3256..0284fb3c 100644 --- a/encoder/basisu_uastc_enc.cpp +++ b/encoder/basisu_uastc_enc.cpp @@ -223,7 +223,7 @@ namespace basisu default: break; } -#endif +#endif uint32_t total_planes = 1; switch (result.m_uastc_mode) @@ -453,7 +453,7 @@ namespace basisu printf("Total bits: %u, endpoint bits: %u, weight bits: %u\n", block_bit_offset, total_endpoint_bits, total_weight_bits); #endif } - + // MODE 0 // 0. DualPlane: 0, WeightRange: 8 (16), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 19 (192) MODE6 RGB // 18. DualPlane: 0, WeightRange: 11 (32), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 11 (32) MODE6 RGB @@ -504,7 +504,7 @@ namespace basisu astc_results.m_endpoints[3] = ccell_results.m_astc_high_endpoint.m_c[1]; astc_results.m_endpoints[4] = ccell_results.m_astc_low_endpoint.m_c[2]; astc_results.m_endpoints[5] = ccell_results.m_astc_high_endpoint.m_c[2]; - + bool invert = false; if (pForce_selectors == nullptr) @@ -1125,7 +1125,7 @@ namespace basisu } // common_pattern } - // MODE 5 + // MODE 5 // DualPlane: 0, WeightRange: 5 (8), Subsets: 1, CEM: 8 (RGB Direct ), EndpointRange: 20 (256) BC7 MODE 6 (or MODE 1 1-subset) static void astc_mode5(const color_rgba block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params) { @@ -1256,7 +1256,7 @@ namespace basisu ccell_results_rgb.m_pSelectors_temp = &ccell_result_selectors_temp[0]; uint64_t part_err_rgb = color_cell_compression(255, &ccell_params_rgb, &ccell_results_rgb, &comp_params); - + color_cell_compressor_params ccell_params_a; memset(&ccell_params_a, 0, sizeof(ccell_params_a)); @@ -1413,9 +1413,9 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { const uint32_t astc_part = bc7_convert_partition_index_3_to_2(g_bc7_partition3[16 * bc7_pattern + x + y * 4], common_pattern_k); -#ifdef _DEBUG +#ifdef _DEBUG assert((int)astc_part == astc_compute_texel_partition(astc_pattern, x, y, 0, 2, true)); -#endif +#endif part_pixel_index[y][x] = num_part_pixels[astc_part]; part_pixels[astc_part][num_part_pixels[astc_part]++] = block[y][x]; @@ -1580,7 +1580,7 @@ namespace basisu } #endif } - + // 9. DualPlane: 0, WeightRange: 2 (4), Subsets: 2, CEM: 12 (RGBA Direct), EndpointRange: 8 (16) - BC7 MODE 7 // 16. DualPlane: 0, WeightRange : 2 (4), Subsets : 2, CEM: 4 (LA Direct), EndpointRange : 20 (256) - BC7 MODE 7 static void astc_mode9_or_16(uint32_t mode, const color_rgba source_block[4][4], uastc_encode_results* pResults, uint32_t& total_results, bc7enc_compress_block_params& comp_params, uint32_t estimate_partition_list_size) @@ -2496,7 +2496,7 @@ namespace basisu total_results++; } } - + static void compute_block_error(const color_rgba block[4][4], const color_rgba decoded_block[4][4], uint64_t &total_rgb_err, uint64_t &total_rgba_err, uint64_t &total_la_err) { uint64_t total_err_r = 0, total_err_g = 0, total_err_b = 0, total_err_a = 0; @@ -2543,14 +2543,14 @@ namespace basisu color_rgba tblock_hint0_bc1[4][4]; color_rgba tblock_hint1_bc1[4][4]; - + etc_block etc1_blk; memset(&etc1_blk, 0, sizeof(etc1_blk)); eac_a8_block etc2_blk; memset(&etc2_blk, 0, sizeof(etc2_blk)); etc2_blk.m_multiplier = 1; - + // Pack to UASTC, then unpack, because the endpoints may be swapped. uastc_block temp_ublock; @@ -2558,7 +2558,7 @@ namespace basisu unpacked_uastc_block temp_ublock_unpacked; unpack_uastc(temp_ublock, temp_ublock_unpacked, false); - + unpacked_uastc_block ublock; memset(&ublock, 0, sizeof(ublock)); ublock.m_mode = best_results.m_uastc_mode; @@ -2587,7 +2587,7 @@ namespace basisu else { transcode_uastc_to_bc1_hint0(ublock, &b); - + unpack_block(texture_format::cBC1, &b, &tblock_hint0_bc1[0][0]); } @@ -2609,7 +2609,7 @@ namespace basisu const float err_thresh0 = 1.075f; const float err_thresh1 = 1.075f; - + if ((g_uastc_mode_has_bc1_hint0[best_mode]) && (t_err_hint0 <= t_err * err_thresh0)) bc1_hint0 = true; @@ -2776,7 +2776,7 @@ namespace basisu uint32_t first_flip = 0, last_flip = 2; uint32_t first_individ = 0, last_individ = 2; - + if (flags & cPackUASTCETC1DisableFlipAndIndividual) { last_flip = 1; @@ -2788,7 +2788,7 @@ namespace basisu first_flip = 1; last_flip = first_flip + 1; } - + for (uint32_t flip = first_flip; flip < last_flip; flip++) { trial_block.set_flip_bit(flip != 0); @@ -2796,7 +2796,7 @@ namespace basisu for (uint32_t individ = first_individ; individ < last_individ; individ++) { const uint32_t mul = individ ? 15 : 31; - + trial_block.set_diff_bit(individ == 0); color_rgba unbiased_block_colors[2]; @@ -2812,7 +2812,7 @@ namespace basisu { const etc_coord2 &c = g_etc1_pixel_coords[flip][subset][j]; const color_rgba& p = decoded_uastc_block[c.m_y][c.m_x]; - + avg_color[0] += p.r; avg_color[1] += p.g; avg_color[2] += p.b; @@ -2830,13 +2830,13 @@ namespace basisu unbiased_block_colors[subset][1] = (uint8_t)((avg_color[1] * mul + 1020) / (8 * 255)); unbiased_block_colors[subset][2] = (uint8_t)((avg_color[2] * mul + 1020) / (8 * 255)); unbiased_block_colors[subset][3] = 0; - + } // subset - + for (uint32_t bias_iter = 0; bias_iter < last_bias; bias_iter++) { const uint32_t bias = use_faster_bias_mode_table ? s_sorted_bias_modes[bias_iter] : bias_iter; - + color_rgba block_colors[2]; for (uint32_t subset = 0; subset < 2; subset++) block_colors[subset] = has_bias ? apply_etc1_bias((color32&)unbiased_block_colors[subset], bias, mul, subset) : unbiased_block_colors[subset]; @@ -2870,7 +2870,7 @@ namespace basisu uint64_t best_subset_err = UINT64_MAX; const uint32_t inten_table_limit = (level == cPackUASTCLevelVerySlow) ? 8 : ((range[subset] > 51) ? 8 : (range[subset] >= 7 ? 4 : 2)); - + for (uint32_t inten_table = 0; inten_table < inten_table_limit; inten_table++) { trial_block.set_inten_table(subset, inten_table); @@ -3005,7 +3005,7 @@ namespace basisu uint32_t m_table; uint32_t m_multiplier; }; - + static uint64_t uastc_pack_eac_a8(uastc_pack_eac_a8_results& results, const uint8_t* pPixels, uint32_t num_pixels, uint32_t base_search_rad, uint32_t mul_search_rad, uint32_t table_mask) { assert(num_pixels <= 16); @@ -3149,7 +3149,7 @@ namespace basisu solid_results.m_common_pattern = 0; solid_results.m_solid_color = first_color; memset(&solid_results.m_astc, 0, sizeof(solid_results.m_astc)); - + etc_block etc1_blk; uint32_t etc1_bias = 0; @@ -3165,16 +3165,16 @@ namespace basisu return; } - + int level = flags & 7; const bool favor_uastc_error = (flags & cPackUASTCFavorUASTCError) != 0; const bool favor_bc7_error = !favor_uastc_error && ((flags & cPackUASTCFavorBC7Error) != 0); //const bool etc1_perceptual = true; - + uastc_encode_results results[MAX_ENCODE_RESULTS]; - + level = clampi(level, cPackUASTCLevelFastest, cPackUASTCLevelVerySlow); - + // Set all options to slowest, then configure from there depending on the selected level. uint32_t mode_mask = UINT32_MAX; uint32_t uber_level = 6; @@ -3185,12 +3185,12 @@ namespace basisu uint32_t least_squares_passes = 2; bool bc1_hints = true; bool only_use_la_on_transparent_blocks = false; - + switch (level) { case cPackUASTCLevelFastest: { - mode_mask = (1 << 0) | (1 << 8) | + mode_mask = (1 << 0) | (1 << 8) | (1 << 11) | (1 << 12) | (1 << 15); always_try_alpha_modes = false; @@ -3216,7 +3216,7 @@ namespace basisu estimate_partition = true; break; } - case cPackUASTCLevelDefault: + case cPackUASTCLevelDefault: { mode_mask = (1 << 0) | (1 << 1) | (1 << 4) | (1 << 5) | (1 << 6) | (1 << 8) | (1 << 9) | (1 << 10) | (1 << 11) | (1 << 12) | (1 << 13) | @@ -3254,9 +3254,9 @@ namespace basisu // HACK HACK //mode_mask &= ~(1 << 18); //mode_mask = (1 << 18)| (1 << 10); - + uint32_t total_results = 0; - + if (only_use_la_on_transparent_blocks) { if ((is_la) && (!has_alpha)) @@ -3264,7 +3264,7 @@ namespace basisu } const bool try_alpha_modes = has_alpha || always_try_alpha_modes; - + bc7enc_compress_block_params comp_params; memset(&comp_params, 0, sizeof(comp_params)); comp_params.m_max_partitions_mode1 = 64; @@ -3339,7 +3339,7 @@ namespace basisu } assert(total_results); - + // Fix up the errors so we consistently have LA, RGB, or RGBA error. for (uint32_t i = 0; i < total_results; i++) { @@ -3373,7 +3373,7 @@ namespace basisu } } } - + unpacked_uastc_block unpacked_ublock; memset(&unpacked_ublock, 0, sizeof(unpacked_ublock)); @@ -3540,7 +3540,7 @@ namespace basisu const uastc_encode_results& best_results = results[best_index]; const uint32_t best_mode = best_results.m_uastc_mode; const astc_block_desc& best_astc_results = best_results.m_astc; - + color_rgba decoded_uastc_block[4][4]; bool success = unpack_uastc(best_mode, best_results.m_common_pattern, best_results.m_solid_color.get_color32(), best_astc_results, (basist::color32 *)&decoded_uastc_block[0][0], false); (void)success; @@ -3558,14 +3558,14 @@ namespace basisu basist::uastc_block temp_block; pack_uastc(temp_block, best_results, etc1_blk, 0, etc_eac_a8_blk, false, false); - + basist::color32 temp_block_unpacked[4][4]; success = basist::unpack_uastc(temp_block, (basist::color32 *)temp_block_unpacked, false); VALIDATE(success); - + // Now round trip to packed ASTC and back, then decode to pixels. uint32_t astc_data[4]; - + if (best_results.m_uastc_mode == UASTC_MODE_INDEX_SOLID_COLOR) pack_astc_solid_block(astc_data, (color32 &)best_results.m_solid_color); else @@ -3583,7 +3583,7 @@ namespace basisu for (uint32_t x = 0; x < 4; x++) { VALIDATE(decoded_astc_block[y][x] == decoded_uastc_block[y][x]); - + VALIDATE(temp_block_unpacked[y][x].c[0] == decoded_uastc_block[y][x].r); VALIDATE(temp_block_unpacked[y][x].c[1] == decoded_uastc_block[y][x].g); VALIDATE(temp_block_unpacked[y][x].c[2] == decoded_uastc_block[y][x].b); @@ -3597,7 +3597,7 @@ namespace basisu bool bc1_hint0 = false, bc1_hint1 = false; if (bc1_hints) compute_bc1_hints(bc1_hint0, bc1_hint1, best_results, block, decoded_uastc_block); - + eac_a8_block eac_a8_blk; if ((g_uastc_mode_has_alpha[best_mode]) && (best_mode != UASTC_MODE_INDEX_SOLID_COLOR)) { @@ -3609,7 +3609,7 @@ namespace basisu uastc_pack_eac_a8_results eac8_a8_results; memset(&eac8_a8_results, 0, sizeof(eac8_a8_results)); uastc_pack_eac_a8(eac8_a8_results, decoded_uastc_block_alpha, 16, 0, eac_a8_mul_search_rad, eac_a8_table_mask); - + // All we care about for hinting is the table and multiplier. eac_a8_blk.m_table = eac8_a8_results.m_table; eac_a8_blk.m_multiplier = eac8_a8_results.m_multiplier; @@ -3833,8 +3833,8 @@ namespace basisu uint64_t m_total; uint64_t m_total2; }; - - static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, + + static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth) { debug_printf("uastc_rdo_blocks: Processing blocks %u to %u\n", first_index, last_index); @@ -3843,7 +3843,7 @@ namespace basisu const bool perceptual = false; std::unordered_map selector_history; - + for (uint32_t block_index = first_index; block_index < last_index; block_index++) { const basist::uastc_block& blk = pBlocks[block_index]; @@ -3893,7 +3893,7 @@ namespace basisu color_rgba decoded_b7_blk[4][4]; unpack_block(texture_format::cBC7, &b7_block, &decoded_b7_blk[0][0]); - + uint64_t bc7_err = 0; for (uint32_t i = 0; i < 16; i++) bc7_err += color_distance(perceptual, pPixels[i], ((color_rgba*)decoded_b7_blk)[i], true); @@ -3948,7 +3948,7 @@ namespace basisu float best_t = cur_ms_err * smooth_block_error_scale + cur_bits * params.m_lambda; - // Now scan through previous blocks, insert their selector bit patterns into the current block, and find + // Now scan through previous blocks, insert their selector bit patterns into the current block, and find // selector bit patterns which don't increase the overall block error too much. for (int prev_block_index = last_block_to_check; prev_block_index >= first_block_to_check; --prev_block_index) { @@ -4070,7 +4070,7 @@ namespace basisu color_rgba decoded_trial_uastc_block[4][4]; bool success = unpack_uastc(results.m_uastc_mode, results.m_common_pattern, results.m_solid_color.get_color32(), results.m_astc, (basist::color32*) & decoded_trial_uastc_block[0][0], false); assert(success); - + BASISU_NOTE_UNUSED(success); uint64_t trial_uastc_err = 0; @@ -4097,7 +4097,7 @@ namespace basisu // Write the modified block pBlocks[block_index] = best_block; - + } // if (best_block_index != block_index) { @@ -4113,8 +4113,8 @@ namespace basisu return true; } - - // This function implements a basic form of rate distortion optimization (RDO) for UASTC. + + // This function implements a basic form of rate distortion optimization (RDO) for UASTC. // It only changes selectors and then updates the hints. It uses very approximate LZ bitprice estimation. // There's A LOT that can be done better in here, but it's a start. // One nice advantage of the method used here is that it works for any input, no matter which or how many modes it uses. @@ -4155,7 +4155,7 @@ namespace basisu { std::lock_guard lck(stat_mutex); - + all_succeeded = all_succeeded && status; total_skipped += job_skipped; total_modified += job_modified; @@ -4178,7 +4178,7 @@ namespace basisu } debug_printf("uastc_rdo: Total modified: %3.2f%%, total skipped: %3.2f%%, total refined: %3.2f%%, total smooth: %3.2f%%\n", total_modified * 100.0f / num_blocks, total_skipped * 100.0f / num_blocks, total_refined * 100.0f / num_blocks, total_smooth * 100.0f / num_blocks); - + return status; } } // namespace basisu diff --git a/encoder/basisu_uastc_enc.h b/encoder/basisu_uastc_enc.h index ba39a558..bbc01659 100644 --- a/encoder/basisu_uastc_enc.h +++ b/encoder/basisu_uastc_enc.h @@ -25,15 +25,15 @@ namespace basisu { // Fastest is the lowest quality, although it's stil substantially higher quality vs. BC1/ETC1. It supports 5 modes. // The output may be somewhat blocky because this setting doesn't support 2/3-subset UASTC modes, but it should be less blocky vs. BC1/ETC1. - // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. + // This setting doesn't write BC1 hints, so BC1 transcoding will be slower. // Transcoded ETC1 quality will be lower because it only considers 2 hints out of 32. // Avg. 43.45 dB cPackUASTCLevelFastest = 0, - + // Faster is ~3x slower than fastest. It supports 9 modes. // Avg. 46.49 dB cPackUASTCLevelFaster = 1, - + // Default is ~5.5x slower than fastest. It supports 14 modes. // Avg. 47.47 dB cPackUASTCLevelDefault = 2, @@ -42,7 +42,7 @@ namespace basisu // Avg. 48.01 dB cPackUASTCLevelSlower = 3, - // VerySlow is ~200x slower than fastest. + // VerySlow is ~200x slower than fastest. // The best quality the codec is capable of, but you'll need to be patient or have a lot of cores. // Avg. 48.24 dB cPackUASTCLevelVerySlow = 4, @@ -53,13 +53,13 @@ namespace basisu // These flags allow you to favor only optimizing for lowest UASTC error, or lowest BC7 error. cPackUASTCFavorUASTCError = 8, cPackUASTCFavorBC7Error = 16, - + cPackUASTCETC1FasterHints = 64, cPackUASTCETC1FastestHints = 128, cPackUASTCETC1DisableFlipAndIndividual = 256, - + // Favor UASTC modes 0 and 10 more than the others (this is experimental, it's useful for RDO compression) - cPackUASTCFavorSimplerModes = 512, + cPackUASTCFavorSimplerModes = 512, }; // pRGBAPixels: Pointer to source 4x4 block of RGBA pixels (R first in memory). @@ -75,18 +75,18 @@ namespace basisu color_rgba m_solid_color; uint64_t m_astc_err; }; - + void pack_uastc(basist::uastc_block& blk, const uastc_encode_results& result, const etc_block& etc1_blk, uint32_t etc1_bias, const eac_a8_block& etc_eac_a8_blk, bool bc1_hint0, bool bc1_hint1); const uint32_t UASCT_RDO_DEFAULT_LZ_DICT_SIZE = 4096; const float UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO = 10.0f; const float UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH = 8.0f; - + // The RDO encoder computes a smoothness factor, from [0,1], for each block. To do this it computes each block's maximum component variance, then it divides this by this factor and clamps the result. // Larger values will result in more blocks being protected from too much distortion. const float UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV = 18.0f; - + // The RDO encoder can artifically boost the error of smooth blocks, in order to suppress distortions on smooth areas of the texture. // The encoder will use this value as the maximum error scale to use on smooth blocks. The larger this value, the better smooth bocks will look. Set to 1.0 to disable this completely. const float UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE = 10.0f; @@ -106,30 +106,30 @@ namespace basisu m_skip_block_rms_thresh = UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH; m_endpoint_refinement = true; m_lz_literal_cost = 100; - + m_max_smooth_block_std_dev = UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV; m_smooth_block_max_error_scale = UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE; } - + // m_lz_dict_size: Size of LZ dictionary to simulate in bytes. The larger this value, the slower the encoder but the higher the quality per LZ compressed bit. uint32_t m_lz_dict_size; // m_lambda: The post-processor tries to reduce distortion+rate*lambda (rate is approximate LZ bits and distortion is scaled MS error). // Larger values push the postprocessor towards optimizing more for lower rate, and smaller values more for distortion. 0=minimal distortion. float m_lambda; - + // m_max_allowed_rms_increase_ratio: How much the RMS error of a block is allowed to increase before a trial is rejected. 1.0=no increase allowed, 1.05=5% increase allowed, etc. float m_max_allowed_rms_increase_ratio; - - // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. + + // m_skip_block_rms_thresh: Blocks with this much RMS error or more are completely skipped by the RDO encoder. float m_skip_block_rms_thresh; - // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. + // m_endpoint_refinement: If true, the post-process will attempt to refine the endpoints of blocks with modified selectors. bool m_endpoint_refinement; float m_max_smooth_block_std_dev; float m_smooth_block_max_error_scale; - + uint32_t m_lz_literal_cost; }; diff --git a/encoder/cppspmd_flow.h b/encoder/cppspmd_flow.h index f6930476..cbb756fe 100644 --- a/encoder/cppspmd_flow.h +++ b/encoder/cppspmd_flow.h @@ -48,7 +48,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_return() m_kernel_exec = andnot(m_exec, m_kernel_exec); m_exec = exec_mask::all_off(); } - + template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaskedBody) { @@ -61,7 +61,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_unmasked(const UnmaskedBody& unmaske m_kernel_exec = m_kernel_exec & orig_kernel_exec; m_exec = m_exec & orig_exec; - + check_masks(); } @@ -69,9 +69,9 @@ struct scoped_unmasked_restorer { spmd_kernel *m_pKernel; exec_mask m_orig_exec, m_orig_kernel_exec; - - CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : - m_pKernel(pKernel), + + CPPSPMD_FORCE_INLINE scoped_unmasked_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), m_orig_exec(pKernel->m_exec), m_orig_kernel_exec(pKernel->m_kernel_exec) { @@ -79,15 +79,15 @@ struct scoped_unmasked_restorer pKernel->m_exec = exec_mask::all_on(); } - CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() - { + CPPSPMD_FORCE_INLINE ~scoped_unmasked_restorer() + { m_pKernel->m_kernel_exec = m_pKernel->m_kernel_exec & m_orig_kernel_exec; m_pKernel->m_exec = m_pKernel->m_exec & m_orig_exec; m_pKernel->check_masks(); } }; -#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); +#define SPMD_UNMASKED_BEGIN { scoped_unmasked_restorer _unmasked_restorer(this); #define SPMD_UNMASKED_END } #if 0 @@ -113,9 +113,9 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if_break(const vbool& cond) #ifdef _DEBUG assert(m_in_loop); #endif - + exec_mask cond_exec(cond); - + m_exec = andnot(m_exec & cond_exec, m_exec); check_masks(); @@ -157,7 +157,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_sifelse(const vbool& cond, const IfB m_exec = em; elseBody(); } - + m_exec = orig_exec; } @@ -165,7 +165,7 @@ template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_if(const vbool& cond, const IfBody& ifBody) { exec_mask cond_exec(cond); - + exec_mask pre_if_exec = cond_exec & m_exec; if (any(pre_if_exec)) @@ -188,7 +188,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_ifelse(const vbool& cond, const IfBo bool all_flag = false; exec_mask cond_exec(cond); - + { exec_mask pre_if_exec = cond_exec & m_exec; @@ -290,17 +290,17 @@ struct scoped_exec_restorer2 { spmd_kernel *m_pKernel; exec_mask m_unexecuted_lanes; - - CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : + + CPPSPMD_FORCE_INLINE scoped_exec_restorer2(spmd_kernel *pKernel, const vbool &cond) : m_pKernel(pKernel) - { + { exec_mask cond_exec(cond); m_unexecuted_lanes = andnot(cond_exec, pKernel->m_exec); pKernel->m_exec = cond_exec & pKernel->m_exec; } - CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() - { + CPPSPMD_FORCE_INLINE ~scoped_exec_restorer2() + { m_pKernel->m_exec = m_pKernel->m_exec | m_unexecuted_lanes; m_pKernel->check_masks(); } @@ -327,17 +327,17 @@ class scoped_exec_saver inline scoped_exec_saver(spmd_kernel *pKernel) : m_exec(pKernel->m_exec), m_kernel_exec(pKernel->m_kernel_exec), m_continue_mask(pKernel->m_continue_mask), m_pKernel(pKernel) - { + { #ifdef _DEBUG m_in_loop = pKernel->m_in_loop; #endif } - + inline ~scoped_exec_saver() - { - m_pKernel->m_exec = m_exec; - m_pKernel->m_continue_mask = m_continue_mask; - m_pKernel->m_kernel_exec = m_kernel_exec; + { + m_pKernel->m_exec = m_exec; + m_pKernel->m_continue_mask = m_continue_mask; + m_pKernel->m_kernel_exec = m_kernel_exec; #ifdef _DEBUG m_pKernel->m_in_loop = m_in_loop; m_pKernel->check_masks(); @@ -353,7 +353,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo { if (begin == end) return; - + if (!any(m_exec)) return; @@ -362,12 +362,12 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo std::swap(begin, end); exec_mask prev_continue_mask = m_continue_mask, prev_exec = m_exec; - + int total_full = (end - begin) / PROGRAM_COUNT; int total_partial = (end - begin) % PROGRAM_COUNT; lint_t loop_index = begin + program_index; - + const int total_loops = total_full + (total_partial ? 1 : 0); m_continue_mask = exec_mask::all_off(); @@ -390,7 +390,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_foreach(int begin, int end, const Fo m_continue_mask = exec_mask::all_off(); check_masks(); - + store_all(loop_index, loop_index + PROGRAM_COUNT); } @@ -443,9 +443,9 @@ struct scoped_while_restorer #ifdef _DEBUG bool m_prev_in_loop; #endif - - CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : - m_pKernel(pKernel), + + CPPSPMD_FORCE_INLINE scoped_while_restorer(spmd_kernel *pKernel) : + m_pKernel(pKernel), m_orig_exec(pKernel->m_exec), m_orig_continue_mask(pKernel->m_continue_mask) { @@ -457,8 +457,8 @@ struct scoped_while_restorer #endif } - CPPSPMD_FORCE_INLINE ~scoped_while_restorer() - { + CPPSPMD_FORCE_INLINE ~scoped_while_restorer() + { m_pKernel->m_exec = m_orig_exec & m_pKernel->m_kernel_exec; m_pKernel->m_continue_mask = m_orig_continue_mask; #ifdef _DEBUG @@ -514,7 +514,7 @@ struct scoped_simple_while_restorer m_pKernel(pKernel), m_orig_exec(pKernel->m_exec) { - + #ifdef _DEBUG m_prev_in_loop = pKernel->m_in_loop; pKernel->m_in_loop = true; @@ -536,18 +536,18 @@ struct scoped_simple_while_restorer #define SPMD_SWHILE(cond) { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); \ while(true) { \ exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; -#define SPMD_SWEND } } +#define SPMD_SWEND } } // Cannot use SPMD break, continue, or return inside simple do #define SPMD_SDO { scoped_simple_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { -#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } +#define SPMD_SEND_DO(cond) exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(cond)); m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; } } #undef SPMD_FOR #undef SPMD_END_FOR #define SPMD_FOR(for_init, for_cond) { for_init; scoped_while_restorer CPPSPMD_GLUER2(_while_restore_, __LINE__)(this); while(true) { exec_mask CPPSPMD_GLUER2(cond_exec, __LINE__) = exec_mask(vbool(for_cond)); \ m_exec = m_exec & CPPSPMD_GLUER2(cond_exec, __LINE__); if (!any(m_exec)) break; #define SPMD_END_FOR(for_inc) m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); for_inc; } } - + template CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, const ForCondBody& forCondBody, const ForIncrBody& forIncrBody, const ForBody& forBody) { @@ -576,7 +576,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::spmd_for(const ForInitBody& forInitBody, m_exec = m_exec | m_continue_mask; m_continue_mask = exec_mask::all_off(); check_masks(); - + forIncrBody(); } diff --git a/encoder/cppspmd_math.h b/encoder/cppspmd_math.h index e7b3202b..0b46d6c6 100644 --- a/encoder/cppspmd_math.h +++ b/encoder/cppspmd_math.h @@ -15,21 +15,21 @@ // limitations under the License. // The general goal of these vectorized estimated math functions is scalability/performance. -// There are explictly no checks NaN's/Inf's on the input arguments. There are no assertions either. -// These are fast estimate functions - if you need more than that, use stdlib. Please do a proper +// There are explictly no checks NaN's/Inf's on the input arguments. There are no assertions either. +// These are fast estimate functions - if you need more than that, use stdlib. Please do a proper // engineering analysis before relying on them. // I have chosen functions written by others, ported them to CppSPMD, then measured their abs/rel errors. // I compared each to the ones in DirectXMath and stdlib's for accuracy/performance. -CPPSPMD_FORCE_INLINE vfloat fmod_inv(const vfloat& a, const vfloat& b, const vfloat& b_inv) -{ - vfloat c = frac(abs(a * b_inv)) * abs(b); - return spmd_ternaryf(a < 0, -c, c); +CPPSPMD_FORCE_INLINE vfloat fmod_inv(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + vfloat c = frac(abs(a * b_inv)) * abs(b); + return spmd_ternaryf(a < 0, -c, c); } -CPPSPMD_FORCE_INLINE vfloat fmod_inv_p(const vfloat& a, const vfloat& b, const vfloat& b_inv) -{ - return frac(a * b_inv) * b; +CPPSPMD_FORCE_INLINE vfloat fmod_inv_p(const vfloat& a, const vfloat& b, const vfloat& b_inv) +{ + return frac(a * b_inv) * b; } // Avoids dividing by zero or very small values. @@ -87,13 +87,13 @@ inline vfloat spmd_kernel::log2_est(vfloat v) vint greater = ux1_i & 0x00400000; // true if signif > 1.5 SPMD_SIF(greater != 0) { - // signif >= 1.5 so need to divide by 2. Accomplish this by stuffing exp = 126 which corresponds to an exponent of -1 + // signif >= 1.5 so need to divide by 2. Accomplish this by stuffing exp = 126 which corresponds to an exponent of -1 store_all(ux2_i, (ux1_i & 0x007FFFFF) | 0x3f000000); store_all(ux2_f, cast_vint_to_vfloat(ux2_i)); // 126 instead of 127 compensates for division by 2 - store_all(fexp, vfloat(exp - 126)); + store_all(fexp, vfloat(exp - 126)); } SPMD_SELSE(greater != 0) { @@ -113,9 +113,9 @@ inline vfloat spmd_kernel::log2_est(vfloat v) vfloat xm1 = signif; vfloat xm1sqr = xm1 * xm1; - + return fexp + ((a * (xm1sqr * xm1) + b * xm1sqr + c * xm1) / (xm1sqr + d * xm1 + e)); - + // fma lowers accuracy for SSE4.1 - no idea why (compiler reordering?) //return fexp + ((vfma(a, (xm1sqr * xm1), vfma(b, xm1sqr, c * xm1))) / (xm1sqr + vfma(d, xm1, e))); } @@ -130,15 +130,15 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_ { // Assume we're using equation (2) store_all(adjustment, 0); - + // integer part of the input argument vint int_arg = (vint)arg; - + // if frac(arg) is in [0.5, 1.0]... - SPMD_SIF((arg - int_arg) > 0.5f) + SPMD_SIF((arg - int_arg) > 0.5f) { store(adjustment, 1); - + // then change it to [0.0, 0.5] store(arg, arg - 0.5f); } @@ -146,17 +146,17 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_ // arg == just the fractional part store_all(arg, arg - (vfloat)int_arg); - - // Now compute 2** (int) arg. + + // Now compute 2** (int) arg. store_all(int_arg, min(int_arg + 127, 254)); - + store_all(two_int_a, cast_vint_to_vfloat(VINT_SHIFT_LEFT(int_arg, 23))); } /* clang 9.0.0 for win /fp:precise release f range : -50.0000000000000000 49.9999940395355225, vals : 16777216 - + exp2_est(): Total passed near - zero check : 16777216 Total sign diffs : 0 @@ -164,7 +164,7 @@ CPPSPMD_FORCE_INLINE void spmd_kernel::reduce_expb(vfloat& arg, vfloat& two_int_ max rel err: 0.0000015642030031 avg abs err: 10793794.4007573910057545 avg rel err: 0.0000003890893282 - + XMVectorExp2(): Total passed near-zero check: 16777216 Total sign diffs: 0 @@ -191,11 +191,11 @@ inline vfloat spmd_kernel::exp2_est(vfloat arg) const vfloat P01 = +0.0576900723731f; const vfloat Q00 = +20.8189237930062f; const vfloat Q01 = +1.0f; - const vfloat sqrt2 = 1.4142135623730950488f; // sqrt(2) for scaling + const vfloat sqrt2 = 1.4142135623730950488f; // sqrt(2) for scaling vfloat result = 0.0f; - // Return 0 if arg is too large. + // Return 0 if arg is too large. // We're not introducing inf/nan's into calculations, or risk doing so by returning huge default values. SPMD_IF(abs(arg) > 126.0f) { @@ -204,13 +204,13 @@ inline vfloat spmd_kernel::exp2_est(vfloat arg) SPMD_END_IF // 2**(int(a)) - vfloat two_int_a; - + vfloat two_int_a; + // set to 1 by reduce_expb vint adjustment; - + // 0 if arg is +; 1 if negative - vint negative = 0; + vint negative = 0; // If the input is negative, invert it. At the end we'll take the reciprocal, since n**(-1) = 1/(n**x). SPMD_SIF(arg < 0.0f) @@ -232,15 +232,15 @@ inline vfloat spmd_kernel::exp2_est(vfloat arg) // Q(x**2) vfloat Q = vfma(Q01, (arg * arg), Q00); - + // x*P(x**2) vfloat x_P = arg * (vfma(P01, arg * arg, P00)); - + vfloat answer = (Q + x_P) / (Q - x_P); // Now correct for the scaling factor of 2**(int(a)) store_all(answer, answer * two_int_a); - + // If the result had a fractional part > 0.5, correct for that store_all(answer, spmd_ternaryf(adjustment != 0, answer * sqrt2, answer)); @@ -295,35 +295,35 @@ inline vfloat spmd_kernel::sincos_est_a(vfloat a, bool sin_flag) store_all(r1_x, sin_flag ? vfms(c1_w, a, c1_x) : c1_w * a); - store_all(r1_y, frac(r1_x)); - - store_all(r2_x, (vfloat)(r1_y < c1_x)); + store_all(r1_y, frac(r1_x)); + + store_all(r2_x, (vfloat)(r1_y < c1_x)); - store_all(r2_y, (vfloat)(r1_y >= c1_y)); - store_all(r2_z, (vfloat)(r1_y >= c1_z)); + store_all(r2_y, (vfloat)(r1_y >= c1_y)); + store_all(r2_z, (vfloat)(r1_y >= c1_z)); store_all(r2_y, vfma(r2_x, c4_z, vfma(r2_y, c4_w, r2_z * c4_z))); - store_all(r0_x, c0_x - r1_y); - store_all(r0_y, c0_y - r1_y); - store_all(r0_z, c0_z - r1_y); - + store_all(r0_x, c0_x - r1_y); + store_all(r0_y, c0_y - r1_y); + store_all(r0_z, c0_z - r1_y); + store_all(r0_x, r0_x * r0_x); store_all(r0_y, r0_y * r0_y); store_all(r0_z, r0_z * r0_z); - store_all(r1_x, vfma(c2_x, r0_x, c2_z)); - store_all(r1_y, vfma(c2_y, r0_y, c2_w)); - store_all(r1_z, vfma(c2_x, r0_z, c2_z)); - + store_all(r1_x, vfma(c2_x, r0_x, c2_z)); + store_all(r1_y, vfma(c2_y, r0_y, c2_w)); + store_all(r1_z, vfma(c2_x, r0_z, c2_z)); + store_all(r1_x, vfma(r1_x, r0_x, c3_x)); store_all(r1_y, vfma(r1_y, r0_y, c3_y)); store_all(r1_z, vfma(r1_z, r0_z, c3_x)); - + store_all(r1_x, vfma(r1_x, r0_x, c3_z)); store_all(r1_y, vfma(r1_y, r0_y, c3_w)); store_all(r1_z, vfma(r1_z, r0_z, c3_z)); - + store_all(r1_x, vfma(r1_x, r0_x, c4_x)); store_all(r1_y, vfma(r1_y, r0_y, c4_y)); store_all(r1_z, vfma(r1_z, r0_z, c4_x)); @@ -347,9 +347,9 @@ CPPSPMD_FORCE_INLINE vfloat spmd_kernel::recip_est1(const vfloat& q) vfloat l = spmd_ternaryf(q >= fMinThresh, q, cast_vint_to_vfloat(vint(mag))); vint x_l = vint(mag) - cast_vfloat_to_vint(l); - + vfloat rcp_l = cast_vint_to_vfloat(x_l); - + return rcp_l * vfnma(rcp_l, q, 2.0f); } @@ -395,12 +395,12 @@ CPPSPMD_FORCE_INLINE vfloat spmd_kernel::atan2_est(vfloat y, vfloat x) { vfloat t1 = abs(y); vfloat t3 = abs(x); - + vfloat t0 = max(t3, t1); store_all(t1, min(t3, t1)); store_all(t3, t1 / t0); - + vfloat t4 = t3 * t3; store_all(t0, vfma(-0.013480470f, t4, 0.057477314f)); store_all(t0, vfms(t0, t4, 0.121239071f)); @@ -452,7 +452,7 @@ CPPSPMD_FORCE_INLINE vfloat spmd_kernel::atan2_est(vfloat y, vfloat x) max abs err: 0.8989131818294709 max rel err: 0.0573181403173166 avg rel err: 0.0000030791301203 - + Originally from: http://www.ganssle.com/approx.htm */ @@ -495,7 +495,7 @@ inline vfloat spmd_kernel::tan_est(vfloat x) vfloat z = tan82(y); vfloat r; - + vbool octant_one_or_two = (octant == 1) || (octant == 2); // SPMD optimization - skip costly divide if we can @@ -503,7 +503,7 @@ inline vfloat spmd_kernel::tan_est(vfloat x) { const float fDivThresh = .4371e-7f; vfloat one_over_z = 1.0f / spmd_ternaryf(abs(z) > fDivThresh, z, spmd_ternaryf(z < 0.0f, -fDivThresh, fDivThresh)); - + vfloat b = spmd_ternaryf(octant_one_or_two, one_over_z, z); store_all(r, spmd_ternaryf((octant & 2) != 0, -b, b)); } @@ -511,7 +511,7 @@ inline vfloat spmd_kernel::tan_est(vfloat x) { store_all(r, spmd_ternaryf(octant == 0, z, -z)); } - + // Small angle approximation, to decrease the max rel error near Pi. SPMD_SIF(x >= (1.0f - .0003125f*4.0f)) { @@ -523,25 +523,25 @@ inline vfloat spmd_kernel::tan_est(vfloat x) } inline void spmd_kernel::seed_rand(rand_context& x, vint seed) -{ - store(x.a, 0xf1ea5eed); - store(x.b, seed ^ 0xd8487b1f); - store(x.c, seed ^ 0xdbadef9a); - store(x.d, seed); - for (int i = 0; i < 20; ++i) - (void)get_randu(x); +{ + store(x.a, 0xf1ea5eed); + store(x.b, seed ^ 0xd8487b1f); + store(x.c, seed ^ 0xdbadef9a); + store(x.d, seed); + for (int i = 0; i < 20; ++i) + (void)get_randu(x); } // https://burtleburtle.net/bob/rand/smallprng.html // Returns 32-bit unsigned random numbers. inline vint spmd_kernel::get_randu(rand_context& x) -{ - vint e = x.a - VINT_ROT(x.b, 27); - store(x.a, x.b ^ VINT_ROT(x.c, 17)); - store(x.b, x.c + x.d); - store(x.c, x.d + e); - store(x.d, e + x.a); - return x.d; +{ + vint e = x.a - VINT_ROT(x.b, 27); + store(x.a, x.b ^ VINT_ROT(x.c, 17)); + store(x.b, x.c + x.d); + store(x.c, x.d + e); + store(x.d, e + x.a); + return x.d; } // Returns random numbers between [low, high), or low if low >= high @@ -552,7 +552,7 @@ inline vint spmd_kernel::get_randi(rand_context& x, vint low, vint high) vint range = high - low; vint rnd_range = mulhiu(rnd, range); - + return spmd_ternaryi(low < high, low + rnd_range, low); } @@ -637,25 +637,25 @@ CPPSPMD_FORCE_INLINE vint spmd_kernel::count_trailing_zeros(vint x) { // cast the least significant bit in v to a float vfloat f = (vfloat)(x & -x); - + // extract exponent and adjust return VUINT_SHIFT_RIGHT(cast_vfloat_to_vint(f), 23) - 0x7F; } CPPSPMD_FORCE_INLINE vint spmd_kernel::count_set_bits(vint x) { - vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); - vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); + vint v = x - (VUINT_SHIFT_RIGHT(x, 1) & 0x55555555); + vint v1 = (v & 0x33333333) + (VUINT_SHIFT_RIGHT(v, 2) & 0x33333333); return VUINT_SHIFT_RIGHT(((v1 + VUINT_SHIFT_RIGHT(v1, 4) & 0xF0F0F0F) * 0x1010101), 24); } -CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) -{ - return cmpeq_epi16(subs_epu16(a, b), vint(0)); +CPPSPMD_FORCE_INLINE vint cmple_epu16(const vint &a, const vint &b) +{ + return cmpeq_epi16(subs_epu16(a, b), vint(0)); } -CPPSPMD_FORCE_INLINE vint cmpge_epu16(const vint &a, const vint &b) -{ +CPPSPMD_FORCE_INLINE vint cmpge_epu16(const vint &a, const vint &b) +{ return cmple_epu16(b, a); } @@ -679,29 +679,29 @@ CPPSPMD_FORCE_INLINE vint cmple_epi16(const vint &a, const vint &b) return cmpge_epi16(b, a); } -void spmd_kernel::print_vint(vint v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("%i ", extract(v, i)); - printf("\n"); +void spmd_kernel::print_vint(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i)); + printf("\n"); } -void spmd_kernel::print_vbool(vbool v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("%i ", extract(v, i) ? 1 : 0); - printf("\n"); +void spmd_kernel::print_vbool(vbool v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%i ", extract(v, i) ? 1 : 0); + printf("\n"); } - -void spmd_kernel::print_vint_hex(vint v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("0x%X ", extract(v, i)); - printf("\n"); + +void spmd_kernel::print_vint_hex(vint v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("0x%X ", extract(v, i)); + printf("\n"); } -void spmd_kernel::print_active_lanes(const char *pPrefix) -{ +void spmd_kernel::print_active_lanes(const char *pPrefix) +{ CPPSPMD_DECL(int, flags[PROGRAM_COUNT]); memset(flags, 0, sizeof(flags)); storeu_linear(flags, vint(1)); @@ -709,17 +709,17 @@ void spmd_kernel::print_active_lanes(const char *pPrefix) if (pPrefix) printf("%s", pPrefix); - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) { if (flags[i]) printf("%u ", i); } printf("\n"); } - -void spmd_kernel::print_vfloat(vfloat v) -{ - for (uint32_t i = 0; i < PROGRAM_COUNT; i++) - printf("%f ", extract(v, i)); - printf("\n"); + +void spmd_kernel::print_vfloat(vfloat v) +{ + for (uint32_t i = 0; i < PROGRAM_COUNT; i++) + printf("%f ", extract(v, i)); + printf("\n"); } diff --git a/encoder/cppspmd_math_declares.h b/encoder/cppspmd_math_declares.h index cdb6447b..cb186f33 100644 --- a/encoder/cppspmd_math_declares.h +++ b/encoder/cppspmd_math_declares.h @@ -54,7 +54,7 @@ CPPSPMD_FORCE_INLINE vfloat atan2_est(vfloat y, vfloat x); CPPSPMD_FORCE_INLINE vfloat atan_est(vfloat x) { return atan2_est(x, vfloat(1.0f)); } -// Don't call this for angles close to 90/270! +// Don't call this for angles close to 90/270! inline vfloat tan_est(vfloat x); // https://burtleburtle.net/bob/rand/smallprng.html diff --git a/encoder/cppspmd_sse.h b/encoder/cppspmd_sse.h index b39cb82a..34b0fac3 100644 --- a/encoder/cppspmd_sse.h +++ b/encoder/cppspmd_sse.h @@ -134,8 +134,8 @@ CPPSPMD_DECL(const uint32_t, g_x_128[4]) = { UINT32_MAX, 0, 0, 0 }; CPPSPMD_DECL(const float, g_onef_128[4]) = { 1.0f, 1.0f, 1.0f, 1.0f }; CPPSPMD_DECL(const uint32_t, g_oneu_128[4]) = { 1, 1, 1, 1 }; -CPPSPMD_DECL(const uint32_t, g_lane_masks_128[4][4]) = -{ +CPPSPMD_DECL(const uint32_t, g_lane_masks_128[4][4]) = +{ { UINT32_MAX, 0, 0, 0 }, { 0, UINT32_MAX, 0, 0 }, { 0, 0, UINT32_MAX, 0 }, @@ -240,7 +240,7 @@ inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) // Just emulate _mm_shuffle_epi8. This is very slow, but what else can we do? CPPSPMD_ALIGN(16) uint8_t av[16]; _mm_store_si128((__m128i*)av, a); - + CPPSPMD_ALIGN(16) uint8_t bvi[16]; _mm_store_ps((float*)bvi, _mm_and_ps(_mm_castsi128_ps(b), _mm_castsi128_ps(_mm_set1_epi32(0x0F0F0F0F)))); @@ -250,7 +250,7 @@ inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) result[1] = av[bvi[1]]; result[2] = av[bvi[2]]; result[3] = av[bvi[3]]; - + result[4] = av[bvi[4]]; result[5] = av[bvi[5]]; result[6] = av[bvi[6]]; @@ -269,9 +269,9 @@ inline __m128i shuffle_epi8(const __m128i& a, const __m128i& b) return _mm_andnot_si128(_mm_cmplt_epi8(b, _mm_setzero_si128()), _mm_load_si128((__m128i*)result)); } #else -CPPSPMD_FORCE_INLINE __m128i shuffle_epi8(const __m128i& a, const __m128i& b) -{ - return _mm_shuffle_epi8(a, b); +CPPSPMD_FORCE_INLINE __m128i shuffle_epi8(const __m128i& a, const __m128i& b) +{ + return _mm_shuffle_epi8(a, b); } #endif @@ -390,7 +390,7 @@ struct spmd_kernel typedef int int_t; typedef vint vint_t; typedef lint lint_t; - + // Exec mask struct exec_mask { @@ -402,7 +402,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit exec_mask(const __m128i& mask) : m_mask(mask) { } CPPSPMD_FORCE_INLINE void enable_lane(uint32_t lane) { m_mask = _mm_load_si128((const __m128i *)&g_lane_masks_128[lane][0]); } - + static CPPSPMD_FORCE_INLINE exec_mask all_on() { return exec_mask{ _mm_load_si128((const __m128i*)g_allones_128) }; } static CPPSPMD_FORCE_INLINE exec_mask all_off() { return exec_mask{ _mm_setzero_si128() }; } @@ -425,20 +425,20 @@ struct spmd_kernel friend CPPSPMD_FORCE_INLINE exec_mask operator^ (const exec_mask& a, const exec_mask& b); friend CPPSPMD_FORCE_INLINE exec_mask operator& (const exec_mask& a, const exec_mask& b); friend CPPSPMD_FORCE_INLINE exec_mask operator| (const exec_mask& a, const exec_mask& b); - + exec_mask m_exec; exec_mask m_kernel_exec; exec_mask m_continue_mask; #ifdef _DEBUG bool m_in_loop; #endif - + CPPSPMD_FORCE_INLINE uint32_t get_movemask() const { return m_exec.get_movemask(); } - + void init(const exec_mask& kernel_exec); - + // Varying bool - + struct vbool { __m128i m_value; @@ -451,25 +451,25 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit operator vfloat() const; CPPSPMD_FORCE_INLINE explicit operator vint() const; - + private: vbool& operator=(const vbool&); }; friend vbool operator!(const vbool& v); - + CPPSPMD_FORCE_INLINE vbool& store(vbool& dst, const vbool& src) { dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); return dst; } - + CPPSPMD_FORCE_INLINE vbool& store_all(vbool& dst, const vbool& src) { dst.m_value = src.m_value; return dst; } - + // Varying float struct vfloat { @@ -498,7 +498,7 @@ struct spmd_kernel dst.m_value = blendv_mask_ps(dst.m_value, src.m_value, _mm_castsi128_ps(m_exec.m_mask)); return dst; } - + CPPSPMD_FORCE_INLINE vfloat& store_all(vfloat& dst, const vfloat& src) { dst.m_value = src.m_value; @@ -539,7 +539,7 @@ struct spmd_kernel _mm_storeu_ps(dst.m_pValue, blendv_mask_ps(_mm_loadu_ps(dst.m_pValue), src.m_value, _mm_castsi128_ps(m_exec.m_mask))); return dst; } - + CPPSPMD_FORCE_INLINE const float_lref& store_all(const float_lref& dst, const vfloat& src) { _mm_storeu_ps(dst.m_pValue, src.m_value); @@ -556,13 +556,13 @@ struct spmd_kernel { return vfloat{ _mm_and_ps(_mm_loadu_ps(src.m_pValue), _mm_castsi128_ps(m_exec.m_mask)) }; } - + // Varying ref to floats struct float_vref { __m128i m_vindex; float* m_pValue; - + private: float_vref& operator=(const float_vref&); }; @@ -572,7 +572,7 @@ struct spmd_kernel { __m128i m_vindex; vfloat* m_pValue; - + private: vfloat_vref& operator=(const vfloat_vref&); }; @@ -582,14 +582,14 @@ struct spmd_kernel { __m128i m_vindex; vint* m_pValue; - + private: vint_vref& operator=(const vint_vref&); }; CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref& dst, const vfloat& src); CPPSPMD_FORCE_INLINE const float_vref& store(const float_vref&& dst, const vfloat& src); - + CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref& dst, const vfloat& src); CPPSPMD_FORCE_INLINE const float_vref& store_all(const float_vref&& dst, const vfloat& src); @@ -629,7 +629,7 @@ struct spmd_kernel private: int_lref& operator=(const int_lref&); }; - + CPPSPMD_FORCE_INLINE const int_lref& store(const int_lref& dst, const vint& src) { int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); @@ -692,7 +692,7 @@ struct spmd_kernel dst.m_pValue[i] = static_cast(stored[i]); return dst; } - + CPPSPMD_FORCE_INLINE vint load(const int16_lref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -716,7 +716,7 @@ struct spmd_kernel return vint{ t }; } - + // Linear ref to constant ints struct cint_lref { @@ -737,7 +737,7 @@ struct spmd_kernel { return vint{ _mm_loadu_si128((const __m128i *)src.m_pValue) }; } - + // Varying ref to ints struct int_vref { @@ -777,7 +777,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE explicit vint(const vfloat& other) : m_value(_mm_cvttps_epi32(other.m_value)) { } - CPPSPMD_FORCE_INLINE explicit operator vbool() const + CPPSPMD_FORCE_INLINE explicit operator vbool() const { return vbool{ _mm_xor_si128( _mm_load_si128((const __m128i*)g_allones_128), _mm_cmpeq_epi32(m_value, _mm_setzero_si128())) }; } @@ -840,7 +840,7 @@ struct spmd_kernel { _mm_store_si128((__m128i*)pDst, src.m_value); } - + CPPSPMD_FORCE_INLINE vint loadu_linear(const int *pSrc) { __m128i v = _mm_loadu_si128((const __m128i*)pSrc); @@ -885,7 +885,7 @@ struct spmd_kernel { _mm_store_ps((float*)pDst, src.m_value); } - + CPPSPMD_FORCE_INLINE vfloat loadu_linear(const float *pSrc) { __m128 v = _mm_loadu_ps((const float*)pSrc); @@ -904,7 +904,7 @@ struct spmd_kernel { return vfloat{ _mm_load_ps((float*)pSrc) }; } - + CPPSPMD_FORCE_INLINE vint& store(vint& dst, const vint& src) { dst.m_value = blendv_mask_epi32(dst.m_value, src.m_value, m_exec.m_mask); @@ -927,13 +927,13 @@ struct spmd_kernel } return dst; } - + CPPSPMD_FORCE_INLINE vint& store_all(vint& dst, const vint& src) { dst.m_value = src.m_value; return dst; } - + CPPSPMD_FORCE_INLINE const int_vref& store_all(const int_vref& dst, const vint& src) { CPPSPMD_ALIGN(16) int vindex[4]; @@ -964,7 +964,7 @@ struct spmd_kernel return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; } - + CPPSPMD_FORCE_INLINE vint load_all(const int_vref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -977,7 +977,7 @@ struct spmd_kernel return vint{ _mm_castps_si128( _mm_load_ps((const float*)values)) }; } - + CPPSPMD_FORCE_INLINE vint load(const cint_vref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -994,7 +994,7 @@ struct spmd_kernel return vint{ _mm_castps_si128(_mm_and_ps(_mm_castsi128_ps(m_exec.m_mask), _mm_load_ps((const float*)values))) }; } - + CPPSPMD_FORCE_INLINE vint load_all(const cint_vref& src) { CPPSPMD_ALIGN(16) int values[4]; @@ -1037,7 +1037,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE void store_strided(int *pDst, uint32_t stride, const vint &v) { int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + if (mask & 1) pDst[0] = extract_x(v.m_value); if (mask & 2) pDst[stride] = extract_y(v.m_value); if (mask & 4) pDst[stride*2] = extract_z(v.m_value); @@ -1073,7 +1073,7 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE vint load_strided(const int *pSrc, uint32_t stride) { int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + #if CPPSPMD_SSE2 CPPSPMD_ALIGN(16) int vals[4] = { 0, 0, 0, 0 }; if (mask & 1) vals[0] = pSrc[0]; @@ -1122,7 +1122,7 @@ struct spmd_kernel vals[2] = pSrc[stride * 2]; vals[3] = pSrc[stride * 3]; return vint{ _mm_load_si128((__m128i*)vals) }; -#else +#else const float* pSrcF = (const float*)pSrc; __m128 v = _mm_load_ss(pSrcF); v = _mm_insert_ps(v, _mm_load_ss(pSrcF + stride), 0x10); @@ -1154,7 +1154,7 @@ struct spmd_kernel { // TODO: There's surely a better way int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(_mm_castps_si128(src.m_value)); if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(_mm_castps_si128(src.m_value)); if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(_mm_castps_si128(src.m_value)); @@ -1182,7 +1182,7 @@ struct spmd_kernel { // TODO: There's surely a better way int mask = _mm_movemask_ps(_mm_castsi128_ps(m_exec.m_mask)); - + if (mask & 1) ((int *)(&dst.m_pValue[extract_x(dst.m_vindex)]))[0] = extract_x(src.m_value); if (mask & 2) ((int *)(&dst.m_pValue[extract_y(dst.m_vindex)]))[1] = extract_y(src.m_value); if (mask & 4) ((int *)(&dst.m_pValue[extract_z(dst.m_vindex)]))[2] = extract_z(src.m_value); @@ -1218,7 +1218,7 @@ struct spmd_kernel return vint{ k }; } - + // Linear integer struct lint { @@ -1238,7 +1238,7 @@ struct spmd_kernel return vint{ m_value }; } - CPPSPMD_FORCE_INLINE int get_first_value() const + CPPSPMD_FORCE_INLINE int get_first_value() const { return _mm_cvtsi128_si32(m_value); } @@ -1272,9 +1272,9 @@ struct spmd_kernel dst.m_value = src.m_value; return dst; } - + const lint program_index = lint{ _mm_set_epi32( 3, 2, 1, 0 ) }; - + // SPMD condition helpers template @@ -1301,7 +1301,7 @@ struct spmd_kernel template CPPSPMD_FORCE_INLINE void spmd_foreach(int begin, int end, const ForeachBody& foreachBody); - + #ifdef _DEBUG CPPSPMD_FORCE_INLINE void check_masks(); #else @@ -1310,9 +1310,9 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE void spmd_break(); CPPSPMD_FORCE_INLINE void spmd_continue(); - + CPPSPMD_FORCE_INLINE void spmd_return(); - + template CPPSPMD_FORCE_INLINE void spmd_unmasked(const UnmaskedBody& unmaskedBody); @@ -1324,8 +1324,8 @@ struct spmd_kernel CPPSPMD_FORCE_INLINE void swap(vfloat &a, vfloat &b) { vfloat temp = a; store(a, b); store(b, temp); } CPPSPMD_FORCE_INLINE void swap(vbool &a, vbool &b) { vbool temp = a; store(a, b); store(b, temp); } - CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) - { + CPPSPMD_FORCE_INLINE float reduce_add(vfloat v) + { __m128 k3210 = _mm_castsi128_ps(blendv_mask_epi32(_mm_setzero_si128(), _mm_castps_si128(v.m_value), m_exec.m_mask)); //#if CPPSPMD_SSE2 @@ -1374,14 +1374,14 @@ using float_vref = spmd_kernel::float_vref; using vfloat_vref = spmd_kernel::vfloat_vref; using vint_vref = spmd_kernel::vint_vref; -CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vfloat() const -{ - return vfloat { _mm_and_ps( _mm_castsi128_ps(m_value), *(const __m128 *)g_onef_128 ) }; +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vfloat() const +{ + return vfloat { _mm_and_ps( _mm_castsi128_ps(m_value), *(const __m128 *)g_onef_128 ) }; } - + // Returns UINT32_MAX's for true, 0 for false. (Should it return 1's?) -CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vint() const -{ +CPPSPMD_FORCE_INLINE spmd_kernel::vbool::operator vint() const +{ return vint { m_value }; } @@ -1462,9 +1462,9 @@ CPPSPMD_FORCE_INLINE vfloat round_truncate(const vfloat& a) { __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU) ); __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); - + __m128i ai = _mm_cvttps_epi32(a.m_value); - + __m128 af = _mm_cvtepi32_ps(ai); return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; } @@ -1487,11 +1487,11 @@ CPPSPMD_FORCE_INLINE vfloat ceil(const vfloat& a) { __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); __m128i has_fractional = _mm_cmplt_epi32(abs_a, _mm_castps_si128(_mm_set1_ps(8388608.0f))); - + __m128i ai = _mm_cvtps_epi32(a.m_value); __m128 af = _mm_cvtepi32_ps(ai); __m128 changed = _mm_cvtepi32_ps(_mm_castps_si128(_mm_cmplt_ps(af, a.m_value))); - + af = _mm_sub_ps(af, changed); return vfloat{ blendv_mask_ps(a.m_value, af, _mm_castsi128_ps(has_fractional)) }; @@ -1524,12 +1524,12 @@ CPPSPMD_FORCE_INLINE vfloat round_nearest(const vfloat& a) __m128i sign_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x80000000U)); __m128 force_int = _mm_castsi128_ps(_mm_or_si128(no_fract_fp_bits, sign_a)); - + // Can't use individual _mm_add_ps/_mm_sub_ps - this will be optimized out with /fp:fast by clang and probably other compilers. //__m128 temp1 = _mm_add_ps(a.m_value, force_int); //__m128 temp2 = _mm_sub_ps(temp1, force_int); __m128 temp2 = add_sub(a.m_value, force_int); - + __m128i abs_a = _mm_and_si128(_mm_castps_si128(a.m_value), _mm_set1_epi32(0x7FFFFFFFU)); __m128i has_fractional = _mm_cmplt_epi32(abs_a, no_fract_fp_bits); return vfloat{ blendv_mask_ps(a.m_value, temp2, _mm_castsi128_ps(has_fractional)) }; @@ -1837,7 +1837,7 @@ CPPSPMD_FORCE_INLINE vint vuint_shift_right(const vint& a, const vint& b) #else //vint inv_shift = 32 - b; //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); - + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); @@ -1856,7 +1856,7 @@ CPPSPMD_FORCE_INLINE vint vuint_shift_right_not_zero(const vint& a, const vint& { //vint inv_shift = 32 - b; //vfloat f = cast_vint_to_vfloat(vint(_mm_slli_epi32(inv_shift.m_value, 23)) + cast_vfloat_to_vint(vfloat(1.0f))); - + // Take float rep of 1.0f (0x3f800000), subtract (32<<23), subtract (shift<<23), cast to float. vfloat f = cast_vint_to_vfloat(vint(_mm_sub_epi32(_mm_set1_epi32(0x4f800000), _mm_slli_epi32(b.m_value, 23)))); @@ -1900,7 +1900,7 @@ CPPSPMD_FORCE_INLINE vint operator>> (const vint& a, const vint& b) // Shift left/right by a uniform immediate constant #define VINT_SHIFT_LEFT(a, b) vint(_mm_slli_epi32( (a).m_value, (b) ) ) -#define VINT_SHIFT_RIGHT(a, b) vint( _mm_srai_epi32( (a).m_value, (b) ) ) +#define VINT_SHIFT_RIGHT(a, b) vint( _mm_srai_epi32( (a).m_value, (b) ) ) #define VUINT_SHIFT_RIGHT(a, b) vint( _mm_srli_epi32( (a).m_value, (b) ) ) #define VINT_ROT(x, k) (VINT_SHIFT_LEFT((x), (k)) | VUINT_SHIFT_RIGHT((x), 32 - (k))) diff --git a/encoder/jpgd.cpp b/encoder/jpgd.cpp index 46083440..3b417b38 100644 --- a/encoder/jpgd.cpp +++ b/encoder/jpgd.cpp @@ -3,7 +3,7 @@ // Supports box and linear chroma upsampling. // // Released under two licenses. You are free to choose which license you want: -// License 1: +// License 1: // Public Domain // // License 2: @@ -149,7 +149,7 @@ namespace jpgd { { static void idct(int* pTemp, const jpgd_block_t* pSrc) { - (void)pTemp; + (void)pTemp; (void)pSrc; } }; @@ -264,10 +264,10 @@ namespace jpgd { 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, }; - static const uint8 s_idct_col_table[] = - { - 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 + static const uint8 s_idct_col_table[] = + { + 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; // Scalar "fast pathing" IDCT. diff --git a/encoder/jpgd.h b/encoder/jpgd.h index 86a7814c..92e53335 100644 --- a/encoder/jpgd.h +++ b/encoder/jpgd.h @@ -10,7 +10,7 @@ #include #ifdef _MSC_VER -#define JPGD_NORETURN __declspec(noreturn) +#define JPGD_NORETURN __declspec(noreturn) #elif defined(__GNUC__) #define JPGD_NORETURN __attribute__ ((noreturn)) #else @@ -140,7 +140,7 @@ namespace jpgd int begin_decoding(); // Returns the next scan line. - // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). // Returns JPGD_SUCCESS if a scan line has been returned. // Returns JPGD_DONE if all scan lines have been returned. diff --git a/spec/basis_spec.txt b/spec/basis_spec.txt index ba9fa4bc..4ccc24c8 100644 --- a/spec/basis_spec.txt +++ b/spec/basis_spec.txt @@ -7,24 +7,24 @@ Version 1.01 1.0 Introduction ---------------- -The Basis Universal GPU texture codec supports reading and writing ".basis" files. +The Basis Universal GPU texture codec supports reading and writing ".basis" files. The .basis file format supports ETC1S or UASTC 4x4 texture data. * ETC1S is a simplified subset of ETC1. -The mode is always differential (diff bit=1), the Rd, Gd, and Bd color deltas -are always (0,0,0), and the flip bit is always set. ETC1S texture data is fully -compliant with all existing software and hardware ETC1 decoders. Existing encoders +The mode is always differential (diff bit=1), the Rd, Gd, and Bd color deltas +are always (0,0,0), and the flip bit is always set. ETC1S texture data is fully +compliant with all existing software and hardware ETC1 decoders. Existing encoders can be easily modified to limit their output to ETC1S. -* UASTC 4x4 is a 19 mode subset of the ASTC texture format. Its specification is +* UASTC 4x4 is a 19 mode subset of the ASTC texture format. Its specification is [here](https://github.com/BinomialLLC/basis_universal/wiki/UASTC-Texture-Specification). UASTC texture data can always be losslessly transcoded to ASTC. 2.0 High-Level File Structure ----------------------------- A .basis file consists of multiple sections. Apart from the header, which must always -be at the start of the file, the other sections may appear in any order. +be at the start of the file, the other sections may appear in any order. Here's the high level organization of a typical .basis file: @@ -32,8 +32,8 @@ Here's the high level organization of a typical .basis file: * Optional ETC1S compressed endpoint/selector codebooks * Optional ETC1S Huffman table information * A required "slice" description array describing the resolutions and file offset/compressed sizes of each texture slice present in the file -* 1 or more slices containing ETC1S or UASTC compressed texture data. -* For future expansion, the format supports an "extended" header which may be located anywhere in the file. This section contains .PNG-like chunked data. +* 1 or more slices containing ETC1S or UASTC compressed texture data. +* For future expansion, the format supports an "extended" header which may be located anywhere in the file. This section contains .PNG-like chunked data. 3.0 File Enums -------------- @@ -41,11 +41,11 @@ Here's the high level organization of a typical .basis file: // basis_file_header::m_tex_type enum basis_texture_type { - cBASISTexType2D = 0, - cBASISTexType2DArray = 1, - cBASISTexTypeCubemapArray = 2, - cBASISTexTypeVideoFrames = 3, - cBASISTexTypeVolume = 4, + cBASISTexType2D = 0, + cBASISTexType2DArray = 1, + cBASISTexTypeCubemapArray = 2, + cBASISTexTypeVideoFrames = 3, + cBASISTexTypeVolume = 4, cBASISTexTypeTotal }; @@ -53,17 +53,17 @@ enum basis_texture_type enum basis_slice_desc_flags { cSliceDescFlagsHasAlpha = 1, - cSliceDescFlagsFrameIsIFrame = 2 + cSliceDescFlagsFrameIsIFrame = 2 }; -// basis_file_header::m_tex_format +// basis_file_header::m_tex_format enum basis_tex_format { cETC1S = 0, cUASTC4x4 = 1 }; -// basis_file_header::m_flags +// basis_file_header::m_flags enum basis_header_flags { cBASISHeaderFlagETC1S = 1, @@ -74,7 +74,7 @@ enum basis_header_flags 4.0 File Structures ------------------- -All individual members in all file structures are byte aligned and little endian. The structs +All individual members in all file structures are byte aligned and little endian. The structs have no padding (i.e. they are declared with #pragma pack(1)). 4.1 "basis_file_header" structure @@ -92,7 +92,7 @@ struct basis_file_header uint32 m_data_size; // The total size of all data after the header uint16 m_data_crc16; // The CRC16 of all data after the header - uint24 m_total_slices; // The number of compressed slices + uint24 m_total_slices; // The number of compressed slices uint24 m_total_images; // The total # of images byte m_tex_format; // enum basis_tex_format @@ -104,11 +104,11 @@ struct basis_file_header uint32 m_userdata0; // For client use uint32 m_userdata1; // For client use - uint16 m_total_endpoints; // ETC1S: The number of endpoints in the endpoint codebook + uint16 m_total_endpoints; // ETC1S: The number of endpoints in the endpoint codebook uint32 m_endpoint_cb_file_ofs; // ETC1S: The compressed endpoint codebook's file offset relative to the start of the file uint24 m_endpoint_cb_file_size; // ETC1S: The compressed endpoint codebook's size in bytes - uint16 m_total_selectors; // ETC1S: The number of selectors in the selector codebook + uint16 m_total_selectors; // ETC1S: The number of selectors in the selector codebook uint32 m_selector_cb_file_ofs; // ETC1S: The compressed selector codebook's file offset relative to the start of the file uint24 m_selector_cb_file_size; // ETC1S: The compressed selector codebook's size in bytes @@ -135,7 +135,7 @@ struct basis_file_header * m_us_per_frame: Microseconds per frame, only valid for cBASISTexTypeVideoFrames texture types. * m_total_endpoints, m_endpoint_cb_file_ofs, m_endpoint_cb_file_size: Information about the compressed ETC1S endpoint codebook: The total # of entries, the offset to the compressed data, and the compressed data's size. * m_total_selectors, m_selector_cb_file_ofs, m_selector_cb_file_size: Information about the compressed ETC1S selector codebook: The total # of entries, the offset to the compressed data, and the compressed data's size. -* m_tables_file_ofs, m_tables_file_size: The file offset and size of the compressed Huffman tables for ETC1S format files. +* m_tables_file_ofs, m_tables_file_size: The file offset and size of the compressed Huffman tables for ETC1S format files. * m_slice_desc_file_ofs: The file offset to the array of slice description structures. There will be m_total_slices structures at this file offset. * m_extended_file_ofs, m_extended_file_size: The "extended" header, for future expansion. Currently unused. @@ -144,20 +144,20 @@ struct basis_file_header struct basis_slice_desc { - uint24 m_image_index; - uint8 m_level_index; - uint8 m_flags; + uint24 m_image_index; + uint8 m_level_index; + uint8 m_flags; - uint16 m_orig_width; - uint16 m_orig_height; + uint16 m_orig_width; + uint16 m_orig_height; - uint16 m_num_blocks_x; - uint16 m_num_blocks_y; + uint16 m_num_blocks_x; + uint16 m_num_blocks_y; - uint32 m_file_ofs; - uint32 m_file_size; + uint32 m_file_ofs; + uint32 m_file_size; - uint16 m_slice_data_crc16; + uint16 m_slice_data_crc16; }; 4.2.1 Details: @@ -168,7 +168,7 @@ struct basis_slice_desc * m_orig_width: The original image width (may not be a multiple of 4 pixels) * m_orig_height: The original image height (may not be a multiple of 4 pixels) * m_num_blocks_x: The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. -* m_num_blocks_y: The slice's block Y dimensions. +* m_num_blocks_y: The slice's block Y dimensions. * m_file_ofs: Offset from the start of the file to the start of the slice's data * m_file_size: The size of the compressed slice data in bytes * m_slice_data_crc16: The CRC16 of the compressed slice data, for extra-paranoid use cases @@ -176,7 +176,7 @@ struct basis_slice_desc 5.0 CRC-16 Function ------------------- -.basis files use CRC-16/genibus(aka CRC-16 EPC, CRC-16 I-CODE, CRC-16 DARC) format CRC-16's. +.basis files use CRC-16/genibus(aka CRC-16 EPC, CRC-16 I-CODE, CRC-16 DARC) format CRC-16's. Here's an example function in C++: @@ -216,39 +216,39 @@ First, some enums: enum { // Max supported Huffman code size is 16-bits - cHuffmanMaxSupportedCodeSize = 16, + cHuffmanMaxSupportedCodeSize = 16, // The maximum number of symbols is 2^14 - cHuffmanMaxSymsLog2 = 14, + cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs may range from 3-10 entries - cHuffmanSmallZeroRunSizeMin = 3, - cHuffmanSmallZeroRunSizeMax = 10, + cHuffmanSmallZeroRunSizeMin = 3, + cHuffmanSmallZeroRunSizeMax = 10, cHuffmanSmallZeroRunExtraBits = 3, // Big zero runs may range from 11-138 entries - cHuffmanBigZeroRunSizeMin = 11, - cHuffmanBigZeroRunSizeMax = 138, + cHuffmanBigZeroRunSizeMin = 11, + cHuffmanBigZeroRunSizeMax = 138, cHuffmanBigZeroRunExtraBits = 7, // Small non-zero runs may range from 3-6 entries - cHuffmanSmallRepeatSizeMin = 3, - cHuffmanSmallRepeatSizeMax = 6, + cHuffmanSmallRepeatSizeMin = 3, + cHuffmanSmallRepeatSizeMax = 6, cHuffmanSmallRepeatExtraBits = 2, // Big non-zero run may range from 7-134 entries - cHuffmanBigRepeatSizeMin = 7, - cHuffmanBigRepeatSizeMax = 134, + cHuffmanBigRepeatSizeMin = 7, + cHuffmanBigRepeatSizeMax = 134, cHuffmanBigRepeatExtraBits = 7, // There are a maximum of 21 symbols in a compressed Huffman code length table. - cHuffmanTotalCodelengthCodes = 21, - + cHuffmanTotalCodelengthCodes = 21, + // Symbols [0,16] indicate code sizes. Other symbols indicate zero runs or repeats: - cHuffmanSmallZeroRunCode = 17, - cHuffmanBigZeroRunCode = 18, - cHuffmanSmallRepeatCode = 19, + cHuffmanSmallZeroRunCode = 17, + cHuffmanBigZeroRunCode = 18, + cHuffmanSmallRepeatCode = 19, cHuffmanBigRepeatCode = 20 }; @@ -261,41 +261,41 @@ Each table begins with a small fixed header: 14 bits: total_used_syms [1, cHuffmanMaxSyms] 5 bits: num_codelength_codes [1, cHuffmanTotalCodelengthCodes] - + Next, the code lengths for the small Huffman table which is used to send the compressed codelengths (and RLE/repeat codes) are sent uncompressed but in a reordered manner: - + 3*num_codelength_codes bits: Code size of each Huffman symbol for the compressed Huffman codelength table. - + These code lengths are sent in this order (to help reduce the number that must be sent): - - { - cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode, cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, - 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 + + { + cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode, cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, + 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 }; - + A canonical Huffman decoding table (of up to 21 symbols) should be built from these code lengths. Immediately following this data are the Huffman symbols (sometimes intermixed with raw bits) which describe how to unpack the codelengths of each symbol in the Huffman table: - Symbols [0,16] indicate a specific symbol code length in bits. - + - Symbol cHuffmanSmallZeroRunCode (17) indicates a short run of symbols with 0 bit code lengths. cHuffmanSmallZeroRunExtraBits (3) bits are sent after this symbol, which indicates the run's size after adding the minimum size (cHuffmanSmallZeroRunSizeMin). - - - Symbol cHuffmanBigZeroRunCode (18) indicates a long run of symbols with 0 bit code lengths. + + - Symbol cHuffmanBigZeroRunCode (18) indicates a long run of symbols with 0 bit code lengths. cHuffmanBigZeroRunExtraBits (7) bits are sent after this symbol, which indicates the run's size after adding the minimum size (cHuffmanBigZeroRunSizeMin) - Symbol cHuffmanSmallRepeatCode (19) indicates a short run of symbols that repeat the previous symbol's code length. - cHuffmanSmallRepeatExtraBits (2) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length, + cHuffmanSmallRepeatExtraBits (2) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length, after adding the minimum size (cHuffmanSmallRepeatSizeMin). Cannot be the first symbol, and the previous symbol cannot have a code length of 0. - + - Symbol cHuffmanBigRepeatCode (20) indicates a short run of symbols that repeat the previous symbol's code length. cHuffmanBigRepeatExtraBits (7) bits are sent after this symbol, which indicates the number of times to repeat the previous symbol's code length, after adding the minimum size (cHuffmanBigRepeatSizeMin). Cannot be the first symbol, and the previous symbol cannot have a code length of 0. - + There should be exactly total_used_syms code lengths stored in the compressed Huffman table. If not the stream is either corrupted or invalid. After all the symbol codelengths are uncompressed, the symbol codes can be computed and the canonical Huffman decoding tables can be built. @@ -318,9 +318,9 @@ appear in this order: 4. inten_delta_model Following the data for these Huffman tables is a single 1-bit code which -indicates if the color endpoint codebook is grayscale or not. +indicates if the color endpoint codebook is grayscale or not. -Immediately following this code is the compressed color endpoint codebook data. +Immediately following this code is the compressed color endpoint codebook data. A simple form of DPCM (Delta Pulse Code Modulation) coding is used to send the ETC1S intensity table indices and color values. Here is the procedure to decode the endpoint codebook: @@ -393,11 +393,11 @@ byte corresponds to four 2-bit ETC1S selectors. The first selector of each group of 4 selectors starts at the LSB (least significant bit) of each byte, and is 2-bits wide. -If the third bit is 0, the selectors have been DPCM coded with Huffman coding. +If the third bit is 0, the selectors have been DPCM coded with Huffman coding. The "delta_selector_pal_model" Huffman table will immediately follow the third bit, and is stored using the procedure outlined in section 6.0. -Immediately following the Huffman table is the compressed selector codebook. +Immediately following the Huffman table is the compressed selector codebook. Here is the DPCM decoding procedure: uint8_t prev_bytes[4] = { 0, 0, 0, 0 }; @@ -439,7 +439,7 @@ Any bytes in this section following the selector codebook bits can be safely ign Each ETC1S slice is compressed with four Huffman tables stored using the procedure outlined in section 6.0. These Huffman tables are stored at file -offset basis_file_header::m_tables_file_ofs. This section will be +offset basis_file_header::m_tables_file_ofs. This section will be basis_file_header::m_tables_file_size bytes long. The following four Huffman tables are sent, in this order: @@ -458,7 +458,7 @@ history buffer. Any remaining bits may be safely ignored. ETC1S slices consist of a compressed 2D array of ETC1S blocks, always compressed in top-down/left-right raster order. For texture video, the previous slice's already decoded contents may be referred to when blocks are encoded using -Conditional Replenishment (also known as "skip blocks"). +Conditional Replenishment (also known as "skip blocks"). Each ETC1S block is encoded by using references to the color endpoint codebook and the selector codebook. Sections 10.1 and 10.2 describe the helper procedures @@ -508,7 +508,7 @@ decoding: m_values[index] = x; } } - + private: std::vector m_values; uint32_t m_rover; @@ -526,7 +526,7 @@ sends raw bits using variable-size chunks. Here is the VLC decoding procedure: const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t v = 0; uint32_t ofs = 0; @@ -538,7 +538,7 @@ sends raw bits using variable-size chunks. Here is the VLC decoding procedure: if ((s & chunk_size) == 0) break; - + if (ofs >= 32) { assert(0); @@ -583,11 +583,11 @@ selector codebook indices are decoded. const uint32_t NUM_ENDPOINT_PREDS = 3; const uint32_t CR_ENDPOINT_PRED_INDEX = NUM_ENDPOINT_PREDS - 1; const uint32_t NO_ENDPOINT_PRED_INDEX = 3; - + // Endpoint/selector codebooks - decoded previously. See sections 7.0 and 8.0. endpoint endpoints[endpoint_codebook_size]; - selector selectors[selector_codebook_size]; - + selector selectors[selector_codebook_size]; + // Array of per-block values used for endpoint index prediction (enough for 2 rows). struct block_preds { @@ -595,22 +595,22 @@ selector codebook indices are decoded. uint8_t m_pred_bits; }; block_preds block_endpoint_preds[2][num_blocks_x]; - + // Some constants and state used during block decoding const uint32_t SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX = selector_codebook_size; const uint32_t SELECTOR_HISTORY_BUF_RLE_SYMBOL_INDEX = selector_history_buf_size + SELECTOR_HISTORY_BUF_FIRST_SYMBOL_INDEX; uint32_t cur_selector_rle_count = 0; - + uint32_t cur_pred_bits = 0; int prev_endpoint_pred_sym = 0; int endpoint_pred_repeat_count = 0; uint32_t prev_endpoint_index = 0; // This array is only used for texture video. It holds the previous frame's endpoint and selector indices (each 16-bits, for 32-bits total). - uint32_t prev_frame_indices[num_blocks_x][num_blocks_y]; - + uint32_t prev_frame_indices[num_blocks_x][num_blocks_y]; + // Selector history buffer - See section 10.1. - // For the selector history buffer's size, see section 9.0. + // For the selector history buffer's size, see section 9.0. approx_move_to_front selector_history_buf(selector_history_buf_size); // Loop over all slice blocks in raster order @@ -629,7 +629,7 @@ selector codebook indices are decoded. { // We're on an even row and column of blocks. Decode the combined endpoint index predictor symbols for 2x2 blocks. // This symbol tells the decoder how the endpoints are decoded for each block in a 2x2 group of blocks. - + // Are we in an RLE run? if (endpoint_pred_repeat_count) { @@ -655,7 +655,7 @@ selector codebook indices are decoded. } } - // The symbol has enough endpoint prediction information for 4 blocks (2 bits per block), so 8 bits total. + // The symbol has enough endpoint prediction information for 4 blocks (2 bits per block), so 8 bits total. // Remember the prediction information we should use for the next row of 2 blocks beneath the current block. block_endpoint_preds[cur_block_endpoint_pred_array ^ 1][block_x].m_pred_bits = (uint8_t)(cur_pred_bits >> 4); } @@ -665,7 +665,7 @@ selector codebook indices are decoded. cur_pred_bits = block_endpoint_preds[cur_block_endpoint_pred_array][block_x].m_pred_bits; } } - + // Decode the current block's endpoint and selector indices. uint32_t endpoint_index, selector_index = 0; @@ -673,8 +673,8 @@ selector codebook indices are decoded. const uint32_t pred = cur_pred_bits & 3; // Get the next block's endpoint prediction bits ready. - cur_pred_bits >>= 2; - + cur_pred_bits >>= 2; + // Now check to see if we should reuse a previously encoded block's endpoints. if (pred == 0) { @@ -712,7 +712,7 @@ selector codebook indices are decoded. const uint32_t delta_sym = decode_huffman(delta_endpoint_model); endpoint_index = delta_sym + prev_endpoint_index; - + // Wrap around if the index goes beyond the end of the endpoint codebook if (endpoint_index >= endpoints.size()) endpoint_index -= (int)endpoints.size(); @@ -723,12 +723,12 @@ selector codebook indices are decoded. // Remember the endpoint index used prev_endpoint_index = endpoint_index; - - // Now we have fully decoded the ETC1S endpoint codebook index, in endpoint_index. - + + // Now we have fully decoded the ETC1S endpoint codebook index, in endpoint_index. + // Now decode the selector index (see the next block of code, below). < selector decoding - see below > - + } // block_x } // block_y @@ -756,7 +756,7 @@ The code to decode the selector codebook index immediately follows the code abov const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - // Decode selector index, unless it's texture video and the endpoint predictor indicated that the + // Decode selector index, unless it's texture video and the endpoint predictor indicated that the // block's endpoints were reused from the previous frame. if ((!is_video) || (pred != CR_ENDPOINT_PRED_INDEX)) { @@ -820,7 +820,7 @@ The code to decode the selector codebook index immediately follows the code abov selector_history_buf.add(selector_index); } } - + // For texture video, remember the endpoint and selector indices used by the block on this frame, for later reuse on the next frame. if (is_video) prev_frame_indices[block_x][block_y] = endpoint_index | (selector_index << 16); @@ -836,24 +836,24 @@ texture data, or it can immedately transcode the ETC1S data to another GPU textu 11.0 Alpha Channels in ETC1S Format Files ----------------------------------------- -ETC1S .basis files can have optional alpha channels, stored in odd slices. If any slice needs an alpha channel, -all slices must have alpha channels. basis_file_header::m_flags will be logically OR'd with -cBASISHeaderFlagHasAlphaSlices. Alpha channel ETC1S files will contain two slices for each mipmap level -(or face, or video frame, etc.). The basis_slice_desc::m_flags field will be logically OR'd with -cSliceDescFlagsHasAlpha for all odd alpha slices. +ETC1S .basis files can have optional alpha channels, stored in odd slices. If any slice needs an alpha channel, +all slices must have alpha channels. basis_file_header::m_flags will be logically OR'd with +cBASISHeaderFlagHasAlphaSlices. Alpha channel ETC1S files will contain two slices for each mipmap level +(or face, or video frame, etc.). The basis_slice_desc::m_flags field will be logically OR'd with +cSliceDescFlagsHasAlpha for all odd alpha slices. -The even slices will contain the RGB data, and the odd slices will contain the alpha data, both stored in ETC1S -format. Alpha channel ETC1S files must always have an even total number of slices. A decoder can first decode -the RGB data slice, then the next alpha channel slice, or it can decode them in parallel using multithreading. +The even slices will contain the RGB data, and the odd slices will contain the alpha data, both stored in ETC1S +format. Alpha channel ETC1S files must always have an even total number of slices. A decoder can first decode +the RGB data slice, then the next alpha channel slice, or it can decode them in parallel using multithreading. The ETC1S green channel (on the odd slices) contains the alpha values. 12.0 Texture Video ------------------ Both ETC1S and UASTC format files support texture video. Texture video files can be optionally mipmapped, and can -contain optional alpha channels (stored as separate slices in ETC1S format files). Currently, the first frame is -always an i-frame, and all subsequent frames are p-frames, but the file format and transcoder supports any -frame being an i-frame (and the encoder will be enhanced to support this feature). Decoders must track the previously +contain optional alpha channels (stored as separate slices in ETC1S format files). Currently, the first frame is +always an i-frame, and all subsequent frames are p-frames, but the file format and transcoder supports any +frame being an i-frame (and the encoder will be enhanced to support this feature). Decoders must track the previously decoded frame's endpoints/selectors for all mipmap levels (if any), not just the top level's. Skip blocks always refer to the previous frame. i-frames cannot use skip blocks (encoded as endpoint predictor index 2). diff --git a/transcoder/basisu.h b/transcoder/basisu.h index f33baf67..05643f03 100644 --- a/transcoder/basisu.h +++ b/transcoder/basisu.h @@ -115,7 +115,7 @@ namespace basisu void enable_debug_printf(bool enabled); void debug_printf(const char *pFmt, ...); - + template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } @@ -124,7 +124,7 @@ namespace basisu template inline S maximum(S a, S b) { return (a > b) ? a : b; } template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } - + template inline S minimum(S a, S b) { return (a < b) ? a : b; } template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } @@ -148,7 +148,7 @@ namespace basisu inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } inline uint64_t iabs64(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } + template inline void clear_vector(T &vec) { vec.erase(vec.begin(), vec.end()); } template inline typename T::value_type *enlarge_vector(T &vec, size_t n) { size_t cs = vec.size(); vec.resize(cs + n); return &vec[cs]; } inline bool is_pow2(uint32_t x) { return x && ((x & (x - 1U)) == 0U); } @@ -161,8 +161,8 @@ namespace basisu template inline T saturate(T val) { return clamp(val, 0.0f, 1.0f); } - template inline void append_vector(T &vec, const R *pObjs, size_t n) - { + template inline void append_vector(T &vec, const R *pObjs, size_t n) + { if (n) { const size_t cur_s = vec.size(); @@ -208,7 +208,7 @@ namespace basisu for (size_t i = 0; i < vec.size(); i++) vec[i] = obj; } - + inline uint64_t read_be64(const void *p) { uint64_t val = 0; @@ -269,7 +269,7 @@ namespace basisu pBytes[2] = (uint8_t)(val >> 16U); pBytes[3] = (uint8_t)(val >> 24U); } - + // Always little endian 1-8 byte unsigned int template struct packed_uint @@ -279,17 +279,17 @@ namespace basisu inline packed_uint() { static_assert(NumBytes <= sizeof(uint64_t), "Invalid NumBytes"); } inline packed_uint(uint64_t v) { *this = v; } inline packed_uint(const packed_uint& other) { *this = other; } - - inline packed_uint& operator= (uint64_t v) - { - for (uint32_t i = 0; i < NumBytes; i++) - m_bytes[i] = static_cast(v >> (i * 8)); - return *this; + + inline packed_uint& operator= (uint64_t v) + { + for (uint32_t i = 0; i < NumBytes; i++) + m_bytes[i] = static_cast(v >> (i * 8)); + return *this; } - inline packed_uint& operator= (const packed_uint& rhs) - { - memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); + inline packed_uint& operator= (const packed_uint& rhs) + { + memcpy(m_bytes, rhs.m_bytes, sizeof(m_bytes)); return *this; } @@ -297,19 +297,19 @@ namespace basisu { switch (NumBytes) { - case 1: + case 1: { return m_bytes[0]; } - case 2: + case 2: { return (m_bytes[1] << 8U) | m_bytes[0]; } - case 3: + case 3: { return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0]; } - case 4: + case 4: { return read_le_dword(m_bytes); } @@ -331,13 +331,13 @@ namespace basisu uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4]; return static_cast(l) | (static_cast(h) << 32U); } - case 8: + case 8: { uint32_t l = read_le_dword(m_bytes); uint32_t h = read_le_dword(m_bytes + 4); return static_cast(l) | (static_cast(h) << 32U); } - default: + default: { assert(0); return 0; @@ -348,14 +348,14 @@ namespace basisu enum eZero { cZero }; enum eNoClamp { cNoClamp }; - + // Rice/Huffman entropy coding - + // This is basically Deflate-style canonical Huffman, except we allow for a lot more symbols. enum { - cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, - cHuffmanFastLookupBits = 10, + cHuffmanMaxSupportedCodeSize = 16, cHuffmanMaxSupportedInternalCodeSize = 31, + cHuffmanFastLookupBits = 10, cHuffmanMaxSymsLog2 = 14, cHuffmanMaxSyms = 1 << cHuffmanMaxSymsLog2, // Small zero runs @@ -381,13 +381,13 @@ namespace basisu enum class texture_format { cInvalidTextureFormat = -1, - + // Block-based formats cETC1, // ETC1 cETC1S, // ETC1 (subset: diff colors only, no subblocks) cETC2_RGB, // ETC2 color block (basisu doesn't support ETC2 planar/T/H modes - just basic ETC1) cETC2_RGBA, // ETC2 EAC alpha block followed by ETC2 color block - cETC2_ALPHA, // ETC2 EAC alpha block + cETC2_ALPHA, // ETC2 EAC alpha block cBC1, // DXT1 cBC3, // DXT5 (BC4/DXT5A block followed by a BC1/DXT1 block) cBC4, // DXT5A @@ -402,10 +402,10 @@ namespace basisu cPVRTC2_4_RGBA, cETC2_R11_EAC, cETC2_RG11_EAC, - cUASTC4x4, + cUASTC4x4, cBC1_NV, cBC1_AMD, - + // Uncompressed/raw pixels cRGBA32, cRGB565, @@ -463,6 +463,6 @@ namespace basisu BASISU_NOTE_UNUSED(fmt); return 4; } - + } // namespace basisu diff --git a/transcoder/basisu_containers.h b/transcoder/basisu_containers.h index 1ca4bab3..cc4cdcb4 100644 --- a/transcoder/basisu_containers.h +++ b/transcoder/basisu_containers.h @@ -650,7 +650,7 @@ namespace basisu insert(m_size, p, n); return *this; } - + inline void erase(uint32_t start, uint32_t n) { assert((start + n) <= m_size); @@ -681,7 +681,7 @@ namespace basisu } else { - // Type is not bitwise copyable or movable. + // Type is not bitwise copyable or movable. // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. T* pDst_end = pDst + num_to_move; while (pDst != pDst_end) @@ -901,7 +901,7 @@ namespace basisu if (!m) break; cmp = -cmp; i += (((m + 1) >> 1) ^ cmp) - cmp; - if (i < 0) + if (i < 0) break; } } @@ -1044,7 +1044,7 @@ namespace basisu public: class iterator; class const_iterator; - + private: friend class iterator; friend class const_iterator; @@ -1232,7 +1232,7 @@ namespace basisu if (new_hash_size > m_values.size()) rehash((uint32_t)new_hash_size); } - + class iterator { friend class hash_map; @@ -1667,7 +1667,7 @@ namespace basisu inline void grow() { uint64_t n = m_values.size() * 3ULL; // was * 2 - + if (!helpers::is_power_of_2(n)) n = helpers::next_pow2(n); @@ -1884,11 +1884,11 @@ namespace basisu template struct bitwise_movable< hash_map > { enum { cFlag = true }; }; - + #if BASISU_HASHMAP_TEST extern void hash_map_test(); #endif - + } // namespace basisu namespace std diff --git a/transcoder/basisu_containers_impl.h b/transcoder/basisu_containers_impl.h index 65551714..6529f3b9 100644 --- a/transcoder/basisu_containers_impl.h +++ b/transcoder/basisu_containers_impl.h @@ -34,7 +34,7 @@ namespace basisu abort(); } } - + const size_t desired_size = element_size * new_capacity; size_t actual_size = 0; if (!pMover) @@ -94,7 +94,7 @@ namespace basisu if (m_p) free(m_p); - + m_p = new_p; } diff --git a/transcoder/basisu_file_headers.h b/transcoder/basisu_file_headers.h index 4316d738..e99972d8 100644 --- a/transcoder/basisu_file_headers.h +++ b/transcoder/basisu_file_headers.h @@ -21,10 +21,10 @@ namespace basist enum basis_slice_desc_flags { cSliceDescFlagsHasAlpha = 1, - + // Video only: Frame doesn't refer to previous frame (no usage of conditional replenishment pred symbols) // Currently the first frame is always an I-Frame, all subsequent frames are P-Frames. This will eventually be changed to periodic I-Frames. - cSliceDescFlagsFrameIsIFrame = 2 + cSliceDescFlagsFrameIsIFrame = 2 }; #pragma pack(push) @@ -39,7 +39,7 @@ namespace basist basisu::packed_uint<2> m_orig_height; // The original image height (may not be a multiple of 4 pixels) basisu::packed_uint<2> m_num_blocks_x; // The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2. - basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. + basisu::packed_uint<2> m_num_blocks_y; // The slice's block Y dimensions. basisu::packed_uint<4> m_file_ofs; // Offset from the start of the file to the start of the slice's data basisu::packed_uint<4> m_file_size; // The size of the compressed slice data in bytes @@ -51,24 +51,24 @@ namespace basist enum basis_header_flags { // Always set for ETC1S files. Not set for UASTC files. - cBASISHeaderFlagETC1S = 1, - + cBASISHeaderFlagETC1S = 1, + // Set if the texture had to be Y flipped before encoding. The actual interpretation of this (is Y up or down?) is up to the user. - cBASISHeaderFlagYFlipped = 2, - + cBASISHeaderFlagYFlipped = 2, + // Set if any slices contain alpha (for ETC1S, if the odd slices contain alpha data) - cBASISHeaderFlagHasAlphaSlices = 4, - - // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. - cBASISHeaderFlagUsesGlobalCodebook = 8, - - // Set if the texture data is sRGB, otherwise it's linear. + cBASISHeaderFlagHasAlphaSlices = 4, + + // For ETC1S files, this will be true if the file utilizes a codebook from another .basis file. + cBASISHeaderFlagUsesGlobalCodebook = 8, + + // Set if the texture data is sRGB, otherwise it's linear. // In reality, we have no idea if the texture data is actually linear or sRGB. This is the m_perceptual parameter passed to the compressor. - cBASISHeaderFlagSRGB = 16, + cBASISHeaderFlagSRGB = 16, }; // The image type field attempts to describe how to interpret the image data in a Basis file. - // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. + // The encoder library doesn't really do anything special or different with these texture types, this is mostly here for the benefit of the user. // We do make sure the various constraints are followed (2DArray/cubemap/videoframes/volume implies that each image has the same resolution and # of mipmap levels, etc., cubemap implies that the # of image slices is a multiple of 6) enum basis_texture_type { @@ -111,7 +111,7 @@ namespace basist basisu::packed_uint<3> m_total_slices; // The total # of compressed slices (1 slice per image, or 2 for alpha .basis files) basisu::packed_uint<3> m_total_images; // The total # of images - + basisu::packed_uint<1> m_tex_format; // enum basis_tex_format basisu::packed_uint<2> m_flags; // enum basist::header_flags basisu::packed_uint<1> m_tex_type; // enum basist::basis_texture_type @@ -121,11 +121,11 @@ namespace basist basisu::packed_uint<4> m_userdata0; // For client use basisu::packed_uint<4> m_userdata1; // For client use - basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook + basisu::packed_uint<2> m_total_endpoints; // The number of endpoints in the endpoint codebook basisu::packed_uint<4> m_endpoint_cb_file_ofs; // The compressed endpoint codebook's file offset relative to the start of the file basisu::packed_uint<3> m_endpoint_cb_file_size; // The compressed endpoint codebook's size in bytes - basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook + basisu::packed_uint<2> m_total_selectors; // The number of selectors in the endpoint codebook basisu::packed_uint<4> m_selector_cb_file_ofs; // The compressed selectors codebook's file offset relative to the start of the file basisu::packed_uint<3> m_selector_cb_file_size; // The compressed selector codebook's size in bytes @@ -133,7 +133,7 @@ namespace basist basisu::packed_uint<4> m_tables_file_size; // The file size in bytes of the compressed huffman codelength tables basisu::packed_uint<4> m_slice_desc_file_ofs; // The file offset to the slice description array, usually follows the header - + basisu::packed_uint<4> m_extended_file_ofs; // The file offset of the "extended" header and compressed data, for future use basisu::packed_uint<4> m_extended_file_size; // The file size in bytes of the "extended" header and compressed data, for future use }; diff --git a/transcoder/basisu_global_selector_palette.h b/transcoder/basisu_global_selector_palette.h index 8bedf947..f682df74 100644 --- a/transcoder/basisu_global_selector_palette.h +++ b/transcoder/basisu_global_selector_palette.h @@ -1,6 +1,6 @@ // basisu_global_selector_palette.h // Copyright (C) 2019-2021 Binomial LLC. All Rights Reserved. -// +// // TODO: NONE of this is used in .basis/.ktx2 files. It will be deleted soon. // // Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/transcoder/basisu_transcoder.cpp b/transcoder/basisu_transcoder.cpp index 29eb3c0d..5c4a7dad 100644 --- a/transcoder/basisu_transcoder.cpp +++ b/transcoder/basisu_transcoder.cpp @@ -162,7 +162,7 @@ namespace basisu void debug_printf(const char* pFmt, ...) { -#if BASISU_FORCE_DEVEL_MESSAGES +#if BASISU_FORCE_DEVEL_MESSAGES g_debug_printf = true; #endif if (g_debug_printf) @@ -322,14 +322,14 @@ namespace basist //const uint8_t g_etc1_to_selector_index[cETC1SelectorValues] = { 2, 3, 1, 0 }; const uint8_t g_selector_index_to_etc1[cETC1SelectorValues] = { 3, 2, 0, 1 }; - + static const uint8_t g_etc_5_to_8[32] = { 0, 8, 16, 24, 33, 41, 49, 57, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 198, 206, 214, 222, 231, 239, 247, 255 }; struct decoder_etc_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 - // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 + // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64_t m_uint64; @@ -597,7 +597,7 @@ namespace basist { return (m_bytes[3] & 2) != 0; } - + inline uint32_t get_inten_table(uint32_t subblock_id) const { assert(subblock_id < 2); @@ -612,7 +612,7 @@ namespace basist const uint32_t b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } - + void get_block_colors(color32* pBlock_colors, uint32_t subblock_index) const { color32 b; @@ -730,7 +730,7 @@ namespace basist g = c.g; b = c.b; } - + static void unpack_color5(color32& result, uint16_t packed_color5, bool scaled) { result = unpack_color5(packed_color5, scaled, 255); @@ -859,7 +859,7 @@ namespace basist static void get_block_color5_r(const color32& base_color5, uint32_t inten_table, uint32_t index, uint32_t &r) { assert(index < 4); - + uint32_t br = (base_color5.r << 3) | (base_color5.r >> 2); const int* pInten_table = g_etc1_inten_tables[inten_table]; @@ -1035,7 +1035,7 @@ namespace basist { 1, 2, 2, 2 }, { 1, 2, 3, 3 }, }; - + static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; static uint8_t g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[NUM_ETC1_TO_DXT1_SELECTOR_MAPPINGS][256]; @@ -1427,9 +1427,9 @@ namespace basist return best_err; } #endif // BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES - + static -#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES +#if !BASISD_WRITE_NEW_ETC2_EAC_A8_TABLES const #endif etc1_g_to_eac_conversion s_etc1_g_to_etc2_a8[32 * 8][NUM_ETC2_EAC_SELECTOR_RANGES] = @@ -1919,18 +1919,18 @@ namespace basist #endif static bool g_transcoder_initialized; - + // Library global initialization. Requires ~9 milliseconds when compiled and executed natively on a Core i7 2.2 GHz. // If this is too slow, these computed tables can easilky be moved to be compiled in. void basisu_transcoder_init() { if (g_transcoder_initialized) { - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Called more than once\n"); return; } - - BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); + + BASISU_DEVEL_ERROR("basisu_transcoder::basisu_transcoder_init: Initializing (this is not an error)\n"); #if BASISD_SUPPORT_UASTC uastc_init(); @@ -1939,7 +1939,7 @@ namespace basist #if BASISD_SUPPORT_ASTC transcoder_init_astc(); #endif - + #if BASISD_WRITE_NEW_ASTC_TABLES create_etc1_to_astc_conversion_table_0_47(); create_etc1_to_astc_conversion_table_0_255(); @@ -2195,7 +2195,7 @@ namespace basist std::swap(l, h); pSelectors_xlat_256 = &g_etc1_to_dxt1_selector_mappings_raw_dxt1_inv_256[best_mapping][0]; } - + pDst_block->set_low_color(static_cast(l)); pDst_block->set_high_color(static_cast(h)); @@ -2355,7 +2355,7 @@ namespace basist fxt1_block* pBlock = static_cast(pDst); // CC_MIXED is basically DXT1 with different encoding tricks. - // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. + // So transcode ETC1S to DXT1, then transcode that to FXT1 which is easy and nearly lossless. // (It's not completely lossless because FXT1 rounds in its color lerps while DXT1 doesn't, but it should be good enough.) dxt1_block blk; convert_etc1s_to_dxt1(&blk, pEndpoints, pSelectors, false); @@ -2368,7 +2368,7 @@ namespace basist uint32_t g0 = color0.g & 1; uint32_t g1 = color1.g & 1; - + color0.g >>= 1; color1.g >>= 1; @@ -2376,7 +2376,7 @@ namespace basist blk.m_selectors[1] = conv_dxt1_to_fxt1_sels(blk.m_selectors[1]); blk.m_selectors[2] = conv_dxt1_to_fxt1_sels(blk.m_selectors[2]); blk.m_selectors[3] = conv_dxt1_to_fxt1_sels(blk.m_selectors[3]); - + if ((blk.get_selector(0, 0) >> 1) != (g0 ^ g1)) { std::swap(color0, color1); @@ -2390,7 +2390,7 @@ namespace basist if (fxt1_subblock == 0) { - pBlock->m_hi.m_mode = 1; + pBlock->m_hi.m_mode = 1; pBlock->m_hi.m_alpha = 0; pBlock->m_hi.m_glsb = g1 | (g1 << 1); pBlock->m_hi.m_r0 = color0.r; @@ -2711,7 +2711,7 @@ namespace basist { uint32_t r; decoder_etc_block::get_block_color5_r(base_color, inten_table, low_selector, r); - + pDst_block->set_low_alpha(r); pDst_block->set_high_alpha(r); pDst_block->m_selectors[0] = 0; @@ -2794,7 +2794,7 @@ namespace basist static const uint8_t g_pvrtc_4[16] = { 0,16,33,49,66,82,99,115,140,156,173,189,206,222,239,255 }; static const uint8_t g_pvrtc_3[8] = { 0,33,74,107,148,181,222,255 }; static const uint8_t g_pvrtc_alpha[9] = { 0,34,68,102,136,170,204,238,255 }; - + static const uint8_t g_pvrtc_5_floor[256] = { 0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3, @@ -2818,7 +2818,7 @@ namespace basist 24,24,24,24,24,24,24,25,25,25,25,25,25,25,25,26,26,26,26,26,26,26,26,27,27,27,27,27,27,27,27,28, 28,28,28,28,28,28,28,28,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,30,31,31,31,31,31,31,31,31 }; - + static const uint8_t g_pvrtc_4_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -2842,7 +2842,7 @@ namespace basist 12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,14, 14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 }; - + static const uint8_t g_pvrtc_3_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -2866,7 +2866,7 @@ namespace basist 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 }; - + static const uint8_t g_pvrtc_alpha_floor[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, @@ -2973,10 +2973,10 @@ namespace basist } assert((r < 32) && (g < 32) && (b < 32) && (a < 16)); - + return color32(r, g, b, a); } - + inline color32 get_endpoint_8888(uint32_t endpoint_index) const { assert(endpoint_index < 2); @@ -3023,7 +3023,7 @@ namespace basist a = g_pvrtc_alpha[a]; } - + return color32(r, g, b, a); } @@ -3032,7 +3032,7 @@ namespace basist color32 c(get_endpoint_8888(endpoint_index)); return c.r + c.g + c.b + c.a; } - + inline uint32_t get_opaque_endpoint_l0() const { uint32_t packed = m_endpoints & 0xFFFE; @@ -3147,7 +3147,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + // opaque endpoints: 554 or 555 // transparent endpoints: 3443 or 3444 inline void set_endpoint_raw(uint32_t endpoint_index, const color32& c, bool opaque_endpoint) @@ -3200,7 +3200,7 @@ namespace basist else m_endpoints = (m_endpoints & 0xFFFF0000U) | packed; } - + inline void set_endpoint_floor(uint32_t endpoint_index, const color32& c) { assert(endpoint_index < 2); @@ -3425,7 +3425,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -3433,7 +3433,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -3582,8 +3582,8 @@ namespace basist } static void fixup_pvrtc1_4_modulation_rgba( - const decoder_etc_block* pETC_Blocks, - const uint32_t* pPVRTC_endpoints, + const decoder_etc_block* pETC_Blocks, + const uint32_t* pPVRTC_endpoints, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, void *pAlpha_blocks, const endpoint* pEndpoints, const selector* pSelectors) { @@ -3606,7 +3606,7 @@ namespace basist for (int ey = 0; ey < 3; ey++) { - int by = y + ey - 1; + int by = y + ey - 1; const uint32_t* pE = &pPVRTC_endpoints[(by & y_mask) * num_blocks_x]; @@ -3614,7 +3614,7 @@ namespace basist for (int ex = 0; ex < 3; ex++) { - int bx = 0 + ex - 1; + int bx = 0 + ex - 1; const uint32_t e = pE[bx & x_mask]; @@ -3628,13 +3628,13 @@ namespace basist for (int x = 0; x < static_cast(num_blocks_x); x++, block_index++) { const decoder_etc_block& src_block = pETC_Blocks[block_index]; - + const uint16_t* pSrc_alpha_block = reinterpret_cast(static_cast(pAlpha_blocks) + x + (y * num_blocks_x)); const endpoint* pAlpha_endpoints = &pEndpoints[pSrc_alpha_block[0]]; const selector* pAlpha_selectors = &pSelectors[pSrc_alpha_block[1]]; - + const uint32_t x_swizzle = (g_pvrtc_swizzle_table[x >> 8] << 17) | (g_pvrtc_swizzle_table[x & 0xFF] << 1); - + uint32_t swizzled = x_swizzle | y_swizzle; if (num_blocks_x != num_blocks_y) { @@ -3777,7 +3777,7 @@ namespace basist const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES = sizeof(g_etc1_to_bc7_m5_selector_ranges) / sizeof(g_etc1_to_bc7_m5_selector_ranges[0]); static uint32_t g_etc1_to_bc7_m5_selector_range_index[4][4]; - + const uint32_t NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS = 10; static const uint8_t g_etc1_to_bc7_m5_selector_mappings[NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS][4] = { @@ -3799,11 +3799,11 @@ namespace basist uint8_t m_hi; uint16_t m_err; }; - + static const etc1_to_bc7_m5_solution g_etc1_to_bc7_m5_color[32 * 8 * NUM_ETC1_TO_BC7_M5_SELECTOR_MAPPINGS * NUM_ETC1_TO_BC7_M5_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_bc7_m5_color.inc" }; - + static dxt_selector_range g_etc1_to_bc7_m5a_selector_ranges[] = { { 0, 3 }, @@ -3828,7 +3828,7 @@ namespace basist { #include "basisu_transcoder_tables_bc7_m5_alpha.inc" }; - + static inline uint32_t set_block_bits(uint8_t* pBytes, uint32_t val, uint32_t num_bits, uint32_t cur_ofs) { assert(num_bits < 32); @@ -3975,7 +3975,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_bc7_m5_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -4054,7 +4054,7 @@ namespace basist int mapping_err = block_colors[s].g - colors[k]; mapping_err *= mapping_err; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) mapping_err *= 5; @@ -4065,7 +4065,7 @@ namespace basist best_k = k; } } // k - + total_err += best_mapping_err; output_selectors |= (best_k << (s * 2)); } // s @@ -4080,7 +4080,7 @@ namespace basist } // lo } // hi - + fprintf(pFile, "{%u,%u,%u},", best_lo, best_hi, best_output_selectors); n++; if ((n & 31) == 31) @@ -4119,7 +4119,7 @@ namespace basist {127,104},{126,105},{126,106},{127,106},{127,107},{126,108},{125,109},{127,109},{126,110},{126,111},{127,111},{127,112},{126,113},{126,114},{127,114},{127,115}, {126,116},{126,117},{127,117},{127,118},{126,119},{126,120},{127,120},{127,121},{126,122},{126,123},{127,123},{127,124},{126,125},{126,126},{127,126},{127,127} }; - + static void transcoder_init_bc7_mode5() { #if 0 @@ -4147,9 +4147,9 @@ namespace basist } } // hi - + } // lo - + printf("{%u,%u},", g_bc7_m5_equals_1[i].m_hi, g_bc7_m5_equals_1[i].m_lo); if ((i & 15) == 15) printf("\n"); } @@ -4173,7 +4173,7 @@ namespace basist static void convert_etc1s_to_bc7_m5_color(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { bc7_mode_5* pDst_block = static_cast(pDst); - + // First ensure the block is cleared to all 0's static_cast(pDst)[0] = 0; static_cast(pDst)[1] = 0; @@ -4299,7 +4299,7 @@ namespace basist pDst_block->m_lo.m_r1 = pTable_r[best_mapping].m_lo; pDst_block->m_lo.m_g1 = pTable_g[best_mapping].m_lo; pDst_block->m_lo.m_b1 = pTable_b[best_mapping].m_lo; - + s_inv = 3; } else @@ -4320,7 +4320,7 @@ namespace basist for (uint32_t x = 0; x < 4; x++) { const uint32_t s = pSelector->get_selector(x, y); - + const uint32_t os = pSelectors_xlat[s] ^ s_inv; output_bits |= (os << output_bit_ofs); @@ -4350,7 +4350,7 @@ namespace basist pDst_block->m_lo.m_a0 = r; pDst_block->m_lo.m_a1_0 = r & 63; pDst_block->m_hi.m_a1_1 = r >> 6; - + return; } else if (pSelector->m_num_unique_selectors == 2) @@ -4400,7 +4400,7 @@ namespace basist } const uint32_t selector_range_table = g_etc1_to_bc7_m5a_selector_range_index[low_selector][high_selector]; - + const etc1_g_to_bc7_m5a_conversion* pTable = &g_etc1_g_to_bc7_m5a[inten_table * (32 * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES) + base_color_r * NUM_ETC1_TO_BC7_M5A_SELECTOR_RANGES + selector_range_table]; pDst_block->m_lo.m_a0 = pTable->m_lo; @@ -4870,7 +4870,7 @@ namespace basist // The best selector mapping to use given a base base+inten table and used selector range for converting grayscale data. static uint8_t g_etc1_to_astc_best_grayscale_mapping[32][8][NUM_ETC1_TO_ASTC_SELECTOR_RANGES]; - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY static const etc1_to_astc_solution g_etc1_to_astc_0_255[32 * 8 * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS * NUM_ETC1_TO_ASTC_SELECTOR_RANGES] = { #include "basisu_transcoder_tables_astc_0_255.inc" @@ -4935,7 +4935,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 8; @@ -4956,7 +4956,7 @@ namespace basist mapping_best_high[m] = best_hi; mapping_best_err[m] = best_err; highest_best_err = basisu::maximum(highest_best_err, best_err); - + } // m for (uint32_t m = 0; m < NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS; m++) @@ -5032,7 +5032,7 @@ namespace basist { int err = block_colors[s].g - colors[g_etc1_to_astc_selector_mappings[m][s]]; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. int err_scale = 1; if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) @@ -5061,9 +5061,9 @@ namespace basist uint64_t err = mapping_best_err[m]; err = basisu::minimum(err, 0xFFFF); - + fprintf(pFile, "{%u,%u,%u},", mapping_best_low[m], mapping_best_high[m], (uint32_t)err); - + n++; if ((n & 31) == 31) fprintf(pFile, "\n"); @@ -5146,14 +5146,14 @@ namespace basist struct astc_block_params { // 2 groups of 5, but only a max of 8 are used (RRGGBBAA00) - uint8_t m_endpoints[10]; + uint8_t m_endpoints[10]; uint8_t m_weights[32]; }; - - // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). + + // Packs a single format ASTC block using Color Endpoint Mode 12 (LDR RGBA direct), endpoint BISE range 13, 2-bit weights (range 2). // We're always going to output blocks containing alpha, even if the input doesn't have alpha, for simplicity. // Each block always has 4x4 weights, uses range 13 BISE encoding on the endpoints (0-47), and each weight ranges from 0-3. This encoding should be roughly equal in quality vs. BC1 for color. - // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. + // 8 total endpoints, stored as RGBA LH LH LH LH order, each ranging from 0-47. // Note the input [0,47] endpoint values are not linear - they are encoded as outlined in the ASTC spec: // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-endpoint-unquantization // 32 total weights, stored as 16 CA CA, each ranging from 0-3. @@ -5175,7 +5175,7 @@ namespace basist astc_encode_trits(pOutput, pBlock->m_endpoints + 5, bit_pos, 4); // Pack 32 2-bit weights, which are stored from the top down into the block in opposite bit order. - + for (uint32_t i = 0; i < 32; i++) { static const uint8_t s_reverse_bits[4] = { 0, 2, 1, 3 }; @@ -5184,7 +5184,7 @@ namespace basist } } - // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights + // CEM mode 12 (LDR RGBA Direct), 8-bit endpoints, 1-bit weights // This ASTC mode is basically block truncation coding (BTC) using 1-bit weights and 8-bit/component endpoints - very convenient. static void astc_pack_block_cem_12_weight_range0(uint32_t* pOutput, const astc_block_params* pBlock) { @@ -5222,7 +5222,7 @@ namespace basist // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x84; pBytes[2] = 0x00; pBytes[3] = 0x00; pBytes[4] = 0x00; pBytes[5] = 0x00; pBytes[6] = 0x00; pBytes[7] = 0xc0; - + pOutput[2] = 0; pOutput[3] = 0; @@ -5248,7 +5248,7 @@ namespace basist // Write constant block mode, color component selector, number of partitions, color endpoint mode // https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#_block_mode pBytes[0] = 0x42; pBytes[1] = 0x00; pBytes[2] = 0x01; pBytes[3] = 0x00; - + pOutput[1] = 0; pOutput[2] = 0; pOutput[3] = 0; @@ -5276,7 +5276,7 @@ namespace basist { uint8_t m_lo, m_hi; } g_astc_single_color_encoding_1[256]; - + static void transcoder_init_astc() { for (uint32_t base_color = 0; base_color < 32; base_color++) @@ -5354,7 +5354,7 @@ namespace basist g_ise_to_unquant[bit | (trit << 4)] = unq; } } - + // Compute table used for optimal single color encoding. for (int i = 0; i < 256; i++) { @@ -5369,9 +5369,9 @@ namespace basist int l = lo_v | (lo_v << 8); int h = hi_v | (hi_v << 8); - + int v = ((l * (64 - 21) + (h * 21) + 32) / 64) >> 8; - + int e = abs(v - i); if (e < lowest_e) @@ -5393,7 +5393,7 @@ namespace basist for (int lo = 0; lo < 48; lo++) { const int lo_v = g_ise_to_unquant[lo]; - + int e = abs(lo_v - i); if (e < lowest_e) @@ -5408,7 +5408,7 @@ namespace basist // Converts opaque or color+alpha ETC1S block to ASTC 4x4. // This function tries to use the best ASTC mode given the block's actual contents. - static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, + static void convert_etc1s_to_astc_4x4(void* pDst_block, const endpoint* pEndpoints, const selector* pSelector, bool transcode_alpha, const endpoint *pEndpoint_codebook, const selector *pSelector_codebook) { astc_block_params blk; @@ -5452,7 +5452,7 @@ namespace basist // See https://www.khronos.org/registry/DataFormat/specs/1.2/dataformat.1.2.html#astc-void-extent-blocks uint32_t r, g, b; decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); - + uint32_t* pOutput = static_cast(pDst_block); uint8_t* pBytes = reinterpret_cast(pDst_block); @@ -5472,7 +5472,7 @@ namespace basist } else if ((pSelector->m_num_unique_selectors <= 2) && (num_unique_alpha_selectors <= 2)) { - // Both color and alpha use <= 2 unique selectors each. + // Both color and alpha use <= 2 unique selectors each. // Use block truncation coding, which is lossless with ASTC (8-bit endpoints, 1-bit weights). color32 block_colors[4]; decoder_etc_block::get_block_colors5(block_colors, base_color, inten_table); @@ -5519,7 +5519,7 @@ namespace basist { uint32_t s = alpha_selectors.get_selector(x, y); s = (s == alpha_high_selector) ? 1 : 0; - + blk.m_weights[(x + y * 4) * 2 + 1] = static_cast(s); } // x } // y @@ -5552,12 +5552,12 @@ namespace basist return; } - + // Either alpha and/or color use > 2 unique selectors each, so we must do something more complex. - + #if BASISD_SUPPORT_ASTC_HIGHER_OPAQUE_QUALITY // The optional higher quality modes use 8-bits endpoints vs. [0,47] endpoints. - + // If the block's base color is grayscale, all pixels are grayscale, so encode the block as Luminance+Alpha. if ((base_color.r == base_color.g) && (base_color.r == base_color.b)) { @@ -5591,7 +5591,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -5599,7 +5599,7 @@ namespace basist blk.m_endpoints[2] = pTable_g[best_mapping].m_lo; blk.m_endpoints[3] = pTable_g[best_mapping].m_hi; - + const uint8_t* pSelectors_xlat = &g_etc1_to_astc_selector_mappings[best_mapping][0]; for (uint32_t y = 0; y < 4; y++) @@ -5643,10 +5643,10 @@ namespace basist { // Convert ETC1S alpha const uint32_t selector_range_table = g_etc1_to_astc_selector_range_index[low_selector][high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc_0_255[(inten_table * 32 + base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; - + const uint32_t best_mapping = g_etc1_to_astc_best_grayscale_mapping_0_255[base_color.g][inten_table][selector_range_table]; blk.m_endpoints[0] = pTable_g[best_mapping].m_lo; @@ -5788,7 +5788,7 @@ namespace basist { // Convert ETC1S alpha const uint32_t alpha_selector_range_table = g_etc1_to_astc_selector_range_index[alpha_low_selector][alpha_high_selector]; - + //[32][8][RANGES][MAPPING] const etc1_to_astc_solution* pTable_g = &g_etc1_to_astc[(alpha_inten_table * 32 + alpha_base_color.g) * (NUM_ETC1_TO_ASTC_SELECTOR_RANGES * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS) + alpha_selector_range_table * NUM_ETC1_TO_ASTC_SELECTOR_MAPPINGS]; @@ -5832,7 +5832,7 @@ namespace basist const uint32_t r = block_colors[low_selector].r; const uint32_t g = block_colors[low_selector].g; const uint32_t b = block_colors[low_selector].b; - + blk.m_endpoints[0] = g_astc_single_color_encoding_1[r].m_lo; blk.m_endpoints[1] = g_astc_single_color_encoding_1[r].m_hi; @@ -5934,7 +5934,7 @@ namespace basist blk.m_endpoints[4] = pTable_b[best_mapping].m_lo; blk.m_endpoints[5] = pTable_b[best_mapping].m_hi; - + int s0 = g_ise_to_unquant[blk.m_endpoints[0]] + g_ise_to_unquant[blk.m_endpoints[2]] + g_ise_to_unquant[blk.m_endpoints[4]]; int s1 = g_ise_to_unquant[blk.m_endpoints[1]] + g_ise_to_unquant[blk.m_endpoints[3]] + g_ise_to_unquant[blk.m_endpoints[5]]; bool invert = false; @@ -6099,8 +6099,8 @@ namespace basist static void transcoder_init_atc() { prepare_atc_single_color_table(g_pvrtc2_match45_equals_1, 16, 32, 1); - prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); - prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); + prepare_atc_single_color_table(g_atc_match55_equals_1, 32, 32, 1); + prepare_atc_single_color_table(g_atc_match56_equals_1, 32, 64, 1); prepare_atc_single_color_table(g_pvrtc2_match4, 1, 16, 3); prepare_atc_single_color_table(g_atc_match5, 1, 32, 3); @@ -6154,7 +6154,7 @@ namespace basist pBlock->set_low_color(g_atc_match55_equals_1[r].m_lo, g_atc_match56_equals_1[g].m_lo, g_atc_match55_equals_1[b].m_lo); pBlock->set_high_color(g_atc_match55_equals_1[r].m_hi, g_atc_match56_equals_1[g].m_hi, g_atc_match55_equals_1[b].m_hi); - + pBlock->m_sels[0] = 0x55; pBlock->m_sels[1] = 0x55; pBlock->m_sels[2] = 0x55; @@ -6289,7 +6289,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6363,7 +6363,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6393,7 +6393,7 @@ namespace basist } // inten fclose(pFile); - + // PVRTC2 45 fopen_s(&pFile, "basisu_transcoder_tables_pvrtc2_45.inc", "w"); @@ -6438,7 +6438,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6515,7 +6515,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6592,7 +6592,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6669,7 +6669,7 @@ namespace basist int err = block_colors[s].g - colors[g_etc1s_to_atc_selector_mappings[m][s]]; int err_scale = 1; - // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor + // Special case when the intensity table is 7, low_selector is 0, and high_selector is 3. In this extreme case, it's likely the encoder is trying to strongly favor // the low/high selectors which are clamping to either 0 or 255. if (((inten == 7) && (low_selector == 0) && (high_selector == 3)) && ((s == 0) || (s == 3))) err_scale = 5; @@ -6797,12 +6797,12 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_trans_match44[256]; - + static struct { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33[256]; - + static struct { uint8_t m_l, m_h; @@ -6812,7 +6812,7 @@ namespace basist { uint8_t m_l, m_h; } g_pvrtc2_alpha_match33_3[256]; - + // PVRTC2 can be forced to look like a slightly weaker variant of ATC/BC1, so that's what we do here for simplicity. static void convert_etc1s_to_pvrtc2_rgb(void* pDst, const endpoint* pEndpoints, const selector* pSelector) { @@ -6926,7 +6926,7 @@ namespace basist } typedef struct { float c[4]; } vec4F; - + static inline vec4F* vec4F_set_scalar(vec4F* pV, float x) { pV->c[0] = x; pV->c[1] = x; pV->c[2] = x; pV->c[3] = x; return pV; } static inline vec4F* vec4F_set(vec4F* pV, float x, float y, float z, float w) { pV->c[0] = x; pV->c[1] = y; pV->c[2] = z; pV->c[3] = w; return pV; } static inline vec4F* vec4F_saturate_in_place(vec4F* pV) { pV->c[0] = saturate(pV->c[0]); pV->c[1] = saturate(pV->c[1]); pV->c[2] = saturate(pV->c[2]); pV->c[3] = saturate(pV->c[3]); return pV; } @@ -6944,9 +6944,9 @@ namespace basist } static inline int sq(int x) { return x * x; } - - // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. - // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! + + // PVRTC2 is a slightly borked format for alpha: In Non-Interpolated mode, the way AlphaB8 is exanded from 4 to 8 bits means it can never be 0. + // This is actually very bad, because on 100% transparent blocks which have non-trivial color pixels, part of the color channel will leak into alpha! // And there's nothing straightforward we can do because using the other modes is too expensive/complex. I can see why Apple didn't adopt it. static void convert_etc1s_to_pvrtc2_rgba(void* pDst, const endpoint* pEndpoints, const selector* pSelector, const endpoint* pEndpoint_codebook, const selector* pSelector_codebook) { @@ -7001,13 +7001,13 @@ namespace basist const uint32_t high_selector = pSelector->m_hi_selector; const int num_unique_color_selectors = pSelector->m_num_unique_selectors; - + // We need to reencode the block at the pixel level, unfortunately, from two ETC1S planes. // Do 4D incremental PCA, project all pixels to this hyperline, then quantize to packed endpoints and compute the modulation values. const int br = (base_color.r << 3) | (base_color.r >> 2); const int bg = (base_color.g << 3) | (base_color.g >> 2); const int bb = (base_color.b << 3) | (base_color.b >> 2); - + color32 block_cols[4]; for (uint32_t i = 0; i < 4; i++) { @@ -7036,14 +7036,14 @@ namespace basist decoder_etc_block::get_block_color5(base_color, inten_table, low_selector, r, g, b); // Mod 0 - uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; + uint32_t lr0 = (r * 15 + 128) / 255, lg0 = (g * 15 + 128) / 255, lb0 = (b * 7 + 128) / 255; uint32_t la0 = g_pvrtc2_alpha_match33_0[constant_alpha_val].m_l; uint32_t cr0 = (lr0 << 1) | (lr0 >> 3); uint32_t cg0 = (lg0 << 1) | (lg0 >> 3); uint32_t cb0 = (lb0 << 2) | (lb0 >> 1); uint32_t ca0 = (la0 << 1); - + cr0 = (cr0 << 3) | (cr0 >> 2); cg0 = (cg0 << 3) | (cg0 >> 2); cb0 = (cb0 << 3) | (cb0 >> 2); @@ -7072,14 +7072,14 @@ namespace basist uint32_t cg3 = (lg3 << 1) | (lg3 >> 3); uint32_t cb3 = (lb3 << 1) | (lb3 >> 3); uint32_t ca3 = (la3 << 1) | 1; - + cr3 = (cr3 << 3) | (cr3 >> 2); cg3 = (cg3 << 3) | (cg3 >> 2); cb3 = (cb3 << 3) | (cb3 >> 2); ca3 = (ca3 << 4) | ca3; uint32_t err3 = sq(cr3 - r) + sq(cg3 - g) + sq(cb3 - b) + sq(ca3 - constant_alpha_val) * 2; - + // Mod 1 uint32_t lr1 = g_pvrtc2_trans_match44[r].m_l, lg1 = g_pvrtc2_trans_match44[g].m_l, lb1 = g_pvrtc2_trans_match34[b].m_l; uint32_t hr1 = g_pvrtc2_trans_match44[r].m_h, hg1 = g_pvrtc2_trans_match44[g].m_h, hb1 = g_pvrtc2_trans_match34[b].m_h; @@ -7154,7 +7154,7 @@ namespace basist // It's a solid color block. uint32_t low_a = block_cols[alpha_selectors.m_lo_selector].a; uint32_t high_a = block_cols[alpha_selectors.m_hi_selector].a; - + const float S = 1.0f / 255.0f; vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, low_a * S); vec4F_set(&maxColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, high_a * S); @@ -7166,7 +7166,7 @@ namespace basist vec4F_set(&minColor, block_cols[low_selector].r * S, block_cols[low_selector].g * S, block_cols[low_selector].b * S, constant_alpha_val * S); vec4F_set(&maxColor, block_cols[high_selector].r * S, block_cols[high_selector].g * S, block_cols[high_selector].b * S, constant_alpha_val * S); } - // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). + // See if any of the block colors got clamped - if so the principle axis got distorted (it's no longer just the ETC1S luma axis). // To keep quality up we need to use full 4D PCA in this case. else if ((block_cols[low_selector].c[0] == 0) || (block_cols[high_selector].c[0] == 255) || (block_cols[low_selector].c[1] == 0) || (block_cols[high_selector].c[1] == 255) || @@ -7217,7 +7217,7 @@ namespace basist } vec4F_normalize_in_place(&axis); - + if (vec4F_dot(&axis, &axis) < .5f) vec4F_set_scalar(&axis, .5f); @@ -7317,10 +7317,10 @@ namespace basist // 4433 4443 color32 trialMinColor, trialMaxColor; - + trialMinColor.set_clamped((int)(minColor.c[0] * 15.0f + .5f), (int)(minColor.c[1] * 15.0f + .5f), (int)(minColor.c[2] * 7.0f + .5f), (int)(minColor.c[3] * 7.0f + .5f)); trialMaxColor.set_clamped((int)(maxColor.c[0] * 15.0f + .5f), (int)(maxColor.c[1] * 15.0f + .5f), (int)(maxColor.c[2] * 15.0f + .5f), (int)(maxColor.c[3] * 7.0f + .5f)); - + pBlock->set_trans_low_color(trialMinColor.r, trialMinColor.g, trialMinColor.b, trialMinColor.a); pBlock->set_trans_high_color(trialMaxColor.r, trialMaxColor.g, trialMaxColor.b, trialMaxColor.a); @@ -7393,7 +7393,7 @@ namespace basist } } } - + static void transcoder_init_pvrtc2() { for (uint32_t v = 0; v < 256; v++) @@ -7499,7 +7499,7 @@ namespace basist g_pvrtc2_trans_match34[v].m_l = (uint8_t)best_l; g_pvrtc2_trans_match34[v].m_h = (uint8_t)best_h; } - + for (uint32_t v = 0; v < 256; v++) { int best_l = 0, best_h = 0, lowest_err = INT_MAX; @@ -7628,7 +7628,7 @@ namespace basist sym_codec.stop(); m_local_selectors.resize(num_selectors); - + if (!sym_codec.init(pSelectors_data, selectors_data_size)) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::decode_palettes: fail 5\n"); @@ -7946,7 +7946,7 @@ namespace basist if (!output_rows_in_pixels) output_rows_in_pixels = orig_height; } - + basisu::vector* pPrev_frame_indices = nullptr; if (is_video) { @@ -7974,7 +7974,7 @@ namespace basist } approx_move_to_front selector_history_buf(m_selector_history_buf_size); - + uint32_t cur_selector_rle_count = 0; decoder_etc_block block; @@ -8224,7 +8224,7 @@ namespace basist case block_format::cETC1: { decoder_etc_block* pDst_block = reinterpret_cast(static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -8275,7 +8275,7 @@ namespace basist const uint32_t low_selector = pSelector->m_lo_selector; const uint32_t high_selector = pSelector->m_hi_selector; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 block_colors[2]; decoder_etc_block::get_block_colors5_bounds(block_colors, base_color, inten_table, low_selector, high_selector); @@ -8291,7 +8291,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -8299,7 +8299,7 @@ namespace basist { #if BASISD_SUPPORT_PVRTC1 assert(pAlpha_blocks); - + block.set_base5_color(decoder_etc_block::pack_color5(pEndpoints->m_color5, false)); block.set_inten_table(0, pEndpoints->m_inten5); block.set_inten_table(1, pEndpoints->m_inten5); @@ -8307,7 +8307,7 @@ namespace basist ((decoder_etc_block*)pPVRTC_work_mem)[block_x + block_y * num_blocks_x] = block; - // Get block's RGBA bounding box + // Get block's RGBA bounding box const color32& base_color = pEndpoints->m_color5; const uint32_t inten_table = pEndpoints->m_inten5; const uint32_t low_selector = pSelector->m_lo_selector; @@ -8342,7 +8342,7 @@ namespace basist pPVRTC_endpoints[block_x + block_y * num_blocks_x] = temp.m_endpoints; #else assert(0); -#endif +#endif break; } @@ -8426,7 +8426,7 @@ namespace basist assert(transcode_alpha); void* pDst_block = static_cast(pDst_blocks) + (block_x + block_y * output_row_pitch_in_blocks_or_pixels) * output_block_or_pixel_stride_in_bytes; - + convert_etc1s_to_pvrtc2_rgba(pDst_block, pEndpoints, pSelector, &endpoints[0], &selectors[0]); #endif break; @@ -8442,10 +8442,10 @@ namespace basist { assert(sizeof(uint32_t) == output_block_or_pixel_stride_in_bytes); uint8_t* pDst_pixels = static_cast(pDst_blocks) + (block_x * 4 + block_y * 4 * output_row_pitch_in_blocks_or_pixels) * sizeof(uint32_t); - + const uint32_t max_x = basisu::minimum(4, (int)output_row_pitch_in_blocks_or_pixels - (int)block_x * 4); const uint32_t max_y = basisu::minimum(4, (int)output_rows_in_pixels - (int)block_y * 4); - + int colors[4]; decoder_etc_block::get_block_colors5_g(colors, pEndpoints->m_color5, pEndpoints->m_inten5); @@ -8459,7 +8459,7 @@ namespace basist pDst_pixels[3+4] = static_cast(colors[(s >> 2) & 3]); pDst_pixels[3+8] = static_cast(colors[(s >> 4) & 3]); pDst_pixels[3+12] = static_cast(colors[(s >> 6) & 3]); - + pDst_pixels += output_row_pitch_in_blocks_or_pixels * sizeof(uint32_t); } } @@ -8488,7 +8488,7 @@ namespace basist color32 colors[4]; decoder_etc_block::get_block_colors5(colors, pEndpoints->m_color5, pEndpoints->m_inten5); - + for (uint32_t y = 0; y < max_y; y++) { const uint32_t s = pSelector->m_selectors[y]; @@ -8609,7 +8609,7 @@ namespace basist cur = byteswap_uint16(cur); cur = (cur & 0xF) | packed_colors[(s >> (x * 2)) & 3]; - + if (BASISD_IS_BIG_ENDIAN) cur = byteswap_uint16(cur); @@ -8821,7 +8821,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. target_format = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + const bool transcode_alpha_data_to_opaque_formats = (decode_flags & cDecodeFlagsTranscodeAlphaDataToOpaqueFormats) != 0; const uint32_t bytes_per_block_or_pixel = basis_get_bytes_per_block_or_pixel(target_format); const uint32_t total_slice_blocks = num_blocks_x * num_blocks_y; @@ -8852,7 +8852,7 @@ namespace basist { //status = transcode_slice(pData, data_size, slice_index_to_decode, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pData, data_len, block_format::cETC1, bytes_per_block_or_pixel, false, is_video, is_alpha_slice, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_etc1s_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -8977,7 +8977,7 @@ namespace basist if (basis_file_has_alpha_slices) { - // First decode the alpha data + // First decode the alpha data //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC2_EAC_A8, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cETC2_EAC_A8, bytes_per_block_or_pixel, false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); } @@ -9015,8 +9015,8 @@ namespace basist return false; #else assert(bytes_per_block_or_pixel == 16); - - // First decode the alpha data + + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -9145,7 +9145,7 @@ namespace basist #else assert(bytes_per_block_or_pixel == 16); - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) { //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cBC4, 16, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); @@ -9205,7 +9205,7 @@ namespace basist } else { - // Now decode the color data and transcode to PVRTC2 RGBA. + // Now decode the color data and transcode to PVRTC2 RGBA. //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, decode_flags | cDecodeFlagsOutputHasAlphaIndices, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + rgb_offset, rgb_length, block_format::cPVRTC2_4_RGBA, bytes_per_block_or_pixel, false, is_video, false, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, true, nullptr, output_rows_in_pixels); } @@ -9226,7 +9226,7 @@ namespace basist { // Raw 32bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cA32, sizeof(uint32_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cA32, sizeof(uint32_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); @@ -9267,7 +9267,7 @@ namespace basist { // Raw 16bpp pixels, decoded in the usual raster order (NOT block order) into an image in memory. - // First decode the alpha data + // First decode the alpha data if (basis_file_has_alpha_slices) //status = transcode_slice(pData, data_size, slice_index + 1, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), decode_flags, output_row_pitch_in_blocks_or_pixels, pState, nullptr, output_rows_in_pixels); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + alpha_offset, alpha_length, block_format::cRGBA4444_ALPHA, sizeof(uint16_t), false, is_video, true, level_index, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, false, nullptr, output_rows_in_pixels); @@ -9369,7 +9369,7 @@ namespace basist return status; } - + basisu_lowlevel_uastc_transcoder::basisu_lowlevel_uastc_transcoder() { } @@ -9435,7 +9435,7 @@ namespace basist for (uint32_t block_y = 0; block_y < num_blocks_y; ++block_y) { void* pDst_block = (uint8_t*)pDst_blocks + block_y * output_row_pitch_in_blocks_or_pixels * output_block_or_pixel_stride_in_bytes; - + for (uint32_t block_x = 0; block_x < num_blocks_x; ++block_x, ++pSource_block, pDst_block = (uint8_t *)pDst_block + output_block_or_pixel_stride_in_bytes) { switch (fmt) @@ -9465,7 +9465,7 @@ namespace basist } case block_format::cBC4: { - if (channel0 < 0) + if (channel0 < 0) channel0 = 0; status = transcode_uastc_to_bc4(*pSource_block, pDst_block, high_quality, channel0); break; @@ -9628,7 +9628,7 @@ namespace basist return false; #endif } - + bool basisu_lowlevel_uastc_transcoder::transcode_image( transcoder_texture_format target_format, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, @@ -9650,7 +9650,7 @@ namespace basist { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: source data buffer too small\n"); return false; - } + } if ((target_format == transcoder_texture_format::cTFPVRTC1_4_RGB) || (target_format == transcoder_texture_format::cTFPVRTC1_4_RGBA)) { @@ -9677,7 +9677,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: output buffer size too small\n"); return false; } - + bool status = false; // UASTC4x4 @@ -9688,7 +9688,7 @@ namespace basist //status = transcode_slice(pData, data_size, slice_index, pOutput_blocks, output_blocks_buf_size_in_blocks_or_pixels, block_format::cETC1, bytes_per_block_or_pixel, decode_flags, output_row_pitch_in_blocks_or_pixels, pState); status = transcode_slice(pOutput_blocks, num_blocks_x, num_blocks_y, pCompressed_data + slice_offset, slice_length, block_format::cETC1, bytes_per_block_or_pixel, false, has_alpha, orig_width, orig_height, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels, channel0, channel1); - + if (!status) { BASISU_DEVEL_ERROR("basisu_lowlevel_uastc_transcoder::transcode_image: transcode_slice() to ETC1 failed\n"); @@ -9905,7 +9905,7 @@ namespace basist return status; } - + basisu_transcoder::basisu_transcoder(const etc1_global_selector_codebook* pGlobal_sel_codebook) : m_lowlevel_etc1s_decoder(pGlobal_sel_codebook), m_ready_to_transcode(false) @@ -9934,7 +9934,7 @@ namespace basist return false; } } -#endif +#endif return true; } @@ -10021,7 +10021,7 @@ namespace basist return false; } } - + // This flag dates back to pre-Basis Universal, when .basis supported full ETC1 too. if ((pHeader->m_flags & cBASISHeaderFlagETC1S) == 0) { @@ -10037,7 +10037,7 @@ namespace basist return false; } } - + if ((pHeader->m_slice_desc_file_ofs >= data_size) || ((data_size - pHeader->m_slice_desc_file_ofs) < (sizeof(basis_slice_desc) * pHeader->m_total_slices)) ) @@ -10153,12 +10153,12 @@ namespace basist image_info.m_image_index = image_index; image_info.m_total_levels = total_levels; - + image_info.m_alpha_flag = false; // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) - image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; + image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; @@ -10281,13 +10281,13 @@ namespace basist image_info.m_image_index = image_index; image_info.m_level_index = level_index; - + // For ETC1S, if anything has alpha all images have alpha. For UASTC, we only report alpha when the image actually has alpha. if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) image_info.m_alpha_flag = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; else image_info.m_alpha_flag = (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0; - + image_info.m_iframe_flag = (slice_desc.m_flags & cSliceDescFlagsFrameIsIFrame) != 0; image_info.m_width = slice_desc.m_num_blocks_x * 4; image_info.m_height = slice_desc.m_num_blocks_y * 4; @@ -10345,7 +10345,7 @@ namespace basist file_info.m_tex_format = static_cast(static_cast(pHeader->m_tex_format)); file_info.m_etc1s = (pHeader->m_tex_format == (int)basis_tex_format::cETC1S); - + file_info.m_y_flipped = (pHeader->m_flags & cBASISHeaderFlagYFlipped) != 0; file_info.m_has_alpha_slices = (pHeader->m_flags & cBASISHeaderFlagHasAlphaSlices) != 0; @@ -10410,7 +10410,7 @@ namespace basist return true; } - + bool basisu_transcoder::start_transcoding(const void* pData, uint32_t data_size) { if (!validate_header_quick(pData, data_size)) @@ -10518,7 +10518,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); } } - + m_ready_to_transcode = true; return true; @@ -10529,7 +10529,7 @@ namespace basist m_lowlevel_etc1s_decoder.clear(); m_ready_to_transcode = false; - + return true; } @@ -10568,7 +10568,7 @@ namespace basist const basis_slice_desc& slice_desc = reinterpret_cast(pDataU8 + pHeader->m_slice_desc_file_ofs)[slice_index]; uint32_t total_4x4_blocks = slice_desc.m_num_blocks_x * slice_desc.m_num_blocks_y; - + if (basis_block_format_is_uncompressed(fmt)) { // Assume the output buffer is orig_width by orig_height @@ -10631,7 +10631,7 @@ namespace basist BASISU_DEVEL_ERROR("basisu_transcoder::transcode_slice: invalid slice_desc.m_file_size, or passed in buffer too small\n"); return false; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { return m_lowlevel_uastc_decoder.transcode_slice(pOutput_blocks, slice_desc.m_num_blocks_x, slice_desc.m_num_blocks_y, @@ -10719,7 +10719,7 @@ namespace basist if (!output_row_pitch_in_blocks_or_pixels) output_row_pitch_in_blocks_or_pixels = num_blocks_x; - + if ((fmt == block_format::cETC2_EAC_A8) || (fmt == block_format::cETC2_EAC_R11)) { #if BASISD_SUPPORT_ETC2_EAC_A8 @@ -10805,7 +10805,7 @@ namespace basist if (slice_index < 0) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: failed finding slice index\n"); - // Unable to find the requested image/level + // Unable to find the requested image/level return false; } @@ -10814,7 +10814,7 @@ namespace basist // Switch to PVRTC1 RGB if the input doesn't have alpha. fmt = transcoder_texture_format::cTFPVRTC1_4_RGB; } - + if (pHeader->m_tex_format == (int)basis_tex_format::cETC1S) { if (pSlice_descs[slice_index].m_flags & cSliceDescFlagsHasAlpha) @@ -10851,7 +10851,7 @@ namespace basist } } } - + bool status = false; const uint32_t total_slice_blocks = pSlice_descs[slice_index].m_num_blocks_x * pSlice_descs[slice_index].m_num_blocks_y; @@ -10859,11 +10859,11 @@ namespace basist if (((fmt == transcoder_texture_format::cTFPVRTC1_4_RGB) || (fmt == transcoder_texture_format::cTFPVRTC1_4_RGBA)) && (output_blocks_buf_size_in_blocks_or_pixels > total_slice_blocks)) { // The transcoder doesn't write beyond total_slice_blocks, so we need to clear the rest ourselves. - // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. + // For GL usage, PVRTC1 4bpp image size is (max(width, 8)* max(height, 8) * 4 + 7) / 8. // However, for KTX and internally in Basis this formula isn't used, it's just ((width+3)/4) * ((height+3)/4) * bytes_per_block_or_pixel. This is all the transcoder actually writes to memory. memset(static_cast(pOutput_blocks) + total_slice_blocks * bytes_per_block_or_pixel, 0, (output_blocks_buf_size_in_blocks_or_pixels - total_slice_blocks) * bytes_per_block_or_pixel); } - + if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) { const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; @@ -10875,7 +10875,7 @@ namespace basist pSlice_desc->m_file_ofs, pSlice_desc->m_file_size, decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } - else + else { // ETC1S const basis_slice_desc* pSlice_desc = &pSlice_descs[slice_index]; @@ -10901,14 +10901,14 @@ namespace basist decode_flags, basis_file_has_alpha_slices, pHeader->m_tex_type == cBASISTexTypeVideoFrames, output_row_pitch_in_blocks_or_pixels, pState, output_rows_in_pixels); } // if (pHeader->m_tex_format == (int)basis_tex_format::cUASTC4x4) - + if (!status) { BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning false\n"); } else { - //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); + //BASISU_DEVEL_ERROR("basisu_transcoder::transcode_image_level: Returning true\n"); } return status; @@ -11122,13 +11122,13 @@ namespace basist } return false; } - + uint32_t basis_get_uncompressed_bytes_per_pixel(transcoder_texture_format fmt) { switch (fmt) { case transcoder_texture_format::cTFRGBA32: - return sizeof(uint32_t); + return sizeof(uint32_t); case transcoder_texture_format::cTFRGB565: case transcoder_texture_format::cTFBGR565: case transcoder_texture_format::cTFRGBA4444: @@ -11138,7 +11138,7 @@ namespace basist } return 0; } - + uint32_t basis_get_block_width(transcoder_texture_format tex_type) { switch (tex_type) @@ -11156,7 +11156,7 @@ namespace basist BASISU_NOTE_UNUSED(tex_type); return 4; } - + bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt) { if (fmt == basis_tex_format::cUASTC4x4) @@ -11214,7 +11214,7 @@ namespace basist case transcoder_texture_format::cTFETC2_RGBA: return true; #endif -#if BASISD_SUPPORT_ASTC +#if BASISD_SUPPORT_ASTC case transcoder_texture_format::cTFASTC_4x4_RGBA: return true; #endif @@ -11245,9 +11245,9 @@ namespace basist return false; } - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // UASTC - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_UASTC const astc_bc7_common_partition2_desc g_astc_bc7_common_partitions2[TOTAL_ASTC_BC7_COMMON_PARTITIONS2] = @@ -11972,7 +11972,7 @@ namespace basist if (group_size) { - // Range has trits or quints - pack each group of 5 or 3 values + // Range has trits or quints - pack each group of 5 or 3 values const int total_groups = (group_size == 5) ? ((num_vals + 4) / 5) : ((num_vals + 2) / 3); for (int group_index = 0; group_index < total_groups; group_index++) @@ -12262,7 +12262,7 @@ namespace basist bool unpack_uastc(const uastc_block& blk, unpacked_uastc_block& unpacked, bool blue_contract_check, bool read_hints) { //memset(&unpacked, 0, sizeof(unpacked)); - + #if 0 uint8_t table[128]; memset(table, 0xFF, sizeof(table)); @@ -12317,7 +12317,7 @@ namespace basist return true; } - + if (read_hints) { if (g_uastc_mode_has_bc1_hint0[mode]) @@ -12350,7 +12350,7 @@ namespace basist } else bit_ofs += g_uastc_mode_total_hint_bits[mode]; - + uint32_t subsets = 1; switch (mode) { @@ -12563,7 +12563,7 @@ namespace basist { // All other modes have <= 64 weight bits. uint64_t bits; - + // Read the weight bits if ((BASISD_IS_BIG_ENDIAN) || (!BASISD_USE_UNALIGNED_WORD_READS)) bits = read_bits64(blk.m_bytes, bit_ofs, basisu::minimum(64, 128 - (int)bit_ofs)); @@ -12575,31 +12575,31 @@ namespace basist #else bits = blk.m_qwords[1]; #endif - + if (bit_ofs >= 64U) bits >>= (bit_ofs - 64U); else { assert(bit_ofs >= 56U); - + uint32_t bits_needed = 64U - bit_ofs; bits <<= bits_needed; bits |= (blk.m_bytes[7] >> (8U - bits_needed)); } } - + bit_ofs = 0; const uint32_t mask = (1U << weight_bits) - 1U; const uint32_t anchor_mask = (1U << (weight_bits - 1U)) - 1U; - + if (total_planes == 2) { // Dual plane modes always have a single subset, and the first 2 weights are anchors. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); - + unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> bit_ofs) & anchor_mask); bit_ofs += (weight_bits - 1); @@ -12617,7 +12617,7 @@ namespace basist if (weight_bits == 4) { assert(bit_ofs == 0); - + // Specialize the most common case: 4-bit weights. unpacked.m_astc.m_weights[0] = (uint8_t)((uint32_t)(bits) & 7); unpacked.m_astc.m_weights[1] = (uint8_t)((uint32_t)(bits >> 3) & 15); @@ -13163,7 +13163,7 @@ namespace basist } case 2: { - // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 + // 2. DualPlane: 0, WeightRange : 5 (8), Subsets : 2, EndpointRange : 8 (16) - BC7 MODE1 dst_blk.m_mode = 1; dst_blk.m_partition = g_astc_bc7_common_partitions2[unpacked_src_blk.m_common_pattern].m_bc7; @@ -14102,7 +14102,7 @@ namespace basist bool flip = pack_etc1_y_estimate_flipped(&block_y[0][0], upper_avg, lower_avg, left_avg, right_avg); // non-flipped: | | - // vs. + // vs. // flipped: -- // -- @@ -14713,7 +14713,7 @@ namespace basist static const uint8_t s_uastc2_to_bc1[4] = { 0, 2, 3, 1 }; static const uint8_t s_uastc1_to_bc1[2] = { 0, 1 }; const uint8_t* s_uastc_to_bc1_weights[6] = { nullptr, s_uastc1_to_bc1, s_uastc2_to_bc1, s_uastc3_to_bc1, s_uastc4_to_bc1, s_uastc5_to_bc1 }; - + void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride) { uint32_t min0_v, max0_v, min1_v, max1_v,min2_v, max2_v, min3_v, max3_v; @@ -14801,7 +14801,7 @@ namespace basist a2 |= (s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U); a3 |= (s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U); } - + { const int v0 = pPixels[8 * stride] * 14 + bias; const int v1 = pPixels[9 * stride] * 14 + bias; @@ -14825,7 +14825,7 @@ namespace basist } const uint64_t f = a0 | a1 | a2 | a3; - + pDst_bytes[2] = (uint8_t)f; pDst_bytes[3] = (uint8_t)(f >> 8U); pDst_bytes[4] = (uint8_t)(f >> 16U); @@ -14848,7 +14848,7 @@ namespace basist int dots[4]; for (uint32_t i = 0; i < 4; i++) dots[i] = (int)block_r[i] * ar + (int)block_g[i] * ag + (int)block_b[i] * ab; - + int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3]; ar *= 2; ag *= 2; ab *= 2; @@ -14857,7 +14857,7 @@ namespace basist { const int d = pSrc_pixels[i].r * ar + pSrc_pixels[i].g * ag + pSrc_pixels[i].b * ab; static const uint8_t s_sels[4] = { 3, 2, 1, 0 }; - + // Rounding matters here! // d <= t0: <=, not <, to the later LS step "sees" a wider range of selectors. It matters for quality. sels[i] = s_sels[(d <= t0) + (d < t1) + (d < t2)]; @@ -14900,11 +14900,11 @@ namespace basist } struct vec3F { float c[3]; }; - + static bool compute_least_squares_endpoints_rgb(const color32* pColors, const uint8_t* pSelectors, vec3F* pXl, vec3F* pXh) { // Derived from bc7enc16's LS function. - // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf + // Least squares using normal equations: http://www.cs.cornell.edu/~bindel/class/cs3220-s12/notes/lec10.pdf // I did this in matrix form first, expanded out all the ops, then optimized it a bit. uint32_t uq00_r = 0, uq10_r = 0, ut_r = 0, uq00_g = 0, uq10_g = 0, ut_g = 0, uq00_b = 0, uq10_b = 0, ut_b = 0; @@ -14978,7 +14978,7 @@ namespace basist return true; } - void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb) { dxt1_block* pDst_block = static_cast(pDst); @@ -15030,19 +15030,19 @@ namespace basist { const color32* pSrc_pixels = (const color32*)pPixels; dxt1_block* pDst_block = static_cast(pDst); - + int avg_r = -1, avg_g = 0, avg_b = 0; int lr = 0, lg = 0, lb = 0, hr = 0, hg = 0, hb = 0; uint8_t sels[16]; - + const bool use_sels = (flags & cEncodeBC1UseSelectors) != 0; if (use_sels) { // Caller is jamming in their own selectors for us to try. const uint32_t s = pDst_block->m_selectors[0] | (pDst_block->m_selectors[1] << 8) | (pDst_block->m_selectors[2] << 16) | (pDst_block->m_selectors[3] << 24); - + static const uint8_t s_sel_tran[4] = { 0, 3, 1, 2 }; - + for (uint32_t i = 0; i < 16; i++) sels[i] = s_sel_tran[(s >> (i * 2)) & 3]; } @@ -15054,13 +15054,13 @@ namespace basist for (j = 1; j < 16; j++) if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break; - + if (j == 16) { encode_bc1_solid_block(pDst, fr, fg, fb); return; } - + // Select 2 colors along the principle axis. (There must be a faster/simpler way.) int total_r = fr, total_g = fg, total_b = fb; int max_r = fr, max_g = fg, max_b = fb; @@ -15094,7 +15094,7 @@ namespace basist float cov[6]; for (uint32_t i = 0; i < 6; i++) cov[i] = static_cast(icov[i])* (1.0f / 255.0f); - + #if 0 // Seems silly to use full PCA to choose 2 colors. The diff in avg. PSNR between using PCA vs. not is small (~.025 difference). // TODO: Try 2 or 3 different normalized diagonal vectors, choose the one that results in the largest dot delta @@ -15126,7 +15126,7 @@ namespace basist saxis_b = (int)(xb * m); } #endif - + int low_dot = INT_MAX, high_dot = INT_MIN, low_c = 0, high_c = 0; for (uint32_t i = 0; i < 16; i++) { @@ -15150,7 +15150,7 @@ namespace basist hr = to_5(pSrc_pixels[high_c].r); hg = to_6(pSrc_pixels[high_c].g); hb = to_5(pSrc_pixels[high_c].b); - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } // if (use_sels) @@ -15197,13 +15197,13 @@ namespace basist hg = basisu::clamp((int)((xh.c[1]) * (63.0f / 255.0f) + .5f), 0, 63); hb = basisu::clamp((int)((xh.c[2]) * (31.0f / 255.0f) + .5f), 0, 31); } - + bc1_find_sels(pSrc_pixels, lr, lg, lb, hr, hg, hb, sels); } uint32_t lc16 = dxt1_block::pack_unscaled_color(lr, lg, lb); uint32_t hc16 = dxt1_block::pack_unscaled_color(hr, hg, hb); - + // Always forbid 3 color blocks if (lc16 == hc16) { @@ -15255,7 +15255,7 @@ namespace basist pDst_block->m_selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; } } - + void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags) { const color32* pSrc_pixels = (const color32*)pPixels; @@ -15304,8 +15304,8 @@ namespace basist min_r = basisu::minimum(min_r, r); min_g = basisu::minimum(min_g, g); min_b = basisu::minimum(min_b, b); total_r += r; total_g += g; total_b += b; } - - if (grayscale_flag) + + if (grayscale_flag) { // Grayscale blocks are a common enough case to specialize. if ((max_r - min_r) < 2) @@ -15622,7 +15622,7 @@ namespace basist // Always forbid 3 color blocks uint16_t lc16 = (uint16_t)b.get_low_color(); uint16_t hc16 = (uint16_t)b.get_high_color(); - + uint8_t mask = 0; // Make l > h @@ -15852,7 +15852,7 @@ namespace basist blk.m_base = static_cast(a); blk.m_table = 13; blk.m_multiplier = 0; - + memcpy(blk.m_selectors, g_etc2_eac_a8_sel4, sizeof(g_etc2_eac_a8_sel4)); return; @@ -16542,7 +16542,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGB bounding box + // Get block's RGB bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); if (from_alpha) @@ -16601,7 +16601,7 @@ namespace basist if (!unpack_uastc(pSrc_blocks[x + y * num_blocks_x], block_pixels, false)) return false; - // Get block's RGBA bounding box + // Get block's RGBA bounding box color32 low_color(255, 255, 255, 255), high_color(0, 0, 0, 0); for (uint32_t i = 0; i < 16; i++) @@ -16717,9 +16717,9 @@ namespace basist #endif // #if BASISD_SUPPORT_UASTC -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ // KTX2 -// ------------------------------------------------------------------------------------------------------ +// ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 const uint8_t g_ktx2_file_identifier[12] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A }; @@ -16741,7 +16741,7 @@ namespace basist m_key_values.clear(); memset(&m_etc1s_header, 0, sizeof(m_etc1s_header)); m_etc1s_image_descs.clear(); - + m_format = basist::basis_tex_format::cETC1S; m_dfd_color_model = 0; @@ -16753,9 +16753,9 @@ namespace basist m_dfd_chan1 = KTX2_DF_CHANNEL_UASTC_RGB; m_etc1s_transcoder.clear(); - + m_def_transcoder_state.clear(); - + m_has_alpha = false; m_is_video = false; } @@ -16826,7 +16826,7 @@ namespace basist return false; } } - + // 3.7 levelCount: "levelCount=0 is allowed, except for block-compressed formats" if (m_header.m_level_count < 1) { @@ -16883,7 +16883,7 @@ namespace basist } memcpy(&m_levels[0], m_pData + sizeof(ktx2_header), level_index_size_in_bytes); - + // Sanity check the level offsets and byte sizes for (uint32_t i = 0; i < m_levels.size(); i++) { @@ -16903,9 +16903,9 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset and/or length\n"); return false; } - + const uint64_t MAX_SANE_LEVEL_UNCOMP_SIZE = 2048ULL * 1024ULL * 1024ULL; - + if (m_levels[i].m_uncompressed_byte_length >= MAX_SANE_LEVEL_UNCOMP_SIZE) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid level offset (too large)\n"); @@ -16942,7 +16942,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Invalid DFD offset and/or length\n"); return false; } - + const uint8_t* pDFD = m_pData + m_header.m_dfd_byte_offset; if (!m_dfd.try_resize(m_header.m_dfd_byte_length)) @@ -16952,17 +16952,17 @@ namespace basist } memcpy(m_dfd.data(), pDFD, m_header.m_dfd_byte_length); - + // This is all hard coded for only ETC1S and UASTC. uint32_t dfd_total_size = basisu::read_le_dword(pDFD); - + // 3.10.3: Sanity check if (dfd_total_size != m_header.m_dfd_byte_length) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: DFD size validation failed (1)\n"); return false; } - + // 3.10.3: More sanity checking if (m_header.m_kvd_byte_length) { @@ -16975,7 +16975,7 @@ namespace basist const uint32_t dfd_bits = basisu::read_le_dword(pDFD + 3 * sizeof(uint32_t)); const uint32_t sample_channel0 = basisu::read_le_dword(pDFD + 7 * sizeof(uint32_t)); - + m_dfd_color_model = dfd_bits & 255; m_dfd_color_prims = (ktx2_df_color_primaries)((dfd_bits >> 8) & 255); m_dfd_transfer_func = (dfd_bits >> 16) & 255; @@ -16991,11 +16991,11 @@ namespace basist if (m_dfd_color_model == KTX2_KDF_DF_MODEL_ETC1S) { m_format = basist::basis_tex_format::cETC1S; - - // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD’s sample count." + + // 3.10.2: "Whether the image has 1 or 2 slices can be determined from the DFD�s sample count." // If m_has_alpha is true it may be 2-channel RRRG or 4-channel RGBA, but we let the caller deal with that. m_has_alpha = (m_header.m_dfd_byte_length == 60); - + m_dfd_samples = m_has_alpha ? 2 : 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); @@ -17011,7 +17011,7 @@ namespace basist m_dfd_samples = 1; m_dfd_chan0 = (ktx2_df_channel_id)((sample_channel0 >> 24) & 15); - + // We're assuming "DATA" means RGBA so it has alpha. m_has_alpha = (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RGBA) || (m_dfd_chan0 == KTX2_DF_CHANNEL_UASTC_RRRG); } @@ -17021,7 +17021,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::init: Unsupported DFD color model\n"); return false; } - + if (!read_key_values()) { BASISU_DEVEL_ERROR("ktx2_transcoder::init: read_key_values() failed\n"); @@ -17065,7 +17065,7 @@ namespace basist return nullptr; } - + bool ktx2_transcoder::start_transcoding() { if (!m_pData) @@ -17074,7 +17074,7 @@ namespace basist return false; } - if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) + if (m_header.m_supercompression_scheme == KTX2_SS_BASISLZ) { // Check if we've already decompressed the ETC1S global data. If so don't unpack it again. if (!m_etc1s_transcoder.get_endpoints().empty()) @@ -17085,7 +17085,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::start_transcoding: decompress_etc1s_global_data() failed\n"); return false; } - + if (!m_is_video) { // See if there are any P-frames. If so it must be a video, even if there wasn't a KTXanimData key. @@ -17141,7 +17141,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::get_image_level_info: layer_index >= maximum(m_header.m_layer_count, 1)\n"); return false; } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); const uint32_t num_blocks_x = (level_width + 3) >> 2; @@ -17171,9 +17171,9 @@ namespace basist return true; } - + bool ktx2_transcoder::transcode_image_level( - uint32_t level_index, uint32_t layer_index, uint32_t face_index, + uint32_t level_index, uint32_t layer_index, uint32_t face_index, void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels, basist::transcoder_texture_format fmt, uint32_t decode_flags, uint32_t output_row_pitch_in_blocks_or_pixels, uint32_t output_rows_in_pixels, int channel0, int channel1, @@ -17187,7 +17187,7 @@ namespace basist if (!pState) pState = &m_def_transcoder_state; - + if (level_index >= m_levels.size()) { BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: level_index >= m_levels.size()\n"); @@ -17216,7 +17216,7 @@ namespace basist const uint8_t* pComp_level_data = m_pData + m_levels[level_index].m_byte_offset; uint64_t comp_level_data_size = m_levels[level_index].m_byte_length; - + const uint8_t* pUncomp_level_data = pComp_level_data; uint64_t uncomp_level_data_size = comp_level_data_size; @@ -17225,7 +17225,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::transcode_image_2D: uncomp_level_data_size > UINT32_MAX\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { // Check if we've already decompressed this level's supercompressed data. @@ -17243,12 +17243,12 @@ namespace basist pUncomp_level_data = pState->m_level_uncomp_data.data(); uncomp_level_data_size = pState->m_level_uncomp_data.size(); } - + const uint32_t level_width = basisu::maximum(m_header.m_pixel_width >> level_index, 1); const uint32_t level_height = basisu::maximum(m_header.m_pixel_height >> level_index, 1); const uint32_t num_blocks_x = (level_width + 3) >> 2; const uint32_t num_blocks_y = (level_height + 3) >> 2; - + if (m_format == basist::basis_tex_format::cETC1S) { // Ensure start_transcoding() was called. @@ -17262,7 +17262,7 @@ namespace basist (level_index * basisu::maximum(m_header.m_layer_count, 1) * m_header.m_face_count) + layer_index * m_header.m_face_count + face_index; - + // Sanity check if (etc1s_image_index >= m_etc1s_image_descs.size()) { @@ -17297,7 +17297,7 @@ namespace basist // Compute length and offset to uncompressed 2D UASTC texture data, given the face/layer indices. assert(uncomp_level_data_size == m_levels[level_index].m_uncompressed_byte_length); const uint32_t total_2D_image_size = num_blocks_x * num_blocks_y * KTX2_UASTC_BLOCK_SIZE; - + const uint32_t uncomp_ofs = (layer_index * m_header.m_face_count + face_index) * total_2D_image_size; // Sanity checks @@ -17333,12 +17333,12 @@ namespace basist return true; } - + bool ktx2_transcoder::decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data) { const uint8_t* pComp_data = m_levels[level_index].m_byte_offset + m_pData; const uint64_t comp_size = m_levels[level_index].m_byte_length; - + const uint64_t uncomp_size = m_levels[level_index].m_uncompressed_byte_length; if (((size_t)comp_size) != comp_size) @@ -17357,7 +17357,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_level_data: Out of memory\n"); return false; } - + if (m_header.m_supercompression_scheme == KTX2_SS_ZSTANDARD) { #if BASISD_SUPPORT_KTX2_ZSTD @@ -17380,7 +17380,7 @@ namespace basist return true; } - + bool ktx2_transcoder::decompress_etc1s_global_data() { // Note: we don't actually support 3D textures in here yet @@ -17419,13 +17419,13 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: SGD byte length is too small, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_image_descs.try_resize(image_count)) { BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: Out of memory\n"); return false; } - + memcpy(m_etc1s_image_descs.data(), pSrc, sizeof(ktx2_etc1s_image_desc) * image_count); pSrc += sizeof(ktx2_etc1s_image_desc) * image_count; @@ -17459,7 +17459,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_tables() failed, file is invalid or corrupted\n"); return false; } - + if (!m_etc1s_transcoder.decode_palettes( m_etc1s_header.m_endpoint_count, pEndpoint_data, m_etc1s_header.m_endpoints_byte_length, m_etc1s_header.m_selector_count, pSelector_data, m_etc1s_header.m_selectors_byte_length)) @@ -17467,7 +17467,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::decompress_etc1s_global_data: decode_palettes() failed, file is likely corrupted\n"); return false; } - + return true; } @@ -17508,7 +17508,7 @@ namespace basist while (src_left > sizeof(uint32_t)) { uint32_t l = basisu::read_le_dword(pSrc); - + pSrc += sizeof(uint32_t); src_left -= sizeof(uint32_t); @@ -17529,7 +17529,7 @@ namespace basist BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); return false; } - + basisu::uint8_vec& key_data = m_key_values.back().m_key; basisu::uint8_vec& value_data = m_key_values.back().m_value; @@ -17551,7 +17551,7 @@ namespace basist l--; } while (key_data.back()); - + if (!value_data.try_resize(l)) { BASISU_DEVEL_ERROR("ktx2_transcoder::read_key_values: Out of memory\n"); @@ -17580,7 +17580,7 @@ namespace basist return true; } - + #endif // BASISD_SUPPORT_KTX2 bool basisu_transcoder_supports_ktx2() diff --git a/transcoder/basisu_transcoder.h b/transcoder/basisu_transcoder.h index bf3aed3d..4bb8ab50 100644 --- a/transcoder/basisu_transcoder.h +++ b/transcoder/basisu_transcoder.h @@ -22,7 +22,7 @@ #define BASISD_SUPPORT_KTX2 1 #endif -// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support +// Set BASISD_SUPPORT_KTX2_ZSTD to 0 to disable Zstd usage and KTX2 UASTC Zstd supercompression support #ifndef BASISD_SUPPORT_KTX2_ZSTD #define BASISD_SUPPORT_KTX2_ZSTD 1 #endif @@ -42,7 +42,7 @@ namespace basist // High-level composite texture formats supported by the transcoder. // Each of these texture formats directly correspond to OpenGL/D3D/Vulkan etc. texture formats. // Notes: - // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a + // - If you specify a texture format that supports alpha, but the .basis file doesn't have alpha, the transcoder will automatically output a // fully opaque (255) alpha channel. // - The PVRTC1 texture formats only support power of 2 dimension .basis files, but this may be relaxed in a future version. // - The PVRTC1 transcoders are real-time encoders, so don't expect the highest quality. We may add a slower encoder with improved quality. @@ -71,7 +71,7 @@ namespace basist // ATC (mobile, Adreno devices, this is a niche format) cTFATC_RGB = 11, // Opaque, RGB or alpha if cDecodeFlagsTranscodeAlphaDataToOpaqueFormats flag is specified. ATI ATC (GL_ATC_RGB_AMD) - cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) + cTFATC_RGBA = 12, // Opaque+alpha, alpha channel will be opaque for opaque .basis files. ATI ATC (GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD) // FXT1 (desktop, Intel devices, this is a super obscure format) cTFFXT1_RGB = 17, // Opaque only, uses exclusively CC_MIXED blocks. Notable for having a 8x4 block size. GL_3DFX_texture_compression_FXT1 is supported on Intel integrated GPU's (such as HD 630). @@ -170,7 +170,7 @@ namespace basist basisu::vector m_block_endpoint_preds[2]; enum { cMaxPrevFrameLevels = 16 }; - basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] + basisu::vector m_prev_frame_indices[2][cMaxPrevFrameLevels]; // [alpha_flag][level_index] void clear() { @@ -279,7 +279,7 @@ namespace basist // This flag is used internally when decoding to BC3. cDecodeFlagsBC1ForbidThreeColorBlocks = 8, - // The output buffer contains alpha endpoint/selector indices. + // The output buffer contains alpha endpoint/selector indices. // Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format. cDecodeFlagsOutputHasAlphaIndices = 16, @@ -486,11 +486,11 @@ namespace basist // transcode_image_level() decodes a single mipmap level from the .basis file to any of the supported output texture formats. // It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5). // If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's). - // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. + // Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. // output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32. // output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling). // output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4). - // Notes: + // Notes: // - basisu_transcoder_init() must have been called first to initialize the transcoder lookup tables before calling this function. // - This method assumes the output texture buffer is readable. In some cases to handle alpha, the transcoder will write temporary data to the output texture in // a first pass, which will be read in a second pass. @@ -545,7 +545,7 @@ namespace basist // basisu_transcoder_init() MUST be called before a .basis file can be transcoded. void basisu_transcoder_init(); - + enum debug_flags_t { cDebugFlagVisCRs = 1, @@ -555,10 +555,10 @@ namespace basist uint32_t get_debug_flags(); void set_debug_flags(uint32_t f); - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ // Optional .KTX2 file format support // KTX2 reading optionally requires miniz or Zstd decompressors for supercompressed UASTC files. - // ------------------------------------------------------------------------------------------------------ + // ------------------------------------------------------------------------------------------------------ #if BASISD_SUPPORT_KTX2 #pragma pack(push) #pragma pack(1) @@ -701,12 +701,12 @@ namespace basist { case KTX2_DF_PRIMARIES_UNSPECIFIED: return "UNSPECIFIED"; case KTX2_DF_PRIMARIES_BT709: return "BT709"; - case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; + case KTX2_DF_PRIMARIES_BT601_EBU: return "EBU"; case KTX2_DF_PRIMARIES_BT601_SMPTE: return "SMPTE"; case KTX2_DF_PRIMARIES_BT2020: return "BT2020"; case KTX2_DF_PRIMARIES_CIEXYZ: return "CIEXYZ"; case KTX2_DF_PRIMARIES_ACES: return "ACES"; - case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; + case KTX2_DF_PRIMARIES_ACESCC: return "ACESCC"; case KTX2_DF_PRIMARIES_NTSC1953: return "NTSC1953"; case KTX2_DF_PRIMARIES_PAL525: return "PAL525"; case KTX2_DF_PRIMARIES_DISPLAYP3: return "DISPLAYP3"; @@ -714,7 +714,7 @@ namespace basist default: break; } return "?"; - } + } // Information about a single 2D texture "image" in a KTX2 file. struct ktx2_image_level_info @@ -745,7 +745,7 @@ namespace basist // true if the image is an I-Frame. Currently, for ETC1S textures, the first frame will always be an I-Frame, and subsequent frames will always be P-Frames. bool m_iframe_flag; }; - + // Thread-specific ETC1S/supercompressed UASTC transcoder state. (If you're not doing multithreading transcoding you can ignore this.) struct ktx2_transcoder_state { @@ -763,9 +763,9 @@ namespace basist // This class is quite similar to basisu_transcoder. It treats KTX2 files as a simple container for ETC1S/UASTC texture data. // It does not support 1D or 3D textures. - // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. + // It only supports 2D and cubemap textures, with or without mipmaps, texture arrays of 2D/cubemap textures, and texture video files. // It only supports raw non-supercompressed UASTC, ETC1S, UASTC+Zstd, or UASTC+zlib compressed files. - // DFD (Data Format Descriptor) parsing is purposely as simple as possible. + // DFD (Data Format Descriptor) parsing is purposely as simple as possible. // If you need to know how to interpret the texture channels you'll need to parse the DFD yourself after calling get_dfd(). class ktx2_transcoder { @@ -806,7 +806,7 @@ namespace basist uint32_t get_layers() const { return m_header.m_layer_count; } // Returns cETC1S or cUASTC4x4. Valid after init(). - basist::basis_tex_format get_format() const { return m_format; } + basist::basis_tex_format get_format() const { return m_format; } bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; } @@ -825,7 +825,7 @@ namespace basist // Returns the DFD color primary. // We do not validate the color primaries, so the returned value may not be in the ktx2_df_color_primaries enum. ktx2_df_color_primaries get_dfd_color_primaries() const { return m_dfd_color_prims; } - + // Returns KTX2_KHR_DF_TRANSFER_LINEAR or KTX2_KHR_DF_TRANSFER_SRGB. uint32_t get_dfd_transfer_func() const { return m_dfd_transfer_func; } @@ -833,9 +833,9 @@ namespace basist // Returns 1 (ETC1S/UASTC) or 2 (ETC1S with an internal alpha channel). uint32_t get_dfd_total_samples() const { return m_dfd_samples; } - - // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. - // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. + + // Returns the channel mapping for each DFD "sample". UASTC always has 1 sample, ETC1S can have one or two. + // Note the returned value SHOULD be one of the ktx2_df_channel_id enums, but we don't validate that. // It's up to the caller to decide what to do if the value isn't in the enum. ktx2_df_channel_id get_dfd_channel_id0() const { return m_dfd_chan0; } ktx2_df_channel_id get_dfd_channel_id1() const { return m_dfd_chan1; } @@ -873,18 +873,18 @@ namespace basist // is_video() is only valid after start_transcoding() is called. // For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames. bool is_video() const { return m_is_video; } - + // start_transcoding() MUST be called before calling transcode_image(). // This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively. bool start_transcoding(); - + // get_image_level_info() be called after init(), but the m_iframe_flag's won't be valid until start_transcoding() is called. // You can call this method before calling transcode_image_level() to retrieve basic information about the mipmap level's dimensions, etc. bool get_image_level_info(ktx2_image_level_info& level_info, uint32_t level_index, uint32_t layer_index, uint32_t face_index) const; // transcode_image_level() transcodes a single 2D texture or cubemap face from the KTX2 file. // Internally it uses the same low-level transcode API's as basisu_transcoder::transcode_image_level(). - // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is + // If the file is UASTC and is supercompressed with Zstandard, and the file is a texture array or cubemap, it's highly recommended that each mipmap level is // completely transcoded before switching to another level. Every time the mipmap level is changed all supercompressed level data must be decompressed using Zstandard as a single unit. // Currently ETC1S videos must always be transcoded from first to last frame (or KTX2 "layer"), in order, with no skipping of frames. // By default this method is not thread safe unless you specify a pointer to a user allocated thread-specific transcoder_state struct. @@ -894,7 +894,7 @@ namespace basist basist::transcoder_texture_format fmt, uint32_t decode_flags = 0, uint32_t output_row_pitch_in_blocks_or_pixels = 0, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, ktx2_transcoder_state *pState = nullptr); - + private: const uint8_t* m_pData; uint32_t m_data_size; @@ -903,22 +903,22 @@ namespace basist basisu::vector m_levels; basisu::uint8_vec m_dfd; key_value_vec m_key_values; - + ktx2_etc1s_global_data_header m_etc1s_header; basisu::vector m_etc1s_image_descs; basist::basis_tex_format m_format; - + uint32_t m_dfd_color_model; ktx2_df_color_primaries m_dfd_color_prims; uint32_t m_dfd_transfer_func; uint32_t m_dfd_flags; uint32_t m_dfd_samples; ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1; - + basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder; basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder; - + ktx2_transcoder_state m_def_transcoder_state; bool m_has_alpha; diff --git a/transcoder/basisu_transcoder_internal.h b/transcoder/basisu_transcoder_internal.h index 2422d788..5cd91c79 100644 --- a/transcoder/basisu_transcoder_internal.h +++ b/transcoder/basisu_transcoder_internal.h @@ -44,9 +44,9 @@ namespace basist // You probably don't care about these enum's unless you are going pretty low-level and calling the transcoder to decode individual slices. enum class block_format { - cETC1, // ETC1S RGB + cETC1, // ETC1S RGB cETC2_RGBA, // full ETC2 EAC RGBA8 block - cBC1, // DXT1 RGB + cBC1, // DXT1 RGB cBC3, // BC4 block followed by a four color BC1 block cBC4, // DXT5A (alpha block only) cBC5, // two BC4 blocks @@ -56,9 +56,9 @@ namespace basist cBC7_M5_COLOR, // RGB BC7 mode 5 color (writes an opaque mode 5 block) cBC7_M5_ALPHA, // alpha portion of BC7 mode 5 (cBC7_M5_COLOR output data must have been written to the output buffer first to set the mode/rot fields etc.) cETC2_EAC_A8, // alpha block of ETC2 EAC (first 8 bytes of the 16-bit ETC2 EAC RGBA format) - cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC + cASTC_4x4, // ASTC 4x4 (either color-only or color+alpha). Note that the transcoder always currently assumes sRGB is not enabled when outputting ASTC // data. If you use a sRGB ASTC format you'll get ~1 LSB of additional error, because of the different way ASTC decoders scale 8-bit endpoints to 16-bits during unpacking. - + cATC_RGB, cATC_RGBA_INTERPOLATED_ALPHA, cFXT1_RGB, // Opaque-only, has oddball 8x4 pixel block size @@ -68,21 +68,21 @@ namespace basist cETC2_EAC_R11, cETC2_EAC_RG11, - + cIndices, // Used internally: Write 16-bit endpoint and selector indices directly to output (output block must be at least 32-bits) cRGB32, // Writes RGB components to 32bpp output pixels cRGBA32, // Writes RGB255 components to 32bpp output pixels cA32, // Writes alpha component to 32bpp output pixels - + cRGB565, cBGR565, - + cRGBA4444_COLOR, cRGBA4444_ALPHA, cRGBA4444_COLOR_OPAQUE, cRGBA4444, - + cTotalBlockFormats }; @@ -103,9 +103,9 @@ namespace basist const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_THRESH = 3; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_BITS = 6; const uint32_t SELECTOR_HISTORY_BUF_RLE_COUNT_TOTAL = (1 << SELECTOR_HISTORY_BUF_RLE_COUNT_BITS); - + uint16_t crc16(const void *r, size_t size, uint16_t crc); - + class huffman_decoding_table { friend class bitwise_decoder; @@ -223,7 +223,7 @@ namespace basist return false; else if (idx >= (int)m_tree.size()) m_tree.resize(idx + 1); - + if (!m_tree[idx]) { m_tree[idx] = (int16_t)tree_next; @@ -392,14 +392,14 @@ namespace basist for (;;) { uint32_t k = peek_bits(16); - + uint32_t l = 0; while (k & 1) { l++; k >>= 1; } - + q += l; remove_bits(l); @@ -417,7 +417,7 @@ namespace basist const uint32_t chunk_size = 1 << chunk_bits; const uint32_t chunk_mask = chunk_size - 1; - + uint32_t v = 0; uint32_t ofs = 0; @@ -429,7 +429,7 @@ namespace basist if ((s & chunk_size) == 0) break; - + if (ofs >= 32) { assert(0); @@ -445,7 +445,7 @@ namespace basist assert(ct.m_code_sizes.size()); const uint32_t huffman_fast_lookup_size = 1 << fast_lookup_bits; - + while (m_bit_buf_size < 16) { uint32_t c = 0; @@ -456,7 +456,7 @@ namespace basist m_bit_buf_size += 8; assert(m_bit_buf_size <= 32); } - + int code_len; int sym; @@ -641,7 +641,7 @@ namespace basist }; struct decoder_etc_block; - + inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); @@ -665,7 +665,7 @@ namespace basist }; uint8_t c[4]; - + uint32_t m; }; @@ -787,7 +787,7 @@ namespace basist }; bool basis_block_format_is_uncompressed(block_format tex_type); - + } // namespace basist diff --git a/transcoder/basisu_transcoder_uastc.h b/transcoder/basisu_transcoder_uastc.h index d501a2af..c38a34b0 100644 --- a/transcoder/basisu_transcoder_uastc.h +++ b/transcoder/basisu_transcoder_uastc.h @@ -5,8 +5,8 @@ namespace basist { struct color_quad_u8 - { - uint8_t m_c[4]; + { + uint8_t m_c[4]; }; const uint32_t TOTAL_UASTC_MODES = 19; @@ -101,9 +101,9 @@ namespace basist int m_ccs; // color component selector (dual plane only) bool m_dual_plane; // true if dual plane - // Weight and endpoint BISE values. + // Weight and endpoint BISE values. // Note these values are NOT linear, they must be BISE encoded. See Table 97 and Table 107. - uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order + uint8_t m_endpoints[18]; // endpoint values, in RR GG BB etc. order uint8_t m_weights[64]; // weight index values, raster order, in P0 P1, P0 P1, etc. or P0, P0, P0, P0, etc. order }; @@ -198,7 +198,7 @@ namespace basist #ifdef _DEBUG int astc_compute_texel_partition(int seed, int x, int y, int z, int partitioncount, bool small_block); #endif - + struct uastc_block { union @@ -238,10 +238,10 @@ namespace basist }; color32 apply_etc1_bias(const color32 &block_color, uint32_t bias, uint32_t limit, uint32_t subblock); - + struct decoder_etc_block; struct eac_block; - + bool unpack_uastc(uint32_t mode, uint32_t common_pattern, const color32& solid_color, const astc_block_desc& astc, color32* pPixels, bool srgb); bool unpack_uastc(const unpacked_uastc_block& unpacked_blk, color32* pPixels, bool srgb); @@ -263,7 +263,7 @@ namespace basist // Packs 16 scalar values to BC4. Same PSNR as stb_dxt's BC4 encoder, around 13% faster. void encode_bc4(void* pDst, const uint8_t* pPixels, uint32_t stride); - + void encode_bc1_solid_block(void* pDst, uint32_t fr, uint32_t fg, uint32_t fb); enum @@ -273,7 +273,7 @@ namespace basist cEncodeBC1UseSelectors = 4, }; void encode_bc1(void* pDst, const uint8_t* pPixels, uint32_t flags); - + // Alternate PCA-free encoder, around 15% faster, same (or slightly higher) avg. PSNR void encode_bc1_alt(void* pDst, const uint8_t* pPixels, uint32_t flags); @@ -290,7 +290,7 @@ namespace basist bool transcode_uastc_to_pvrtc1_4_rgb(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality, bool from_alpha); bool transcode_uastc_to_pvrtc1_4_rgba(const uastc_block* pSrc_blocks, void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, bool high_quality); - + // uastc_init() MUST be called before using this module. void uastc_init(); diff --git a/webgl/README.md b/webgl/README.md index 79b6cb90..bfb7131a 100644 --- a/webgl/README.md +++ b/webgl/README.md @@ -42,7 +42,7 @@ extension that is [currently in development](https://github.com/KhronosGroup/glT ## Compressor (encode_test) -This demo shows how to use the compressor from JavaScript. To use it, select a .PNG file then hit the "Encode!" button. The compressor will dynamically generate a .basis file in memory which will then be immediately transcoded and displayed. Hit the "Download!" button to locally download the generated .basis file. +This demo shows how to use the compressor from JavaScript. To use it, select a .PNG file then hit the "Encode!" button. The compressor will dynamically generate a .basis file in memory which will then be immediately transcoded and displayed. Hit the "Download!" button to locally download the generated .basis file. To view the compressor's textual debug output, open your browser's developer debug console (under Developer Tools in Chrome) and enable the Debug checkbox before hitting the "Encode!" button. Multithreading is not currently supported when the compressor is compiled to WebAssembly, so compression will be slower than using the stand-alone command line tool. diff --git a/webgl/encode_test/dxt-to-rgb565.js b/webgl/encode_test/dxt-to-rgb565.js index 8e35be4d..7827cf69 100644 --- a/webgl/encode_test/dxt-to-rgb565.js +++ b/webgl/encode_test/dxt-to-rgb565.js @@ -37,7 +37,7 @@ function dxtToRgb565(src, src16Offset, width, height) { i = src16Offset + 4 * (blockY * blockWidth + blockX); c[0] = src[i]; c[1] = src[i + 1]; - + r0 = c[0] & 0x1f; g0 = c[0] & 0x7e0; b0 = c[0] & 0xf800; @@ -50,7 +50,7 @@ function dxtToRgb565(src, src16Offset, width, height) { // decoder in many GPUs does :) // rg FIXME: This is most likely leading to wrong results vs. a GPU - + c[2] = ((5 * r0 + 3 * r1) >> 3) | (((5 * g0 + 3 * g1) >> 3) & 0x7e0) | (((5 * b0 + 3 * b1) >> 3) & 0xf800); diff --git a/webgl/encode_test/index.html b/webgl/encode_test/index.html index ad74da13..193ff789 100644 --- a/webgl/encode_test/index.html +++ b/webgl/encode_test/index.html @@ -124,8 +124,8 @@ cTFFXT1_RGB: 17, cTFPVRTC2_4_RGB: 18, cTFPVRTC2_4_RGBA: 19, - cTFETC2_EAC_R11: 20, - cTFETC2_EAC_RG11: 21 + cTFETC2_EAC_R11: 20, + cTFETC2_EAC_RG11: 21 }; BASIS_FORMAT_NAMES = {}; @@ -136,7 +136,7 @@ DXT_FORMAT_MAP = {}; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC1] = COMPRESSED_RGB_S3TC_DXT1_EXT; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC3] = COMPRESSED_RGBA_S3TC_DXT5_EXT; -DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; +DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; var astcSupported = false; var etcSupported = false; @@ -159,7 +159,7 @@ { var basisFileDesc = basisFile.getFileDesc(); - log('------'); + log('------'); log('getFileDesc():'); log('version: ' + basisFileDesc.version); log('us per frame: ' + basisFileDesc.usPerFrame); @@ -167,7 +167,7 @@ log('userdata0: ' + basisFileDesc.userdata0 + ' userdata1: ' + basisFileDesc.userdata1); log('texFormat: ' + basisFileDesc.texFormat); log('yFlipped: ' + basisFileDesc.yFlipped + ' hasAlphaSlices: ' + basisFileDesc.hasAlphaSlices); - + if (basisFileDesc.texFormat == Module.basis_tex_format.cETC1S.value) { log('numEndpoints: ' + basisFileDesc.numEndpoints); @@ -185,9 +185,9 @@ for (image_index = 0; image_index < basisFileDesc.totalImages; image_index++) { log('image: ' + image_index); - + var basisImageDesc = basisFile.getImageDesc(image_index); - + log('origWidth: ' + basisImageDesc.origWidth + ' origWidth: ' + basisImageDesc.origHeight); log('numBlocksX: ' + basisImageDesc.numBlocksX + ' origWidth: ' + basisImageDesc.numBlocksY); log('numLevels: ' + basisImageDesc.numLevels); @@ -198,15 +198,15 @@ for (level_index = 0; level_index < basisImageDesc.numLevels; level_index++) { var basisImageLevelDesc = basisFile.getImageLevelDesc(image_index, level_index); - - log('level: ' + level_index + + + log('level: ' + level_index + ' rgb_file_offset: ' + basisImageLevelDesc.rgbFileOfs + ' rgb_file_len: ' + basisImageLevelDesc.rgbFileLen); - if (basisFileDesc.hasAlphaSlices) + if (basisFileDesc.hasAlphaSlices) log('alpha_file_offset: ' + basisImageLevelDesc.alphaFileOfs + ' alpha_file_len: ' + basisImageLevelDesc.alphaFileLen); } } - + log('------'); } @@ -226,7 +226,7 @@ images = basisFile.getNumImages(); levels = basisFile.getNumLevels(0); has_alpha = basisFile.getHasAlpha(); - + dumpBasisFileDesc(basisFile); if (!width || !height || !images || !levels) { @@ -235,9 +235,9 @@ basisFile.delete(); return; } - + // Note: If the file is UASTC, the preferred formats are ASTC/BC7. - // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. + // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. var formatString = 'UNKNOWN'; if (astcSupported) @@ -275,7 +275,7 @@ formatString = 'PVRTC1_RGB'; format = BASIS_FORMAT.cTFPVRTC1_4_RGB; } - + if ( ((width & (width - 1)) != 0) || ((height & (height - 1)) != 0) ) @@ -284,7 +284,7 @@ } if (width != height) { - log('ERROR: PVRTC1 requires square power of 2 textures'); + log('ERROR: PVRTC1 requires square power of 2 textures'); } } else if (etcSupported) @@ -311,7 +311,7 @@ const dstSize = basisFile.getImageTranscodedSizeInBytes(0, 0, format); const dst = new Uint8Array(dstSize); - + //log(dstSize); // if (!basisFile.transcodeImage(dst, 0, 0, format, 1, 0)) { @@ -320,7 +320,7 @@ console.warn('transcodeImage failed'); basisFile.close(); basisFile.delete(); - + return; } @@ -338,7 +338,7 @@ alignedWidth = (width + 3) & ~3; alignedHeight = (height + 3) & ~3; - + displayWidth = alignedWidth; displayHeight = alignedHeight; @@ -373,9 +373,9 @@ displayWidth = width; displayHeight = height; - // Create 565 texture. + // Create 565 texture. var dstTex = new Uint16Array(width * height); - + // Convert the array of bytes to an array of uint16's. var pix = 0; for (var y = 0; y < height; y++) @@ -388,16 +388,16 @@ redraw(); } -function download_file(filename, body) +function download_file(filename, body) { var element = document.createElement('a'); - + //element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text)); - const blob = new Blob([body]); + const blob = new Blob([body]); const url = URL.createObjectURL(blob); element.setAttribute('href', url); - + element.setAttribute('download', filename); element.style.display = 'none'; @@ -413,14 +413,14 @@ function PNGDataLoaded(data) { const { BasisFile, BasisEncoder, initializeBasis, encodeBasisTexture } = Module; - + initializeBasis(); - + // Create a destination buffer to hold the compressed .basis file data. If this buffer isn't large enough compression will fail. var basisFileData = new Uint8Array(1024*1024*10); - + var num_output_bytes; - + // Compress using the BasisEncoder class. log('BasisEncoder::encode() started:'); @@ -437,22 +437,22 @@ basisEncoder.setQualityLevel(qualityLevel); basisEncoder.setUASTC(uastcFlag); basisEncoder.setMipGen(elem('Mipmaps').checked); - + if (!uastcFlag) log('Encoding at ETC1S quality level ' + qualityLevel); - + const startTime = performance.now(); - + num_output_bytes = basisEncoder.encode(basisFileData); - + const elapsed = performance.now() - startTime; - + logTime('encoding time', elapsed.toFixed(2)); - + var actualBasisFileData = new Uint8Array(basisFileData.buffer, 0, num_output_bytes); basisEncoder.delete(); - + if (num_output_bytes == 0) { log('encodeBasisTexture() failed!'); @@ -460,12 +460,12 @@ else { log('encodeBasisTexture() succeeded, output size ' + num_output_bytes); - + encodedBasisFile = actualBasisFileData; - + //download("test.basis", actualBasisFileData); } - + if (num_output_bytes != 0) { dataLoaded(actualBasisFileData); @@ -486,10 +486,10 @@ function viewRGB() { drawMode = 1; redraw(); } function viewAlpha() { drawMode = 2; redraw(); } -function downloadEncodedFile() +function downloadEncodedFile() { if (encodedBasisFile) - { + { if (encodedBasisFile.length) download_file("encoded_file.basis", encodedBasisFile); } @@ -511,7 +511,7 @@
- +
.png file: @@ -533,15 +533,15 @@
UASTC: - +
- + ETC1S Quality: - - + +
- +
@@ -558,17 +558,17 @@ - + diff --git a/webgl/ktx2_encode_test/dxt-to-rgb565.js b/webgl/ktx2_encode_test/dxt-to-rgb565.js index 8e35be4d..7827cf69 100644 --- a/webgl/ktx2_encode_test/dxt-to-rgb565.js +++ b/webgl/ktx2_encode_test/dxt-to-rgb565.js @@ -37,7 +37,7 @@ function dxtToRgb565(src, src16Offset, width, height) { i = src16Offset + 4 * (blockY * blockWidth + blockX); c[0] = src[i]; c[1] = src[i + 1]; - + r0 = c[0] & 0x1f; g0 = c[0] & 0x7e0; b0 = c[0] & 0xf800; @@ -50,7 +50,7 @@ function dxtToRgb565(src, src16Offset, width, height) { // decoder in many GPUs does :) // rg FIXME: This is most likely leading to wrong results vs. a GPU - + c[2] = ((5 * r0 + 3 * r1) >> 3) | (((5 * g0 + 3 * g1) >> 3) & 0x7e0) | (((5 * b0 + 3 * b1) >> 3) & 0xf800); diff --git a/webgl/ktx2_encode_test/index.html b/webgl/ktx2_encode_test/index.html index 1835ae75..81588176 100644 --- a/webgl/ktx2_encode_test/index.html +++ b/webgl/ktx2_encode_test/index.html @@ -125,8 +125,8 @@ cTFFXT1_RGB: 17, cTFPVRTC2_4_RGB: 18, cTFPVRTC2_4_RGBA: 19, - cTFETC2_EAC_R11: 20, - cTFETC2_EAC_RG11: 21 + cTFETC2_EAC_R11: 20, + cTFETC2_EAC_RG11: 21 }; BASIS_FORMAT_NAMES = {}; @@ -137,7 +137,7 @@ DXT_FORMAT_MAP = {}; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC1] = COMPRESSED_RGB_S3TC_DXT1_EXT; DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC3] = COMPRESSED_RGBA_S3TC_DXT5_EXT; -DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; +DXT_FORMAT_MAP[BASIS_FORMAT.cTFBC7] = COMPRESSED_RGBA_BPTC_UNORM; var astcSupported = false; var etcSupported = false; @@ -158,8 +158,8 @@ function dumpKTX2FileDesc(ktx2File) { - log('------'); - + log('------'); + log('Width: ' + ktx2File.getWidth()); log('Height: ' + ktx2File.getHeight()); log('Faces: ' + ktx2File.getFaces()); @@ -179,7 +179,7 @@ log('DFD Channel0: ' + ktx2File.getDFDChannelID0()); log('DFD Channel1: ' + ktx2File.getDFDChannelID1()); log('Is Video: ' + ktx2File.isVideo()); - + var dfdSize = ktx2File.getDFDSize(); var dvdData = new Uint8Array(dfdSize); ktx2File.getDFD(dvdData); @@ -188,17 +188,17 @@ log('--'); log('--'); - log('Key values:'); + log('Key values:'); var key_index; - for (key_index = 0; key_index < ktx2File.getTotalKeys(); key_index++) + for (key_index = 0; key_index < ktx2File.getTotalKeys(); key_index++) { var key_name = ktx2File.getKey(key_index); log('Key ' + key_index + ': "' + key_name + '"'); - + var valSize = ktx2File.getKeyValueSize(key_name); if (valSize != 0) - { + { var val_data = new Uint8Array(valSize); var status = ktx2File.getKeyValue(key_name, val_data); if (!status) @@ -207,21 +207,21 @@ { log('value size: ' + val_data.length); var i, str = ""; - + for (i = 0; i < val_data.length; i++) { var c = val_data[i]; str = str + String.fromCharCode(c); } - + log(str); } - + } else log(''); } - + log('--'); log('Image level information:'); var level_index; @@ -229,14 +229,14 @@ { var layer_index; for (layer_index = 0; layer_index < Math.max(1, ktx2File.getLayers()); layer_index++) - { + { var face_index; for (face_index = 0; face_index < ktx2File.getFaces(); face_index++) { var imageLevelInfo = ktx2File.getImageLevelInfo(level_index, layer_index, face_index); - + log('level: ' + level_index + ' layer: ' + layer_index + ' face: ' + face_index); - + log('orig_width: ' + imageLevelInfo.origWidth); log('orig_height: ' + imageLevelInfo.origHeight); log('width: ' + imageLevelInfo.width); @@ -248,7 +248,7 @@ log('iframeFlag: ' + imageLevelInfo.iframeFlag); if (ktx2File.isETC1S()) log('ETC1S image desc image flags: ' + ktx2File.getETC1SImageDescImageFlags(level_index, layer_index, face_index)); - + log('--'); } } @@ -256,7 +256,7 @@ log('--'); log('KTX2 header:'); var hdr = ktx2File.getHeader(); - + log('vkFormat: ' + hdr.vkFormat); log('typeSize: ' + hdr.typeSize); log('pixelWidth: ' + hdr.pixelWidth); @@ -272,7 +272,7 @@ log('kvdByteLength: ' + hdr.kvdByteLength); log('sgdByteOffset: ' + hdr.sgdByteOffset); log('sgdByteLength: ' + hdr.sgdByteLength); - + log('------'); } @@ -286,7 +286,7 @@ const startTime = performance.now(); const ktx2File = new KTX2File(new Uint8Array(data)); - + if (!ktx2File.isValid()) { console.warn('Invalid or unsupported .ktx2 file'); @@ -301,16 +301,16 @@ levels = ktx2File.getLevels(); faces = ktx2File.getFaces(); has_alpha = ktx2File.getHasAlpha(); - + if (!width || !height || !levels) { console.warn('Invalid .ktx2 file'); ktx2File.close(); ktx2File.delete(); return; } - + // Note: If the file is UASTC, the preferred formats are ASTC/BC7. - // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. + // If the file is ETC1S and doesn't have alpha, the preferred formats are ETC1 and BC1. For alpha, the preferred formats are ETC2, BC3 or BC7. var formatString = 'UNKNOWN'; if (astcSupported) @@ -348,7 +348,7 @@ formatString = 'PVRTC1_RGB'; format = BASIS_FORMAT.cTFPVRTC1_4_RGB; } - + if ( ((width & (width - 1)) != 0) || ((height & (height - 1)) != 0) ) @@ -357,7 +357,7 @@ } if (width != height) { - log('ERROR: PVRTC1 requires square power of 2 textures'); + log('ERROR: PVRTC1 requires square power of 2 textures'); } } else if (etcSupported) @@ -381,12 +381,12 @@ basisFile.delete(); return; } - + dumpKTX2FileDesc(ktx2File); const dstSize = ktx2File.getImageTranscodedSizeInBytes(0, 0, 0, format); const dst = new Uint8Array(dstSize); - + //log(dstSize); if (!ktx2File.transcodeImage(dst, 0, 0, 0, format, 0, -1, -1)) { @@ -394,7 +394,7 @@ console.warn('transcodeImage failed'); ktx2File.close(); ktx2File.delete(); - + return; } @@ -413,7 +413,7 @@ alignedWidth = (width + 3) & ~3; alignedHeight = (height + 3) & ~3; - + displayWidth = alignedWidth; displayHeight = alignedHeight; @@ -448,9 +448,9 @@ displayWidth = width; displayHeight = height; - // Create 565 texture. + // Create 565 texture. var dstTex = new Uint16Array(width * height); - + // Convert the array of bytes to an array of uint16's. var pix = 0; for (var y = 0; y < height; y++) @@ -463,16 +463,16 @@ redraw(); } -function download_file(filename, body) +function download_file(filename, body) { var element = document.createElement('a'); - + //element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text)); - const blob = new Blob([body]); + const blob = new Blob([body]); const url = URL.createObjectURL(blob); element.setAttribute('href', url); - + element.setAttribute('download', filename); element.style.display = 'none'; @@ -488,14 +488,14 @@ function PNGDataLoaded(data) { const { BasisFile, BasisEncoder, initializeBasis, encodeBasisTexture } = Module; - + initializeBasis(); - + // Create a destination buffer to hold the compressed .basis file data. If this buffer isn't large enough compression will fail. var ktx2FileData = new Uint8Array(1024*1024*10); - + var num_output_bytes; - + // Compress using the BasisEncoder class. log('BasisEncoder::encode() started:'); @@ -507,7 +507,7 @@ basisEncoder.setCreateKTX2File(true); basisEncoder.setKTX2UASTCSupercompression(true); basisEncoder.setKTX2SRGBTransferFunc(true); - + basisEncoder.setSliceSourceImage(0, new Uint8Array(data), 0, 0, true); basisEncoder.setDebug(elem('Debug').checked); basisEncoder.setComputeStats(elem('ComputeStats').checked); @@ -516,22 +516,22 @@ basisEncoder.setQualityLevel(qualityLevel); basisEncoder.setUASTC(uastcFlag); basisEncoder.setMipGen(elem('Mipmaps').checked); - + if (!uastcFlag) log('Encoding at ETC1S quality level ' + qualityLevel); - + const startTime = performance.now(); - + num_output_bytes = basisEncoder.encode(ktx2FileData); - + const elapsed = performance.now() - startTime; - + logTime('encoding time', elapsed.toFixed(2)); - + var actualKTX2FileData = new Uint8Array(ktx2FileData.buffer, 0, num_output_bytes); basisEncoder.delete(); - + if (num_output_bytes == 0) { log('encodeBasisTexture() failed!'); @@ -539,12 +539,12 @@ else { log('encodeBasisTexture() succeeded, output size ' + num_output_bytes); - + encodedKTX2File = actualKTX2FileData; - + //download("test.ktx2", actualKTX2FileData); } - + if (num_output_bytes != 0) { dataLoaded(actualKTX2FileData); @@ -565,10 +565,10 @@ function viewRGB() { drawMode = 1; redraw(); } function viewAlpha() { drawMode = 2; redraw(); } -function downloadEncodedFile() +function downloadEncodedFile() { if (encodedKTX2File) - { + { if (encodedKTX2File.length) download_file("encoded_file.ktx2", encodedKTX2File); } @@ -591,7 +591,7 @@
- +
.png file: @@ -613,15 +613,15 @@
UASTC: - +
- + ETC1S Quality: - - + +
- +
@@ -638,17 +638,17 @@