From 72b2b3b1d8b646a81a800c8bdd9da692d741f8a7 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Thu, 21 Mar 2024 15:46:39 +0800 Subject: [PATCH 001/244] long range attachment --- projects/CUDA/Utils.hpp | 6 +- .../CuLagrange/fem/FleshDynamicStepping.cu | 26 --- .../collision_energy/evaluate_collision.hpp | 2 +- projects/CuLagrange/pbd/ConstraintsBuilder.cu | 150 +++++++++++++++++- projects/CuLagrange/pbd/ConstraintsSolver.cu | 14 +- .../constraint_function_kernel/constraint.cuh | 47 +++--- .../constraint_types.hpp | 1 + projects/ZenoFX/nw.cpp | 12 +- zeno/src/nodes/StringNodes.cpp | 73 ++++++++- 9 files changed, 268 insertions(+), 63 deletions(-) diff --git a/projects/CUDA/Utils.hpp b/projects/CUDA/Utils.hpp index 9938132296..ed93bd4464 100644 --- a/projects/CUDA/Utils.hpp +++ b/projects/CUDA/Utils.hpp @@ -103,8 +103,8 @@ retrieve_bounding_volumes(Pol &pol, const TileVecT &vtemp, template zs::Vector retrieve_bounding_volumes(Pol &pol, const TileVecT &vtemp, - float radius = 0.f, - const zs::SmallString &xTag = "xn") { + const float& radius, + const zs::SmallString &xTag) { using namespace zs; using bv_t = typename ZenoParticles::lbvh_t::Box; constexpr auto space = Pol::exec_tag::value; @@ -225,7 +225,7 @@ void retrieve_bounding_volumes(Pol &pol, const TileVecT &vtemp, } // for ccd -template +template zs::Vector retrieve_bounding_volumes( Pol &pol, const TileVecT0 &verts, const typename ZenoParticles::particles_t &eles, const TileVecT1 &vtemp, diff --git a/projects/CuLagrange/fem/FleshDynamicStepping.cu b/projects/CuLagrange/fem/FleshDynamicStepping.cu index d0dc444031..ceec5f142c 100644 --- a/projects/CuLagrange/fem/FleshDynamicStepping.cu +++ b/projects/CuLagrange/fem/FleshDynamicStepping.cu @@ -1361,14 +1361,6 @@ struct FleshDynamicStepping : INode { TILEVEC_OPS::copy(cudaPol,inbbw,"w",bbw,"w"); TILEVEC_OPS::copy(cudaPol,inbbw,"strength",bbw,"strength"); TILEVEC_OPS::copy(cudaPol,inbbw,"cnorm",bbw,"cnorm"); - - // if(zsbones_verts.has_attr("drivenStrength")) - // ompExec(zs::range(zsbones_verts.size()), - // [bverts = proxy(bverts),&zsbones_verts] (int i) mutable { - // auto v = zsbones_verts[i]; - // bverts[i] = zs::vec{v[0],v[1],v[2]}; - // }); - } // bverts = bverts.clone({zs::memsrc_e::device,0}); // std::cout << "bverts.size() = " << bverts.size() << std::endl; @@ -1385,24 +1377,6 @@ struct FleshDynamicStepping : INode { {"inds",3}, {"nrm",3}},0,zs::memsrc_e::device,0); - - // dtiles_t surf_tris_buffer{tris.get_allocator(),{ - // {"inds",3}, - // {"nrm",3}, - // {"he_inds",1} - // },tris.size()}; - - // dtiles_t surf_verts_buffer{points.get_allocator(),{ - // {"inds",1}, - // {"xn",3}, - // {"is_loop_vertex",1}, - // {"mustExclude",1} - // },points.size()}; - // TILEVEC_OPS::copy(cudaPol,points,"inds",surf_verts_buffer,"inds"); - // TILEVEC_OPS::copy(cudaPol,tris,"inds",surf_tris_buffer,"inds"); - // TILEVEC_OPS::copy(cudaPol,tris,"he_inds",surf_tris_buffer,"he_inds"); - // reorder_topology(cudaPol,points,surf_tris_buffer); - // zs::Vector nodal_colors{surf_verts_buffer.get_allocator(),surf_verts_buffer.size()}; dtiles_t gia_res{points.get_allocator(),{ {"ring_mask",1}, {"type_mask",1}, diff --git a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp index f11557b5b6..e6996dc496 100644 --- a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp +++ b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp @@ -865,7 +865,7 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, if(verts("collision_cancel",vi) > 1e-3) return; auto p = verts.pack(dim_c<3>,xtag,vi); - auto bv = bv_t{get_bounding_box(p - thickness/(T)2,p + thickness/(T)2)}; + auto bv = bv_t{ (p - thickness/(T)2,p + thickness/(T)2)}; zs::vec ts[3] = {}; diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu index cad66159fe..a6372a0296 100644 --- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu +++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu @@ -11,6 +11,7 @@ #include #include #include +#include "../../Utils.hpp" #include "constraint_function_kernel/constraint.cuh" #include "../geometry/kernel/tiled_vector_ops.hpp" @@ -88,10 +89,6 @@ virtual void apply() override { auto uniform_xpbd_affiliation = get_input2("xpbd_affiliation"); auto damping_coeff = get_input2("damping_coeff"); - - // auto paramsList = get_input("paramList")->getLiterial(); - - auto make_empty = get_input2("make_empty_constraint"); if(!make_empty) { @@ -113,6 +110,150 @@ virtual void apply() override { auto do_constraint_topological_coloring = get_input2("do_constraint_topological_coloring"); + if(type == "lra_stretch") { + constraint->setMeta(CONSTRAINT_KEY,category_c::long_range_attachment); + auto radii = get_input2("thickness"); + auto attach_group_name = get_input2("group_name"); + auto has_group = verts.hasProperty(attach_group_name); + + if(!has_group) { + std::cout << "the input vertices has no specified group tag : " << attach_group_name << std::endl; + throw std::runtime_error("the input vertices has no specified LRA group"); + } + + zs::bht attachAnchors{verts.get_allocator(),(size_t)verts.size()}; + attachAnchors.reset(cudaPol,true); + + cudaPol(zs::range(verts.size()),[ + verts = proxy({},verts), + attachAnchors = proxy(attachAnchors)] ZS_LAMBDA(int vi) mutable { + if(verts("minv",vi) == 0.) + attachAnchors.insert(vi); + }); + + auto nmAttachAnchors = attachAnchors.size(); + dtiles_t vtemp{verts.get_allocator(),{ + {"x",3}, + {"id",1} + },nmAttachAnchors}; + + // std::cout << "nmAttachAnchors : " << nmAttachAnchors << std::endl; + + cudaPol(zip(zs::range(nmAttachAnchors),attachAnchors._activeKeys),[ + verts = proxy({},verts), + // thickness = thickness, + vtemp = proxy({},vtemp)] ZS_LAMBDA(auto ci,const auto& pvec) mutable { + auto vi = pvec[0]; + vtemp.tuple(dim_c<3>,"x",ci) = verts.pack(dim_c<3>,"x",vi); + vtemp("id",ci) = zs::reinterpret_bits(vi); + }); + + auto attBvh = bvh_t{}; + auto attBvs = retrieve_bounding_volumes(cudaPol,vtemp,radii,"x"); + attBvh.build(cudaPol,attBvs); + + zs::Vector maxNmPairsBuffer{verts.get_allocator(),1}; + maxNmPairsBuffer.setVal(0); + + constexpr auto exec_tag = wrapv{}; + + cudaPol(zs::range(verts.size()),[ + exec_tag = exec_tag, + verts = proxy({},verts), + vtemp = proxy({},vtemp), + attBvh = proxy(attBvh), + thickness = radii, + maxNmPairsBuffer = proxy(maxNmPairsBuffer), + attach_group_name = zs::SmallString(attach_group_name)] ZS_LAMBDA(int vi) mutable { + auto p = verts.pack(dim_c<3>,"x",vi); + if(verts("minv",vi) == 0.0) + return; + + auto bv = bv_t(p,p); + + auto do_close_proximity_detection = [&](int ai) mutable { + auto ap = vtemp.pack(dim_c<3>,"x",ai); + auto avi = zs::reinterpret_bits(vtemp("id",ai)); + if(avi == vi) + return; + + auto groupVi = verts(attach_group_name,vi); + auto groupAVi = verts(attach_group_name,avi); + if(zs::abs(groupVi - groupAVi) > 0.5) + return; + + // we need to switch the distance evaluation from euclidean distance to geodesic one + auto dist = (ap - p).norm(); + if(dist < thickness) { + atomic_add(exec_tag,&maxNmPairsBuffer[0],1); + } + }; + attBvh.iter_neighbors(bv,do_close_proximity_detection); + }); + + auto maxNmPairs = maxNmPairsBuffer.getVal(0); + std::cout << "maxNmPairs : " << maxNmPairs << std::endl; + zs::bht attachPairs{verts.get_allocator(),(size_t)maxNmPairs}; + attachPairs.reset(cudaPol,true); + + cudaPol(zs::range(verts.size()),[ + // exec_tag = exec_tag, + verts = proxy({},verts), + vtemp = proxy({},vtemp), + attBvh = proxy(attBvh), + thickness = radii, + attachPairs = proxy(attachPairs), + attach_group_name = zs::SmallString(attach_group_name)] ZS_LAMBDA(int vi) mutable { + auto p = verts.pack(dim_c<3>,"x",vi); + auto bv = bv_t(p,p); + + auto do_close_proximity_detection = [&](int ai) mutable { + auto ap = vtemp.pack(dim_c<3>,"x",ai); + auto avi = zs::reinterpret_bits(vtemp("id",ai)); + if(avi == vi) + return; + + auto groupVi = verts(attach_group_name,vi); + auto groupAVi = verts(attach_group_name,avi); + if(zs::abs(groupVi - groupAVi) > 0.5) + return; + + // we need to switch the distance evaluation from euclidean distance to geodesic one + auto dist = (ap - p).norm(); + if(dist < thickness) { + attachPairs.insert(vec2i(vi,avi)); + } + }; + attBvh.iter_neighbors(bv,do_close_proximity_detection); + }); + + auto rest_scale = get_input2("rest_scale"); + std::cout << "number of attach pairs : " << attachPairs.size() << std::endl; + eles.resize(attachPairs.size()); + eles.append_channels(cudaPol,{{"inds",2},{"r",1}}); + cudaPol(zip(zs::range(attachPairs.size()),attachPairs._activeKeys),[ + rest_scale = rest_scale, + eles = proxy({},eles), + verts = proxy({},verts)] ZS_LAMBDA(auto ai,const auto& pair) mutable { + eles.tuple(dim_c<2>,"inds",ai) = pair.reinterpret_bits(); + auto v0 = verts.pack(dim_c<3>,"x",pair[0]); + auto v1 = verts.pack(dim_c<3>,"x",pair[1]); + eles("r",ai) = (v0 - v1).norm() * rest_scale; + }); + + zs::Vector> point_topos{verts.get_allocator(),0}; + point_topos.resize(eles.size()); + cudaPol(zip(zs::range(attachPairs.size()),attachPairs._activeKeys),[ + point_topos = proxy(point_topos)] ZS_LAMBDA(auto ai,const auto& pair) mutable { + point_topos[ai] = pair[0]; + }); + + if(do_constraint_topological_coloring) { + topological_coloring(cudaPol,point_topos,colors); + sort_topology_by_coloring_tag(cudaPol,colors,reordered_map,color_offset); + } + } + if(type == "stretch") { constraint->setMeta(CONSTRAINT_KEY,category_c::edge_length_constraint); auto quads_vec = tilevec_topo_to_zsvec_topo(cudaPol,quads,wrapv<3>{}); @@ -1270,6 +1411,7 @@ ZENDEFNODE(MakeSurfaceConstraintTopology, {{ {"float","thickness","0.1"}, {"int","substep_id","0"}, {"int","nm_substeps","1"}, + {"int","max_constraint_pairs","1"}, {"bool","make_empty_constraint","0"}, {"bool","do_constraint_topological_coloring","1"}, {"float","damping_coeff","0.0"}, diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index cf92eafd50..b524e57e44 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -170,7 +170,7 @@ struct XPBDSolve : INode { verts.tuple(dim_c<3>,ptag,pi) = kc + knrm * rd; } - if(category == category_c::edge_length_constraint || category == category_c::dihedral_spring_constraint) { + if(category == category_c::edge_length_constraint || category == category_c::dihedral_spring_constraint || category == category_c::long_range_attachment) { // printf("do xpbd solve\n"); auto edge = cquads.pack(dim_c<2>,"inds",coffset + gi,int_c); @@ -194,6 +194,7 @@ struct XPBDSolve : INode { // lambda, // dp0,dp1)) // return; + bool stretch_resistence_only = category == category_c::long_range_attachment; if(!CONSTRAINT::solve_DistanceConstraint( p0,minv0, p1,minv1, @@ -203,7 +204,8 @@ struct XPBDSolve : INode { kd, dt, lambda, - dp0,dp1)) + dp0,dp1, + stretch_resistence_only)) return; verts.tuple(dim_c<3>,ptag,edge[0]) = p0 + dp0; @@ -758,7 +760,7 @@ struct XPBDSolveSmoothAll : INode { auto category = constraint_ptr->readMeta(CONSTRAINT_KEY,wrapt{}); const auto& cquads = constraint_ptr->getQuadraturePoints(); - if(category == category_c::edge_length_constraint || category == category_c::dihedral_bending_constraint) { + if(category == category_c::edge_length_constraint || category == category_c::dihedral_bending_constraint || category == category_c::long_range_attachment) { cudaPol(zs::range(cquads.size()),[ cquads = proxy({},cquads), dt = dt, @@ -793,6 +795,7 @@ struct XPBDSolveSmoothAll : INode { vec3 dp[2] = {}; auto lambda = (T)0; + bool do_stretch_resistence_only = category == category_c::long_range_attachment; if(!CONSTRAINT::solve_DistanceConstraint( p0,minv0, p1,minv1, @@ -802,12 +805,13 @@ struct XPBDSolveSmoothAll : INode { kd, dt, lambda, - dp[0],dp[1])) + dp[0],dp[1], + do_stretch_resistence_only)) return; // printf("smooth stretch update : %f %f\n",(float)dp[0].norm(),(float)dp[1].norm()); for(int i = 0;i != 2;++i) { if(isnan(dp[i].norm())) - printf("nan dp[%d] detected at stretch\n",i); + printf("nan dp[%d] detected at bending\n",i); // atomic_add(exec_tag,&weight_sum[edge[i]],w); atomic_add(exec_tag,&verts(wOffset,edge[i]),w); for(int d = 0;d != 3;++d) diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh index 2837974cab..3fad41cee4 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh @@ -76,14 +76,14 @@ namespace zeno { namespace CONSTRAINT { constexpr bool solve_DistanceConstraint( const VECTOR3d &p0, const SCALER& invMass0, const VECTOR3d &p1, const SCALER& invMass1, - const VECTOR3d &pp0, - const VECTOR3d &pp1, + const VECTOR3d &pp0,const VECTOR3d &pp1, const SCALER& restLength, const SCALER& xpbd_affiliation, const SCALER& kdamp_ratio, const SCALER& dt, SCALER& lambda, - VECTOR3d &corr0, VECTOR3d &corr1) + VECTOR3d &corr0, VECTOR3d &corr1, + bool stretch_resistence_only = false) { SCALER wsum = invMass0 + invMass1; if(wsum < static_cast(1e-6)) { @@ -92,42 +92,47 @@ namespace zeno { namespace CONSTRAINT { VECTOR3d n = p0 - p1; SCALER d = n.norm(); - SCALER C = d - restLength; - - if (d > static_cast(1e-6)) + if(d > static_cast(1e-6)) { n /= d; - else - { + } + else { corr0 = VECTOR3d::uniform(0); corr1 = VECTOR3d::uniform(0); return false; - } + } + SCALER C = d - restLength; + SCALER alpha = 0.0; - if (xpbd_affiliation != 0.0) - { + if (xpbd_affiliation != 0.0){ alpha = static_cast(1.0) / (xpbd_affiliation * dt * dt); } - const auto& gradC = n; - SCALER dsum = 0.0, gamma = 1.0; + if(stretch_resistence_only && C < 0) { + corr0 = VECTOR3d::uniform(0); + corr1 = VECTOR3d::uniform(0); + return false; + } + + auto gradC = n; + + SCALER dsum = 0.0, gamma = 0.0; if(kdamp_ratio > 0) { auto beta = kdamp_ratio * dt * dt; gamma = alpha * beta / dt; - dsum = gamma * n.dot((p0 - pp0) - (p1 - pp1)); + dsum = gamma * gradC.dot((p0 - pp0) - (p1 - pp1)); // gamma += 1.0; } - const SCALER delta_lambda = -(C + alpha * lambda + dsum) / ((gamma + static_cast(1.0)) * wsum + alpha); + const SCALER dL = -(C + alpha * lambda + dsum) / ((gamma + static_cast(1.0))* wsum + alpha); + const VECTOR3d dp = n * dL; - lambda += delta_lambda; - - const VECTOR3d pt = n * delta_lambda; + lambda += dL; - corr0 = invMass0 * pt; - corr1 = -invMass1 * pt; + corr0 = invMass0 * dp; + corr1 = -invMass1 * dp; return true; } @@ -408,7 +413,7 @@ namespace zeno { namespace CONSTRAINT { alpha = static_cast(1.0) / (xpbd_affliation * dt * dt); SCALER dsum = 0.0; - SCALER gamma = 1.0; + SCALER gamma = 0.0; if(kdamp_ratio > 0) { auto beta = kdamp_ratio * dt * dt; gamma = alpha * beta / dt; diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp index eb277a49c3..b503178da5 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp @@ -21,6 +21,7 @@ constexpr auto PBD_USE_HARD_CONSTRAINT = "PBD_USE_HARD_CONSTRAINT"; // constexpr auto DCD_COLLISIONS_MESH_COLLIDER = "DCD_COLLISION_MESH_COLLIDER"; enum category_c : int { + long_range_attachment, edge_length_constraint, isometric_bending_constraint, dihedral_bending_constraint, diff --git a/projects/ZenoFX/nw.cpp b/projects/ZenoFX/nw.cpp index b8e078550e..340f4b22f6 100644 --- a/projects/ZenoFX/nw.cpp +++ b/projects/ZenoFX/nw.cpp @@ -85,7 +85,17 @@ struct NumericWrangle : zeno::INode { auto par = zeno::objectToLiterial(obj); auto dim = std::visit([&] (auto const &v) { using T = std::decay_t; - if constexpr (std::is_convertible_v) { + if constexpr (std::is_convertible_v) { + parvals.push_back(v[0]); + parvals.push_back(v[1]); + parvals.push_back(v[2]); + parvals.push_back(v[3]); + parnames.emplace_back(key, 0); + parnames.emplace_back(key, 1); + parnames.emplace_back(key, 2); + parnames.emplace_back(key, 3); + return 4; + } else if constexpr (std::is_convertible_v) { parvals.push_back(v[0]); parvals.push_back(v[1]); parvals.push_back(v[2]); diff --git a/zeno/src/nodes/StringNodes.cpp b/zeno/src/nodes/StringNodes.cpp index 2bb08140a7..a644e9057b 100644 --- a/zeno/src/nodes/StringNodes.cpp +++ b/zeno/src/nodes/StringNodes.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace zeno { @@ -87,7 +88,7 @@ ZENDEFNODE(StringEqual, { struct PrintString : zeno::INode { virtual void apply() override { auto str = get_input2("str"); - printf("PrintString: %s\n", str.c_str()); + printf("%s\n", str.c_str()); } }; @@ -230,9 +231,18 @@ ZENDEFNODE(StringFormatNumStr, { struct StringRegexMatch : zeno::INode { virtual void apply() override { + using namespace std::regex_constants; + auto str = get_input2("str"); auto regex_str = get_input2("regex"); - std::regex self_regex(regex_str); + + auto case_sensitive = get_input2("case_sensitive"); + + auto default_flags = ECMAScript; + if(!case_sensitive) + default_flags |= icase; + + std::regex self_regex(regex_str,default_flags); int output = std::regex_match(str, self_regex); set_output2("output", output); @@ -243,6 +253,7 @@ ZENDEFNODE(StringRegexMatch, { { {"string", "str", ""}, {"string", "regex", ""}, + {"bool","case_sensitive","1"} }, { {"int", "output"} @@ -251,6 +262,64 @@ ZENDEFNODE(StringRegexMatch, { {"string"}, }); +struct StringRegexSearch : zeno::INode { + virtual void apply() override { + using namespace std::regex_constants; + + auto str = get_input2("str"); + + auto regex_str = get_input2("regex"); + auto case_sensitive = get_input2("case_sensitive"); + + std::smatch res{}; + + auto flags = ECMAScript; + if(!case_sensitive) + flags |= icase; + + std::regex self_regex(regex_str,flags); + + auto matched_substr_list = std::make_shared(); + + // int search_success = std::regex_search(str,res,self_regex); + int search_success = 0; + + + while(std::regex_search(str,res,self_regex)) { + search_success = 1; + auto is_first_matched = true; + for(auto w : res) { + if(is_first_matched) { + is_first_matched = false; + continue; + } + auto zstr = std::make_shared(); + zstr->set(w.str()); + matched_substr_list->arr.push_back(std::move(zstr)); + } + + str = res.suffix().str(); + } + + set_output2("search_success",search_success); + set_output("res",std::move(matched_substr_list)); + } +}; + +ZENDEFNODE(StringRegexSearch, { + { + {"string", "str", ""}, + {"string", "regex", ""}, + {"bool","case_sensitive","1"} + }, + { + {"int", "search_success"}, + {"res"} + }, + {}, + {"string"}, +}); + struct StringSplitAndMerge: zeno::INode{ From 74d7d796b0f7346f6db2f6f84fc9275ac573f938 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Tue, 26 Mar 2024 01:26:55 +0800 Subject: [PATCH 002/244] shape matching and collision group --- .../collision_energy/evaluate_collision.hpp | 171 +++++++++++++++--- projects/CuLagrange/pbd/CollisionSolver.cu | 15 +- projects/CuLagrange/pbd/ConstraintsBuilder.cu | 163 ++++++++++++++++- projects/CuLagrange/pbd/ConstraintsSolver.cu | 109 +++++++++++ .../constraint_types.hpp | 8 + 5 files changed, 430 insertions(+), 36 deletions(-) diff --git a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp index e6996dc496..da18c01346 100644 --- a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp +++ b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp @@ -50,6 +50,9 @@ using vec3 = zs::vec; using vec4 = zs::vec; using vec4i = zs::vec; +#define COLLISION_AMONG_SAME_GROUP 1 +#define COLLISION_AMONG_DIFFERENT_GROUP 2 + template 0.1) { + return; + } + } + } + } + if(!(collision_group_strategy & COLLISION_AMONG_SAME_GROUP)) { + for(int i = 0;i != 2;++i) { + auto eaGroup = verts(collision_group_name,ea[i]); + for(int j = 0;j != 2;++j){ + auto ebGroup = verts(collision_group_name,eb[j]); + if(zs::abs(eaGroup - ebGroup) < 0.1) { + return; + } + } + } + } + } + + if(has_collision_cancel) for(int i = 0;i != 2;++i) if(verts("collision_cancel",eb[i]) > 1e-3) @@ -712,7 +744,8 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, ProximityBuffer& proximity_buffer, PTHashMap& csPT, bool skip_too_close_pair_at_rest_configuration = false, - bool use_collision_group = false) { + bool use_collision_group = false, + int collision_group_strategy = COLLISION_AMONG_SAME_GROUP | COLLISION_AMONG_DIFFERENT_GROUP) { using namespace zs; constexpr auto space = RM_CVREF_T(pol)::exec_tag::value; constexpr auto eps = (T)1e-6; @@ -736,6 +769,7 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, thickness = thickness, thickness2 = thickness * thickness, triBvh = proxy(triBvh), + collision_group_strategy = collision_group_strategy, proximity_buffer = proxy({},proximity_buffer), csPT = proxy(csPT)] ZS_LAMBDA(int vi) mutable { @@ -751,6 +785,27 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, if(tri[i] == vi) return; + if(has_collision_group) { + if(!(collision_group_strategy & COLLISION_AMONG_DIFFERENT_GROUP)) { + auto vgroup = verts(collisionGroupTag,vi); + for(int i = 0;i != 3;++i) { + auto tgroup = verts(collisionGroupTag,tri[i]); + // if they belong to two different groups + if(zs::abs(vgroup - tgroup) > 0.1) { + return; + } + } + } + if(!(collision_group_strategy & COLLISION_AMONG_SAME_GROUP)) { + auto vgroup = verts(collisionGroupTag,vi); + for(int i = 0;i != 3;++i) { + auto tgroup = verts(collisionGroupTag,tri[i]); + if(zs::abs(vgroup - tgroup) < 0.1) + return; + } + } + } + for(int i = 0;i != 3;++i) if(verts.hasProperty("collision_cancel") && verts("collision_cancel",tri[i]) > eps) return; @@ -833,7 +888,8 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, const TriBvh& triBvh, PTHashMap& csPT, bool skip_too_close_pair_at_rest_configuration = false, - bool use_collision_group = false) { + bool use_collision_group = false, + int collision_group_strategy = COLLISION_AMONG_SAME_GROUP | COLLISION_AMONG_DIFFERENT_GROUP) { using namespace zs; constexpr auto space = RM_CVREF_T(pol)::exec_tag::value; constexpr auto eps = (T)1e-6; @@ -846,6 +902,7 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, pol(zs::range(verts.size()),[ collisionGroupTag = collisionGroupTag, + collision_group_strategy = collision_group_strategy, has_collision_cancel = has_collision_cancel, has_rest_shape = has_rest_shape, has_collision_group = has_collision_group, @@ -875,6 +932,27 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, if(tri[i] == vi) return; + if(has_collision_group) { + if(!(collision_group_strategy & COLLISION_AMONG_DIFFERENT_GROUP)) { + auto vgroup = verts(collisionGroupTag,vi); + for(int i = 0;i != 3;++i) { + auto tgroup = verts(collisionGroupTag,tri[i]); + // if they belong to two different groups + if(zs::abs(vgroup - tgroup) > 0.1) { + return; + } + } + } + if(!(collision_group_strategy & COLLISION_AMONG_SAME_GROUP)) { + auto vgroup = verts(collisionGroupTag,vi); + for(int i = 0;i != 3;++i) { + auto tgroup = verts(collisionGroupTag,tri[i]); + if(zs::abs(vgroup - tgroup) < 0.1) + return; + } + } + } + if(has_collision_cancel) for(int i = 0;i != 3;++i) if(verts("collision_cancel",tri[i]) > eps) @@ -952,7 +1030,8 @@ void calc_imminent_self_EE_collision_impulse(Pol& pol, const REAL& thickness, size_t buffer_offset, const EdgeBvh& edgeBvh, - CollisionBuffer& imminent_collision_buffer,EEHashMap& csEE) { + CollisionBuffer& imminent_collision_buffer,EEHashMap& csEE, + int collision_group_strategy = COLLISION_AMONG_SAME_GROUP | COLLISION_AMONG_DIFFERENT_GROUP) { using namespace zs; constexpr auto space = RM_CVREF_T(pol)::exec_tag::value; // constexpr auto exec_tag = wrapv{}; @@ -967,6 +1046,7 @@ void calc_imminent_self_EE_collision_impulse(Pol& pol, csEE.reset(pol,true); pol(zs::range(edges.size()),[ + collision_group_strategy = collision_group_strategy, xtag = zs::SmallString(xtag), verts = proxy({},verts), edges = proxy({},edges), @@ -995,6 +1075,9 @@ void calc_imminent_self_EE_collision_impulse(Pol& pol, if(eb[i] == ea[0] || eb[i] == ea[1]) return; } + + + for(int i = 0;i != 2;++i) if(verts.hasProperty("collision_cancel") && verts("collision_cancel",eb[i]) > 1e-3) return; @@ -1012,19 +1095,7 @@ void calc_imminent_self_EE_collision_impulse(Pol& pol, vec3 pbs[2] = {}; for(int i = 0;i != 2;++i) pbs[i] = verts.pack(dim_c<3>,xtag,eb[i]); - - // auto cp = (pas[0] - pas[1]).cross(pbs[0] - pbs[1]).norm(); - - // vec3 int_a{},int_b{}; - // COLLISION_UTILS::IntersectLineSegments(pas[0],pas[1],pbs[0],pbs[1],int_a,int_b); - // auto dist = (int_a - int_b).norm(); - // if(dist > thickness) - // return; - - // auto lb = (pbs[0] - pbs[1]).norm(); - - // auto ra = (pas[0] - int_a).norm() / la; - // auto rb = (pbs[0] - int_b).norm() / lb; + vec2 bary{}; LSL_GEO::get_edge_edge_barycentric_coordinates(pas[0],pas[1],pbs[0],pbs[1],bary); @@ -1129,7 +1200,8 @@ void calc_continous_self_PT_collision_impulse(Pol& pol, // bool recalc_collision_pairs = true, bool skip_too_close_pair_at_rest_configuration = false, bool use_collision_group = false, - bool output_debug_inform = false) { + bool output_debug_inform = false, + int collision_group_strategy = COLLISION_AMONG_SAME_GROUP | COLLISION_AMONG_DIFFERENT_GROUP) { using namespace zs; constexpr auto space = RM_CVREF_T(pol)::exec_tag::value; // constexpr auto exec_tag = wrapv{}; @@ -1843,8 +1915,8 @@ void calc_continous_self_PT_collision_impulse_with_toc(Pol& pol, ImpulseCount& impulse_count, bool skip_too_close_pair_at_rest_configuration = false, bool use_collision_group = false, - // bool recalc_collision_pairs = true, - bool output_debug_inform = false) { + bool output_debug_inform = false, + int collision_group_strategy = COLLISION_AMONG_SAME_GROUP | COLLISION_AMONG_DIFFERENT_GROUP) { using namespace zs; constexpr auto space = RM_CVREF_T(pol)::exec_tag::value; // constexpr auto exec_tag = wrapv{}; @@ -1858,11 +1930,6 @@ void calc_continous_self_PT_collision_impulse_with_toc(Pol& pol, auto execTag = wrapv{}; - // std::cout << "do continous PT collilsion detection" << std::endl; - - // std::cout << "build continous PT spacial structure" << std::endl; - - auto bvs = retrieve_bounding_volumes(pol,verts,tris,verts,wrapv<3>{},(T)1.0,(T)thickness,xtag,vtag); if(refit_bvh) triCCDBvh.refit(pol,bvs); @@ -1879,6 +1946,7 @@ void calc_continous_self_PT_collision_impulse_with_toc(Pol& pol, pol(zs::range(verts.size()),[tocs = proxy(tocs)] ZS_LAMBDA(auto vi) mutable {tocs[vi] = 1;}); pol(zs::range(verts.size()),[ + collision_group_strategy = collision_group_strategy, use_collision_group = use_collision_group, skip_rest = skip_too_close_pair_at_rest_configuration, has_collision_group = has_collision_group, @@ -1888,7 +1956,6 @@ void calc_continous_self_PT_collision_impulse_with_toc(Pol& pol, vtag = vtag, verts = proxy({},verts), tris = proxy({},tris), - // csPT = proxy(csPT), thickness = thickness, igore_rest_shape_thickness = igore_rest_shape_thickness, tocs = proxy(tocs), @@ -1925,6 +1992,28 @@ void calc_continous_self_PT_collision_impulse_with_toc(Pol& pol, if(!has_dynamic_points) return; + if(has_collision_group) { + if(!(collision_group_strategy & COLLISION_AMONG_DIFFERENT_GROUP)) { + auto vgroup = verts("collision_group",vi); + for(int i = 0;i != 3;++i) { + auto tgroup = verts("collision_group",tri[i]); + // if they belong to two different groups + if(zs::abs(vgroup - tgroup) > 0.1) { + return; + } + } + } + if(!(collision_group_strategy & COLLISION_AMONG_SAME_GROUP)) { + auto vgroup = verts("collision_group",vi); + for(int i = 0;i != 3;++i) { + auto tgroup = verts("collision_group",tri[i]); + if(zs::abs(vgroup - tgroup) < 0.1) + return; + } + } + } + + if(skip_rest && has_rest_shape) { auto rp = verts.pack(dim_c<3>,"X",vi); vec3 rts[3] = {}; @@ -2606,7 +2695,8 @@ void calc_continous_self_EE_collision_impulse_with_toc(Pol& pol, ImpulseCountBuffer& impulse_count, bool skip_too_close_pair_at_rest_configuration = false, bool use_collision_group = false, - bool output_debug_inform = false) { + bool output_debug_inform = false, + int collision_group_strategy = COLLISION_AMONG_SAME_GROUP | COLLISION_AMONG_DIFFERENT_GROUP) { using namespace zs; constexpr auto space = RM_CVREF_T(pol)::exec_tag::value; // constexpr auto exec_tag = wrapv{}; @@ -2646,6 +2736,7 @@ void calc_continous_self_EE_collision_impulse_with_toc(Pol& pol, auto execTag = wrapv{}; pol(zs::range(nm_test_edges),[ + collision_group_strategy = collision_group_strategy, use_collision_group = use_collision_group, skip_rest = skip_too_close_pair_at_rest_configuration, has_collision_group = has_collision_group, @@ -2699,6 +2790,32 @@ void calc_continous_self_EE_collision_impulse_with_toc(Pol& pol, if(verts.hasProperty("collision_cancel") && verts("collision_cancel",eb[i]) > 1e-3) return; + if(has_collision_group) { + if(!(collision_group_strategy & COLLISION_AMONG_DIFFERENT_GROUP)) { + for(int i = 0;i != 2;++i) { + auto eaGroup = verts("collision_group",ea[i]); + for(int j = 0;j != 2;++j){ + auto ebGroup = verts("collision_group",eb[j]); + if(zs::abs(eaGroup - ebGroup) > 0.1) { + return; + } + } + } + } + if(!(collision_group_strategy & COLLISION_AMONG_SAME_GROUP)) { + for(int i = 0;i != 2;++i) { + auto eaGroup = verts("collision_group",ea[i]); + for(int j = 0;j != 2;++j){ + auto ebGroup = verts("collision_group",eb[j]); + if(zs::abs(eaGroup - ebGroup) < 0.1) { + return; + } + } + } + } + } + + auto has_dynamic_points = false; for(int i = 0;i != 2;++i) { if(invMass("minv",ea[i]) > eps) diff --git a/projects/CuLagrange/pbd/CollisionSolver.cu b/projects/CuLagrange/pbd/CollisionSolver.cu index bead8b5bcc..0953d76d97 100644 --- a/projects/CuLagrange/pbd/CollisionSolver.cu +++ b/projects/CuLagrange/pbd/CollisionSolver.cu @@ -64,6 +64,13 @@ struct DetangleCCDCollisionWithBoundary : INode { auto kboundary = get_input2("boundary"); + auto among_same_group = get_input2("among_same_group"); + auto among_different_groups = get_input2("among_different_groups"); + + int group_strategy = 0; + group_strategy |= (among_same_group ? 1 : 0); + group_strategy |= (among_different_groups ? 2 : 0); + auto boundary_velocity_scale = get_input2("boundary_velocity_scale"); // auto current_kx_tag = get_input2("current_kx_tag"); // auto pre_kx_tag = get_input2("previous_kx_tag"); @@ -208,7 +215,8 @@ struct DetangleCCDCollisionWithBoundary : INode { do_bvh_refit, csPT, impulse_buffer, - impulse_count,true,true); + impulse_count,true,true,false,group_strategy); + std::cout << "nm_PT_continuous_collisions : " << csPT.size() << std::endl; } } @@ -243,7 +251,8 @@ struct DetangleCCDCollisionWithBoundary : INode { do_bvh_refit, csEE, impulse_buffer, - impulse_count,true,true); + impulse_count,true,true,false,group_strategy); + std::cout << "nm_EE_continuous_collisions : " << csPT.size() << std::endl; } } @@ -317,6 +326,8 @@ ZENDEFNODE(DetangleCCDCollisionWithBoundary, {{{"zsparticles"}, {"int","substep_id","0"}, {"int","nm_substeps","1"}, {"float","boundary_velocity_scale","1"}, + {"bool","among_same_group","1"}, + {"bool","among_different_groups","1"} }, {{"zsparticles"}}, {}, diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu index a6372a0296..250c6ad71a 100644 --- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu +++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu @@ -66,6 +66,7 @@ virtual void apply() override { using vec3 = zs::vec; using vec4 = zs::vec; using vec9 = zs::vec; + using mat3 = zs::vec; using vec2i = zs::vec; using vec3i = zs::vec; using vec4i = zs::vec; @@ -110,6 +111,143 @@ virtual void apply() override { auto do_constraint_topological_coloring = get_input2("do_constraint_topological_coloring"); + + if(type == "shape_matching") { + constraint->setMeta(CONSTRAINT_KEY,category_c::shape_matching_constraint); + auto radii = get_input2("thickness"); + auto shape_matching_group_name = get_input2("group_name"); + + zs::bht shape_matching_set{verts.get_allocator(),verts.size()}; + shape_matching_set.reset(cudaPol,true); + + int nm_shapes = 1; + + auto has_group = verts.hasProperty(shape_matching_group_name); + int shape_group_offset = -1; + + constexpr auto exec_tag = wrapv{}; + + if(has_group) { + shape_group_offset = verts.getPropertyOffset(shape_matching_group_name); + + zs::Vector maxGroupID{verts.get_allocator(),1}; + maxGroupID.setVal(0); + + cudaPol(zs::range(verts.size()),[ + exec_tag = exec_tag, + verts = proxy({},verts), + maxGroupID = proxy(maxGroupID), + shape_group_offset = shape_group_offset] ZS_LAMBDA(int vi) mutable { + auto groupID = (int)verts(shape_group_offset,vi); + atomic_max(exec_tag,&maxGroupID[0],groupID); + }); + + auto maxGroupID_val = maxGroupID.getVal(0); + nm_shapes = maxGroupID_val + 1; + } + + std::cout << "shape_matching::nm_shapes : " << nm_shapes << std::endl; + + std::vector shape_matching_rest_cm((size_t)nm_shapes,vec3::zeros()); + std::vector shape_matching_weight_sum((size_t)nm_shapes,0.f); + + zs::Vector nm_vertices_every_shape{verts.get_allocator(),(size_t)nm_shapes}; + cudaPol(zs::range(nm_vertices_every_shape),[] ZS_LAMBDA(auto& cnt) mutable {cnt = 0;}); + + eles.resize(verts.size()); + eles.append_channels(cudaPol,{{"inds",1}}); + TILEVEC_OPS::fill(cudaPol,eles,"inds",zs::reinterpret_bits((int)-1)); + + zs::Vector shape_matching_shape_offsets{verts.get_allocator(),(size_t)(nm_shapes + 1)}; + cudaPol(zs::range(shape_matching_shape_offsets),[] ZS_LAMBDA(auto& offset) mutable {offset = 0;}); + + // find the number of groups + for(int shape_id = 0;shape_id < nm_shapes;++shape_id) { + cudaPol(zs::range(verts.size()),[ + verts = proxy({},verts), + eles = proxy({},eles), + shape_matching_set = proxy(shape_matching_set), + shape_id = shape_id, + has_group = has_group, + nm_vertices_every_shape = proxy(nm_vertices_every_shape), + shape_group_offset = shape_group_offset] ZS_LAMBDA(int vi) mutable { + if(has_group) { + auto groupID = (int)verts(shape_group_offset,vi); + if(groupID == shape_id) { + auto ei = shape_matching_set.insert(vi); + atomic_add(exec_tag,&nm_vertices_every_shape[shape_id],1); + eles("inds",ei) = zs::reinterpret_bits(vi); + } + } else { + auto ei = shape_matching_set.insert(vi); + atomic_add(exec_tag,&nm_vertices_every_shape[shape_id],1); + eles("inds",ei) = zs::reinterpret_bits(vi); + } + }); + } + + exclusive_scan(cudaPol,std::begin(nm_vertices_every_shape),std::end(nm_vertices_every_shape),std::begin(shape_matching_shape_offsets)); + shape_matching_shape_offsets.setVal(shape_matching_set.size(),nm_shapes); + + for(int shape_id = 0;shape_id < nm_shapes;++shape_id) { + zs::Vector restCm{verts.get_allocator(),1}; + restCm.setVal(vec3::zeros()); + zs::Vector wsum{verts.get_allocator(),1}; + wsum.setVal(static_cast(0)); + + // std::cout << "shapeMatching::compute barycentric point" << std::endl; + + auto shape_size = nm_vertices_every_shape.getVal(shape_id); + auto offset = shape_matching_shape_offsets.getVal(shape_id); + + // std::cout << "shape[" << shape_id << "] : " << shape_size << "\t" << offset << std::endl; + + cudaPol(zs::range(shape_size),[ + offset = shape_matching_shape_offsets.getVal(shape_id), + exec_tag = exec_tag, + eles = proxy({},eles), + xtagOffset = verts.getPropertyOffset("x"), + minvTagOffset = verts.getPropertyOffset("minv"), + verts = proxy({},verts), + restCm = proxy(restCm), + wsum = proxy(wsum)] ZS_LAMBDA(int vi_within_shape) mutable { + auto vi = zs::reinterpret_bits(eles("inds",vi_within_shape + offset)); + + auto xi = verts.pack(dim_c<3>,xtagOffset,vi); + auto minvi = verts(minvTagOffset,vi); + auto wi = static_cast(1.0) / (minvi + static_cast(1e-6)); + + auto xw = xi * wi; + + for(int d = 0;d != 3;++d) { + atomic_add(exec_tag,&restCm[0][d],xw[d]); + } + atomic_add(exec_tag,&wsum[0],wi); + }); + + auto rCm = restCm.getVal(0); + auto ws = wsum.getVal(0); + rCm = rCm / ws; + + // std::cout << "rCm[" << shape_id << "] : " << rCm[0] << "\t" << rCm[1] << "\t" << rCm[2] << std::endl; + + // shape_matching_rest_cm.setVal(rCm,shape_id); + shape_matching_rest_cm[shape_id] = rCm; + shape_matching_weight_sum[shape_id] = ws; + // shape_matching_weight_sum.setVal(ws,shape_id); + + } + + zs::Vector dAsBuffer{verts.get_allocator(),eles.size()}; + + constraint->setMeta(SHAPE_MATCHING_REST_CM,shape_matching_rest_cm); + constraint->setMeta(SHAPE_MATCHING_WEIGHT_SUM,shape_matching_weight_sum); + constraint->setMeta(SHAPE_MATCHING_SHAPE_OFFSET,shape_matching_shape_offsets); + constraint->setMeta(SHAPE_MATCHING_MATRIX_BUFFER,dAsBuffer); + + // std::cout << "shapeMatching::finish set aux data" << std::endl; + } + if(type == "lra_stretch") { constraint->setMeta(CONSTRAINT_KEY,category_c::long_range_attachment); auto radii = get_input2("thickness"); @@ -192,7 +330,7 @@ virtual void apply() override { }); auto maxNmPairs = maxNmPairsBuffer.getVal(0); - std::cout << "maxNmPairs : " << maxNmPairs << std::endl; + // std::cout << "maxNmPairs : " << maxNmPairs << std::endl; zs::bht attachPairs{verts.get_allocator(),(size_t)maxNmPairs}; attachPairs.reset(cudaPol,true); @@ -228,7 +366,7 @@ virtual void apply() override { }); auto rest_scale = get_input2("rest_scale"); - std::cout << "number of attach pairs : " << attachPairs.size() << std::endl; + // std::cout << "number of attach pairs : " << attachPairs.size() << std::endl; eles.resize(attachPairs.size()); eles.append_channels(cudaPol,{{"inds",2},{"r",1}}); cudaPol(zip(zs::range(attachPairs.size()),attachPairs._activeKeys),[ @@ -373,6 +511,13 @@ virtual void apply() override { eles.append_channels(cudaPol,{{"inds",4},{"bary",4},{"type",1}}); eles.resize(MAX_IMMINENT_COLLISION_PAIRS); + auto among_same_group = get_input2("among_same_group"); + auto among_different_groups = get_input2("among_different_groups"); + + int group_strategy = 0; + group_strategy |= (among_same_group ? 1 : 0); + group_strategy |= (among_different_groups ? 2 : 0); + const auto &edges = (*source)[ZenoParticles::s_surfEdgeTag]; auto has_input_collider = has_input("target"); @@ -505,7 +650,8 @@ virtual void apply() override { eles, csPT, true, - true); + true, + group_strategy); // std::cout << "nm_imminent_csPT : " << csPT.size() << std::endl; @@ -521,7 +667,8 @@ virtual void apply() override { edgeBvh, eles,csEE, true, - true); + true, + group_strategy); // std::cout << "nm_imminent_csEE : " << csEE.size() << std::endl; // std::cout << "csEE + csPT = " << csPT.size() + csEE.size() << std::endl; if(!verts.hasProperty("dcd_collision_tag")) @@ -1157,8 +1304,8 @@ virtual void apply() override { point_topos[id] = pvec[0]; }); - std::cout << "binder name : " << pin_point_group_name << std::endl; - std::cout << "nm binder point : " << point_topos.size() << std::endl; + // std::cout << "binder name : " << pin_point_group_name << std::endl; + // std::cout << "nm binder point : " << point_topos.size() << std::endl; if(do_constraint_topological_coloring) { topological_coloring(cudaPol,point_topos,colors,false); @@ -1416,7 +1563,9 @@ ZENDEFNODE(MakeSurfaceConstraintTopology, {{ {"bool","do_constraint_topological_coloring","1"}, {"float","damping_coeff","0.0"}, {"bool","enable_sliding","0"}, - {"bool","use_hard_constraint","0"} + {"bool","use_hard_constraint","0"}, + {"bool","among_same_group","1"}, + {"bool","among_different_groups","1"} }, {{"constraint"}}, { diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index b524e57e44..efa2899add 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -21,6 +21,8 @@ #include "constraint_function_kernel/constraint_types.hpp" #include "../fem/collision_energy/evaluate_collision.hpp" +#include "zensim/math/matrix/QRSVD.hpp" + namespace zeno { @@ -728,6 +730,7 @@ struct XPBDSolveSmoothAll : INode { using vec2 = zs::vec; using vec3 = zs::vec; using vec4 = zs::vec; + using mat3 = zs::vec; using vec2i = zs::vec; using vec3i = zs::vec; using vec4i = zs::vec; @@ -760,6 +763,112 @@ struct XPBDSolveSmoothAll : INode { auto category = constraint_ptr->readMeta(CONSTRAINT_KEY,wrapt{}); const auto& cquads = constraint_ptr->getQuadraturePoints(); + if(category == category_c::shape_matching_constraint) { + auto shape_matching_rest_cm = constraint_ptr->readMeta>(SHAPE_MATCHING_REST_CM); + auto shape_matching_weight_sum = constraint_ptr->readMeta>(SHAPE_MATCHING_WEIGHT_SUM); + auto shape_matching_offsets = constraint_ptr->readMeta>(SHAPE_MATCHING_SHAPE_OFFSET); + auto nm_shapes = shape_matching_rest_cm.size(); + + auto dAs = constraint_ptr->readMeta>(SHAPE_MATCHING_MATRIX_BUFFER); + + zs::Vector cmVec{verts.get_allocator(),1}; + + // auto wsum = constraint_ptr->readMeta(SHAPE_MATCHING_WEIGHT_SUM); + // auto restCM = constraint_ptr->readMeta(SHAPE_MATCHING_REST_CM); + for(int shape_id = 0;shape_id != nm_shapes;++shape_id) { + auto shape_size = shape_matching_offsets.getVal(shape_id + 1) - shape_matching_offsets.getVal(shape_id); + if(shape_size == 0) + continue; + + auto restCM = shape_matching_rest_cm[shape_id]; + auto wsum = shape_matching_weight_sum[shape_id]; + + cmVec.setVal(vec3::zeros()); + cudaPol(zs::range(shape_size),[ + exec_tag = exec_tag, + verts = proxy({},verts), + offset = shape_matching_offsets.getVal(shape_id), + ptagOffset = verts.getPropertyOffset(ptag), + minvOffset = verts.getPropertyOffset("minv"), + indsOffset = cquads.getPropertyOffset("inds"), + cquads = proxy({},cquads), + cmVec = proxy(cmVec)] ZS_LAMBDA(int ci) mutable { + auto vi = zs::reinterpret_bits(cquads(indsOffset,ci + offset)); + auto pi = verts.pack(dim_c<3>,ptagOffset,vi); + auto wi = static_cast(1.0) / (static_cast(1e-6) + verts(minvOffset,vi)); + auto pw = pi * wi; + for(int d = 0;d != 3;++d) + atomic_add(exec_tag,&cmVec[0][d],pw[d]); + }); + + auto cm = cmVec.getVal(0) / wsum; + // dAs.setVal(mat3::zeros()); + + cudaPol(zs::range(shape_size),[ + offset = shape_matching_offsets.getVal(shape_id), + cquads = proxy({},cquads), + verts = proxy({},verts), + XtagOffset = verts.getPropertyOffset("X"), + minvOffset = verts.getPropertyOffset("minv"), + ptagOffset = verts.getPropertyOffset(ptag), + restCM = restCM, + cm = cm, + dAs = proxy(dAs)] ZS_LAMBDA(int ci) mutable { + auto vi = zs::reinterpret_bits(cquads("inds",ci + offset)); + auto q = verts.pack(dim_c<3>,XtagOffset,vi) - restCM; + auto p = verts.pack(dim_c<3>,ptagOffset,vi) - cm; + auto w = static_cast(1.0) / (verts(minvOffset,vi) + static_cast(1e-6)); + p *= w; + dAs[ci + offset] = dyadic_prod(p,q); + }); + + zs::Vector A{verts.get_allocator(),1}; + A.setVal(mat3::zeros()); + + cudaPol(zs::range(shape_size * 9),[ + exec_tag = exec_tag, + offset = shape_matching_offsets.getVal(shape_id), + A = proxy(A), + dAs = proxy(dAs)] ZS_LAMBDA(int dof) mutable { + auto dAid = dof / 9; + auto Aoffset = dof % 9; + auto r = Aoffset / 3; + auto c = Aoffset % 3; + const auto& dA = dAs[dAid + offset]; + atomic_add(exec_tag,&A[0][r][c],dA[r][c]); + }); + + auto Am = A.getVal(0); + Am /= wsum; + + auto [R,S] = math::polar_decomposition(Am); + cudaPol(zs::range(shape_size),[ + offset = shape_matching_offsets.getVal(shape_id), + cquads = proxy({},cquads), + stiffnessOffset = cquads.getPropertyOffset("relative_stiffness"), + R = R, + cm = cm, + restCM = restCM, + wOffset = verts.getPropertyOffset("w"), + dptagOffset = verts.getPropertyOffset(dptag), + verts = proxy({},verts), + XtagOffset = verts.getPropertyOffset("X"), + ptagOffset = verts.getPropertyOffset(ptag)] ZS_LAMBDA(int ci) mutable { + auto vi = zs::reinterpret_bits(cquads("inds",ci + offset)); + auto Xi = verts.pack(dim_c<3>,XtagOffset,vi); + auto w = cquads(stiffnessOffset,ci + offset); + auto goal = cm + R * (Xi - restCM); + + auto dp = goal - verts.pack(dim_c<3>,ptagOffset,vi); + + verts.tuple(dim_c<3>,dptagOffset,vi) = verts.pack(dim_c<3>,dptagOffset,vi) + dp * w; + verts(wOffset,vi) += w; + }); + + } + + } + if(category == category_c::edge_length_constraint || category == category_c::dihedral_bending_constraint || category == category_c::long_range_attachment) { cudaPol(zs::range(cquads.size()),[ cquads = proxy({},cquads), diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp index b503178da5..d2e4cb24fd 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp @@ -18,9 +18,17 @@ constexpr auto TARGET_CELL_BUFFER = "TARGET_CELL_BUFFER"; constexpr auto PBD_USE_HARD_CONSTRAINT = "PBD_USE_HARD_CONSTRAINT"; +constexpr auto SHAPE_MATCHING_REST_CM = "SHAPE_MATCHING_REST_CM"; +constexpr auto SHAPE_MATCHING_WEIGHT_SUM = "SHAPE_MATCHING_WEIGHT_SUM"; + +constexpr auto SHAPE_MATCHING_SHAPE_OFFSET = "SHAPE_MATCHING_SHAPE_OFFSET"; + +constexpr auto SHAPE_MATCHING_MATRIX_BUFFER = "SHAPE_MATCHING_MATRIX_BUFFER"; + // constexpr auto DCD_COLLISIONS_MESH_COLLIDER = "DCD_COLLISION_MESH_COLLIDER"; enum category_c : int { + shape_matching_constraint, long_range_attachment, edge_length_constraint, isometric_bending_constraint, From 14a2d852b43da2e8986be6e5d7456188bc31c895 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Thu, 28 Mar 2024 17:53:35 +0800 Subject: [PATCH 003/244] xpbd stride --- projects/CUDA/CMakeLists.txt | 7 ++++--- projects/CuLagrange/pbd/CollisionSolver.cu | 7 ++++--- projects/CuLagrange/pbd/ConstraintsSolver.cu | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 7d7e9f5e6f..62630fc909 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -45,9 +45,10 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) set(ENV{ZENO_BIN_DIR} "${ZENO_BIN_DIR}") add_custom_command( TARGET copy_py - POST_BUILD - COMMAND ${CMAKE_COMMAND} -E env "ZENO_BIN_DIR=${ZENO_BIN_DIR}" - ${ZS_OVERWRITE_PYTHON_EXECUTABLE} -m pip install ${CMAKE_CURRENT_SOURCE_DIR}/zpc_jit + POST_BUILD + COMMAND + ${CMAKE_COMMAND} -E env ZENO_BIN_DIR=${ZENO_BIN_DIR} + ${ZS_OVERWRITE_PYTHON_EXECUTABLE} -m pip install ${CMAKE_CURRENT_SOURCE_DIR}/zpc_jit --verbose --user COMMENT "installing pyzpc into the specified conda environment: ${ZS_OVERWRITE_PYTHON_EXECUTABLE}" ) if (WIN32) diff --git a/projects/CuLagrange/pbd/CollisionSolver.cu b/projects/CuLagrange/pbd/CollisionSolver.cu index 0953d76d97..7d6a1a6757 100644 --- a/projects/CuLagrange/pbd/CollisionSolver.cu +++ b/projects/CuLagrange/pbd/CollisionSolver.cu @@ -122,7 +122,8 @@ struct DetangleCCDCollisionWithBoundary : INode { auto cur_kvert = kverts.pack(dim_c<3>,"px",kvi) * (1 - w) + kverts.pack(dim_c<3>,"x",kvi) * w; auto pre_kvert = kverts.pack(dim_c<3>,"px",kvi) * (1 - pw) + kverts.pack(dim_c<3>,"x",kvi) * pw; vtemp("collision_group",kvi + voffset) = kverts(collision_group_name,kvi); - vtemp.tuple(dim_c<3>,"X",kvi + voffset) = kverts.pack(dim_c<3>,"X",kvi); + // for alignment, we directly assign the current boundary as reference shape + vtemp.tuple(dim_c<3>,"X",kvi + voffset) = kverts.pack(dim_c<3>,"x",kvi); vtemp.tuple(dim_c<3>,"x",kvi + voffset) = pre_kvert; vtemp.tuple(dim_c<3>,"v",kvi + voffset) = (cur_kvert - pre_kvert) * boundary_velocity_scale; vtemp("minv",kvi + voffset) = (T)0; @@ -216,7 +217,7 @@ struct DetangleCCDCollisionWithBoundary : INode { csPT, impulse_buffer, impulse_count,true,true,false,group_strategy); - std::cout << "nm_PT_continuous_collisions : " << csPT.size() << std::endl; + // std::cout << "nm_PT_continuous_collisions : " << csPT.size() << std::endl; } } @@ -252,7 +253,7 @@ struct DetangleCCDCollisionWithBoundary : INode { csEE, impulse_buffer, impulse_count,true,true,false,group_strategy); - std::cout << "nm_EE_continuous_collisions : " << csPT.size() << std::endl; + // std::cout << "nm_EE_continuous_collisions : " << csPT.size() << std::endl; } } diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index efa2899add..32b39c9574 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -757,12 +757,26 @@ struct XPBDSolveSmoothAll : INode { } TILEVEC_OPS::fill(cudaPol,verts,"w",0); + + auto iter_id = get_input2("iter_id"); for(auto& constraint_ptr : constraint_ptr_list) { auto category = constraint_ptr->readMeta(CONSTRAINT_KEY,wrapt{}); const auto& cquads = constraint_ptr->getQuadraturePoints(); + + if(constraint_ptr->userData().has("stride")) { + auto stride = objectToLiterial(constraint_ptr->userData().get("stride")); + // std::cout << "find constraint with stride = " << stride << std::endl; + if(iter_id % stride != 0) { + // std::cout << "skip constraint solving due to stride-skipping" << std::endl; + continue; + } + } else { + // std::cout << "the constraint has no stride information" << std::endl; + } + if(category == category_c::shape_matching_constraint) { auto shape_matching_rest_cm = constraint_ptr->readMeta>(SHAPE_MATCHING_REST_CM); auto shape_matching_weight_sum = constraint_ptr->readMeta>(SHAPE_MATCHING_WEIGHT_SUM); @@ -1392,6 +1406,7 @@ ZENDEFNODE(XPBDSolveSmoothAll, {{{"zsparticles"}, {"float","dt","1.0"}, {"int","nm_substeps","1"}, {"int","substep_id","0"}, + {"int","iter_id","0"} }, {{"zsparticles"},{"constraints"}}, {}, From a93e3c9b1cd06423701dfb35514708fa4a4a59ce Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Mon, 8 Apr 2024 16:52:46 +0800 Subject: [PATCH 004/244] fiber stretch --- projects/CuLagrange/fem/FleshDynamicStepping.cu | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/projects/CuLagrange/fem/FleshDynamicStepping.cu b/projects/CuLagrange/fem/FleshDynamicStepping.cu index ceec5f142c..5becde4e24 100644 --- a/projects/CuLagrange/fem/FleshDynamicStepping.cu +++ b/projects/CuLagrange/fem/FleshDynamicStepping.cu @@ -1489,10 +1489,11 @@ struct FleshDynamicStepping : INode { // auto max_collision_pairs = tris.size() / 10; dtiles_t etemp(eles.get_allocator(), { // {"H", 12 * 12}, - {"ActInv",3*3},\ + {"ActInv",3*3}, {"dfiber",3}, // {"muscle_ID",1}, - {"is_inverted",1} + {"is_inverted",1}, + {"fiberStretch",1} }, eles.size() ); @@ -1593,6 +1594,12 @@ struct FleshDynamicStepping : INode { if(!eles.hasProperty("Act")) eles.append_channels(cudaPol,{{"Act",1}}); + + if(!eles.hasProperty("fiberStretch")) + eles.append_channels(cudaPol,{{"fiberStretch",1}}); + + TILEVEC_OPS::fill(cudaPol,eles,"fiberStretch",1.f); + if(!eles.hasProperty("fiber")) fmt::print(fg(fmt::color::red),"the quadrature has no \"fiber\"\n"); if(!verts.hasProperty(muscle_id_tag)) @@ -1665,13 +1672,17 @@ struct FleshDynamicStepping : INode { verts.template pack<3>("x",inds[3]), eles.template pack<3,3>("IB",ei)); auto dfiber = F * fiber; + auto dfiberN = dfiber.norm(); + auto fiberN = fiber.norm(); dfiber = dfiber / dfiber.norm(); etemp.tuple(dim_c<3>,"dfiber",ei) = dfiber; + eles("fiberStretch",ei) = dfiberN / fiberN; } }else{ fiber = zs::vec(1.0,0.0,0.0); act = vec3{1,1,1}; eles("Act",ei) = (T)0.0; + } if(fabs(fiber.norm() - 1.0) > 0.1) { printf("invalid fiber[%d] detected : %f %f %f\n",(int)ei, From 851056578356e5256729292d99882b21c474e336 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Mon, 8 Apr 2024 16:53:15 +0800 Subject: [PATCH 005/244] barycentric computing floating point error fix --- projects/CuLagrange/geometry/kernel/geo_math.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/CuLagrange/geometry/kernel/geo_math.hpp b/projects/CuLagrange/geometry/kernel/geo_math.hpp index 9aebc609f6..35196bd248 100644 --- a/projects/CuLagrange/geometry/kernel/geo_math.hpp +++ b/projects/CuLagrange/geometry/kernel/geo_math.hpp @@ -570,8 +570,8 @@ constexpr REAL get_vertex_triangle_distance(const VECTOR3& v0, const VECTOR3& v1 auto b0 = x13.dot(x43); auto b1 = x23.dot(x43); auto detA = A00 * A11 - A01 * A01; - bary[0] = ( A11 * b0 - A01 * b1) / detA; - bary[1] = (-A01 * b0 + A00 * b1) / detA; + bary[0] = ( A11 * b0 - A01 * b1) / (detA + eps); + bary[1] = (-A01 * b0 + A00 * b1) / (detA + eps); bary[2] = 1 - bary[0] - bary[1]; } @@ -587,8 +587,8 @@ constexpr REAL get_vertex_triangle_distance(const VECTOR3& v0, const VECTOR3& v1 auto b0 = x13.dot(x43); auto b1 = x23.dot(x43); detA = A00 * A11 - A01 * A01; - bary[0] = ( A11 * b0 - A01 * b1) / detA; - bary[1] = (-A01 * b0 + A00 * b1) / detA; + bary[0] = ( A11 * b0 - A01 * b1) / (detA + eps); + bary[1] = (-A01 * b0 + A00 * b1) / (detA + eps); bary[2] = 1 - bary[0] - bary[1]; } @@ -604,8 +604,8 @@ constexpr REAL get_vertex_triangle_distance(const VECTOR3& v0, const VECTOR3& v1 auto b1 = -x43.dot(x31); auto detA = A00 * A11 - A01 * A01; - bary[0] = ( A11 * b0 - A01 * b1) / detA; - bary[1] = (-A01 * b0 + A00 * b1) / detA; + bary[0] = ( A11 * b0 - A01 * b1) / (detA + eps); + bary[1] = (-A01 * b0 + A00 * b1) / (detA + eps); // }else { // the two edge is almost parallel // } From 1a36fe1493729571585f86e286d724c2fd915130 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Mon, 8 Apr 2024 16:53:36 +0800 Subject: [PATCH 006/244] collision solve output debug inform --- projects/CuLagrange/pbd/ConstraintsSolver.cu | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index 32b39c9574..850010a597 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -490,9 +490,14 @@ struct XPBDSolveSmooth : INode { continue; if(isnan(imps[i].norm())) { - printf("nan imps detected : %f %f %f %f %f %f %f\n", + printf("nan imps detected : %f %f %f %f %f %f %f\nvs : %d %d %d %d\n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", (float)imps[i][0],(float)imps[i][1],(float)imps[i][2], - (float)bary[0],(float)bary[1],(float)bary[2],(float)bary[3]); + (float)bary[0],(float)bary[1],(float)bary[2],(float)bary[3], + inds[0],inds[1],inds[2],inds[3], + (float)ps[0][0],(float)ps[0][1],(float)ps[0][2], + (float)ps[1][0],(float)ps[1][1],(float)ps[1][2], + (float)ps[2][0],(float)ps[2][1],(float)ps[2][2], + (float)ps[3][0],(float)ps[3][1],(float)ps[3][2]); return; } atomic_add(exec_tag,&dp_count[inds[i]],(int)1); From 82c8b120dce66c40c2289291084eb9ed3ea06008 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Tue, 16 Apr 2024 18:36:08 +0800 Subject: [PATCH 007/244] CreateFolder --- zeno/src/nodes/prim/SimpleGeometry.cpp | 19 +++++++++++++++++++ zeno/src/nodes/prim/UVProjectFromPlane.cpp | 6 +++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/zeno/src/nodes/prim/SimpleGeometry.cpp b/zeno/src/nodes/prim/SimpleGeometry.cpp index 9a2a440159..229eca1bc5 100644 --- a/zeno/src/nodes/prim/SimpleGeometry.cpp +++ b/zeno/src/nodes/prim/SimpleGeometry.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #define ROTATE_COMPUTE \ auto gp = glm::vec3(p[0], p[1], p[2]); \ @@ -1412,5 +1413,23 @@ ZENDEFNODE(CreateCylinder, { {}, {"create"}, }); +struct CreateFolder : zeno::INode { + virtual void apply() override { + namespace fs = std::filesystem; + auto folderPath = fs::u8path(get_input2("folderPath")); + if (!fs::exists(folderPath)) { + fs::create_directories(folderPath); + } + } +}; + +ZENDEFNODE(CreateFolder, { + { + {"directory", "folderPath"} + }, + {}, + {}, + {"create"}, +}); } } diff --git a/zeno/src/nodes/prim/UVProjectFromPlane.cpp b/zeno/src/nodes/prim/UVProjectFromPlane.cpp index 4515ee1299..866d4de8b3 100644 --- a/zeno/src/nodes/prim/UVProjectFromPlane.cpp +++ b/zeno/src/nodes/prim/UVProjectFromPlane.cpp @@ -730,6 +730,10 @@ struct WriteImageFile_v2 : INode { stbi_flip_vertically_on_write(1); stbi_write_png(path.c_str(), w, h, n, data.data(),0); } + else if(type == "hdr"){ + stbi_flip_vertically_on_write(1); + stbi_write_hdr(path.c_str(), w, h, 3, (float*)image->verts.data()); + } else if(type == "exr"){ std::vector data2(w * h * n); for (int i = 0; i < w * h; i++) { @@ -768,7 +772,7 @@ ZENDEFNODE(WriteImageFile_v2, { { {"image"}, {"writepath", "path"}, - {"enum png jpg exr pfm", "type", "png"}, + {"enum png jpg exr pfm hdr", "type", "png"}, {"mask"}, {"bool", "linear_to_srgb_when_save", "0"}, }, From 19816e635f34f4794e1482dfd5a3c221f29a7a34 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 17 Apr 2024 14:22:36 +0800 Subject: [PATCH 008/244] RemoveFolder --- zeno/src/nodes/prim/SimpleGeometry.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/zeno/src/nodes/prim/SimpleGeometry.cpp b/zeno/src/nodes/prim/SimpleGeometry.cpp index 229eca1bc5..91ce527e58 100644 --- a/zeno/src/nodes/prim/SimpleGeometry.cpp +++ b/zeno/src/nodes/prim/SimpleGeometry.cpp @@ -1431,5 +1431,23 @@ ZENDEFNODE(CreateFolder, { {}, {"create"}, }); + +struct RemoveFolder : zeno::INode { + virtual void apply() override { + namespace fs = std::filesystem; + auto folderPath = fs::u8path(get_input2("folderPath")); + std::error_code errorCode; + fs::remove_all(folderPath, errorCode); + } +}; + +ZENDEFNODE(RemoveFolder, { + { + {"directory", "folderPath"} + }, + {}, + {}, + {"create"}, +}); } } From 74a4dbc325ccfd82ddefdf2975ff8b4dc1ba72b2 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 17 Apr 2024 14:29:35 +0800 Subject: [PATCH 009/244] clean --- zeno/src/nodes/prim/SimpleGeometry.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/zeno/src/nodes/prim/SimpleGeometry.cpp b/zeno/src/nodes/prim/SimpleGeometry.cpp index 91ce527e58..224a927490 100644 --- a/zeno/src/nodes/prim/SimpleGeometry.cpp +++ b/zeno/src/nodes/prim/SimpleGeometry.cpp @@ -1436,14 +1436,20 @@ struct RemoveFolder : zeno::INode { virtual void apply() override { namespace fs = std::filesystem; auto folderPath = fs::u8path(get_input2("folderPath")); - std::error_code errorCode; - fs::remove_all(folderPath, errorCode); + if (fs::exists(folderPath)) { + std::error_code errorCode; + fs::remove_all(folderPath, errorCode); + if (get_input2("clean")) { + fs::create_directories(folderPath); + } + } } }; ZENDEFNODE(RemoveFolder, { { - {"directory", "folderPath"} + {"directory", "folderPath"}, + {"bool", "clean", "false"}, }, {}, {}, From 500aded8cdd041d73515e6576b87f4ff6fe51e26 Mon Sep 17 00:00:00 2001 From: teachmain Date: Fri, 19 Apr 2024 15:50:57 +0800 Subject: [PATCH 010/244] update --- zenovis/xinxinoptix/DisneyBSDF.h | 54 +++++++++++++++++--------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/zenovis/xinxinoptix/DisneyBSDF.h b/zenovis/xinxinoptix/DisneyBSDF.h index 12766a4a99..e0803d465c 100644 --- a/zenovis/xinxinoptix/DisneyBSDF.h +++ b/zenovis/xinxinoptix/DisneyBSDF.h @@ -407,17 +407,18 @@ namespace DisneyBSDF{ f = f + h; return f * abs(wi.z); } - if(diffPr > 0.0 && reflect) - { + if(reflect){ + + if(diffPr > 0.0f){ vec3 d = BRDFBasics::EvalDisneyDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen, Csheen, wo, wi, wm, tmpPdf) * dielectricWt; dterm = dterm + d; f = f + d; fPdf += tmpPdf * diffPr ; - } - if(dielectricPr>0.0 && reflect) - { + } + + if(dielectricPr > 0.0f){ float F = BRDFBasics::SchlickDielectic(abs(dot(wm, wo)), mat.ior); float ax, ay; BRDFBasics::CalculateAnisotropicParams(mat.roughness,mat.anisotropic,ax,ay); @@ -427,9 +428,9 @@ namespace DisneyBSDF{ sterm = sterm + s; f = f + s; fPdf += tmpPdf * dielectricPr; - } - if(metalPr>0.0 && reflect) - { + } + + if(metalPr>0.0f){ vec3 F = mix(mix(mat.basecolor, mat.diffractColor, mat.diffraction), vec3(1.0), BRDFBasics::SchlickWeight(HoV)); float ax, ay; BRDFBasics::CalculateAnisotropicParams(mat.roughness,mat.anisotropic,ax,ay); @@ -439,12 +440,28 @@ namespace DisneyBSDF{ sterm = sterm + s; f = f + s; fPdf += tmpPdf * metalPr; + } + + if(clearCtPr>0.0f){ + vec3 wm = normalize(wi + wo); + float ax, ay; + BRDFBasics::CalculateAnisotropicParams(mat.clearcoatRoughness,0,ax,ay); + //ior related clearCt + float F = BRDFBasics::SchlickDielectic(abs(dot(wm, wo)), mat.clearcoatIOR); + vec3 s = mix(vec3(0.04f), vec3(1.0f), F) * + BRDFBasics::EvalClearcoat(mat.clearcoatRoughness, wo, wi, + wm, tmpPdf) * 0.25 * mat.clearcoat; + sterm = sterm + s; + f = f + s; + fPdf += tmpPdf * clearCtPr; + } + } - if(glassPr>0.0) + + if(glassPr>0.0f) { bool entering = wo.z>0?true:false; - //float F = BRDFBasics::DielectricFresnel(, eta); float ax, ay; BRDFBasics::CalculateAnisotropicParams(mat.roughness,mat.anisotropic,ax,ay); if (reflect) { @@ -490,22 +507,7 @@ namespace DisneyBSDF{ } } - if(clearCtPr>0.0 && reflect) - { - vec3 wm = normalize(wi + wo); - float ax, ay; - BRDFBasics::CalculateAnisotropicParams(mat.clearcoatRoughness,0,ax,ay); - //ior related clearCt - float F = BRDFBasics::SchlickDielectic(abs(dot(wm, wo)), mat.clearcoatIOR); - vec3 s = mix(vec3(0.04f), vec3(1.0f), F) * - BRDFBasics::EvalClearcoat(mat.clearcoatRoughness, wo, wi, - wm, tmpPdf) * 0.25 * mat.clearcoat; - sterm = sterm + s; - f = f + s; - fPdf += tmpPdf * clearCtPr; - } - if((sssPr>0.0&&reflectance) || (sssPr>0.0 && dot(wo, N2)<0.0) || (sssPr>0.0 && (thin))) - { + if(sssPr > 0.0f && (reflectance || dot(wo,N2) < 0.0f || thin)){ bool trans = (dot(wi, N2) * dot(wo, N2)<0) && (wi.z * wo.z<0); float FL = BRDFBasics::SchlickWeight(abs(wi.z)); float FV = BRDFBasics::SchlickWeight(abs(wo.z)); From 969c8c1541f503a498925991daef55fa4b3ea61c Mon Sep 17 00:00:00 2001 From: teachmain Date: Fri, 19 Apr 2024 18:34:27 +0800 Subject: [PATCH 011/244] fix inner reflection problem --- zenovis/xinxinoptix/DeflMatShader.cu | 11 +---------- zenovis/xinxinoptix/DisneyBRDF.h | 2 +- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index b79153f4f0..9a0b7940f1 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -921,16 +921,7 @@ extern "C" __global__ void __closesthit__radiance() prd->direction = normalize(wi); - if(mats.thin<0.5f && mats.doubleSide<0.5f){ - //auto p_prim = vec3(prd->origin) + optixGetRayTmax() * vec3(prd->direction); - //float3 p = p_prim; - prd->origin = next_ray_is_going_inside? backPos : frontPos; - } - else { - //auto p_prim = vec3(prd->origin) + optixGetRayTmax() * vec3(prd->direction); - //float3 p = p_prim; - prd->origin = dot(prd->direction, prd->geometryNormal) < 0? backPos : frontPos; - } + prd->origin = dot(prd->direction, prd->geometryNormal) < 0? backPos : frontPos; if (prd->medium != DisneyBSDF::vacuum) { prd->_mask_ = (uint8_t)(EverythingMask ^ VolumeMatMask); diff --git a/zenovis/xinxinoptix/DisneyBRDF.h b/zenovis/xinxinoptix/DisneyBRDF.h index 14581596b2..709dc54f15 100644 --- a/zenovis/xinxinoptix/DisneyBRDF.h +++ b/zenovis/xinxinoptix/DisneyBRDF.h @@ -318,7 +318,7 @@ float DielectricFresnel(float cosThetaI, float eta) float eta2 = eta * eta; float cos2t = 1.0f - sin2 / eta2; - if(cos2t < 0) return 1.0f; + if(cos2t < 0.0f) return 1.0f; float t0 = sqrt(cos2t); float t1 = eta * t0; From d06bd25856c8f13494f7c972df8528cc5427113c Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Mon, 22 Apr 2024 20:50:08 +0800 Subject: [PATCH 012/244] rigid transform matcher --- projects/CuLagrange/CMakeLists.txt | 3 + projects/CuLagrange/geometry/ShapeMatching.cu | 175 ++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 projects/CuLagrange/geometry/ShapeMatching.cu diff --git a/projects/CuLagrange/CMakeLists.txt b/projects/CuLagrange/CMakeLists.txt index cc82d8067f..1ece87b638 100644 --- a/projects/CuLagrange/CMakeLists.txt +++ b/projects/CuLagrange/CMakeLists.txt @@ -38,6 +38,7 @@ target_sources(zeno PRIVATE pbd/ConstraintsBuilder.cu pbd/ConstraintsSolver.cu pbd/CollisionSolver.cu + pbd/ConstraintsUpdator.cu ) # fem-cloth @@ -125,4 +126,6 @@ target_sources(zeno PRIVATE geometry/Intersections.cu # CHECK THIS geometry/Detangle.cu geometry/HalfEdgeStructures.cu + + geometry/ShapeMatching.cu ) diff --git a/projects/CuLagrange/geometry/ShapeMatching.cu b/projects/CuLagrange/geometry/ShapeMatching.cu new file mode 100644 index 0000000000..90e1216f01 --- /dev/null +++ b/projects/CuLagrange/geometry/ShapeMatching.cu @@ -0,0 +1,175 @@ +#include "Structures.hpp" +#include "zensim/Logger.hpp" +// #include "zensim/cuda/execution/ExecutionPolicy.cuh" +#include "zensim/omp/execution/ExecutionPolicy.hpp" +#include "zensim/io/MeshIO.hpp" +#include "zensim/math/bit/Bits.h" +#include "zensim/types/Property.h" +#include +#include +#include +#include +#include + +// #include "../geometry/kernel/tiled_vector_ops.hpp" +// #include "../geometry/kernel/topology.hpp" +// #include "../geometry/kernel/geo_math.hpp" + +#include "zensim/math/Rotation.hpp" +#include "zensim/math/matrix/QRSVD.hpp" + + +namespace zeno { + +struct MatchTransformation : zeno::INode { + + virtual void apply() override { + using namespace zs; + using vec3 = zs::vec; + using vec4 = zs::vec; + using mat3 = zs::vec; + + constexpr auto space = execspace_e::openmp; + auto ompPol = omp_exec(); + constexpr auto exec_tag = wrapv{}; + + auto rprim = get_input("refObj"); + auto tprim = get_input("targetObj"); + + auto shape_size = rprim->verts.size(); + + // compute the center of mass of rprim + auto rcm = vec3::zeros(); + auto tcm = vec3::zeros(); + // cmVec[0] = zs::vec::zeros(); + + const auto& rverts = rprim->verts; + const auto& tverts = tprim->verts; + + ompPol(zs::range(shape_size),[ + exec_tag = exec_tag, + &rverts,&tverts,&rcm,&tcm] (int vi) mutable { + for(int d = 0;d != 3;++d) { + atomic_add(exec_tag,&rcm[d],rverts[vi][d]); + atomic_add(exec_tag,&tcm[d],tverts[vi][d]); + } + }); + + rcm /= shape_size; + tcm /= shape_size; + + std::vector dAs(shape_size,mat3::zeros()); + ompPol(zs::range(shape_size),[ + &dAs,rcm,tcm,&rverts,&tverts] (int vi) mutable { + auto q = vec3::from_array(rverts[vi]) - rcm; + auto p = vec3::from_array(tverts[vi]) - tcm; + dAs[vi] = dyadic_prod(p,q); + }); + + auto A = mat3::zeros(); + ompPol(zs::range(shape_size * 9),[ + exec_tag = exec_tag, + &A, + &dAs] (int dof) mutable { + auto dAid = dof / 9; + auto Aoffset = dof % 9; + auto r = Aoffset / 3; + auto c = Aoffset % 3; + const auto& dA = dAs[dAid]; + atomic_add(exec_tag,&A[r][c],dA[r][c]); + }); + A /= shape_size; + + auto [R,S] = math::polar_decomposition(A); + + // R = R.transpose(); + + printf("R:\n%f\b%f\b%f\n%f\b%f\b%f\n%f\b%f\b%f\n", + (float)R(0,0),(float)R(0,1),(float)R(0,2), + (float)R(1,0),(float)R(1,1),(float)R(1,2), + (float)R(2,0),(float)R(2,1),(float)R(2,2)); + + auto b = tcm - R * rcm; + + auto q = vec4::zeros(); + auto m00 = R(0,0); + auto m01 = R(0,1); + auto m02 = R(0,2); + auto m10 = R(1,0); + auto m11 = R(1,1); + auto m12 = R(1,2); + auto m20 = R(2,0); + auto m21 = R(2,1); + auto m22 = R(2,2); + // float t{0}; + + // if (m22 < 0) { + // if (m00 > m11) { + // t = 1 + m00 -m11 -m22; + // q = vec4( t, m01+m10, m20+m02, m12-m21 ); + // } + // else { + // t = 1 -m00 + m11 -m22; + // q = vec4( m01+m10, t, m12+m21, m20-m02 ); + // } + // } + // else { + // if (m00 < -m11) { + // t = 1 -m00 -m11 + m22; + // q = vec4( m20+m02, m12+m21, t, m01-m10 ); + // } + // else { + // t = 1 + m00 + m11 + m22; + // q = vec4( m12-m21, m20-m02, m01-m10, t ); + // } + // } + // q *= 0.5 / zs::sqrt(t); + + auto trace = m00 + m11 + m22; + if (trace > 0.0f) + { + auto k = 0.5f / zs::sqrt(1.0f + trace); + q = vec4( k * (m12 - m21), k * (m20 - m02), k * (m01 - m10), 0.25f / k ); + } + else if ((m00 > m11) && (m00 > m22)) + { + auto k = 0.5f / zs::sqrt(1.0f + m00 - m11 - m22); + q = vec4( 0.25f / k, k * (m10 + m01), k * (m20 + m02), k * (m12 - m21) ); + } + else if (m11 > m22) + { + auto k = 0.5f / zs::sqrt(1.0f + m11 - m00 - m22); + q = vec4( k * (m10 + m01), 0.25f / k, k * (m21 + m12), k * (m20 - m02) ); + } + else + { + auto k = 0.5f / zs::sqrt(1.0f + m22 - m00 - m11); + q = vec4( k * (m20 + m02), k * (m21 + m12), 0.25f / k, k * (m01 - m10) ); + } + + // due to the column-major setting, need a quaternion negation here + q[0] = -q[0]; + q[1] = -q[1]; + q[2] = -q[2]; + + auto retq = std::make_shared(); + retq->set(zeno::vec4f(q[0],q[1],q[2],q[3])); + + auto retb = std::make_shared(); + retb->set(zeno::vec3f(b[0],b[1],b[2])); + + + set_output("quat",std::move(retq)); + set_output("trans",std::move(retb)); + } + +}; + +ZENDEFNODE(MatchTransformation,{ + {{"refObj"},{"targetObj"}}, + {"quat","trans"}, + {}, + {"Geometry"}, +}); + +}; \ No newline at end of file From 44f334185b989253d83e93d43de273e9e3b5500a Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Tue, 23 Apr 2024 18:13:02 +0800 Subject: [PATCH 013/244] constraint updator --- .../CuLagrange/fem/FleshDynamicStepping.cu | 10 +- .../collision_energy/evaluate_collision.hpp | 6 +- .../geometry/BaryCentricInterpolator.cu | 12 +-- .../CuLagrange/geometry/HalfEdgeStructures.cu | 10 +- projects/CuLagrange/geometry/VectorField.cu | 8 +- .../geometry/kernel/bary_centric_weights.hpp | 2 +- projects/CuLagrange/pbd/ConstraintsBuilder.cu | 85 ++++++++++------ projects/CuLagrange/pbd/ConstraintsSolver.cu | 47 +++++---- projects/CuLagrange/pbd/ConstraintsUpdator.cu | 97 +++++++++++++++++++ 9 files changed, 204 insertions(+), 73 deletions(-) create mode 100644 projects/CuLagrange/pbd/ConstraintsUpdator.cu diff --git a/projects/CuLagrange/fem/FleshDynamicStepping.cu b/projects/CuLagrange/fem/FleshDynamicStepping.cu index ca21872115..17ab53e908 100644 --- a/projects/CuLagrange/fem/FleshDynamicStepping.cu +++ b/projects/CuLagrange/fem/FleshDynamicStepping.cu @@ -1331,12 +1331,12 @@ struct FleshDynamicStepping : INode { {"inds",1}, {"w",4}, {"strength",1}, - {"cnorm",1}},0,zs::memsrc_e::device,0); + {"cnorm",1}},0,zs::memsrc_e::device); auto bverts = typename ZenoParticles::particles_t({ {"x",3}, {"intersect",1}, - {"strength",1}},0,zs::memsrc_e::device,0); + {"strength",1}},0,zs::memsrc_e::device); if(has_input("driven_boudary") && zsparticles->hasAuxData(driven_tag)){ auto zsbones = get_input("driven_boudary"); const auto& zsbones_verts = zsbones->getParticles(); @@ -1362,7 +1362,7 @@ struct FleshDynamicStepping : INode { TILEVEC_OPS::copy(cudaPol,inbbw,"strength",bbw,"strength"); TILEVEC_OPS::copy(cudaPol,inbbw,"cnorm",bbw,"cnorm"); } - // bverts = bverts.clone({zs::memsrc_e::device,0}); + // bverts = bverts.clone({zs::memsrc_e::device}); // std::cout << "bverts.size() = " << bverts.size() << std::endl; auto kverts = typename ZenoParticles::particles_t({ @@ -1372,10 +1372,10 @@ struct FleshDynamicStepping : INode { {"binderStiffness",1}, {planeConsIDTag,1}, {"nrm",3}, - {"area",1}},0,zs::memsrc_e::device,0); + {"area",1}},0,zs::memsrc_e::device); auto ktris = typename ZenoParticles::particles_t({ {"inds",3}, - {"nrm",3}},0,zs::memsrc_e::device,0); + {"nrm",3}},0,zs::memsrc_e::device); dtiles_t gia_res{points.get_allocator(),{ {"ring_mask",1}, diff --git a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp index da18c01346..ce3b61c534 100644 --- a/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp +++ b/projects/CuLagrange/fem/collision_energy/evaluate_collision.hpp @@ -761,7 +761,7 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, skip_rest = skip_too_close_pair_at_rest_configuration, use_collision_group = use_collision_group, eps = eps, - Xtag = Xtag, + XtagOffset = verts.getPropertyOffset(Xtag), xtag = xtag, verts = proxy({},verts), tris = proxy({},tris), @@ -847,10 +847,10 @@ void detect_self_imminent_PT_close_proximity(Pol& pol, #endif if(has_rest_shape && skip_rest) { - auto rp = verts.pack(dim_c<3>,Xtag,vi); + auto rp = verts.pack(dim_c<3>,XtagOffset,vi); vec3 rts[3] = {}; for(int i = 0;i != 3;++i) - rts[i] = verts.pack(dim_c<3>,Xtag,tri[i]); + rts[i] = verts.pack(dim_c<3>,XtagOffset,tri[i]); auto is_same_collision_group = true; if(use_collision_group && has_collision_group) diff --git a/projects/CuLagrange/geometry/BaryCentricInterpolator.cu b/projects/CuLagrange/geometry/BaryCentricInterpolator.cu index 49ea5f3bda..9531879874 100644 --- a/projects/CuLagrange/geometry/BaryCentricInterpolator.cu +++ b/projects/CuLagrange/geometry/BaryCentricInterpolator.cu @@ -95,7 +95,7 @@ struct ZSComputeRBFWeights : INode { rbf_weights = typename ZenoParticles::particles_t({ {"inds",1}, - {"w",1}},close_proximity.size(),zs::memsrc_e::device,0); + {"w",1}},close_proximity.size(),zs::memsrc_e::device); auto varience = get_input2("varience"); @@ -252,7 +252,7 @@ struct ZSComputeSurfaceBaryCentricWeights : INode { sampler = typename ZenoParticles::particles_t({ {"inds",1}, - {"w",1}},csPT.size() * 3,zs::memsrc_e::device,0); + {"w",1}},csPT.size() * 3,zs::memsrc_e::device); zs::Vector nm_updated{dverts.get_allocator(),dverts.size()}; cudaPol(zs::range(nm_updated),[] ZS_LAMBDA(auto& cnt) mutable {cnt = 0;}); @@ -1174,14 +1174,14 @@ struct ZSComputeBaryCentricWeights : INode { {"inds",1}, {"w",4}, {"strength",1}, - {"cnorm",1}},everts.size(),zs::memsrc_e::device,0); + {"cnorm",1}},everts.size(),zs::memsrc_e::device); // auto topo_tag = tag + std::string("_topo"); // auto &bcw_topo = (*zsvolume)[topo_tag]; // auto e_dim = e_eles.getPropertySize("inds"); - // bcw_topo = typename ZenoParticles::particles_t({{"inds",e_dim}},e_eles.size(),zs::memsrc_e::device,0); + // bcw_topo = typename ZenoParticles::particles_t({{"inds",e_dim}},e_eles.size(),zs::memsrc_e::device); auto cudaExec = zs::cuda_exec(); @@ -1393,7 +1393,7 @@ struct ZSSampleEmbedVectorField : zeno::INode { if(!sampler->hasAuxData(tag)){ fmt::print("no specified bcw channel detected, create a new one...\n"); auto& sample_bcw = (*sampler)[tag]; - sample_bcw = typename ZenoParticles::particles_t({{"inds",1},{"w",4}},verts.size(),zs::memsrc_e::device,0); + sample_bcw = typename ZenoParticles::particles_t({{"inds",1},{"w",4}},verts.size(),zs::memsrc_e::device); } const auto& sample_bcw = (*sampler)[tag]; @@ -1464,7 +1464,7 @@ struct ZSSampleEmbedTagField : zeno::INode { if(!sampler->hasAuxData(tag)){ fmt::print("no specified bcw channel detected, create a new one...\n"); auto& sample_bcw = (*sampler)[tag]; - sample_bcw = typename ZenoParticles::particles_t({{"inds",1},{"w",4}},verts.size(),zs::memsrc_e::device,0); + sample_bcw = typename ZenoParticles::particles_t({{"inds",1},{"w",4}},verts.size(),zs::memsrc_e::device); } const auto& sample_bcw = (*sampler)[tag]; diff --git a/projects/CuLagrange/geometry/HalfEdgeStructures.cu b/projects/CuLagrange/geometry/HalfEdgeStructures.cu index e974b6d13b..b4a8878eee 100644 --- a/projects/CuLagrange/geometry/HalfEdgeStructures.cu +++ b/projects/CuLagrange/geometry/HalfEdgeStructures.cu @@ -41,7 +41,7 @@ namespace zeno { auto& halfEdge = (*zsparticles)[ZenoParticles::s_surfHalfEdgeTag]; halfEdge = typename ZenoParticles::particles_t({{"local_vertex_id",1},{"to_face",1},{"opposite_he",1},{"next_he",1}}, - tris.size() * 3,zs::memsrc_e::device,0); + tris.size() * 3,zs::memsrc_e::device); auto cudaPol = zs::cuda_exec(); @@ -186,7 +186,7 @@ namespace zeno { }); auto &surfEdges = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; - surfEdges = typename ZenoParticles::particles_t({{"inds", 2},{"he_inds",1}}, edgeSet.size(),zs::memsrc_e::device,0); + surfEdges = typename ZenoParticles::particles_t({{"inds", 2},{"he_inds",1}}, edgeSet.size(),zs::memsrc_e::device); cudaPol(zip(zs::range(edgeSet.size()),edgeSet._activeKeys),[ halfedges = proxy({},halfEdge), surfEdges = proxy({},surfEdges), @@ -208,7 +208,7 @@ namespace zeno { auto& boundaryHalfEdges = (*zsparticles)[ZenoParticles::s_surfBoundaryEdgeTag]; boundaryHalfEdges = typename ZenoParticles::particles_t({{"he_inds",1}}, - boundaryHalfEdgeSet.size(),zs::memsrc_e::device,0); + boundaryHalfEdgeSet.size(),zs::memsrc_e::device); cudaPol(zip(zs::range(boundaryHalfEdgeSet.size()),boundaryHalfEdgeSet._activeKeys),[ boundaryHalfEdges = boundaryHalfEdges.begin("he_inds",dim_c<1>,int_c)] ZS_LAMBDA(int id,const auto& key) mutable { @@ -243,7 +243,7 @@ namespace zeno { auto& halfFacet = (*zsparticles)[ZenoParticles::s_tetHalfFacetTag]; halfFacet = typename ZenoParticles::particles_t({{"opposite_hf",1},{"next_hf",1},{"to_tet",1},{"local_idx",1}}, - tets.size() * 4,zs::memsrc_e::device,0); + tets.size() * 4,zs::memsrc_e::device); build_tetrahedra_half_facet(cudaPol,tets,halfFacet); @@ -286,7 +286,7 @@ namespace zeno { }); auto &surfEdges = (*zsparticles)[ZenoParticles::s_surfEdgeTag]; - surfEdges = typename ZenoParticles::particles_t({{"inds", 2}}, edgeSet.size(),zs::memsrc_e::device,0); + surfEdges = typename ZenoParticles::particles_t({{"inds", 2}}, edgeSet.size(),zs::memsrc_e::device); cudaPol(zip(zs::range(edgeSet.size()),edgeSet._activeKeys),[ surfEdges = proxy({},surfEdges)] ZS_LAMBDA(auto ei,const auto& pair) mutable { surfEdges.tuple(dim_c<2>,"inds",ei) = pair.reinterpret_bits(float_c); diff --git a/projects/CuLagrange/geometry/VectorField.cu b/projects/CuLagrange/geometry/VectorField.cu index 228d50947f..dbe5a8ce14 100644 --- a/projects/CuLagrange/geometry/VectorField.cu +++ b/projects/CuLagrange/geometry/VectorField.cu @@ -185,8 +185,8 @@ struct ZSRetrieveVectorField : zeno::INode { std::vector tags{{"x",3},{"vec",3}}; - auto vec_buffer = typename ZenoParticles::particles_t(tags,on_elm ? eles.size() : verts.size(),zs::memsrc_e::device,0); - auto zsvec_buffer = zs::Vector(on_elm ? eles.size() : verts.size(),zs::memsrc_e::device,0); + auto vec_buffer = typename ZenoParticles::particles_t(tags,on_elm ? eles.size() : verts.size(),zs::memsrc_e::device); + auto zsvec_buffer = zs::Vector(on_elm ? eles.size() : verts.size(),zs::memsrc_e::device); // transfer the data from gpu to cpu constexpr auto cuda_space = execspace_e::cuda; auto cudaPol = cuda_exec(); @@ -862,8 +862,8 @@ struct ZSGaussianSampler : zeno::INode { } int dim = source_dim; - auto src = typename ZenoParticles::particles_t({{"x",3},{"attr",dim},{"inds",1}},0,zs::memsrc_e::device,0); - auto dst = typename ZenoParticles::particles_t({{"x",3},{"attr",dim},{"mark",1}},0,zs::memsrc_e::device,0); + auto src = typename ZenoParticles::particles_t({{"x",3},{"attr",dim},{"inds",1}},0,zs::memsrc_e::device); + auto dst = typename ZenoParticles::particles_t({{"x",3},{"attr",dim},{"mark",1}},0,zs::memsrc_e::device); int source_simplex_size = srcQuads.getPropertySize("inds"); if(srcType == "vert"){ diff --git a/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp b/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp index 005d1e04b1..40c452f0da 100644 --- a/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp +++ b/projects/CuLagrange/geometry/kernel/bary_centric_weights.hpp @@ -143,7 +143,7 @@ namespace zeno { const zs::vec& b2, T& toc, zs::vec& bary, - const T& eta = (T)0.1) { + const T& eta = (T)0.001) { auto v0 = b0 - a0; auto v1 = b1 - a1; diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu index 250c6ad71a..5af91bbbf9 100644 --- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu +++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu @@ -105,7 +105,7 @@ virtual void apply() override { {"lambda",1}, {"damping_coeff",1}, {"tclr",1} - }, 0, zs::memsrc_e::device,0); + }, 0, zs::memsrc_e::device); auto &eles = constraint->getQuadraturePoints(); constraint->setMeta(CONSTRAINT_TARGET,source.get()); @@ -1207,32 +1207,36 @@ virtual void apply() override { zs::bht binder_set{verts.get_allocator(),verts.size()}; binder_set.reset(cudaPol,true); + if(!verts.hasProperty("pinSuccess")) + verts.append_channels(cudaPol,{{"pinSuccess",1}}); + TILEVEC_OPS::fill(cudaPol,verts,"pinSuccess",0.f); + cudaPol(zs::range(verts.size()),[ - verts = proxy({},verts), - has_pin_group = has_pin_group, - eps = eps, - pin_group_name = zs::SmallString(pin_group_name), - ktris = proxy({},ktris), - kverts = proxy({},kverts), - binder_set = proxy(binder_set), - binder_buffer = proxy({},binder_buffer), - cell_buffer = proxy({},cell_buffer), - cellBvh = proxy(cellBvh)] ZS_LAMBDA(int vi) mutable { - auto p = verts.pack(dim_c<3>,"x",vi); - if(verts("minv",vi) < eps) - return; + verts = proxy({},verts), + has_pin_group = has_pin_group, + eps = eps, + pin_group_name = zs::SmallString(pin_group_name), + ktris = proxy({},ktris), + kverts = proxy({},kverts), + binder_set = proxy(binder_set), + binder_buffer = proxy({},binder_buffer), + cell_buffer = proxy({},cell_buffer), + cellBvh = proxy(cellBvh)] ZS_LAMBDA(int vi) mutable { + auto p = verts.pack(dim_c<3>,"x",vi); + if(verts("minv",vi) < eps) + return; + + if(has_pin_group && verts(pin_group_name,vi) < eps) { + // printf("ignore V[%d] excluded by pingroup\n",vi); + return; + } + auto bv = bv_t{p,p}; + int closest_kti = -1; + T closest_toc = std::numeric_limits::max(); + zs::vec closest_bary = {}; + T min_toc_dist = std::numeric_limits::max(); - if(has_pin_group && verts(pin_group_name,vi) < eps) { - // printf("ignore V[%d] excluded by pingroup\n",vi); - return; - } - auto bv = bv_t{p,p}; - int closest_kti = -1; - T closest_toc = std::numeric_limits::max(); - zs::vec closest_bary = {}; - T min_toc_dist = std::numeric_limits::max(); - - auto do_close_proximity_detection = [&](int kti) mutable { + auto do_close_proximity_detection = [&](int kti) mutable { auto ktri = ktris.pack(dim_c<3>,"inds",kti,int_c); vec3 as[3] = {}; vec3 bs[3] = {}; @@ -1244,7 +1248,7 @@ virtual void apply() override { zs::vec prism_bary{}; T toc{}; - if(!compute_vertex_prism_barycentric_weights(p,as[0],as[1],as[2],bs[0],bs[1],bs[2],toc,prism_bary,(T)0.1)) + if(!compute_vertex_prism_barycentric_weights(p,as[0],as[1],as[2],bs[0],bs[1],bs[2],toc,prism_bary,(T)0.001)) return; auto toc_dist = zs::abs(toc - (T)0.5); @@ -1254,16 +1258,39 @@ virtual void apply() override { closest_bary = prism_bary; closest_kti = kti; } - }; - cellBvh.iter_neighbors(bv,do_close_proximity_detection); - if(closest_kti >= 0) { + }; + cellBvh.iter_neighbors(bv,do_close_proximity_detection); + if(closest_kti >= 0) { auto id = binder_set.insert(vec2i{vi,closest_kti}); binder_buffer.tuple(dim_c<6>,"bary",id) = closest_bary; + verts("pinSuccess",vi) = 1.f; + + auto ktri = ktris.pack(dim_c<3>,"inds",closest_kti,int_c); + vec3 as[3] = {}; + vec3 bs[3] = {}; + + for(int i = 0;i != 3;++i) { + as[i] = cell_buffer.pack(dim_c<3>,"x",ktri[i]); + bs[i] = cell_buffer.pack(dim_c<3>,"v",ktri[i]) + as[i]; + } + + auto tp = vec3::zeros(); + for(int i = 0;i != 3;++i) { + tp += as[i] * closest_bary[i]; + tp += bs[i] * closest_bary[i + 3]; + } + + auto diffp = (p - tp).norm(); + if(diffp > 0.1) { + printf("too big prism and point difference : %d %d %f\n",vi,closest_kti,diffp); + } } }); eles.append_channels(cudaPol,{{"inds",2},{"bary",6}}); eles.resize(binder_set.size()); + + std::cout << "nunber of cell pin anchors : " << eles.size() << std::endl; cudaPol(zip(zs::range(binder_set.size()),binder_set._activeKeys),[ binder_buffer = proxy({},binder_buffer), diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index 850010a597..99001db012 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -764,7 +764,6 @@ struct XPBDSolveSmoothAll : INode { TILEVEC_OPS::fill(cudaPol,verts,"w",0); auto iter_id = get_input2("iter_id"); - for(auto& constraint_ptr : constraint_ptr_list) { auto category = constraint_ptr->readMeta(CONSTRAINT_KEY,wrapt{}); @@ -889,6 +888,10 @@ struct XPBDSolveSmoothAll : INode { } if(category == category_c::edge_length_constraint || category == category_c::dihedral_bending_constraint || category == category_c::long_range_attachment) { + // if(category == category_c::edge_length_constraint) + // std::cout << "solve edge length constraint" << std::endl; + // if(category == category_c::dihedral_bending_constraint) + // std::cout << "solve dihedral bending constraint" << std::endl; cudaPol(zs::range(cquads.size()),[ cquads = proxy({},cquads), dt = dt, @@ -1067,6 +1070,8 @@ struct XPBDSolveSmoothAll : INode { } if(category == category_c::vertex_pin_to_cell_constraint) { + // std::cout << "solve vertex cell pin constraint" << std::endl; + auto target = constraint_ptr->readMeta(CONSTRAINT_TARGET); const auto& kverts = target->getParticles(); const auto& ktris = target->getQuadraturePoints(); @@ -1102,7 +1107,6 @@ struct XPBDSolveSmoothAll : INode { ptagOffet = verts.getPropertyOffset(ptag), ktris = ktris.begin("inds",dim_c<3>,int_c), enable_sliding = enable_sliding, - // weight_sum = proxy(weight_sum), wOffset = verts.getPropertyOffset("w"), stiffnessOffset = cquads.getPropertyOffset("relative_stiffness"), verts = proxy({},verts)] ZS_LAMBDA(int ci) mutable { @@ -1121,8 +1125,6 @@ struct XPBDSolveSmoothAll : INode { bs[i] = cell_buffer.pack(dim_c<3>,"v",ktri[i]) + as[i]; } - - auto tp = vec3::zeros(); for(int i = 0;i != 3;++i) { tp += as[i] * bary[i]; @@ -1130,23 +1132,28 @@ struct XPBDSolveSmoothAll : INode { } auto dp = tp - verts.pack(dim_c<3>,ptagOffet,vi); + + // auto dpn = dp.norm(); + // if(dpn > 0.1) { + // printf("dp[%d,%d] : %f\n",vi,kti,(float)dpn); + // } - if(enable_sliding) { - auto avg_nrm = vec3::zeros(); - for(int i = 0;i != 3;++i) { - avg_nrm += cell_buffer.pack(dim_c<3>,"nrm",ktri[i]); - } - avg_nrm = avg_nrm.normalized(); - - auto dp_normal = dp.dot(avg_nrm) * avg_nrm; - auto dp_tangent = dp - dp_normal; - if(dp_tangent.norm() < static_cast(0.1)) - dp_tangent = vec3::zeros(); - else - dp_tangent *= static_cast(0.5); - // dp -= dp_tangent * 0.5; - dp = dp_tangent + dp_normal; - } + // if(enable_sliding) { + // auto avg_nrm = vec3::zeros(); + // for(int i = 0;i != 3;++i) { + // avg_nrm += cell_buffer.pack(dim_c<3>,"nrm",ktri[i]); + // } + // avg_nrm = avg_nrm.normalized(); + + // auto dp_normal = dp.dot(avg_nrm) * avg_nrm; + // auto dp_tangent = dp - dp_normal; + // if(dp_tangent.norm() < static_cast(0.1)) + // dp_tangent = vec3::zeros(); + // else + // dp_tangent *= static_cast(0.5); + // // dp -= dp_tangent * 0.5; + // dp = dp_tangent + dp_normal; + // } // atomic_add(exec_tag,&weight_sum[vi],w); atomic_add(exec_tag,&verts(wOffset,vi),w); diff --git a/projects/CuLagrange/pbd/ConstraintsUpdator.cu b/projects/CuLagrange/pbd/ConstraintsUpdator.cu new file mode 100644 index 0000000000..6b4a0622ba --- /dev/null +++ b/projects/CuLagrange/pbd/ConstraintsUpdator.cu @@ -0,0 +1,97 @@ +#include "Structures.hpp" +#include "zensim/Logger.hpp" +#include "zensim/cuda/execution/ExecutionPolicy.cuh" +#include "zensim/omp/execution/ExecutionPolicy.hpp" +#include "zensim/io/MeshIO.hpp" +#include "zensim/math/bit/Bits.h" +#include "zensim/types/Property.h" +#include +#include +#include +#include +#include +#include +#include "../../Utils.hpp" + +#include "constraint_function_kernel/constraint.cuh" +#include "../geometry/kernel/tiled_vector_ops.hpp" +#include "../geometry/kernel/topology.hpp" +#include "../geometry/kernel/geo_math.hpp" +#include "../geometry/kernel/bary_centric_weights.hpp" +#include "constraint_function_kernel/constraint_types.hpp" +#include "../fem/collision_energy/evaluate_collision.hpp" + + +namespace zeno { + +struct UpdateConstraintTarget : INode { + +virtual void apply() override { + using namespace zs; + using namespace PBD_CONSTRAINT; + + using vec2 = zs::vec; + using vec3 = zs::vec; + using vec4 = zs::vec; + using vec9 = zs::vec; + using mat3 = zs::vec; + using vec2i = zs::vec; + using vec3i = zs::vec; + using vec4i = zs::vec; + using mat4 = zs::vec; + + + constexpr auto space = execspace_e::cuda; + auto cudaPol = zs::cuda_exec(); + + auto source = get_input("source"); + auto constraint = get_input("constraint"); + + // auto target = get_input("target"); + + auto type = constraint->readMeta(CONSTRAINT_KEY,wrapt{}); + // auto do_frame_interpolation = get_input2("do_frame_interpolation"); + + if(type == category_c::vertex_pin_to_cell_constraint) { + std::cout << "update constraint " << type << std::endl; + auto target = get_input("target"); + // switch(type) { + // // case category_c::follow_animation_constraint : break; + // // case category_c::dcd_collision_constraint : break; + // case category_c::vertex_pin_to_cell_constraint || category_c::volume_pin_constraint : + auto ctarget = constraint->readMeta(CONSTRAINT_TARGET,zs::wrapt{}); + if(target->getParticles().size() != ctarget->getParticles().size()) { + std::cout << "the input update target and contraint target has different number of particles" << std::endl; + throw std::runtime_error("the input update target and constraint target has different number of particles"); + } + if(target->getQuadraturePoints().size() != ctarget->getQuadraturePoints().size()) { + std::cout << "the input update target and constraint target has different number of quadratures" << std::endl; + throw std::runtime_error("the input update target and constraint target has different number of quadratures"); + } + + const auto& kverts = target->getParticles(); + auto& ckverts = ctarget->getParticles(); + TILEVEC_OPS::copy(cudaPol,kverts,"x",ckverts,"x"); + TILEVEC_OPS::copy(cudaPol,kverts,"px",ckverts,"px"); + std::cout << "Update ckverts " << std::endl; + // break; + // } + + } + set_output("constraint",constraint); +} + +}; + +ZENDEFNODE(UpdateConstraintTarget, {{ + {"source"}, + {"target"}, + {"constraint"} +}, +{{"constraint"}}, +{ + // {"string","groupID",""}, +}, +{"PBD"}}); + +}; \ No newline at end of file From 95503c60451917377d4e7b40dbe0ae2ce7c2a8fb Mon Sep 17 00:00:00 2001 From: teachmain Date: Sun, 28 Apr 2024 16:50:17 +0800 Subject: [PATCH 014/244] double surface by default --- zenovis/xinxinoptix/DeflMatShader.cu | 19 ++-- zenovis/xinxinoptix/DisneyBSDF.h | 146 +++++++++++++-------------- zenovis/xinxinoptix/Light.h | 2 +- zenovis/xinxinoptix/TraceStuff.h | 1 + 4 files changed, 80 insertions(+), 88 deletions(-) diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index 9a0b7940f1..0beb82ba96 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -884,18 +884,18 @@ extern "C" __global__ void __closesthit__radiance() float3 frontPos, backPos; SelfIntersectionAvoidance::offsetSpawnPoint( frontPos, backPos, wldPos, prd->geometryNormal, wldOffset ); - shadowPRD.origin = dot(-ray_dir, wldNorm) > 0 ? frontPos : backPos; + shadowPRD.origin = dot(wi, vec3(wldNorm)) > 0 ? frontPos : backPos; //auto shadingP = rtgems::offset_ray(shadowPRD.origin + params.cam.eye, prd->geometryNormal); // world space - shadowPRD.origin = frontPos; - if(mats.subsurface>0 && (mats.thin>0.5 || mats.doubleSide>0.5) && istransmission){ - shadowPRD.origin = backPos; //rtgems::offset_ray(P, -prd->geometryNormal); - } + //shadowPRD.origin = frontPos; + //if(mats.subsurface>0 && (mats.thin>0.5 || mats.doubleSide>0.5) && istransmission){ + //shadowPRD.origin = backPos; //rtgems::offset_ray(P, -prd->geometryNormal); + //} - auto shadingP = rtgems::offset_ray(P + params.cam.eye, prd->geometryNormal); // world space - if(mats.subsurface>0 && (mats.thin>0.5 || mats.doubleSide>0.5) && istransmission){ - shadingP = rtgems::offset_ray(P + params.cam.eye, -prd->geometryNormal); - } + auto shadingP = rtgems::offset_ray(P + params.cam.eye, dot(wi, vec3(wldNorm)) > 0 ? wldNorm:-wldNorm); // world space + //if(mats.subsurface>0 && (mats.thin>0.5 || mats.doubleSide>0.5) && istransmission){ + //shadingP = rtgems::offset_ray(P + params.cam.eye, -prd->geometryNormal); + //} prd->radiance = {}; prd->direction = normalize(wi); @@ -909,6 +909,7 @@ extern "C" __global__ void __closesthit__radiance() } prd->lightmask = DefaultMatMask; + shadowPRD.ShadowNormal = dot(wi, vec3(prd->geometryNormal)) > 0 ? prd->geometryNormal:-prd->geometryNormal; DirectLighting(prd, shadowPRD, shadingP, ray_dir, evalBxDF, &taskAux, dummy_prt); if(mats.shadowReceiver > 0.5f) { diff --git a/zenovis/xinxinoptix/DisneyBSDF.h b/zenovis/xinxinoptix/DisneyBSDF.h index e0803d465c..678e2ec9bd 100644 --- a/zenovis/xinxinoptix/DisneyBSDF.h +++ b/zenovis/xinxinoptix/DisneyBSDF.h @@ -306,6 +306,27 @@ namespace DisneyBSDF{ return result; } + static __inline__ __device__ + vec3 EvaluateDiffuse(vec3 baseColor, float subsurface, float roughness, float sheen, vec3 Csheen, vec3 V, vec3 L, vec3 H, float &pdf){ + pdf = 0.0f; + if (L.z == 0.0f) + return vec3(0.0f); + + float LDotH = abs(dot(L, H)); + float F90 = 0.5f + 2.0f * LDotH * LDotH * roughness; + // Diffuse + float FL = BRDFBasics::SchlickWeight(abs(L.z)); + float FV = BRDFBasics::SchlickWeight(abs(V.z)); + float Fd = mix(1.0f,F90,FL) * mix(1.0f,F90,FV); + + + // Sheen + float FH = BRDFBasics::SchlickWeight(LDotH); + vec3 Fsheen = FH * sheen * Csheen; + + pdf =abs (L.z) * 1.0f / M_PIf; + return 1.0f / M_PIf * baseColor * (Fd + Fsheen); + } static __inline__ __device__ float3 EvaluateDisney2( @@ -351,7 +372,6 @@ namespace DisneyBSDF{ wm = wm.z<0.0f?-wm:wm; BRDFBasics::TintColors(mix(mat.basecolor, mat.sssColor, mat.subsurface), eta, mat.specularTint, mat.sheenTint, F0, Csheen, Cspec0); - Cspec0 = Cspec0; //material layer mix weight float dielectricWt = (1.0 - mat.metallic) * (1.0 - mat.specTrans); float metalWt = mat.metallic; @@ -411,8 +431,8 @@ namespace DisneyBSDF{ if(reflect){ if(diffPr > 0.0f){ - vec3 d = BRDFBasics::EvalDisneyDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen, - Csheen, wo, wi, wm, tmpPdf) * dielectricWt; + vec3 d = EvaluateDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen,Csheen, wo, wi, wm, tmpPdf) * dielectricWt; + //vec3 d = BRDFBasics::EvalDisneyDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen,Csheen, wo, wi, wm, tmpPdf) * dielectricWt; dterm = dterm + d; f = f + d; fPdf += tmpPdf * diffPr ; @@ -556,6 +576,16 @@ namespace DisneyBSDF{ return 1.0f / ( n * n) - (1.0f - c * c); } + static __inline__ __device__ + void SampleSpecular(vec3 wo, vec3& wi, float rough, float aniso, float r1, float r2){ + float ax, ay; + BRDFBasics::CalculateAnisotropicParams(rough,aniso,ax,ay); + vec3 vtmp = wo; + vtmp.z = abs(vtmp.z); + vec3 wm = BRDFBasics::SampleGGXVNDF(vtmp, ax, ay, r1, r2); + + wi = normalize(reflect(-wo, wm)); + } static __inline__ __device__ @@ -684,10 +714,6 @@ namespace DisneyBSDF{ isSS = false; tbn.inverse_transform(wi); wi = normalize(wi); - - if (dot(wi, N2) < 0) { - wi = normalize(wi - 1.01f * dot(wi, N2) * N2); - } } else{ //switch between scattering or diffuse reflection @@ -696,17 +722,14 @@ namespace DisneyBSDF{ { prd->fromDiff = true; wi = BRDFBasics::CosineSampleHemisphere(r1, r2); + if(wo.z<0.0f){ + wi.z = -wi.z; + } isSS = false; tbn.inverse_transform(wi); wi = normalize(wi); - - if(dot(wi, N2)<0) - { - wi = normalize(wi - 1.01f * dot(wi, N2) * N2); - } }else { - //go inside wi = -BRDFBasics::UniformSampleHemisphere(r1, r2); wi.z = min(-0.2f, wi.z); @@ -725,11 +748,6 @@ namespace DisneyBSDF{ tbn.inverse_transform(wi); wi = normalize(wi); - bool sameside2 = (dot(wi, N) * dot(wi, N2)) > 0.0f; - if (sameside == false) { - wi = normalize(wi - 1.01f * dot(wi, N2) * N2); - } - } } @@ -745,62 +763,45 @@ namespace DisneyBSDF{ else if(r3hit_type = SPECULAR_HIT; - float ax, ay; - BRDFBasics::CalculateAnisotropicParams(mat.roughness,mat.anisotropic,ax,ay); - - vec3 vtmp = wo; - vtmp.z = wo.z>0?vtmp.z:-vtmp.z; - vec3 wm = BRDFBasics::SampleGGXVNDF(vtmp, ax, ay, r1, r2); - - if (wm.z < 0.0) - wm.z = -wm.z; - - wm.z = wo.z>0? wm.z:-wm.z; - - wi = normalize(reflect(-wo, wm)); + SampleSpecular(wo,wi,mat.roughness,mat.anisotropic,r1,r2); tbn.inverse_transform(wi); wi = normalize(wi); - if(dot(wi, N2)<0) - { - wi = normalize(wi - 1.01f * dot(wi, N2) * N2); - } }else if(r30?true:false; - float ax, ay; - BRDFBasics::CalculateAnisotropicParams(mat.roughness,mat.anisotropic,ax,ay); - vec3 swo = wo.z>0?wo:-wo; - vec3 wm = BRDFBasics::SampleGGXVNDF(swo, ax, ay, r1, r2); - wm = wm.z<0?-wm:wm; - - wm = entering?wm:-wm; - - float F = BRDFBasics::DielectricFresnel(abs(dot(wm, wo)), entering?mat.ior:1.0f/mat.ior); - float p = rnd(seed); - if(p0?true:false; + float ax, ay; + BRDFBasics::CalculateAnisotropicParams(mat.roughness,mat.anisotropic,ax,ay); + vec3 swo = wo.z>0?wo:-wo; + vec3 wm = BRDFBasics::SampleGGXVNDF(swo, ax, ay, r1, r2); + wm = wm.z<0?-wm:wm; + + wm = entering?wm:-wm; + + float F = BRDFBasics::DielectricFresnel(abs(dot(wm, wo)), entering?mat.ior:1.0f/mat.ior); + float p = rnd(seed); + if(phit_type = SPECULAR_HIT; - float ax, ay; - BRDFBasics::CalculateAnisotropicParams(mat.clearcoatRoughness,0,ax,ay); - vec3 swo = wo.z>0?wo:-wo; - vec3 wm = BRDFBasics::SampleGGXVNDF(swo, ax, ay, r1, r2); - wm = wm.z<0?-wm:wm; - - - wi = normalize(reflect(-wo, wm)); + SampleSpecular(wo,wi,mat.clearcoatRoughness,0.0f,r1,r2); tbn.inverse_transform(wi); wi = normalize(wi); - if(dot(wi, N2)<0) - { - wi = normalize(wi - 1.01f * dot(wi, N2) * N2); - } } tbn.inverse_transform(wo); diff --git a/zenovis/xinxinoptix/Light.h b/zenovis/xinxinoptix/Light.h index 6554fe16de..fe7ea9770f 100644 --- a/zenovis/xinxinoptix/Light.h +++ b/zenovis/xinxinoptix/Light.h @@ -229,7 +229,7 @@ void DirectLighting(RadiancePRD *prd, ShadowPRD& shadowPRD, const float3& shadin UF = (UF - _SKY_PROB_) / lightPickProb; const Vector3f& SP = reinterpret_cast(shadingP); - const Vector3f& SN = reinterpret_cast(prd->geometryNormal); + const Vector3f& SN = reinterpret_cast(shadowPRD.ShadowNormal); auto pick = lightTree->sample(UF, SP, SN); if (pick.prob <= 0.0f) { return; } diff --git a/zenovis/xinxinoptix/TraceStuff.h b/zenovis/xinxinoptix/TraceStuff.h index c9adac0a91..3c5d4f475a 100644 --- a/zenovis/xinxinoptix/TraceStuff.h +++ b/zenovis/xinxinoptix/TraceStuff.h @@ -61,6 +61,7 @@ struct ShadowPRD { uint8_t nonThinTransHit; VolumePRD vol; + float3 ShadowNormal; float rndf() { return rnd(seed); From f0196f4628b1ff5f71915598fe0b01798bdc1004 Mon Sep 17 00:00:00 2001 From: teachmain Date: Mon, 29 Apr 2024 17:48:40 +0800 Subject: [PATCH 015/244] update --- zenovis/xinxinoptix/DisneyBRDF.h | 2 +- zenovis/xinxinoptix/DisneyBSDF.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/zenovis/xinxinoptix/DisneyBRDF.h b/zenovis/xinxinoptix/DisneyBRDF.h index 709dc54f15..f8ec3b41f6 100644 --- a/zenovis/xinxinoptix/DisneyBRDF.h +++ b/zenovis/xinxinoptix/DisneyBRDF.h @@ -415,7 +415,7 @@ vec3 EvalDisneyDiffuse(vec3 baseColor, float subsurface, float roughness, float float FH = SchlickWeight(LDotH); vec3 Fsheen = FH * sheen * Csheen; - pdf = L.z * 1.0f / M_PIf; + pdf = abs(L.z) * 1.0f / M_PIf; return 1.0f / M_PIf * baseColor * (Fd + Fretro) + Fsheen; } diff --git a/zenovis/xinxinoptix/DisneyBSDF.h b/zenovis/xinxinoptix/DisneyBSDF.h index 678e2ec9bd..99ffdad54f 100644 --- a/zenovis/xinxinoptix/DisneyBSDF.h +++ b/zenovis/xinxinoptix/DisneyBSDF.h @@ -431,8 +431,8 @@ namespace DisneyBSDF{ if(reflect){ if(diffPr > 0.0f){ - vec3 d = EvaluateDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen,Csheen, wo, wi, wm, tmpPdf) * dielectricWt; - //vec3 d = BRDFBasics::EvalDisneyDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen,Csheen, wo, wi, wm, tmpPdf) * dielectricWt; + //vec3 d = EvaluateDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen,Csheen, wo, wi, wm, tmpPdf) * dielectricWt; + vec3 d = BRDFBasics::EvalDisneyDiffuse(thin? mat.basecolor : mix(mat.basecolor,mat.sssColor,mat.subsurface), mat.subsurface, mat.roughness, mat.sheen,Csheen, wo, wi, wm, tmpPdf) * dielectricWt; dterm = dterm + d; f = f + d; fPdf += tmpPdf * diffPr ; From a40b3ebe338d01b36c89f071fa8d73294b466e3a Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 29 Apr 2024 19:14:34 +0800 Subject: [PATCH 016/244] step 4 --- zenovis/src/bate/RenderEngineBate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/zenovis/src/bate/RenderEngineBate.cpp b/zenovis/src/bate/RenderEngineBate.cpp index 7fa2c4d43e..556ea28634 100644 --- a/zenovis/src/bate/RenderEngineBate.cpp +++ b/zenovis/src/bate/RenderEngineBate.cpp @@ -103,6 +103,7 @@ struct RenderEngineBate : RenderEngine { void cleanupWhenExit() override { released = true; scene->shaderMan = nullptr; + scene->drawOptions->handler = nullptr; vao = nullptr; graphicsMan = nullptr; hudGraphics.clear(); From ed17906143afcfa1e1a802df8ba4c3d119821ace Mon Sep 17 00:00:00 2001 From: zhuohy <1445643474@qq.com> Date: Mon, 6 May 2024 18:44:36 +0800 Subject: [PATCH 017/244] mark error when path is invalid --- ui/zenoedit/dialog/zeditparamlayoutdlg.cpp | 45 ++++++++++++++++++++++ ui/zenoedit/dialog/zeditparamlayoutdlg.h | 1 + ui/zenoedit/dialog/zeditparamlayoutdlg.ui | 31 +++++++++++++++ ui/zenoedit/panel/zenoproppanel.cpp | 4 ++ ui/zenomodel/customui/customuirw.cpp | 3 +- ui/zenoui/comctrl/gv/zenogvhelper.cpp | 5 +++ ui/zenoui/comctrl/gv/zenoparamwidget.cpp | 10 +++-- ui/zenoui/comctrl/gv/zgraphicstextitem.cpp | 32 +++++++++++++++ ui/zenoui/comctrl/gv/zgraphicstextitem.h | 2 + ui/zenoui/comctrl/gv/zitemfactory.cpp | 9 +++++ ui/zenoui/comctrl/zpathedit.cpp | 9 ++++- ui/zenoui/comctrl/zwidgetfactory.cpp | 9 +++++ 12 files changed, 154 insertions(+), 6 deletions(-) diff --git a/ui/zenoedit/dialog/zeditparamlayoutdlg.cpp b/ui/zenoedit/dialog/zeditparamlayoutdlg.cpp index ee5010a4da..c307e91fd9 100644 --- a/ui/zenoedit/dialog/zeditparamlayoutdlg.cpp +++ b/ui/zenoedit/dialog/zeditparamlayoutdlg.cpp @@ -16,6 +16,8 @@ #include "iotags.h" #include #include +#include "zenoapplication.h" +#include "zenomainwindow.h" static CONTROL_ITEM_INFO controlList[] = { {"Tab", CONTROL_NONE, "", ":/icons/parameter_control_tab.svg"}, @@ -188,6 +190,7 @@ ZEditParamLayoutDlg::ZEditParamLayoutDlg(QStandardItemModel* pModel, bool bNodeU connect(m_ui->editStep, SIGNAL(editingFinished()), this, SLOT(onStepEditFinished())); connect(m_ui->cbControl, SIGNAL(currentIndexChanged(int)), this, SLOT(onControlItemChanged(int))); connect(m_ui->cbTypes, SIGNAL(currentIndexChanged(int)), this, SLOT(onTypeItemChanged(int))); + connect(m_ui->m_pathFilterEdit, SIGNAL(editingFinished()), this, SLOT(onPathFilterFinished())); m_ui->itemsTable->setHorizontalHeaderLabels({ tr("Item Name") }); connect(m_ui->itemsTable, SIGNAL(cellChanged(int, int)), this, SLOT(onComboTableItemsCellChanged(int, int))); @@ -458,6 +461,9 @@ void ZEditParamLayoutDlg::onTreeCurrentChanged(const QModelIndex& current, const } return pCurrentItem->data(ROLE_PARAM_VALUE); }; + cbSets.cbSwitch = [=](bool bOn) { + zenoApp->getMainWindow()->setInDlgEventLoop(bOn); + }; QWidget *valueControl = zenoui::createWidget(deflVal, ctrl, dataType, cbSets, controlProperties); if (valueControl) { valueControl->setEnabled(bEditable); @@ -648,6 +654,19 @@ void ZEditParamLayoutDlg::switchStackProperties(int ctrl, VParamItem* pItem) m_ui->editStep->setText(QString::number(info.step)); m_ui->editMin->setText(QString::number(info.min)); m_ui->editMax->setText(QString::number(info.max)); + } else if (ctrl == CONTROL_READPATH || ctrl == CONTROL_WRITEPATH) + { + m_ui->stackProperties->setCurrentIndex(3); + QVariantMap pros = controlProperties.toMap(); + if (pros.find("filter") != pros.end()) + { + QString filter = pros["filter"].toString(); + m_ui->m_pathFilterEdit->setText(filter); + } + else + { + m_ui->m_pathFilterEdit->setText(""); + } } else { m_ui->stackProperties->setCurrentIndex(0); } @@ -836,6 +855,26 @@ void ZEditParamLayoutDlg::onMaxEditFinished() updateSliderInfo(); } +void ZEditParamLayoutDlg::onPathFilterFinished() +{ + QModelIndex layerIdx = m_ui->paramsView->currentIndex(); + if (!layerIdx.isValid() && layerIdx.data(ROLE_VPARAM_TYPE) != VPARAM_PARAM) + return; + + CONTROL_PROPERTIES properties = layerIdx.data(ROLE_VPARAM_CTRL_PROPERTIES).value(); + QString filter = m_ui->m_pathFilterEdit->text(); + properties["filter"] = filter; + proxyModelSetData(layerIdx, properties, ROLE_VPARAM_CTRL_PROPERTIES); + //update control. + QLayoutItem* pLayoutItem = m_ui->gridLayout->itemAtPosition(rowValueControl, 1); + if (pLayoutItem) { + QWidget* pControl = pLayoutItem->widget(); + if (pControl) { + pControl->setProperty("filter", filter); + } + } +} + void ZEditParamLayoutDlg::onControlItemChanged(int idx) { const QString& controlName = m_ui->cbControl->itemText(idx); @@ -863,6 +902,9 @@ void ZEditParamLayoutDlg::onControlItemChanged(int idx) value = UiHelper::initDefaultValue(dataType); QVariant controlProperties = layerIdx.data(ROLE_VPARAM_CTRL_PROPERTIES); cbSets.cbGetIndexData = [=]() -> QVariant { return UiHelper::initVariantByControl(ctrl); }; + cbSets.cbSwitch = [=](bool bOn) { + zenoApp->getMainWindow()->setInDlgEventLoop(bOn); + }; QWidget *valueControl = zenoui::createWidget(value, ctrl, dataType, cbSets, controlProperties); if (valueControl) { valueControl->setEnabled(m_pGraphsModel->IsSubGraphNode(m_nodeIdx)); @@ -898,6 +940,9 @@ void ZEditParamLayoutDlg::onTypeItemChanged(int idx) }; QVariant controlProperties = layerIdx.data(ROLE_VPARAM_CTRL_PROPERTIES); cbSets.cbGetIndexData = [=]() -> QVariant { return pItem->data(ROLE_PARAM_VALUE); }; + cbSets.cbSwitch = [=](bool bOn) { + zenoApp->getMainWindow()->setInDlgEventLoop(bOn); + }; QWidget *valueControl = zenoui::createWidget(pItem->data(ROLE_PARAM_VALUE), pItem->m_ctrl, dataType, cbSets, controlProperties); if (valueControl) { valueControl->setEnabled(m_pGraphsModel->IsSubGraphNode(m_nodeIdx)); diff --git a/ui/zenoedit/dialog/zeditparamlayoutdlg.h b/ui/zenoedit/dialog/zeditparamlayoutdlg.h index 0b6cef828f..becace4722 100644 --- a/ui/zenoedit/dialog/zeditparamlayoutdlg.h +++ b/ui/zenoedit/dialog/zeditparamlayoutdlg.h @@ -63,6 +63,7 @@ private slots: void onComboTableItemsCellChanged(int row, int column); void onProxyItemNameChanged(const QModelIndex& itemIdx, const QString& oldPath, const QString& newName); void onViewParamDataChanged(const QModelIndex &topLeft, const QModelIndex &bottomRight, const QVector &roles); + void onPathFilterFinished(); private: void initUI(); diff --git a/ui/zenoedit/dialog/zeditparamlayoutdlg.ui b/ui/zenoedit/dialog/zeditparamlayoutdlg.ui index 9b7d665e96..5522cfbfb5 100644 --- a/ui/zenoedit/dialog/zeditparamlayoutdlg.ui +++ b/ui/zenoedit/dialog/zeditparamlayoutdlg.ui @@ -469,6 +469,37 @@ + + + + + + + + Filter + + + + + + + + + + + + Qt::Vertical + + + + 20 + 162 + + + + + + diff --git a/ui/zenoedit/panel/zenoproppanel.cpp b/ui/zenoedit/panel/zenoproppanel.cpp index 3c9ab10f4e..75fff46908 100644 --- a/ui/zenoedit/panel/zenoproppanel.cpp +++ b/ui/zenoedit/panel/zenoproppanel.cpp @@ -753,6 +753,10 @@ void ZenoPropPanel::onViewParamDataChanged(const QModelIndex& topLeft, const QMo pSpinBox->setRange(info.min, info.max); } } + else if (value.type() == QMetaType::QVariantMap && value.toMap().contains("filter")) + { + ctrl.pControl->setProperty("filter", value.toMap()["filter"]); + } } else if (role == ROLE_VPARAM_TOOLTIP) { diff --git a/ui/zenomodel/customui/customuirw.cpp b/ui/zenomodel/customui/customuirw.cpp index b7684ddf9f..91b824fc22 100644 --- a/ui/zenomodel/customui/customuirw.cpp +++ b/ui/zenomodel/customui/customuirw.cpp @@ -171,7 +171,8 @@ namespace zenomodel ZASSERT_EXIT(paramVal.HasMember("control"), param); const rapidjson::Value& controlObj = paramVal["control"]; - if (controlObj.HasMember("items") || (controlObj.HasMember("step") && controlObj.HasMember("min") && controlObj.HasMember("max"))) + if (controlObj.HasMember("items") || controlObj.HasMember("filter") || + (controlObj.HasMember("step") && controlObj.HasMember("min") && controlObj.HasMember("max"))) { JsonHelper::importControl(controlObj, param.m_info.control, param.controlInfos); } diff --git a/ui/zenoui/comctrl/gv/zenogvhelper.cpp b/ui/zenoui/comctrl/gv/zenogvhelper.cpp index 651c19e2e7..45a80d892b 100644 --- a/ui/zenoui/comctrl/gv/zenogvhelper.cpp +++ b/ui/zenoui/comctrl/gv/zenogvhelper.cpp @@ -208,6 +208,11 @@ void ZenoGvHelper::setCtrlProperties(QGraphicsItem *item, const QVariant &value pSpinBox->setSliderInfo(info); } } + ZenoParamPathEdit* pEditItem = qgraphicsitem_cast(item); + if (pEditItem && map.contains("filter")) + { + pEditItem->setProperty("filter", map["filter"]); + } } } diff --git a/ui/zenoui/comctrl/gv/zenoparamwidget.cpp b/ui/zenoui/comctrl/gv/zenoparamwidget.cpp index ad4e9b822d..1a0e8619ac 100644 --- a/ui/zenoui/comctrl/gv/zenoparamwidget.cpp +++ b/ui/zenoui/comctrl/gv/zenoparamwidget.cpp @@ -314,11 +314,15 @@ void ZenoParamPathEdit::mousePressEvent(QGraphicsSceneMouseEvent *event) QDir dir = fileInfo.dir(); dirPath = dir.path(); } - + QString filter = this->property("filter").toString(); + if (filter.isEmpty()) + { + filter = "All Files(*);;"; + } if (m_control == CONTROL_READPATH) { - path = QFileDialog::getOpenFileName(nullptr, "File to Open", dirPath, "All Files(*);;"); + path = QFileDialog::getOpenFileName(nullptr, "File to Open", dirPath, filter); } else if (m_control == CONTROL_WRITEPATH) { - path = QFileDialog::getSaveFileName(nullptr, "Path to Save", dirPath, "All Files(*);;"); + path = QFileDialog::getSaveFileName(nullptr, "Path to Save", dirPath, filter); } else { path = QFileDialog::getExistingDirectory(nullptr, "Path to Save", ""); } diff --git a/ui/zenoui/comctrl/gv/zgraphicstextitem.cpp b/ui/zenoui/comctrl/gv/zgraphicstextitem.cpp index 1b37bf4d10..70a9096562 100644 --- a/ui/zenoui/comctrl/gv/zgraphicstextitem.cpp +++ b/ui/zenoui/comctrl/gv/zgraphicstextitem.cpp @@ -385,6 +385,7 @@ ZEditableTextItem::ZEditableTextItem(const QString &text, QGraphicsItem *parent) , m_bShowSlider(false) , m_pSlider(nullptr) , m_bValidating(false) + , m_validState(QValidator::Acceptable) { _base::setText(text); initUI(text); @@ -396,6 +397,7 @@ ZEditableTextItem::ZEditableTextItem(QGraphicsItem* parent) , m_bShowSlider(false) , m_pSlider(nullptr) , m_bValidating(false) + , m_validState(QValidator::Acceptable) { initUI(""); } @@ -422,6 +424,13 @@ void ZEditableTextItem::paint(QPainter *painter, const QStyleOptionGraphicsItem painter->setPen(pen); painter->drawRect(rc); } else { + if (m_validState != QValidator::Acceptable) + { + pen.setJoinStyle(Qt::MiterJoin); + pen.setColor(QColor(200, 84, 79)); + painter->setPen(pen); + painter->drawRect(rc); + } painter->fillRect(rc, col); } _base::paint(painter, option, widget); @@ -489,6 +498,11 @@ void ZEditableTextItem::onContentsChanged() else { m_acceptableText = editText; } + if (m_validState != ret) + { + m_validState = ret; + update(); + } iVal = 0; } } @@ -591,6 +605,24 @@ void ZEditableTextItem::keyReleaseEvent(QKeyEvent* event) return _base::keyReleaseEvent(event); } +bool ZEditableTextItem::event(QEvent * event) +{ + if (event->type() == QEvent::DynamicPropertyChange) + { + QDynamicPropertyChangeEvent* evt = static_cast(event); + if (evt->propertyName() == "filter") { + QString filter = property("filter").toString(); + if (!filter.isEmpty()) + { + QRegExp rx(filter); + rx.setPatternSyntax(QRegExp::Wildcard); + m_validator = new QRegExpValidator(rx, this); + } + } + } + return _base::event(event); +} + void ZEditableTextItem::focusInEvent(QFocusEvent* event) { _base::focusInEvent(event); diff --git a/ui/zenoui/comctrl/gv/zgraphicstextitem.h b/ui/zenoui/comctrl/gv/zgraphicstextitem.h index 20630f6956..94ba170c57 100644 --- a/ui/zenoui/comctrl/gv/zgraphicstextitem.h +++ b/ui/zenoui/comctrl/gv/zgraphicstextitem.h @@ -119,6 +119,7 @@ class ZEditableTextItem : public ZGraphicsLayoutItem void focusOutEvent(QFocusEvent* event) override; void keyPressEvent(QKeyEvent* event) override; void keyReleaseEvent(QKeyEvent* event) override; + bool event(QEvent* evt) override; private slots: void onContentsChanged(); @@ -134,6 +135,7 @@ private slots: bool m_bFocusIn; bool m_bValidating; bool m_bShowSlider; + QValidator::State m_validState; }; class ZenoSocketItem; diff --git a/ui/zenoui/comctrl/gv/zitemfactory.cpp b/ui/zenoui/comctrl/gv/zitemfactory.cpp index 1fe5804349..74e8b13d11 100644 --- a/ui/zenoui/comctrl/gv/zitemfactory.cpp +++ b/ui/zenoui/comctrl/gv/zitemfactory.cpp @@ -160,6 +160,15 @@ namespace zenoui pPathEditor->setData(GVKEY_SIZEHINT, ZenoStyle::dpiScaledSize(QSizeF(200, zenoui::g_ctrlHeight))); pPathEditor->setData(GVKEY_SIZEPOLICY, QSizePolicy(QSizePolicy::Expanding, QSizePolicy::Fixed)); pPathEditor->setData(GVKEY_TYPE, type); + if (controlProps.type() == QMetaType::QVariantMap) + { + QVariantMap props = controlProps.toMap(); + if (props.find("filter") != props.end()) + { + auto filter = props["filter"].toStringList(); + pPathEditor->setProperty("filter", filter); + } + } QObject::connect(pPathEditor, &ZenoParamPathEdit::editingFinished, [=]() { cbSet.cbEditFinished(pPathEditor->text()); diff --git a/ui/zenoui/comctrl/zpathedit.cpp b/ui/zenoui/comctrl/zpathedit.cpp index a8b45f6e9d..4bbae9e1d7 100644 --- a/ui/zenoui/comctrl/zpathedit.cpp +++ b/ui/zenoui/comctrl/zpathedit.cpp @@ -24,12 +24,17 @@ void ZPathEdit::initUI(const CALLBACK_SWITCH& cbSwitch) QObject::connect(this, &ZLineEdit::btnClicked, [=]() { int ctrl = this->property("control").toInt(); + QString filter = this->property("filter").toString(); + if (filter.isEmpty()) + { + filter = "All Files(*);;"; + } QString path; cbSwitch(true); if (ctrl == CONTROL_READPATH) { - path = QFileDialog::getOpenFileName(nullptr, "File to Open", "", "All Files(*);;"); + path = QFileDialog::getOpenFileName(nullptr, "File to Open", "", filter); } else if (ctrl == CONTROL_WRITEPATH) { - path = QFileDialog::getSaveFileName(nullptr, "Path to Save", "", "All Files(*);;"); + path = QFileDialog::getSaveFileName(nullptr, "Path to Save", "", filter); } else { path = QFileDialog::getExistingDirectory(nullptr, "Path to Save", ""); } diff --git a/ui/zenoui/comctrl/zwidgetfactory.cpp b/ui/zenoui/comctrl/zwidgetfactory.cpp index 37da1b7ef5..399ac78a9f 100644 --- a/ui/zenoui/comctrl/zwidgetfactory.cpp +++ b/ui/zenoui/comctrl/zwidgetfactory.cpp @@ -76,6 +76,15 @@ namespace zenoui ZPathEdit *pathLineEdit = new ZPathEdit(cbSet.cbSwitch,value.toString()); pathLineEdit->setFixedHeight(ZenoStyle::dpiScaled(zenoui::g_ctrlHeight)); pathLineEdit->setProperty("control", ctrl); + if (properties.type() == QMetaType::QVariantMap) + { + QVariantMap props = properties.toMap(); + if (props.find("filter") != props.end()) + { + auto filter = props["filter"].toStringList(); + pathLineEdit->setProperty("filter", filter); + } + } QObject::connect(pathLineEdit, &ZLineEdit::textEditFinished, [=]() { cbSet.cbEditFinished(pathLineEdit->text()); From 1e2ee53aa2c513493a927bf996b31c9341b81de1 Mon Sep 17 00:00:00 2001 From: luzh Date: Wed, 8 May 2024 16:14:29 +0800 Subject: [PATCH 018/244] update ver. --- ui/zenoedit/zenoedit.rc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/zenoedit/zenoedit.rc b/ui/zenoedit/zenoedit.rc index 3ee1d6adf7..30a237cd8b 100644 --- a/ui/zenoedit/zenoedit.rc +++ b/ui/zenoedit/zenoedit.rc @@ -48,8 +48,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,3,0,315 - PRODUCTVERSION 1,3,0,315 + FILEVERSION 1,3,0,501 + PRODUCTVERSION 1,3,0,501 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -66,12 +66,12 @@ BEGIN BEGIN VALUE "CompanyName", "ZENUSTECH" VALUE "FileDescription", "Zeno Editor" - VALUE "FileVersion", "1.3.0.315" + VALUE "FileVersion", "1.3.0.501" VALUE "InternalName", "zenoedit.rc" VALUE "LegalCopyright", "Copyright (C) 2023" VALUE "OriginalFilename", "zenoedit.rc" VALUE "ProductName", "Zeno" - VALUE "ProductVersion", "1.3.0.315" + VALUE "ProductVersion", "1.3.0.501" END END BLOCK "VarFileInfo" From 852743129680fd65a173a4a86e79ee8f2549bc32 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Sat, 11 May 2024 15:21:15 +0800 Subject: [PATCH 019/244] outputPoint --- projects/Alembic/WriteAlembic.cpp | 32 +++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/projects/Alembic/WriteAlembic.cpp b/projects/Alembic/WriteAlembic.cpp index cc44da2d02..268fedc56c 100644 --- a/projects/Alembic/WriteAlembic.cpp +++ b/projects/Alembic/WriteAlembic.cpp @@ -547,6 +547,7 @@ void prim_to_poly_if_only_vertex(PrimitiveObject* p) { struct WriteAlembic2 : INode { OArchive archive; OPolyMesh meshyObj; + OPoints pointsObj; std::string usedPath; std::map verts_attrs; std::map loops_attrs; @@ -582,7 +583,12 @@ struct WriteAlembic2 : INode { "None" ); real_frame_start = -1; - meshyObj = OPolyMesh( OObject( archive, 1 ), "mesh" ); + if (get_input2("outputPoint")) { + pointsObj = OPoints (OObject( archive, 1 ), "points"); + } + else { + meshyObj = OPolyMesh( OObject( archive, 1 ), "mesh" ); + } verts_attrs.clear(); loops_attrs.clear(); polys_attrs.clear(); @@ -602,7 +608,7 @@ struct WriteAlembic2 : INode { if (flipFrontBack) { primFlipFaces(prim.get()); } - { + if (!get_input2("outputPoint")) { prim_to_poly_if_only_vertex(prim.get()); // Create a PolyMesh class. OPolyMeshSchema &mesh = meshyObj.getSchema(); @@ -683,6 +689,27 @@ struct WriteAlembic2 : INode { } } } + else { + OPointsSchema &points = pointsObj.getSchema(); + OCompoundProperty user = points.getUserProperties(); + write_user_data(user_attrs, "", prim, user, frameid, real_frame_start); + points.setTimeSampling(1); + OPointsSchema::Sample samp(V3fArraySample( ( const V3f * )prim->verts.data(), prim->verts.size() )); + std::vector ids(prim->verts.size()); + if (prim->verts.attr_is("id")) { + auto &ids_ = prim->verts.attr("id"); + for (auto i = 0; i < prim->verts.size(); i++) { + ids[i] = ids_[i]; + } + } + else { + std::iota(ids.begin(), ids.end(), 0); + } + samp.setIds(Alembic::Abc::UInt64ArraySample(ids.data(), ids.size())); + write_velocity(prim, samp); + write_attrs(verts_attrs, loops_attrs, polys_attrs, "", prim, points, frameid, real_frame_start, prim_size_per_frame); + points.set( samp ); + } } }; @@ -695,6 +722,7 @@ ZENDEFNODE(WriteAlembic2, { {"int", "frame_end", "100"}, {"float", "fps", "25"}, {"bool", "flipFrontBack", "1"}, + {"bool", "outputPoint", "0"}, }, { }, From 1d9d333a3c2da5f0863adb4b0922bf7eda47bc1f Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Sat, 11 May 2024 22:36:33 +0800 Subject: [PATCH 020/244] fix empty prim --- projects/Alembic/ReadAlembic.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/Alembic/ReadAlembic.cpp b/projects/Alembic/ReadAlembic.cpp index 591c2aa5ad..9f2a75c8cf 100644 --- a/projects/Alembic/ReadAlembic.cpp +++ b/projects/Alembic/ReadAlembic.cpp @@ -1218,6 +1218,10 @@ ZENDEFNODE(ReadAlembic, { }); std::shared_ptr abc_split_by_name(std::shared_ptr prim, bool add_when_none) { + auto list = std::make_shared(); + if (prim->verts.size() == 0) { + return list; + } int faceset_count = prim->userData().get2("faceset_count"); if (add_when_none && faceset_count == 0) { auto name = prim->userData().get2("_abc_name"); @@ -1228,7 +1232,6 @@ std::shared_ptr abc_split_by_name(std::shared_ptr p for (auto f = 0; f < faceset_count; f++) { faceset_map[f] = {}; } - auto list = std::make_shared(); if (prim->polys.size()) { auto &faceset = prim->polys.add_attr("faceset"); for (auto j = 0; j < faceset.size(); j++) { From 6576b0bf1edbbdbe685e42ed3f695596ca12aa4c Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Tue, 14 May 2024 16:15:15 +0800 Subject: [PATCH 021/244] remove abused memory allocation --- .../CuLagrange/geometry/kernel/geo_math.hpp | 26 ++ .../geometry/kernel/intersection.hpp | 47 ++- projects/CuLagrange/pbd/ConstraintsBuilder.cu | 160 +++++--- projects/CuLagrange/pbd/ConstraintsSolver.cu | 351 +++++++++++------- projects/CuLagrange/pbd/ConstraintsUpdator.cu | 13 +- .../constraint_types.hpp | 4 + 6 files changed, 400 insertions(+), 201 deletions(-) diff --git a/projects/CuLagrange/geometry/kernel/geo_math.hpp b/projects/CuLagrange/geometry/kernel/geo_math.hpp index 35196bd248..300d0693e2 100644 --- a/projects/CuLagrange/geometry/kernel/geo_math.hpp +++ b/projects/CuLagrange/geometry/kernel/geo_math.hpp @@ -285,6 +285,7 @@ namespace zeno { namespace LSL_GEO { /////////////////////////////////////////////////////////////////////// constexpr VECTOR3 get_vertex_triangle_barycentric_coordinates(const VECTOR3 vertices[4]) { +#if 1 const VECTOR3 v0 = vertices[1]; const VECTOR3 v1 = vertices[2]; const VECTOR3 v2 = vertices[3]; @@ -304,6 +305,31 @@ namespace zeno { namespace LSL_GEO { n.dot(nc) / n.l2NormSqr()); return barycentric; +#else + constexpr auto eps = 1e-6; + const auto& v1 = vertices[1]; + const auto& v2 = vertices[2]; + const auto& v3 = vertices[3]; + const auto& v4 = vertices[0]; + + auto x13 = v1 - v3; + auto x23 = v2 - v3; + auto x43 = v4 - v3; + auto A00 = x13.dot(x13); + auto A01 = x13.dot(x23); + auto A11 = x23.dot(x23); + auto b0 = x13.dot(x43); + auto b1 = x23.dot(x43); + auto detA = A00 * A11 - A01 * A01; + + VECTOR3 bary{}; + + bary[0] = ( A11 * b0 - A01 * b1) / detA; + bary[1] = (-A01 * b0 + A00 * b1) / detA; + bary[2] = 1 - bary[0] - bary[1]; + + return bary; +#endif } diff --git a/projects/CuLagrange/geometry/kernel/intersection.hpp b/projects/CuLagrange/geometry/kernel/intersection.hpp index 49ff89e373..296a7dedb2 100644 --- a/projects/CuLagrange/geometry/kernel/intersection.hpp +++ b/projects/CuLagrange/geometry/kernel/intersection.hpp @@ -321,7 +321,9 @@ int retrieve_intersection_tri_halfedge_info_of_two_meshes(Pol& pol, const float& thickness, bool use_barycentric_interpolator = false, bool skip_too_close_pair_at_rest_configuration = false, - bool use_collision_group = false) { + bool use_collision_group = false, + bool among_same_group = true, + bool among_different_group = true) { using namespace zs; using vec2i = zs::vec; using bv_t = typename ZenoParticles::lbvh_t::Box; @@ -348,6 +350,8 @@ int retrieve_intersection_tri_halfedge_info_of_two_meshes(Pol& pol, // nm_ints.setVal(0); #endif pol(zs::range(edges.size()),[ + among_different_group = among_different_group, + among_same_group = among_same_group, exec_tag = exec_tag, edges = edges.begin("inds", dim_c<2>, int_c), triNrmOffset = tris.getPropertyOffset("nrm"), @@ -451,6 +455,19 @@ int retrieve_intersection_tri_halfedge_info_of_two_meshes(Pol& pol, } } + if(hasCollisionGroup) { + for(int i = 0;i != 2;++i) { + for(int j = 0;j != 3;++j) { + if(zs::abs(verts(collisionGroupOffset,edge[i]) - verts(collisionGroupOffset,tri[j])) > 0.5) { + if(!among_different_group) + return; + if(!among_same_group) + return; + } + } + } + } + vec3 tvs[3] = {}; for(int i = 0;i != 3;++i) tvs[i] = verts.pack(dim_c<3>,xOffset,tri[i]); @@ -555,18 +572,20 @@ int retrieve_intersection_tri_halfedge_info_of_two_meshes(Pol& pol, typename BaryTileVec, typename TriBVH> void retrieve_intersection_with_edge_tri_pairs(Pol& pol, - const PosTileVec& verts, const zs::SmallString& xtag, + const PosTileVec& verts, const zs::SmallString& xtag,const zs::SmallString& collisionGroupName, const EdgeTileVec& edges, // const TriTileVec& tris, // const TriBVH& tri_bvh, - const PosTileVec& kverts, const zs::SmallString& kxtag, + const PosTileVec& kverts, const zs::SmallString& kxtag,const zs::SmallString& kCollisionGroupName, // const EdgeTileVec& kedges, const TriTileVec& ktris, const TriBVH& ktri_bvh, zs::bht& res, // zs::bht& cs_KET, BaryTileVec& bary_buffer, - bool use_barycentric_interpolator = false) { + bool use_barycentric_interpolator = false, + bool among_same_group = true, + bool among_different_group = true) { using namespace zs; using vec2i = zs::vec; using bv_t = typename ZenoParticles::lbvh_t::Box; @@ -583,10 +602,16 @@ int retrieve_intersection_tri_halfedge_info_of_two_meshes(Pol& pol, // timer.tick(); pol(zs::range(edges.size()),[ + among_same_group = among_same_group, + among_different_group = among_different_group, exec_tag = exec_tag, use_barycentric_interpolator = use_barycentric_interpolator, edges = edges.begin("inds", dim_c<2>, int_c), bary_buffer = proxy({},bary_buffer), + hasKCollisionGroup = kverts.hasProperty(kCollisionGroupName), + kCollisionGroupOffset = kverts.getPropertyOffset(kCollisionGroupName), + hasCollisionGroup = verts.hasProperty(collisionGroupName), + collisionGroupOffset = verts.getPropertyOffset(collisionGroupName), ktriNrmOffset = ktris.getPropertyOffset("nrm"), ktriDOffset = ktris.getPropertyOffset("d"), ktriIndsOffset = ktris.getPropertyOffset("inds"), @@ -643,6 +668,20 @@ int retrieve_intersection_tri_halfedge_info_of_two_meshes(Pol& pol, if(!is_dynamic_edge || !is_dynamic_ktri) return; + + if(hasCollisionGroup && hasKCollisionGroup) { + for(int i = 0;i != 2;++i) { + for(int j = 0;j != 3;++j) { + if(zs::abs(verts(collisionGroupOffset,edge[i]) - kverts(kCollisionGroupOffset,ktri[j])) > 0.5) { + if(!among_different_group) + return; + if(!among_same_group) + return; + } + } + } + } + vec3 ktvs[3] = {}; for(int i = 0;i != 3;++i) ktvs[i] = kverts.pack(dim_c<3>,kxOffset,ktri[i]); diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu index 5af91bbbf9..b31cc03c6a 100644 --- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu +++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu @@ -449,61 +449,96 @@ virtual void apply() override { if(type == "follow_animation_constraint") { constexpr auto eps = 1e-6; + auto use_hard_constraint = get_input2("use_hard_constraint"); constraint->setMeta(CONSTRAINT_KEY,category_c::follow_animation_constraint); + constraint->setMeta(PBD_USE_HARD_CONSTRAINT,use_hard_constraint); + if(!has_input("target")) { std::cout << "no target specify while adding follow animation constraint" << std::endl; throw std::runtime_error("no target specify while adding follow animation constraint"); } + + auto animaskGroupName = get_input2("group_name"); + if(!verts.hasProperty(animaskGroupName)) { + std::cout << "the zcloth should has \'ani_mask\' nodal attribute" << std::endl; + throw std::runtime_error("the zcloth should has \'ani_mask\' nodal attribute"); + } + auto target = get_input("target"); if(target->getParticles().size() != verts.size()) { std::cout << "the size of target and the cloth not match : " << target->getParticles().size() << "\t" << source->getParticles().size() << std::endl; throw std::runtime_error("the size of the target and source not matched"); } const auto& kverts = target->getParticles(); - if(!kverts.hasProperty("ani_mask")) { - std::cout << "the animation target should has \'ani_mask\' nodal attribute" << std::endl; - throw std::runtime_error("the animation target should has \'ani_mask\' nodal attribute"); - } - zs::Vector> point_topos{quads.get_allocator(),0}; - point_topos.resize(verts.size()); - cudaPol(zip(zs::range(point_topos.size()),point_topos),[] ZS_LAMBDA(const auto& id,auto& pi) mutable {pi = id;}); + zs::Vector> point_topos{verts.get_allocator(),0}; + zs:bht pin_point_set{verts.get_allocator(),verts.size()}; + pin_point_set.reset(cudaPol,true); + + cudaPol(zs::range(verts.size()),[ + verts = proxy({},verts), + gname = zs::SmallString(animaskGroupName), + pin_point_set = proxy(pin_point_set)] ZS_LAMBDA(int vi) mutable { + auto gtag = verts(gname,vi); + if(gtag > 1e-6) + pin_point_set.insert(vi); + }); + + point_topos.resize(pin_point_set.size()); + cudaPol(zip(zs::range(pin_point_set.size()),pin_point_set._activeKeys),[ + point_topos = proxy(point_topos) + ] ZS_LAMBDA(const auto& id,const auto& pvec) mutable { + point_topos[id] = pvec; + }); // std::cout << "nm binder point : " << point_topos.size() << std::endl; if(do_constraint_topological_coloring) { topological_coloring(cudaPol,point_topos,colors,false); sort_topology_by_coloring_tag(cudaPol,colors,reordered_map,color_offset); } - eles.resize(verts.size()); + eles.resize(point_topos.size()); // we need an extra 'inds' tag, in case the source and animation has different topo eles.append_channels(cudaPol,{{"inds",1},{"follow_weight",1}}); cudaPol(zs::range(eles.size()),[ - kverts = proxy({},kverts), + verts = proxy({},verts), + animaskOffset = verts.getPropertyOffset(animaskGroupName), do_constraint_topological_coloring = do_constraint_topological_coloring, + use_hard_constraint = use_hard_constraint, reordered_map = proxy(reordered_map), point_topos = proxy(point_topos), eles = proxy({},eles)] ZS_LAMBDA(int oei) mutable { auto ei = do_constraint_topological_coloring ? reordered_map[oei] : oei; auto pi = point_topos[ei][0]; - auto am = kverts("ani_mask",pi); + auto am = verts(animaskOffset,pi); am = am > 1 ? 1 : am; am = am < 0 ? 0 : am; - eles("follow_weight",oei) = 1 - am; + + if(use_hard_constraint) { + am = am > 0.5 ? 1 : 0; + } + eles("follow_weight",oei) = am; eles("inds",oei) = zs::reinterpret_bits(pi); + if(use_hard_constraint) + verts("minv",pi) = am > 0.5 ? 0 : verts("minv",pi); }); // not sure about effect by increasing the nodal mass - cudaPol(zs::range(verts.size()),[ - verts = proxy({},verts), - kverts = proxy({},kverts)] ZS_LAMBDA(int vi) mutable { - verts("minv",vi) = (1 - kverts("ani_mask",vi)) * verts("minv",vi); - }); + // cudaPol(zs::range(eles.size()),[ + // verts = proxy({},verts), + // use_hard_constraint = use_hard_constraint, + // followWeightOffset = eles.getPropertyOffset("follow_weight"), + // eles = proxy({},eles)] ZS_LAMBDA(int ei) mutable { + // auto vi = reinterpret_bits(eles("inds",ei)); + // if(use_hard_constraint) + // verts("minv",vi) = eles(followWeightOffset,ci) > 0.5 ? 0 : verts("minv",vi); + // }); // TILEVEC_OPS::copy(cudaPol,eles,"inds",eles,"vis_inds"); constraint->setMeta(CONSTRAINT_TARGET,target.get()); } if(type == "reference_dcd_collision_constraint") { + constexpr auto exec_tag = wrapv{}; constexpr auto eps = 1e-6; constexpr auto MAX_IMMINENT_COLLISION_PAIRS = 200000; auto dcd_source_xtag = get_input2("dcd_source_xtag"); @@ -511,6 +546,21 @@ virtual void apply() override { eles.append_channels(cudaPol,{{"inds",4},{"bary",4},{"type",1}}); eles.resize(MAX_IMMINENT_COLLISION_PAIRS); + if(!source->hasAuxData(DCD_COUNTER_BUFFER)) { + (*source)[DCD_COUNTER_BUFFER] = dtiles_t{verts.get_allocator(),{ + {"cnt",1} + },verts.size()}; + } + if(!source->hasAuxData(COLLISION_BUFFER)) { + (*source)[COLLISION_BUFFER] = dtiles_t{verts.get_allocator(),{ + {"inds",4},{"bary",4},{"type",1} + },MAX_IMMINENT_COLLISION_PAIRS}; + } + auto& collision_buffer = (*source)[COLLISION_BUFFER]; + auto& collision_counter = (*source)[DCD_COUNTER_BUFFER]; + // cudaPol(zs::range(collision_counter),[] ZS_LAMBDA(auto& cnt) {cnt = 0;}); + TILEVEC_OPS::fill(cudaPol,collision_counter,"cnt",0.f); + auto among_same_group = get_input2("among_same_group"); auto among_different_groups = get_input2("among_different_groups"); @@ -634,8 +684,19 @@ virtual void apply() override { }); } - zs::bht csPT{verts.get_allocator(),(size_t)MAX_IMMINENT_COLLISION_PAIRS};csPT.reset(cudaPol,true); - zs::bht csEE{edges.get_allocator(),(size_t)MAX_IMMINENT_COLLISION_PAIRS};csEE.reset(cudaPol,true); + if(!source->hasMeta(COLLSIION_CSPT_SET)) { + source->setMeta(COLLSIION_CSPT_SET,zs::bht{verts.get_allocator(),(size_t)MAX_IMMINENT_COLLISION_PAIRS}); + } + auto&csPT = source->readMeta &>(COLLSIION_CSPT_SET); + csPT.reset(cudaPol,true); + + if(!source->hasMeta(COLLISION_CSEE_SET)) { + source->setMeta(COLLISION_CSEE_SET,zs::bht{verts.get_allocator(),(size_t)MAX_IMMINENT_COLLISION_PAIRS}); + } + auto&csEE = source->readMeta &>(COLLISION_CSEE_SET); + csEE.reset(cudaPol,true); + + // zs::bht csEE{edges.get_allocator(),(size_t)MAX_IMMINENT_COLLISION_PAIRS};csEE.reset(cudaPol,true); auto triBvh = bvh_t{}; @@ -647,7 +708,7 @@ virtual void apply() override { imminent_collision_thickness, 0, triBvh, - eles, + collision_buffer, csPT, true, true, @@ -665,34 +726,36 @@ virtual void apply() override { imminent_collision_thickness, csPT.size(), edgeBvh, - eles,csEE, + collision_buffer,csEE, true, true, group_strategy); + + // std::cout << "nm_imminent_csEE : " << csEE.size() << std::endl; - // std::cout << "csEE + csPT = " << csPT.size() + csEE.size() << std::endl; - if(!verts.hasProperty("dcd_collision_tag")) - verts.append_channels(cudaPol,{{"dcd_collision_tag",1}}); - cudaPol(zs::range(verts.size()),[ + auto nm_dcd_collisions = csPT.size() + csEE.size(); + eles.resize(nm_dcd_collisions); + TILEVEC_OPS::copy(cudaPol,collision_buffer,"inds",eles,"inds"); + TILEVEC_OPS::copy(cudaPol,collision_buffer,"bary",eles,"bary"); + TILEVEC_OPS::copy(cudaPol,collision_buffer,"type",eles,"type"); + + + cudaPol(zs::range(nm_dcd_collisions),[ + nm_verts = verts.size(), + exec_tag = exec_tag, verts = proxy({},verts), - vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - verts("dcd_collision_tag",vi) = vtemp("dcd_collision_tag",vi); + minvOffset = verts.getPropertyOffset("minv"), + indsOffset = eles.getPropertyOffset("inds"), + collision_counter = proxy({},collision_counter), + eles = proxy({},eles)] ZS_LAMBDA(const auto& ci) mutable { + auto inds = eles.pack(dim_c<4>,indsOffset,ci,int_c); + for(int i = 0;i != 4;++i) + if(inds[i] < nm_verts && verts(minvOffset,inds[i]) > 1e-5 && inds[i] > -1) { + atomic_add(exec_tag,&collision_counter("cnt",inds[i]),1.f); + } }); - if(has_input_collider) { - auto collider = get_input("target"); - auto& kverts = collider->getParticles(); - if(!kverts.hasProperty("dcd_collision_tag")) - kverts.append_channels(cudaPol,{{"dcd_collision_tag",1}}); - cudaPol(zs::range(kverts.size()),[ - kverts = proxy({},kverts), - voffset = verts.size(), - vtemp = proxy({},vtemp)] ZS_LAMBDA(int kvi) mutable { - kverts("dcd_collision_tag",kvi) = vtemp("dcd_collision_tag",kvi + voffset); - }); - } - - constraint->setMeta(NM_DCD_COLLISIONS,csEE.size() + csPT.size()); + constraint->setMeta(NM_DCD_COLLISIONS,(size_t)nm_dcd_collisions); constraint->setMeta(GLOBAL_DCD_THICKNESS,imminent_collision_thickness); } @@ -867,7 +930,6 @@ virtual void apply() override { proximity_buffer(typeOffset,id + buffer_offset) = zs::reinterpret_bits((int)1); proximity_buffer.tuple(dim_c<3>,hitPointOffset,id + buffer_offset) = kp; proximity_buffer.tuple(dim_c<3>,hitVelocityOffset,id + buffer_offset) = kv; - // proximity_buffer.tuple(dim_c<3>,hitNormalOffset,id) = hit_normal; }); @@ -916,12 +978,6 @@ virtual void apply() override { hit_point = bary[2] * kps[0] + bary[3] * kps[1]; hit_velocity = bary[2] * kvs[0] + bary[3] * kvs[1]; - // auto hit_normal = bary[0] * ps[0] + bary[1] * ps[1] + bary[2] * kps[0] + bary[3] * kps[1]; - // if(hit_normal.norm() > eps) - // hit_normal = hit_normal.normalized(); - // else - // hit_normal = (ps[1] - ps[0]).cross(kps[1] - kps[0]).normalized(); - proximity_buffer.tuple(dim_c<4>,baryOffset,id + buffer_offset) = bary; proximity_buffer.tuple(dim_c<4>,indsOffset,id + buffer_offset) = inds.reinterpret_bits(float_c); proximity_buffer(typeOffset,id + buffer_offset) = zs::reinterpret_bits((int)2); @@ -1136,7 +1192,7 @@ virtual void apply() override { auto ws = compute_vertex_tetrahedron_barycentric_weights(p,ktps[0],ktps[1],ktps[2],ktps[3]); T epsilon = zs::limits::epsilon(); - if(ws[0] > epsilon && ws[1] > epsilon && ws[2] > epsilon && ws[3] > epsilon){ + if(ws[0] > -epsilon && ws[1] > -epsilon && ws[2] > -epsilon && ws[3] > -epsilon){ embed_kti = kti; bary = ws; found = true; @@ -1157,6 +1213,8 @@ virtual void apply() override { } if(type == "point_cell_pin") { + auto use_hard_constraint = get_input2("use_hard_constraint"); + constexpr auto eps = 1e-6; constraint->setMeta(CONSTRAINT_KEY,category_c::vertex_pin_to_cell_constraint); @@ -1177,6 +1235,7 @@ virtual void apply() override { if(!target->hasAuxData(TARGET_CELL_BUFFER)) { (*target)[TARGET_CELL_BUFFER] = dtiles_t{kverts.get_allocator(),{ {"cx",3}, + // {"px",3}, {"x",3}, {"v",3}, {"nrm",3} @@ -1212,6 +1271,7 @@ virtual void apply() override { TILEVEC_OPS::fill(cudaPol,verts,"pinSuccess",0.f); cudaPol(zs::range(verts.size()),[ + use_hard_constraint = use_hard_constraint, verts = proxy({},verts), has_pin_group = has_pin_group, eps = eps, @@ -1223,7 +1283,7 @@ virtual void apply() override { cell_buffer = proxy({},cell_buffer), cellBvh = proxy(cellBvh)] ZS_LAMBDA(int vi) mutable { auto p = verts.pack(dim_c<3>,"x",vi); - if(verts("minv",vi) < eps) + if(verts("minv",vi) < eps && !use_hard_constraint) return; if(has_pin_group && verts(pin_group_name,vi) < eps) { @@ -1264,6 +1324,8 @@ virtual void apply() override { auto id = binder_set.insert(vec2i{vi,closest_kti}); binder_buffer.tuple(dim_c<6>,"bary",id) = closest_bary; verts("pinSuccess",vi) = 1.f; + if(use_hard_constraint) + verts("minv",vi) = 0; auto ktri = ktris.pack(dim_c<3>,"inds",closest_kti,int_c); vec3 as[3] = {}; diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index 99001db012..fe025ba193 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -338,16 +338,20 @@ struct XPBDSolveSmooth : INode { // auto all_constraints = RETRIEVE_OBJECT_PTRS(ZenoParticles, "all_constraints"); auto constraints = get_input("constraints"); - // auto ptag = get_param("ptag"); + auto dptag = get_input2("dptag"); + auto ptag = get_input2("ptag"); + auto pptag = get_input2("pptag"); auto relaxs = get_input2("relaxation_strength"); auto& verts = zsparticles->getParticles(); + if(!verts.hasProperty(dptag)) + verts.append_channels(cudaPol,{{dptag,3}}); auto nm_smooth_iters = get_input2("nm_smooth_iters"); - zs::Vector dp_buffer{verts.get_allocator(),verts.size() * 3}; - cudaPol(zs::range(dp_buffer),[]ZS_LAMBDA(auto& v) {v = 0;}); - zs::Vector dp_count{verts.get_allocator(),verts.size()}; - cudaPol(zs::range(dp_count),[]ZS_LAMBDA(auto& c) {c = 0;}); + // zs::Vector dp_buffer{verts.get_allocator(),verts.size() * 3}; + // cudaPol(zs::range(dp_buffer),[]ZS_LAMBDA(auto& v) {v = 0;}); + // zs::Vector dp_count{verts.get_allocator(),verts.size()}; + // cudaPol(zs::range(dp_count),[]ZS_LAMBDA(auto& c) {c = 0;}); @@ -373,91 +377,97 @@ struct XPBDSolveSmooth : INode { auto pw = (float)(substep_id) / (float)nm_substeps; auto nm_verts = verts.size(); - auto nm_tris = tris.size(); - auto nm_edges = edges.size(); - - if(has_input_collider) { - auto collider = constraints->readMeta(CONSTRAINT_TARGET,zs::wrapt{}); - nm_verts += collider->getParticles().size(); - nm_edges += (*collider)[ZenoParticles::s_surfEdgeTag].size(); - nm_tris += collider->getQuadraturePoints().size(); - } - - dtiles_t vtemp{verts.get_allocator(),{ - {"x",3}, - {"v",3}, - {"minv",1}, - {"m",1}, - // {"collision_cancel",1} - },nm_verts}; + // auto nm_tris = tris.size(); + // auto nm_edges = edges.size(); + + // if(has_input_collider) { + // auto collider = constraints->readMeta(CONSTRAINT_TARGET,zs::wrapt{}); + // nm_verts += collider->getParticles().size(); + // nm_edges += (*collider)[ZenoParticles::s_surfEdgeTag].size(); + // nm_tris += collider->getQuadraturePoints().size(); + // } + + // dtiles_t vtemp{verts.get_allocator(),{ + // {"x",3}, + // {"v",3}, + // {"minv",1}, + // {"m",1}, + // // {"collision_cancel",1} + // },nm_verts}; + + + + // TILEVEC_OPS::copy<3>(cudaPol,verts,pptag,vtemp,"x"); + // TILEVEC_OPS::copy(cudaPol,verts,"minv",vtemp,"minv"); + // TILEVEC_OPS::copy(cudaPol,verts,"m",vtemp,"m"); + + // cudaPol(zs::range(verts.size()),[ + // vtemp = proxy({},vtemp), + // pptag = zs::SmallString(pptag), + // verts = proxy({},verts)] ZS_LAMBDA(int vi) mutable { + // vtemp.tuple(dim_c<3>,"v",vi) = verts.pack(dim_c<3>,"x",vi) - verts.pack(dim_c<3>,pptag,vi); + // }); + + // if(has_input_collider) { + // auto boundary_velocity_scale = get_input2("boundary_velocity_scale"); + + auto collider = constraints->readMeta(CONSTRAINT_TARGET,zs::wrapt{}); + const auto& kverts = collider->getParticles(); + const auto& kedges = (*collider)[ZenoParticles::s_surfEdgeTag]; + const auto& ktris = collider->getQuadraturePoints(); + + // auto voffset = verts.size(); + // cudaPol(zs::range(kverts.size()),[ + // kverts = proxy({},kverts), + // voffset = voffset, + // pw = pw, + // boundary_velocity_scale = boundary_velocity_scale, + // w = w, + // nm_substeps = nm_substeps, + // // hasKCollisionCancel = kverts.hasProperty("collision_cancel"), + // // kCollisionCancelOffset = kverts.getPropertyOffset("collision_cancel"), + // vtemp = proxy({},vtemp)] ZS_LAMBDA(int kvi) mutable { + // auto pre_kvert = kverts.pack(dim_c<3>,"px",kvi) * (1 - pw) + kverts.pack(dim_c<3>,"x",kvi) * pw; + // auto cur_kvert = kverts.pack(dim_c<3>,"px",kvi) * (1 - w) + kverts.pack(dim_c<3>,"x",kvi) * w; + // vtemp.tuple(dim_c<3>,"x",voffset + kvi) = pre_kvert; + // vtemp("minv",voffset + kvi) = 0; + // vtemp("m",voffset + kvi) = (T)1000; + // vtemp.tuple(dim_c<3>,"v",voffset + kvi) = (cur_kvert - pre_kvert) * boundary_velocity_scale; + // // if(hasKCollisionCancel) + // // vtemp("collision_cancel",voffset + kvi) = kverts("collision_cancel",kvi); + // }); + // } - auto pptag = get_input2("pptag"); - - TILEVEC_OPS::copy<3>(cudaPol,verts,pptag,vtemp,"x"); - TILEVEC_OPS::copy(cudaPol,verts,"minv",vtemp,"minv"); - TILEVEC_OPS::copy(cudaPol,verts,"m",vtemp,"m"); - // TILEVEC_OPS::fill(cudaPol,vtemp,"collision_cancel",0); - // if(verts.hasProperty("collision_cancel")) - // TILEVEC_OPS::copy(cudaPol,verts,"collision_cancel",vtemp,"collision_cancel"); - // else - // TILEVEC_OPS::fill(cudaPol,vtemp,"collision_cancel",0); - cudaPol(zs::range(verts.size()),[ - vtemp = proxy({},vtemp), - pptag = zs::SmallString(pptag), - verts = proxy({},verts)] ZS_LAMBDA(int vi) mutable { - vtemp.tuple(dim_c<3>,"v",vi) = verts.pack(dim_c<3>,"x",vi) - verts.pack(dim_c<3>,pptag,vi); - }); + auto add_repulsion_force = get_input2("add_repulsion_force"); - if(has_input_collider) { - auto boundary_velocity_scale = get_input2("boundary_velocity_scale"); + const auto& dp_count = (*zsparticles)[DCD_COUNTER_BUFFER]; - auto collider = constraints->readMeta(CONSTRAINT_TARGET,zs::wrapt{}); - const auto& kverts = collider->getParticles(); - const auto& kedges = (*collider)[ZenoParticles::s_surfEdgeTag]; - const auto& ktris = collider->getQuadraturePoints(); - - auto voffset = verts.size(); - cudaPol(zs::range(kverts.size()),[ - kverts = proxy({},kverts), - voffset = voffset, - pw = pw, - boundary_velocity_scale = boundary_velocity_scale, - w = w, - nm_substeps = nm_substeps, - // hasKCollisionCancel = kverts.hasProperty("collision_cancel"), - // kCollisionCancelOffset = kverts.getPropertyOffset("collision_cancel"), - vtemp = proxy({},vtemp)] ZS_LAMBDA(int kvi) mutable { - auto pre_kvert = kverts.pack(dim_c<3>,"px",kvi) * (1 - pw) + kverts.pack(dim_c<3>,"x",kvi) * pw; - auto cur_kvert = kverts.pack(dim_c<3>,"px",kvi) * (1 - w) + kverts.pack(dim_c<3>,"x",kvi) * w; - vtemp.tuple(dim_c<3>,"x",voffset + kvi) = pre_kvert; - vtemp("minv",voffset + kvi) = 0; - vtemp("m",voffset + kvi) = (T)1000; - vtemp.tuple(dim_c<3>,"v",voffset + kvi) = (cur_kvert - pre_kvert) * boundary_velocity_scale; - // if(hasKCollisionCancel) - // vtemp("collision_cancel",voffset + kvi) = kverts("collision_cancel",kvi); - }); - } - - auto add_repulsion_force = get_input2("add_repulsion_force"); + // std::cout << "nm_dcd_collisions : " << nm_dcd_collisions << std::endl; for(auto iter = 0;iter != nm_smooth_iters;++iter) { - cudaPol(zs::range(verts.size()),[ - dp_buffer = proxy(dp_buffer), - dp_count = proxy(dp_count)] ZS_LAMBDA(int vi) mutable { - for(int d = 0;d != 3;++d) - dp_buffer[vi * 3 + d] = 0; - dp_count[vi] = 0; - }); + + // cudaPol(zs::range(verts.size()),[ + // dp_buffer = proxy(dp_buffer)] ZS_LAMBDA(int vi) mutable { + // for(int d = 0;d != 3;++d) + // dp_buffer[vi * 3 + d] = 0; + // // dp_count[vi] = 0; + // }); + TILEVEC_OPS::fill(cudaPol,verts,dptag,(T)0); cudaPol(zs::range(nm_dcd_collisions),[ cquads = proxy({},cquads), - vtemp = proxy({},vtemp), + verts = proxy({},verts), + dptagOffset = verts.getPropertyOffset(dptag), + ptagOffset = verts.getPropertyOffset(ptag), + pptagOffset = verts.getPropertyOffset(pptag), + kverts = proxy({},kverts), exec_tag = exec_tag, eps = eps, + pw = pw, + w = w, + nm_verts = nm_verts, add_repulsion_force = add_repulsion_force, - imminent_thickness = imminent_thickness, - dp_buffer = proxy(dp_buffer), - dp_count = proxy(dp_count)] ZS_LAMBDA(int ci) mutable { + imminent_thickness = imminent_thickness] ZS_LAMBDA(int ci) mutable { auto inds = cquads.pack(dim_c<4>,"inds",ci,int_c); auto bary = cquads.pack(dim_c<4>,"bary",ci); auto type = zs::reinterpret_bits(cquads("type",ci)); @@ -468,10 +478,21 @@ struct XPBDSolveSmooth : INode { vec4 ms{}; for(int i = 0;i != 4;++i) { - ps[i] = vtemp.pack(dim_c<3>,"x",inds[i]); - vs[i] = vtemp.pack(dim_c<3>,"v",inds[i]); - minvs[i] = vtemp("minv",inds[i]); - ms[i] = vtemp("m",inds[i]); + if(inds[i] < nm_verts) { + ps[i] = verts.pack(dim_c<3>,pptagOffset,inds[i]); + vs[i] = verts.pack(dim_c<3>,ptagOffset,inds[i]) - verts.pack(dim_c<3>,pptagOffset,inds[i]); + minvs[i] = verts("minv",inds[i]); + ms[i] = verts("m",inds[i]); + } else { + auto kp = kverts.pack(dim_c<3>,"x",(size_t)(inds[i] - nm_verts)); + auto kpp = kverts.pack(dim_c<3>,"px",(size_t)(inds[i] - nm_verts)); + auto pre_kvert = kpp * (1 - pw) + kp * pw; + auto cur_kvert = kpp * (1 - w) + kp * w; + ps[i] = pre_kvert; + vs[i] = cur_kvert - pre_kvert; + minvs[i] = (T)0; + ms[i] = (T)1000; + } } vec3 imps[4] = {}; @@ -486,43 +507,51 @@ struct XPBDSolveSmooth : INode { add_repulsion_force)) return; for(int i = 0;i != 4;++i) { - if(minvs[i] < eps) + if(minvs[i] < eps || inds[i] >= nm_verts) continue; + - if(isnan(imps[i].norm())) { - printf("nan imps detected : %f %f %f %f %f %f %f\nvs : %d %d %d %d\n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", - (float)imps[i][0],(float)imps[i][1],(float)imps[i][2], - (float)bary[0],(float)bary[1],(float)bary[2],(float)bary[3], - inds[0],inds[1],inds[2],inds[3], - (float)ps[0][0],(float)ps[0][1],(float)ps[0][2], - (float)ps[1][0],(float)ps[1][1],(float)ps[1][2], - (float)ps[2][0],(float)ps[2][1],(float)ps[2][2], - (float)ps[3][0],(float)ps[3][1],(float)ps[3][2]); - return; - } - atomic_add(exec_tag,&dp_count[inds[i]],(int)1); + // if(isnan(imps[i].norm())) { + // printf("nan imps detected : %f %f %f %f %f %f %f\nvs : %d %d %d %d\n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n%f\t%f\t%f\n", + // (float)imps[i][0],(float)imps[i][1],(float)imps[i][2], + // (float)bary[0],(float)bary[1],(float)bary[2],(float)bary[3], + // inds[0],inds[1],inds[2],inds[3], + // (float)ps[0][0],(float)ps[0][1],(float)ps[0][2], + // (float)ps[1][0],(float)ps[1][1],(float)ps[1][2], + // (float)ps[2][0],(float)ps[2][1],(float)ps[2][2], + // (float)ps[3][0],(float)ps[3][1],(float)ps[3][2]); + // return; + // } + // atomic_add(exec_tag,&dp_count[inds[i]],(int)1); + // printf("imps : %f\n",(float)imps[i].norm()); for(int d = 0;d != 3;++d) - atomic_add(exec_tag,&dp_buffer[inds[i] * 3 + d],imps[i][d]); + atomic_add(exec_tag,&verts(dptagOffset + d,inds[i]),imps[i][d]); } }); + // auto ndp = TILEVEC_OPS::dot<3>(cudaPol,verts,dptag,dptag); + // std::cout << "ndp : " << ndp << std::endl; + cudaPol(zs::range(verts.size()),[ - vtemp = proxy({},vtemp),relaxs = relaxs, - dp_count = proxy(dp_count), - dp_buffer = proxy(dp_buffer)] ZS_LAMBDA(int vi) mutable { - if(dp_count[vi] > 0) { - auto dp = relaxs * vec3{dp_buffer[vi * 3 + 0],dp_buffer[vi * 3 + 1],dp_buffer[vi * 3 + 2]}; - vtemp.tuple(dim_c<3>,"v",vi) = vtemp.pack(dim_c<3>,"v",vi) + dp / (T)dp_count[vi]; + verts = proxy({},verts), + relaxs = relaxs, + dp_count = proxy({},dp_count), + dptagOffset = verts.getPropertyOffset(dptag), + ptagOffset = verts.getPropertyOffset(ptag)] ZS_LAMBDA(int vi) mutable { + if(dp_count("cnt",vi) > 0.5) { + // auto dp = relaxs * vec3{dp_buffer[vi * 3 + 0],dp_buffer[vi * 3 + 1],dp_buffer[vi * 3 + 2]}; + auto dp = verts.pack(dim_c<3>,dptagOffset,vi) * relaxs; + // printf("update %d : %f %f\n",vi,(float)dp.norm(),dp_count("cnt",vi)); + verts.tuple(dim_c<3>,ptagOffset,vi) = verts.pack(dim_c<3>,ptagOffset,vi) + dp / (T)dp_count("cnt",vi); } }); - } - cudaPol(zs::range(verts.size()),[ - verts = proxy({},verts), - vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { - verts.tuple(dim_c<3>,"x",vi) = vtemp.pack(dim_c<3>,"x",vi) + vtemp.pack(dim_c<3>,"v",vi); - }); + // cudaPol(zs::range(verts.size()),[ + // verts = proxy({},verts), + // vtemp = proxy({},vtemp)] ZS_LAMBDA(int vi) mutable { + // verts.tuple(dim_c<3>,"x",vi) = vtemp.pack(dim_c<3>,"x",vi) + vtemp.pack(dim_c<3>,"v",vi); + // }); } @@ -538,7 +567,9 @@ ZENDEFNODE(XPBDSolveSmooth, {{{"zsparticles"}, {"int","substep_id","0"}, {"bool","add_repulsion_force","0"}, {"float","boundary_velocity_scale","1"}, - {"string","pptag","px"} + {"string","ptag","x"}, + {"string","pptag","px"}, + {"string","dptag","dx"} }, {{"zsparticles"}}, {}, @@ -760,8 +791,10 @@ struct XPBDSolveSmoothAll : INode { if(!verts.hasProperty("w")) { verts.append_channels(cudaPol,{{"w",1}}); } - TILEVEC_OPS::fill(cudaPol,verts,"w",0); + if(!verts.hasProperty(dptag)) + verts.append_channels(cudaPol,{{dptag,3}}); + TILEVEC_OPS::fill(cudaPol,verts,dptag,0); auto iter_id = get_input2("iter_id"); @@ -773,6 +806,8 @@ struct XPBDSolveSmoothAll : INode { if(constraint_ptr->userData().has("stride")) { auto stride = objectToLiterial(constraint_ptr->userData().get("stride")); // std::cout << "find constraint with stride = " << stride << std::endl; + // if(stride <= 0 && iter_id != 0) + // continue; if(iter_id % stride != 0) { // std::cout << "skip constraint solving due to stride-skipping" << std::endl; continue; @@ -825,6 +860,7 @@ struct XPBDSolveSmoothAll : INode { cudaPol(zs::range(shape_size),[ offset = shape_matching_offsets.getVal(shape_id), cquads = proxy({},cquads), + indsOffset = cquads.getPropertyOffset("inds"), verts = proxy({},verts), XtagOffset = verts.getPropertyOffset("X"), minvOffset = verts.getPropertyOffset("minv"), @@ -832,7 +868,7 @@ struct XPBDSolveSmoothAll : INode { restCM = restCM, cm = cm, dAs = proxy(dAs)] ZS_LAMBDA(int ci) mutable { - auto vi = zs::reinterpret_bits(cquads("inds",ci + offset)); + auto vi = zs::reinterpret_bits(cquads(indsOffset,ci + offset)); auto q = verts.pack(dim_c<3>,XtagOffset,vi) - restCM; auto p = verts.pack(dim_c<3>,ptagOffset,vi) - cm; auto w = static_cast(1.0) / (verts(minvOffset,vi) + static_cast(1e-6)); @@ -946,7 +982,7 @@ struct XPBDSolveSmoothAll : INode { // atomic_add(exec_tag,&weight_sum[edge[i]],w); atomic_add(exec_tag,&verts(wOffset,edge[i]),w); for(int d = 0;d != 3;++d) - atomic_add(exec_tag,&verts(dptagOffset + d,edge[i]),dp[i][d] * w); + atomic_add(exec_tag,&verts(dptagOffset + d,edge[i]),dp[i][d]); } } @@ -981,7 +1017,7 @@ struct XPBDSolveSmoothAll : INode { return; for(int i = 0;i != 4;++i) { if(isnan(dp[i].norm())) - printf("nan dp[%d] detected at stretch\n",i); + printf("nan dp[%d] detected at bending\n",i); // atomic_add(exec_tag,&weight_sum[quad[i]],w); atomic_add(exec_tag,&verts(wOffset,quad[i]),w); for(int d = 0;d != 3;++d) @@ -1104,7 +1140,7 @@ struct XPBDSolveSmoothAll : INode { cquads = proxy({},cquads), cell_buffer = proxy({},cell_buffer), dptagOffset = verts.getPropertyOffset(dptag), - ptagOffet = verts.getPropertyOffset(ptag), + ptagOffset = verts.getPropertyOffset(ptag), ktris = ktris.begin("inds",dim_c<3>,int_c), enable_sliding = enable_sliding, wOffset = verts.getPropertyOffset("w"), @@ -1131,50 +1167,77 @@ struct XPBDSolveSmoothAll : INode { tp += bs[i] * bary[i + 3]; } - auto dp = tp - verts.pack(dim_c<3>,ptagOffet,vi); + auto dp = tp - verts.pack(dim_c<3>,ptagOffset,vi); + - // auto dpn = dp.norm(); - // if(dpn > 0.1) { - // printf("dp[%d,%d] : %f\n",vi,kti,(float)dpn); - // } - - // if(enable_sliding) { - // auto avg_nrm = vec3::zeros(); - // for(int i = 0;i != 3;++i) { - // avg_nrm += cell_buffer.pack(dim_c<3>,"nrm",ktri[i]); - // } - // avg_nrm = avg_nrm.normalized(); - - // auto dp_normal = dp.dot(avg_nrm) * avg_nrm; - // auto dp_tangent = dp - dp_normal; - // if(dp_tangent.norm() < static_cast(0.1)) - // dp_tangent = vec3::zeros(); - // else - // dp_tangent *= static_cast(0.5); - // // dp -= dp_tangent * 0.5; - // dp = dp_tangent + dp_normal; - // } - - // atomic_add(exec_tag,&weight_sum[vi],w); - atomic_add(exec_tag,&verts(wOffset,vi),w); for(int d = 0;d != 3;++d){ - atomic_add(exec_tag,&verts(dptagOffset + d,vi),dp[d] * w); + atomic_add(exec_tag,&verts(dptagOffset + d,vi),dp[d]); + } + }); + } + + if(category == category_c::follow_animation_constraint) { + auto use_hard_constraint = constraint_ptr->readMeta(PBD_USE_HARD_CONSTRAINT); + if(use_hard_constraint && iter_id > 0) + continue; + + auto animation = constraint_ptr->readMeta(CONSTRAINT_TARGET); + const auto& averts = animation->getParticles(); + auto substep_id = get_input2("substep_id"); + auto nm_substeps = get_input2("nm_substeps"); + auto anim_w = (float)(substep_id + 1) / (float)nm_substeps; + + cudaPol(zs::range(cquads.size()),[ + cquads = proxy({},cquads), + verts = proxy({},verts), + alpha = anim_w, + wOffset = verts.getPropertyOffset("w"), + averts = proxy({},averts), + aPtagOffset = averts.getPropertyOffset("x"), + dptagOffset = verts.getPropertyOffset(dptag), + apPtagOffset = averts.getPropertyOffset("px"), + use_hard_constraint = use_hard_constraint, + ptagOffset = verts.getPropertyOffset(ptag), + followWeightOffset = cquads.getPropertyOffset("follow_weight"), + indsOffset = cquads.getPropertyOffset("inds")] ZS_LAMBDA(int ei) mutable { + auto vi = zs::reinterpret_bits(cquads(indsOffset,ei)); + auto w = cquads(followWeightOffset,ei); + auto p = verts.pack(dim_c<3>,ptagOffset,vi); + auto tp = averts.pack(dim_c<3>,apPtagOffset,vi) * (1.f - alpha) + averts.pack(dim_c<3>,aPtagOffset,vi) * alpha; + if(use_hard_constraint) + verts.tuple(dim_c<3>,ptagOffset,vi) = tp; + else { + auto bp = tp * w + p * (1.f - w); + auto dp = bp - p; + atomic_add(exec_tag,&verts(wOffset,vi),w); + for(int d = 0;d != 3;++d){ + atomic_add(exec_tag,&verts(dptagOffset + d,vi),dp[d] * w); + } } }); } if(category == category_c::volume_pin_constraint) { + // std::cout << "solve volume pin constraint " << std::endl; + auto use_hard_constraint = constraint_ptr->readMeta(PBD_USE_HARD_CONSTRAINT); + if(use_hard_constraint && iter_id > 0) + continue; + + auto embed_volume = constraint_ptr->readMeta(CONSTRAINT_TARGET); const auto& vverts = embed_volume->getParticles(); - const auto vtets = embed_volume->getQuadraturePoints(); + const auto& vtets = embed_volume->getQuadraturePoints(); - auto use_hard_constraint = constraint_ptr->readMeta(PBD_USE_HARD_CONSTRAINT); auto substep_id = get_input2("substep_id"); auto nm_substeps = get_input2("nm_substeps"); auto volume_anim_w = (float)(substep_id + 1) / (float)nm_substeps; // auto pw = (float)(substep_id) / (float)nm_substeps; + if(!vverts.hasProperty("px")) { + throw std::runtime_error("the vverts has no px channel"); + } + cudaPol(zs::range(cquads.size()),[ cquads = proxy({},cquads), verts = proxy({},verts), @@ -1397,11 +1460,13 @@ struct XPBDSolveSmoothAll : INode { verts = proxy({},verts), eps = eps, dptagOffset = verts.getPropertyOffset(dptag), + ptagOffset = verts.getPropertyOffset(ptag), wOffset = verts.getPropertyOffset("w")] ZS_LAMBDA(int vi) mutable { if(verts(wOffset,vi) > eps) verts.tuple(dim_c<3>,dptagOffset,vi) = verts.pack(dim_c<3>,dptagOffset,vi) / verts(wOffset,vi); else verts.tuple(dim_c<3>,dptagOffset,vi) = vec3::zeros(); + verts.tuple(dim_c<3>,ptagOffset,vi) = verts.pack(dim_c<3>,ptagOffset,vi) + verts.pack(dim_c<3>,dptagOffset,vi); }); set_output("zsparticles",get_input("zsparticles")); diff --git a/projects/CuLagrange/pbd/ConstraintsUpdator.cu b/projects/CuLagrange/pbd/ConstraintsUpdator.cu index 6b4a0622ba..95c81a1efd 100644 --- a/projects/CuLagrange/pbd/ConstraintsUpdator.cu +++ b/projects/CuLagrange/pbd/ConstraintsUpdator.cu @@ -52,7 +52,7 @@ virtual void apply() override { auto type = constraint->readMeta(CONSTRAINT_KEY,wrapt{}); // auto do_frame_interpolation = get_input2("do_frame_interpolation"); - if(type == category_c::vertex_pin_to_cell_constraint) { + // if(type == category_c::vertex_pin_to_cell_constraint || type == category_c) { std::cout << "update constraint " << type << std::endl; auto target = get_input("target"); // switch(type) { @@ -71,14 +71,17 @@ virtual void apply() override { const auto& kverts = target->getParticles(); auto& ckverts = ctarget->getParticles(); + if(!ckverts.hasProperty("px")) { + ckverts.append_channels(cudaPol,{{"px",3}}); + } + TILEVEC_OPS::copy(cudaPol,ckverts,"x",ckverts,"px"); TILEVEC_OPS::copy(cudaPol,kverts,"x",ckverts,"x"); - TILEVEC_OPS::copy(cudaPol,kverts,"px",ckverts,"px"); - std::cout << "Update ckverts " << std::endl; // break; // } - } + // } set_output("constraint",constraint); + set_output("source",source); } }; @@ -88,7 +91,7 @@ ZENDEFNODE(UpdateConstraintTarget, {{ {"target"}, {"constraint"} }, -{{"constraint"}}, +{{"source"},{"constraint"}}, { // {"string","groupID",""}, }, diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp index d2e4cb24fd..c828e22bf7 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp @@ -7,6 +7,10 @@ constexpr auto CONSTRAINT_TARGET = "XPBD_CONSTRAINT_TARGET"; constexpr auto CONSTRAINT_COLOR_OFFSET = "XPBD_CONSTRAINT_OFFSET"; constexpr auto NM_DCD_COLLISIONS = "NM_DCD_COLLISIONS"; +constexpr auto DCD_COUNTER_BUFFER = "DCD_COUNTER_BUFFER"; +constexpr auto COLLISION_BUFFER = "COLLLISION_BUFFER"; +constexpr auto COLLISION_CSEE_SET = "COLLISION_CSEE_SET"; +constexpr auto COLLSIION_CSPT_SET = "COLLISION_CSPT_SET"; constexpr auto GLOBAL_DCD_THICKNESS = "GLOBAL_DCD_THICKNESS"; constexpr auto ENABLE_SLIDING = "ENABLE_SLIDING"; From 446d8a66ab62d5db2bd7453d017cf89017079966 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Tue, 14 May 2024 16:15:32 +0800 Subject: [PATCH 022/244] detangle support group strategy --- projects/CuLagrange/geometry/Detangle.cu | 41 ++++++++++++++++++------ 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/projects/CuLagrange/geometry/Detangle.cu b/projects/CuLagrange/geometry/Detangle.cu index 514d9c7794..c47890ed8f 100644 --- a/projects/CuLagrange/geometry/Detangle.cu +++ b/projects/CuLagrange/geometry/Detangle.cu @@ -111,6 +111,7 @@ struct Detangle2 : zeno::INode { dtiles_t kvtemp{verts.get_allocator(),{ {"x",3}, {"collision_cancel",1}, + {"collision_group",1} },0}; dtiles_t kttemp{tris.get_allocator(),{ {"inds",3}, @@ -129,6 +130,9 @@ struct Detangle2 : zeno::INode { auto detangle_with_boundary = get_input2("detangle_with_boundary"); auto do_self_detangle = get_input2("do_self_detangle"); + auto among_same_group = get_input2("among_same_group"); + auto among_different_group = get_input2("among_different_group"); + if(has_input("kboundary") && detangle_with_boundary) { auto kboundary = get_input("kboundary"); auto substep_id = get_input2("substep_id"); @@ -149,6 +153,8 @@ struct Detangle2 : zeno::INode { kxtag = zs::SmallString(kxtag), kpxtag = zs::SmallString(kpxtag), kverts = proxy({},kverts), + has_collision_group = kverts.hasProperty(collision_group_name), + collision_group_name = zs::SmallString(collision_group_name), hasKCollisionCancel = kverts.hasProperty("colllision_cancel"), kvtemp = proxy({},kvtemp)] ZS_LAMBDA(int kvi) mutable { auto kvert = kverts.pack(dim_c<3>,kpxtag,kvi) * (1 - alpha) + kverts.pack(dim_c<3>,kxtag,kvi) * alpha; @@ -157,6 +163,11 @@ struct Detangle2 : zeno::INode { kvtemp("collision_cancel",kvi) = kverts("collision_cancel",kvi); else kvtemp("collision_cancel",kvi) = 0; + if(has_collision_group) { + kvtemp("collision_group",kvi) = kverts(collision_group_name,kvi); + }else { + kvtemp("collision_group",kvi) = -1.0f; + } }); kttemp.resize(ktris.size()); TILEVEC_OPS::copy<3>(cudaExec,ktris,"inds",kttemp,"inds"); @@ -184,6 +195,8 @@ struct Detangle2 : zeno::INode { auto kbvs = retrieve_bounding_volumes(cudaExec,kvtemp,ktris,wrapv<3>{},(T)0,"x"); ktri_bvh.build(cudaExec,kbvs); + + // std::cout << "detangle build bvh : " << kbvs.size() << std::endl; } int nm_intersections = 0; @@ -275,14 +288,18 @@ struct Detangle2 : zeno::INode { // std::cout << "retrive_intersections_between_edges_and_ktris" << std::endl; if(do_proximity_detection) { retrieve_intersection_with_edge_tri_pairs(cudaExec, - verts,xtag, + verts,xtag,collision_group_name, edges, - kvtemp,"x", + kvtemp,"x","collision_group", kttemp, ktri_bvh, csEKT, icm_grad, - false); + false, + among_same_group, + among_different_group); + + // std::cout << "do EKT intersection detection : " << csEKT.size() << "\t" << kvtemp.size() << "\t" << kttemp.size() << std::endl; } if(iter == 0) nm_kinematic_intersection += csEKT.size(); @@ -414,14 +431,16 @@ struct Detangle2 : zeno::INode { if(do_proximity_detection) { retrieve_intersection_with_edge_tri_pairs(cudaExec, - kvtemp,"x", + kvtemp,"x","collision_group", kedges, - verts,xtag, + verts,xtag,collision_group_name, tris, tri_bvh, csKET, icm_grad, - false); + false, + among_same_group, + among_different_group); } #ifdef TIMING_DETANGLE timer.tock("retrieve_intersection_with_KET_pairs"); @@ -429,7 +448,7 @@ struct Detangle2 : zeno::INode { // nm_intersections += csET.size(); - // std::cout << "finish retrive_intersections_between_kedges_and_tris" << std::endl; + // std::cout << "do TKE intersection detection" << csKET.size() << "\t" << kvtemp.size() << "\t" << kedges.size() << std::endl; if(csKET.size() > 0) has_kine_intersection = true; @@ -564,7 +583,9 @@ struct Detangle2 : zeno::INode { skip_distance, false, skip_too_close_pair_at_rest_shape, - use_collision_group); + use_collision_group, + among_same_group, + among_different_group); } // std::cout << "nm_self_intersections_ET : " << csET.size() << std::endl; @@ -802,7 +823,9 @@ ZENDEFNODE(Detangle2, { {"bool","enforce_self_intersection_normal","0"}, {"bool","detangle_with_boundary","1"}, {"bool","do_self_detangle","1"}, - {"bool","skip_animation_intersection","1"} + {"bool","skip_animation_intersection","1"}, + {"bool","among_same_group","1"}, + {"bool","among_different_group","1"} }, { {"zsparticles"} From 6231184d0aa82cc2cd952fecaa11ec533601684f Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Tue, 14 May 2024 16:16:21 +0800 Subject: [PATCH 023/244] PrimitiveAttrPicker adjust output order --- zeno/src/nodes/prim/PrimitiveAttrPicker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zeno/src/nodes/prim/PrimitiveAttrPicker.cpp b/zeno/src/nodes/prim/PrimitiveAttrPicker.cpp index 9ce12642cd..73cc0dad77 100644 --- a/zeno/src/nodes/prim/PrimitiveAttrPicker.cpp +++ b/zeno/src/nodes/prim/PrimitiveAttrPicker.cpp @@ -64,8 +64,8 @@ ZENDEFNODE(PrimitiveAttrPicker, { }, // outputs { - {"list"}, - {"PrimitiveObject", "outPrim"} + {"PrimitiveObject", "outPrim"}, + {"list"} }, // params {{"string", "selected", ""}}, From 3e1ccc52cdb7a7875f00d8a1dec5a7e6d8f5407a Mon Sep 17 00:00:00 2001 From: zhuohy <1445643474@qq.com> Date: Tue, 14 May 2024 16:41:57 +0800 Subject: [PATCH 024/244] add cache shortcut --- ui/zenoedit/nodesys/zenosubgraphscene.cpp | 22 ++++++++++---------- ui/zenoedit/nodesys/zenosubgraphscene.h | 6 +----- ui/zenoedit/settings/zenosettingsmanager.cpp | 1 + ui/zenoedit/settings/zsettings.h | 1 + 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/ui/zenoedit/nodesys/zenosubgraphscene.cpp b/ui/zenoedit/nodesys/zenosubgraphscene.cpp index 42a84b2c38..6e2b33ce3e 100644 --- a/ui/zenoedit/nodesys/zenosubgraphscene.cpp +++ b/ui/zenoedit/nodesys/zenosubgraphscene.cpp @@ -42,9 +42,6 @@ ZenoSubGraphScene::ZenoSubGraphScene(QObject *parent) : QGraphicsScene(parent) , m_tempLink(nullptr) - , m_bOnceOn(false) - , m_bBypassOn(false) - , m_bViewOn(false) { ZtfUtil &inst = ZtfUtil::GetInstance(); m_nodeParams = inst.toUtilParam(inst.loadZtf(":/templates/node-example.xml")); @@ -1228,21 +1225,24 @@ void ZenoSubGraphScene::keyPressEvent(QKeyEvent* event) } else if (!event->isAccepted() && uKey == ZenoSettingsManager::GetInstance().getShortCut(ShortCut_Once)) { - updateNodeStatus(m_bOnceOn, OPT_ONCE); + updateNodeStatus(OPT_ONCE); } else if (!event->isAccepted() && uKey == ZenoSettingsManager::GetInstance().getShortCut(ShortCut_Bypass)) { - updateNodeStatus(m_bBypassOn, OPT_MUTE); + updateNodeStatus(OPT_MUTE); } else if (!event->isAccepted() && uKey == ZenoSettingsManager::GetInstance().getShortCut(ShortCut_View)) { - updateNodeStatus(m_bViewOn, OPT_VIEW); + updateNodeStatus(OPT_VIEW); + } + else if (!event->isAccepted() && uKey == ZenoSettingsManager::GetInstance().getShortCut(ShortCut_Cache)) + { + updateNodeStatus(OPT_CACHE); } } -void ZenoSubGraphScene::updateNodeStatus(bool &bOn, int option) +void ZenoSubGraphScene::updateNodeStatus(int option) { - bOn = !bOn; for (const QModelIndex &idx : selectNodesIndice()) { IGraphsModel *pGraphsModel = zenoApp->graphsManagment()->currentModel(); @@ -1250,10 +1250,10 @@ void ZenoSubGraphScene::updateNodeStatus(bool &bOn, int option) STATUS_UPDATE_INFO info; int options = idx.data(ROLE_OPTIONS).toInt(); info.oldValue = options; - if (bOn) - options |= option; - else + if (options & option) options &= (~option); + else + options |= option; info.role = ROLE_OPTIONS; info.newValue = options; pGraphsModel->updateNodeStatus(idx.data(ROLE_OBJID).toString(), info, m_subgIdx); diff --git a/ui/zenoedit/nodesys/zenosubgraphscene.h b/ui/zenoedit/nodesys/zenosubgraphscene.h index 8b877ac6e1..b44e3c1668 100644 --- a/ui/zenoedit/nodesys/zenosubgraphscene.h +++ b/ui/zenoedit/nodesys/zenosubgraphscene.h @@ -75,7 +75,7 @@ private slots: void onTempLinkClosed(); ZenoNode* createNode(const QModelIndex& idx, const NodeUtilParam& params); void initLink(const QModelIndex& linkIdx); - void updateNodeStatus(bool &bOn, int option); + void updateNodeStatus(int option); NodeUtilParam m_nodeParams; QPersistentModelIndex m_subgIdx; //index to the subgraphmodel or node in "graphsModel" @@ -85,10 +85,6 @@ private slots: ZenoTempLink* m_tempLink; QVector> m_selChanges; - - bool m_bOnceOn; - bool m_bBypassOn; - bool m_bViewOn; }; #endif diff --git a/ui/zenoedit/settings/zenosettingsmanager.cpp b/ui/zenoedit/settings/zenosettingsmanager.cpp index 11a2753453..2a3d70d846 100644 --- a/ui/zenoedit/settings/zenosettingsmanager.cpp +++ b/ui/zenoedit/settings/zenosettingsmanager.cpp @@ -214,6 +214,7 @@ QVector ZenoSettingsManager::getDefaultShortCutInfo(int style) {ShortCut_View, QObject::tr("View"), "V"}, {ShortCut_Once, QObject::tr("Once"), "C"}, {ShortCut_Bypass, QObject::tr("Bypass"), "B"}, + {ShortCut_Cache, QObject::tr("Cache"), "X"}, {ShortCut_FloatPanel, QObject::tr("Float Panel"), "P"}, {ShortCut_CoordSys, QObject::tr("CoordSys"), "M"}, {ShortCut_InitHandler, QObject::tr("Init Handler"), "Backspace"}, diff --git a/ui/zenoedit/settings/zsettings.h b/ui/zenoedit/settings/zsettings.h index 3192e99576..42682047ff 100644 --- a/ui/zenoedit/settings/zsettings.h +++ b/ui/zenoedit/settings/zsettings.h @@ -63,6 +63,7 @@ const char *const ShortCut_SelectAllNodes = "Select All Nodes"; const char *const ShortCut_View = "View"; const char *const ShortCut_Bypass = "Bypass"; const char *const ShortCut_Once = "Once"; +const char* const ShortCut_Cache = "Cache"; const char *const ShortCut_MovingView = "Moving View"; const char *const ShortCut_RotatingView = "Rotating View"; const char *const ShortCut_ScalingView = "Scaling View"; From d2ad9ed43d33a7a0eccc44c4a31cf81c3ef0e7b6 Mon Sep 17 00:00:00 2001 From: luzh Date: Wed, 15 May 2024 09:20:31 +0800 Subject: [PATCH 025/244] update ver. --- ui/zenoedit/zenoedit.rc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/zenoedit/zenoedit.rc b/ui/zenoedit/zenoedit.rc index 30a237cd8b..1b36cb2d25 100644 --- a/ui/zenoedit/zenoedit.rc +++ b/ui/zenoedit/zenoedit.rc @@ -48,8 +48,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,3,0,501 - PRODUCTVERSION 1,3,0,501 + FILEVERSION 1,3,0,515 + PRODUCTVERSION 1,3,0,515 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -66,12 +66,12 @@ BEGIN BEGIN VALUE "CompanyName", "ZENUSTECH" VALUE "FileDescription", "Zeno Editor" - VALUE "FileVersion", "1.3.0.501" + VALUE "FileVersion", "1.3.0.515" VALUE "InternalName", "zenoedit.rc" VALUE "LegalCopyright", "Copyright (C) 2023" VALUE "OriginalFilename", "zenoedit.rc" VALUE "ProductName", "Zeno" - VALUE "ProductVersion", "1.3.0.501" + VALUE "ProductVersion", "1.3.0.515" END END BLOCK "VarFileInfo" From 06791fc6d0fe5d3e7655d068a9112ceb9519d3cb Mon Sep 17 00:00:00 2001 From: luzh Date: Thu, 16 May 2024 16:16:35 +0800 Subject: [PATCH 026/244] just remove all cache flags. --- ui/zenoedit/nodesys/zenosubgraphscene.cpp | 18 +++++++++++++++++- ui/zenoedit/nodesys/zenosubgraphscene.h | 1 + ui/zenoedit/zenoedit.rc | 8 ++++---- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/ui/zenoedit/nodesys/zenosubgraphscene.cpp b/ui/zenoedit/nodesys/zenosubgraphscene.cpp index 6e2b33ce3e..35f3089b2d 100644 --- a/ui/zenoedit/nodesys/zenosubgraphscene.cpp +++ b/ui/zenoedit/nodesys/zenosubgraphscene.cpp @@ -1237,7 +1237,23 @@ void ZenoSubGraphScene::keyPressEvent(QKeyEvent* event) } else if (!event->isAccepted() && uKey == ZenoSettingsManager::GetInstance().getShortCut(ShortCut_Cache)) { - updateNodeStatus(OPT_CACHE); + removeNodeCache(); + } +} + +void ZenoSubGraphScene::removeNodeCache() +{ + for (const QModelIndex& idx : selectNodesIndice()) + { + IGraphsModel* pGraphsModel = zenoApp->graphsManagment()->currentModel(); + ZASSERT_EXIT(pGraphsModel); + STATUS_UPDATE_INFO info; + int options = idx.data(ROLE_OPTIONS).toInt(); + info.oldValue = options; + options &= (~(int)OPT_CACHE); + info.role = ROLE_OPTIONS; + info.newValue = options; + pGraphsModel->updateNodeStatus(idx.data(ROLE_OBJID).toString(), info, m_subgIdx); } } diff --git a/ui/zenoedit/nodesys/zenosubgraphscene.h b/ui/zenoedit/nodesys/zenosubgraphscene.h index b44e3c1668..a1e6451c91 100644 --- a/ui/zenoedit/nodesys/zenosubgraphscene.h +++ b/ui/zenoedit/nodesys/zenosubgraphscene.h @@ -76,6 +76,7 @@ private slots: ZenoNode* createNode(const QModelIndex& idx, const NodeUtilParam& params); void initLink(const QModelIndex& linkIdx); void updateNodeStatus(int option); + void removeNodeCache(); NodeUtilParam m_nodeParams; QPersistentModelIndex m_subgIdx; //index to the subgraphmodel or node in "graphsModel" diff --git a/ui/zenoedit/zenoedit.rc b/ui/zenoedit/zenoedit.rc index 1b36cb2d25..aa4b6b811a 100644 --- a/ui/zenoedit/zenoedit.rc +++ b/ui/zenoedit/zenoedit.rc @@ -48,8 +48,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,3,0,515 - PRODUCTVERSION 1,3,0,515 + FILEVERSION 1,3,1,515 + PRODUCTVERSION 1,3,1,515 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -66,12 +66,12 @@ BEGIN BEGIN VALUE "CompanyName", "ZENUSTECH" VALUE "FileDescription", "Zeno Editor" - VALUE "FileVersion", "1.3.0.515" + VALUE "FileVersion", "1.3.1.515" VALUE "InternalName", "zenoedit.rc" VALUE "LegalCopyright", "Copyright (C) 2023" VALUE "OriginalFilename", "zenoedit.rc" VALUE "ProductName", "Zeno" - VALUE "ProductVersion", "1.3.0.515" + VALUE "ProductVersion", "1.3.1.515" END END BLOCK "VarFileInfo" From 40ee0ac9372888899c9a8d227411407f023f5948 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 16 May 2024 18:38:57 +0800 Subject: [PATCH 027/244] improve-clone --- zeno/src/nodes/PortalNodes.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/zeno/src/nodes/PortalNodes.cpp b/zeno/src/nodes/PortalNodes.cpp index 79adf7787e..b447e09575 100644 --- a/zeno/src/nodes/PortalNodes.cpp +++ b/zeno/src/nodes/PortalNodes.cpp @@ -74,12 +74,16 @@ struct Clone : zeno::INode { return; } set_output("newObject", std::move(newobj)); + set_output("origin", obj); } }; ZENDEFNODE(Clone, { {"object"}, - {"newObject"}, + { + "newObject", + "origin", + }, {}, {"lifecycle"}, }); From e978bcea914feb6a6657699798beac4ad7a35bcf Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 21 May 2024 20:41:29 +0800 Subject: [PATCH 028/244] for zpc upd --- projects/CUDA/remesh/simplification.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/CUDA/remesh/simplification.cpp b/projects/CUDA/remesh/simplification.cpp index 76318867eb..f4773d2db8 100644 --- a/projects/CUDA/remesh/simplification.cpp +++ b/projects/CUDA/remesh/simplification.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "zensim/container/Bht.hpp" #include "zensim/omp/execution/ExecutionPolicy.hpp" From c33ca04143cabb6c402754c92df34750e5a0b033 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 23 May 2024 01:51:29 +0800 Subject: [PATCH 029/244] fbx-improve --- projects/FBX/FBXSDK.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 9e60135789..1cee6f6b68 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -9,8 +9,6 @@ #include #include -#ifdef ZENO_FBXSDK -#include #include "zeno/utils/log.h" #include #include "zeno/types/PrimitiveObject.h" @@ -19,6 +17,9 @@ #include #include +#ifdef ZENO_FBXSDK +#include + namespace FBX{ void GetChildNodePathRecursive(FbxNode* node, std::string& path) { if (node->GetParent()) { @@ -1089,6 +1090,9 @@ ZENDEFNODE(NewFBXImportCamera, { {}, {"primitive"}, }); +} +#endif +namespace zeno { struct NewFBXBoneDeform : INode { std::vector getBoneNames(PrimitiveObject *prim) { auto boneName_count = prim->userData().get2("boneName_count"); @@ -1173,7 +1177,7 @@ struct NewFBXBoneDeform : INode { auto &bi = prim->verts.add_attr("boneName"); auto &bw = prim->verts.add_attr("boneWeight"); size_t vert_count = prim->verts.size(); - #pragma omp parallel for +#pragma omp parallel for for (auto i = 0; i < vert_count; i++) { auto opos = prim->verts[i]; vec3f pos = {}; @@ -1302,5 +1306,4 @@ ZENDEFNODE(BoneTransformView, { {}, {"debug"}, }); -} -#endif \ No newline at end of file +} \ No newline at end of file From 4e91975cad77145a0fb40644a0548ad1be574b5e Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 23 May 2024 20:41:15 +0800 Subject: [PATCH 030/244] improve-abc-to-maya --- projects/Alembic/WriteAlembic.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/projects/Alembic/WriteAlembic.cpp b/projects/Alembic/WriteAlembic.cpp index 268fedc56c..8f6ec7220a 100644 --- a/projects/Alembic/WriteAlembic.cpp +++ b/projects/Alembic/WriteAlembic.cpp @@ -674,7 +674,9 @@ struct WriteAlembic2 : INode { uvsamp); write_velocity(prim, mesh_samp); write_normal(prim, mesh_samp); - write_attrs(verts_attrs, loops_attrs, polys_attrs, "", prim, mesh, frameid, real_frame_start, prim_size_per_frame); + if (get_input2("outputToMaya") == false) { + write_attrs(verts_attrs, loops_attrs, polys_attrs, "", prim, mesh, frameid, real_frame_start, prim_size_per_frame); + } mesh.set( mesh_samp ); } else { @@ -684,7 +686,9 @@ struct WriteAlembic2 : INode { Int32ArraySample( vertex_count_per_face.data(), vertex_count_per_face.size() )); write_velocity(prim, mesh_samp); write_normal(prim, mesh_samp); - write_attrs(verts_attrs, loops_attrs, polys_attrs, "", prim, mesh, frameid, real_frame_start, prim_size_per_frame); + if (get_input2("outputToMaya") == false) { + write_attrs(verts_attrs, loops_attrs, polys_attrs, "", prim, mesh, frameid, real_frame_start, prim_size_per_frame); + } mesh.set( mesh_samp ); } } @@ -707,7 +711,9 @@ struct WriteAlembic2 : INode { } samp.setIds(Alembic::Abc::UInt64ArraySample(ids.data(), ids.size())); write_velocity(prim, samp); - write_attrs(verts_attrs, loops_attrs, polys_attrs, "", prim, points, frameid, real_frame_start, prim_size_per_frame); + if (get_input2("outputToMaya") == false) { + write_attrs(verts_attrs, loops_attrs, polys_attrs, "", prim, points, frameid, real_frame_start, prim_size_per_frame); + } points.set( samp ); } } @@ -723,6 +729,7 @@ ZENDEFNODE(WriteAlembic2, { {"float", "fps", "25"}, {"bool", "flipFrontBack", "1"}, {"bool", "outputPoint", "0"}, + {"bool", "outputToMaya", "0"}, }, { }, @@ -944,7 +951,9 @@ struct WriteAlembicPrims : INode { uvsamp); write_velocity(prim, mesh_samp); write_normal(prim, mesh_samp); - write_attrs(verts_attrs, loops_attrs, polys_attrs, path, prim, mesh, frameid, real_frame_start, prim_size_per_frame[path]); + if (get_input2("outputToMaya") == false) { + write_attrs(verts_attrs, loops_attrs, polys_attrs, path, prim, mesh, frameid, real_frame_start, prim_size_per_frame[path]); + } mesh.set( mesh_samp ); } else { @@ -954,7 +963,9 @@ struct WriteAlembicPrims : INode { Int32ArraySample( vertex_count_per_face.data(), vertex_count_per_face.size() )); write_velocity(prim, mesh_samp); write_normal(prim, mesh_samp); - write_attrs(verts_attrs, loops_attrs, polys_attrs, path, prim, mesh, frameid, real_frame_start, prim_size_per_frame[path]); + if (get_input2("outputToMaya") == false) { + write_attrs(verts_attrs, loops_attrs, polys_attrs, path, prim, mesh, frameid, real_frame_start, prim_size_per_frame[path]); + } mesh.set( mesh_samp ); } } @@ -973,6 +984,7 @@ ZENDEFNODE(WriteAlembicPrims, { {"int", "frame_end", "100"}, {"float", "fps", "25"}, {"bool", "flipFrontBack", "1"}, + {"bool", "outputToMaya", "0"}, }, { }, From 13ee2edad9d52ea79c495b99e93a72c55e64f618 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Fri, 24 May 2024 20:39:43 +0800 Subject: [PATCH 031/244] vertex pin constraint --- projects/CuLagrange/pbd/ConstraintsSolver.cu | 123 +++++++++++++++---- 1 file changed, 102 insertions(+), 21 deletions(-) diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index fe025ba193..549fb12c49 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -256,8 +256,8 @@ struct XPBDSolve : INode { minv[i] = verts("minv",quad[i]); } - auto ra = cquads("ra",coffset + gi); - auto ras = cquads("sign",coffset + gi); + auto ra = cquads("r",coffset + gi); + // auto ras = cquads("sign",coffset + gi); vec3 dp[4] = {}; // if(!CONSTRAINT::solve_DihedralConstraint( // p[0],minv[0], @@ -278,7 +278,7 @@ struct XPBDSolve : INode { p[3],minv[3], pp[0],pp[1],pp[2],pp[3], ra, - ras, + // ras, alpha, dt, kd, @@ -935,8 +935,8 @@ struct XPBDSolveSmoothAll : INode { affiliationOffset = cquads.getPropertyOffset("xpbd_affiliation"), dampingOffset = cquads.getPropertyOffset("damping_coeff"), indsOffset = cquads.getPropertyOffset("inds"), - // rOffset = cquads.getPropertyOffset("r"), - restScaleOffset = cquads.getPropertyOffset("rest_scale"), + rOffset = cquads.getPropertyOffset("r"), + // restScaleOffset = cquads.getPropertyOffset("rest_scale"), // weight_sum = proxy(weight_sum), ptagOffset = verts.getPropertyOffset(ptag), pptagOffset = verts.getPropertyOffset(pptag), @@ -957,12 +957,13 @@ struct XPBDSolveSmoothAll : INode { auto pp1 = verts.pack(dim_c<3>,pptagOffset,edge[1]); auto minv0 = verts(minvOffset,edge[0]); auto minv1 = verts(minvOffset,edge[1]); - auto rest_scale = cquads(restScaleOffset,ci); - auto r = cquads("r",ci) * rest_scale; + // auto rest_scale = cquads(restScaleOffset,ci); + // auto r = cquads("r",ci) * rest_scale; + auto r = cquads(rOffset,ci); vec3 dp[2] = {}; auto lambda = (T)0; - bool do_stretch_resistence_only = category == category_c::long_range_attachment; + bool do_stretch_resistence_only = (category == category_c::long_range_attachment); if(!CONSTRAINT::solve_DistanceConstraint( p0,minv0, p1,minv1, @@ -977,8 +978,8 @@ struct XPBDSolveSmoothAll : INode { return; // printf("smooth stretch update : %f %f\n",(float)dp[0].norm(),(float)dp[1].norm()); for(int i = 0;i != 2;++i) { - if(isnan(dp[i].norm())) - printf("nan dp[%d] detected at bending\n",i); + // if(isnan(dp[i].norm())) + // printf("nan dp[%d] detected at bending\n",i); // atomic_add(exec_tag,&weight_sum[edge[i]],w); atomic_add(exec_tag,&verts(wOffset,edge[i]),w); for(int d = 0;d != 3;++d) @@ -996,9 +997,10 @@ struct XPBDSolveSmoothAll : INode { pp[i] = verts.pack(dim_c<3>,pptagOffset,quad[i]); minv[i] = verts(minvOffset,quad[i]); } - auto rest_scale = cquads(restScaleOffset,ci); - auto ra = cquads("ra",ci) * rest_scale; - auto ras = cquads("sign",ci); + // auto rest_scale = cquads(restScaleOffset,ci); + // auto ra = cquads("ra",ci) * rest_scale; + // auto ras = cquads("sign",ci); + auto ra = cquads(rOffset,ci); vec3 dp[4] = {}; auto lambda = (T)0; if(!CONSTRAINT::solve_DihedralConstraint( @@ -1008,7 +1010,7 @@ struct XPBDSolveSmoothAll : INode { p[3],minv[3], pp[0],pp[1],pp[2],pp[3], ra, - ras, + // ras, aff, dt, kd, @@ -1016,8 +1018,8 @@ struct XPBDSolveSmoothAll : INode { dp[0],dp[1],dp[2],dp[3])) return; for(int i = 0;i != 4;++i) { - if(isnan(dp[i].norm())) - printf("nan dp[%d] detected at bending\n",i); + // if(isnan(dp[i].norm())) + // printf("nan dp[%d] detected at bending\n",i); // atomic_add(exec_tag,&weight_sum[quad[i]],w); atomic_add(exec_tag,&verts(wOffset,quad[i]),w); for(int d = 0;d != 3;++d) @@ -1026,6 +1028,81 @@ struct XPBDSolveSmoothAll : INode { } }); } + + if(category == category_c::vertex_pin_to_vertex_constraint) { + auto target = constraint_ptr->readMeta(CONSTRAINT_TARGET); + const auto& kverts = target->getParticles(); + + auto substep_id = get_input2("substep_id"); + auto nm_substeps = get_input2("nm_substeps"); + auto anim_w = (float)(substep_id + 1) / (float)nm_substeps; + auto anim_pw = (float)(substep_id) / (float)nm_substeps; + + cudaPol(zs::range(cquads.size()),[ + cquads = proxy({},cquads), + stiffnessOffset = cquads.getPropertyOffset("relative_stiffness"), + affiliationOffset = cquads.getPropertyOffset("xpbd_affiliation"), + dampingOffset = cquads.getPropertyOffset("damping_coeff"), + cquadsIndsOffset = cquads.getPropertyOffset("inds"), + cquadsROffset = cquads.getPropertyOffset("r"), + dt = dt, + anim_w = anim_w, + anim_pw = anim_pw, + verts = view(verts), + ptagOffset = verts.getPropertyOffset(ptag), + pptagOffset = verts.getPropertyOffset(pptag), + dptagOffset = verts.getPropertyOffset(dptag), + minvOffset = verts.getPropertyOffset("minv"), + wOffset = verts.getPropertyOffset("w"), + kverts = proxy({},kverts), + kptagOffset = kverts.getPropertyOffset("x"), + kpptagOffset = kverts.getPropertyOffset("px")] ZS_LAMBDA(int ei) mutable { + auto w = cquads(stiffnessOffset,ei); + auto inds = cquads.pack(dim_c<2>,cquadsIndsOffset,ei,int_c); + auto r = cquads(cquadsROffset,ei); + auto aff = cquads(affiliationOffset,ei); + auto kd = cquads(dampingOffset,ei); + + auto vi = inds[0]; + auto kvi = inds[1]; + if(kvi < 0) + return; + + auto p = verts.pack(dim_c<3>,ptagOffset,vi); + auto pp = verts.pack(dim_c<3>,pptagOffset,vi); + auto kp = anim_w * kverts.pack(dim_c<3>,kptagOffset,kvi) + (1.f - anim_w) * kverts.pack(dim_c<3>,kpptagOffset,kvi); + auto kpp = anim_pw * kverts.pack(dim_c<3>,kptagOffset,kvi) + (1.f - anim_pw) * kverts.pack(dim_c<3>,kpptagOffset,kvi); + + auto minv = verts(minvOffset,vi); + if(minv < 1e-6) + return; + auto kminv = 0.f; + + auto lambda = (T)0.0; + bool do_stretch_resistence_only = true; + vec3 dp[2] = {}; + + if(!CONSTRAINT::solve_DistanceConstraint( + p,minv, + kp,kminv, + pp,kpp, + r, + aff, + kd, + dt, + lambda, + dp[0],dp[1], + do_stretch_resistence_only)) + return; + + // atomic_add(exec_tag,&verts(wOffset,vi),w); + verts(wOffset,vi) += w; + verts.tuple(dim_c<3>,dptagOffset,vi) = verts.pack(dim_c<3>,dptagOffset,vi) + dp[0]; + // for(int d = 0;d != 3;++d) + // atomic_add(exec_tag,&verts(dptagOffset + d,vi),dp[0][d]); + }); + } + if(category == category_c::self_dcd_collision_constraint) { const auto& tris = zsparticles->getQuadraturePoints(); @@ -1141,24 +1218,28 @@ struct XPBDSolveSmoothAll : INode { cell_buffer = proxy({},cell_buffer), dptagOffset = verts.getPropertyOffset(dptag), ptagOffset = verts.getPropertyOffset(ptag), + cquadsIndsOffset = cquads.getPropertyOffset("inds"), + cquadsBaryOffset = cquads.getPropertyOffset("bary"), + cellBufferXOffset = cell_buffer.getPropertyOffset("x"), + cellBufferVOffset = cell_buffer.getPropertyOffset("v"), ktris = ktris.begin("inds",dim_c<3>,int_c), enable_sliding = enable_sliding, wOffset = verts.getPropertyOffset("w"), stiffnessOffset = cquads.getPropertyOffset("relative_stiffness"), verts = proxy({},verts)] ZS_LAMBDA(int ci) mutable { auto w = cquads(stiffnessOffset,ci); - auto pair = cquads.pack(dim_c<2>,"inds",ci,int_c); + auto pair = cquads.pack(dim_c<2>,cquadsIndsOffset,ci,int_c); auto vi = pair[0]; auto kti = pair[1]; auto ktri = ktris[kti]; - auto bary = cquads.pack(dim_c<6>,"bary",ci); + auto bary = cquads.pack(dim_c<6>,cquadsBaryOffset,ci); vec3 as[3] = {}; vec3 bs[3] = {}; for(int i = 0;i != 3;++i) { - as[i] = cell_buffer.pack(dim_c<3>,"x",ktri[i]); - bs[i] = cell_buffer.pack(dim_c<3>,"v",ktri[i]) + as[i]; + as[i] = cell_buffer.pack(dim_c<3>,cellBufferXOffset,ktri[i]); + bs[i] = cell_buffer.pack(dim_c<3>,cellBufferVOffset,ktri[i]) + as[i]; } auto tp = vec3::zeros(); @@ -1466,7 +1547,7 @@ struct XPBDSolveSmoothAll : INode { verts.tuple(dim_c<3>,dptagOffset,vi) = verts.pack(dim_c<3>,dptagOffset,vi) / verts(wOffset,vi); else verts.tuple(dim_c<3>,dptagOffset,vi) = vec3::zeros(); - verts.tuple(dim_c<3>,ptagOffset,vi) = verts.pack(dim_c<3>,ptagOffset,vi) + verts.pack(dim_c<3>,dptagOffset,vi); + verts.tuple(dim_c<3>,ptagOffset,vi) = verts.pack(dim_c<3>,ptagOffset,vi) + 2.f * verts.pack(dim_c<3>,dptagOffset,vi); }); set_output("zsparticles",get_input("zsparticles")); From 6afad62772db98d790ff203349669221e499c220 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Fri, 24 May 2024 20:40:13 +0800 Subject: [PATCH 032/244] vertex pin constraint builder --- projects/CuLagrange/pbd/ConstraintsBuilder.cu | 110 +++++++++++++++++- 1 file changed, 104 insertions(+), 6 deletions(-) diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu index b31cc03c6a..f021e9a35d 100644 --- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu +++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu @@ -248,12 +248,109 @@ virtual void apply() override { // std::cout << "shapeMatching::finish set aux data" << std::endl; } + if(type == "point_point_pin") { + // auto use_hard_constraint = get_input2("use") + constexpr auto eps = 1e-6; + constraint->setMeta(CONSTRAINT_KEY,category_c::vertex_pin_to_vertex_constraint); + + auto target = get_input("target"); + const auto& kverts = target->getParticles(); + constraint->setMeta(CONSTRAINT_TARGET,target.get()); + + auto search_radii = get_input2("search_radii"); + auto thickness = get_input2("thickness"); + + auto pin_group_name = get_input2("group_name"); + if(!verts.hasProperty("pinSuccess")) + verts.append_channels(cudaPol,{{"pinSuccess",1}}); + TILEVEC_OPS::fill(cudaPol,verts,"pinSuccess",0.f); + + zs::Vector> point_topos{verts.get_allocator(),0}; + if(verts.hasProperty(pin_group_name)) { + // std::cout << "binder name : " << pin_group_name << std::endl; + zs::bht pin_point_set{verts.get_allocator(),verts.size()}; + pin_point_set.reset(cudaPol,true); + + cudaPol(zs::range(verts.size()),[ + verts = proxy({},verts), + eps = eps, + gname = zs::SmallString(pin_group_name), + pin_point_set = proxy(pin_point_set)] ZS_LAMBDA(int vi) mutable { + auto gtag = verts(gname,vi); + if(gtag > eps) + pin_point_set.insert(vi); + }); + point_topos.resize(pin_point_set.size()); + cudaPol(zip(zs::range(pin_point_set.size()),pin_point_set._activeKeys),[ + point_topos = proxy(point_topos)] ZS_LAMBDA(auto id,const auto& pvec) mutable { + point_topos[id] = pvec[0]; + }); + }else { + point_topos.resize(verts.size()); + cudaPol(zip(zs::range(point_topos.size()),point_topos),[] ZS_LAMBDA(const auto& id,auto& pi) mutable {pi = id;}); + } + + if(do_constraint_topological_coloring) { + topological_coloring(cudaPol,point_topos,colors,false); + sort_topology_by_coloring_tag(cudaPol,colors,reordered_map,color_offset); + } + + eles.append_channels(cudaPol,{{"inds",2},{"r",1}}); + eles.resize(point_topos.size()); + TILEVEC_OPS::fill(cudaPol,eles,"inds",zs::reinterpret_bits((int)-1)); + TILEVEC_OPS::fill(cudaPol,eles,"r",0.f); + + auto kpBvh = bvh_t{}; + auto kpBvs = retrieve_bounding_volumes(cudaPol,kverts,search_radii / 2.0f,"x"); + kpBvh.build(cudaPol,kpBvs); + + cudaPol(zs::range(point_topos.size()),[ + point_topos = proxy(point_topos), + do_constraint_topological_coloring = do_constraint_topological_coloring, + reordered_map = proxy(reordered_map), + verts = proxy({},verts), + kpBvh = proxy(kpBvh), + search_radii = search_radii, + thickness = thickness, + // use_hard_constraint = use_hard_constraint, + eles = proxy({},eles), + kverts = proxy({},kverts)] ZS_LAMBDA(int oei) mutable { + auto ei = do_constraint_topological_coloring ? reordered_map[oei] : oei; + auto pi = point_topos[ei][0]; + + auto p = verts.pack(dim_c<3>,"x",pi); + auto bv = bv_t{get_bounding_box(p - search_radii / 2.f,p + search_radii / 2.f)}; + + int closest_kvi = -1; + auto closest_distance = std::numeric_limits::max(); + auto find_closest_point = [&](int kvi) mutable { + auto kp = kverts.pack(dim_c<3>,"x",kvi); + auto dist = (kp - p).norm(); + if(dist < closest_distance) { + closest_kvi = kvi; + closest_distance = dist; + } + }; + kpBvh.iter_neighbors(bv,find_closest_point); + eles.tuple(dim_c<2>,"inds",oei) = vec2i{pi,closest_kvi}.reinterpret_bits(float_c); + eles("r",oei) = thickness + closest_distance; + if(closest_kvi >= 0) { + // printf("vertex2vertex_pin : %d %d\n",pi,closest_kvi); + verts("pinSuccess",pi) = 1.f; + } + }); + } + if(type == "lra_stretch") { constraint->setMeta(CONSTRAINT_KEY,category_c::long_range_attachment); + auto radii = get_input2("thickness"); auto attach_group_name = get_input2("group_name"); auto has_group = verts.hasProperty(attach_group_name); + // constexpr auto eps = 1e-6; + // auto attach2target = get_input("target"); + if(!has_group) { std::cout << "the input vertices has no specified group tag : " << attach_group_name << std::endl; throw std::runtime_error("the input vertices has no specified LRA group"); @@ -410,10 +507,10 @@ virtual void apply() override { sort_topology_by_coloring_tag(cudaPol,colors,reordered_map,color_offset); } // std::cout << "quads.size() = " << quads.size() << "\t" << "edge_topos.size() = " << edge_topos.size() << std::endl; - eles.append_channels(cudaPol,{{"inds",2},{"r",1},{"rest_scale",1}}); + eles.append_channels(cudaPol,{{"inds",2},{"r",1}}); auto rest_scale = get_input2("rest_scale"); - TILEVEC_OPS::fill(cudaPol,eles,"rest_scale",rest_scale); + // TILEVEC_OPS::fill(cudaPol,eles,"rest_scale",rest_scale); cudaPol(zs::range(eles.size()),[ has_group = has_group, @@ -1534,8 +1631,8 @@ virtual void apply() override { } // std::cout << "quads.size() = " << quads.size() << "\t" << "edge_topos.size() = " << edge_topos.size() << std::endl; - eles.append_channels(cudaPol,{{"inds",4},{"ra",1},{"sign",1},{"rest_scale",1}}); - TILEVEC_OPS::fill(cudaPol,eles,"rest_scale",rest_scale); + eles.append_channels(cudaPol,{{"inds",4},{"r",1}}); + // TILEVEC_OPS::fill(cudaPol,eles,"rest_scale",rest_scale); cudaPol(zs::range(eles.size()),[ eles = proxy({},eles), @@ -1554,8 +1651,8 @@ virtual void apply() override { float alpha{}; float alpha_sign{}; CONSTRAINT::init_DihedralBendingConstraint(x[0],x[1],x[2],x[3],rest_scale,alpha,alpha_sign); - eles("ra",oei) = alpha; - eles("sign",oei) = alpha_sign; + eles("r",oei) = alpha * rest_scale; + // eles("sign",oei) = alpha_sign; // auto topo = bd_topos[ei]; // zs::vec vis_inds { @@ -1637,6 +1734,7 @@ ZENDEFNODE(MakeSurfaceConstraintTopology, {{ {"string","dcd_source_xtag","px"}, {"string","dcd_collider_xtag","x"}, {"string","dcd_collider_pxtag","px"}, + {"float","search_radii","0.0"}, {"float","toc","0"}, {"bool","add_dcd_repulsion_force","1"}, {"float","relative_stiffness","1.0"}, From e4c9f12c970ec00551142258e5d30c67683f9f93 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Fri, 24 May 2024 20:40:58 +0800 Subject: [PATCH 033/244] save memory for stretch and bending constraint --- .../CuLagrange/pbd/constraint_function_kernel/constraint.cuh | 2 +- .../pbd/constraint_function_kernel/constraint_types.hpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh index 3fad41cee4..9ec41df4d7 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh @@ -389,7 +389,7 @@ namespace zeno { namespace CONSTRAINT { const VECTOR3d& pp2, const VECTOR3d& pp3, const SCALER& restAngle, - const SCALER& restAngleSign, + // const SCALER& restAngleSign, const SCALER& xpbd_affliation, const SCALER& dt, const SCALER& kdamp_ratio, diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp index c828e22bf7..5874bd63b0 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp @@ -5,6 +5,10 @@ namespace zeno { namespace PBD_CONSTRAINT { constexpr auto CONSTRAINT_KEY = "XPBD_CONSTRAINT"; constexpr auto CONSTRAINT_TARGET = "XPBD_CONSTRAINT_TARGET"; constexpr auto CONSTRAINT_COLOR_OFFSET = "XPBD_CONSTRAINT_OFFSET"; +constexpr auto CONSTRAINT_UPDATE_INDEX_BUFFER = "CONSTRAINT_UPDATE_INDEX_BUFFER"; +constexpr auto CONSTRAINT_UPDATE_OFFSETS = "CONSTRAINT_UPDATE_OFFSETS"; +constexpr auto CONSTRAINT_UPDATE_BUFFER = "CONSTRAINT_Update_BUFFER"; + constexpr auto NM_DCD_COLLISIONS = "NM_DCD_COLLISIONS"; constexpr auto DCD_COUNTER_BUFFER = "DCD_COUNTER_BUFFER"; @@ -34,6 +38,7 @@ constexpr auto SHAPE_MATCHING_MATRIX_BUFFER = "SHAPE_MATCHING_MATRIX_BUFFER"; enum category_c : int { shape_matching_constraint, long_range_attachment, + vertex_pin_to_vertex_constraint, edge_length_constraint, isometric_bending_constraint, dihedral_bending_constraint, From 4894815d6191232cde9da1e48228a74ae6a80327 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Fri, 24 May 2024 20:41:47 +0800 Subject: [PATCH 034/244] detangle increase intersection storage size --- projects/CuLagrange/geometry/Detangle.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/CuLagrange/geometry/Detangle.cu b/projects/CuLagrange/geometry/Detangle.cu index c47890ed8f..79b7cf9a74 100644 --- a/projects/CuLagrange/geometry/Detangle.cu +++ b/projects/CuLagrange/geometry/Detangle.cu @@ -42,7 +42,7 @@ struct Detangle2 : zeno::INode { constexpr auto DETANGLE_CS_KET_BUFFER_KEY = "DETANGLE_CS_KET_BUFFER_KEY"; constexpr auto DETANGLE_TRI_BVH_BUFFER_KEY = "DETANGLE_TRI_BVH_BUFFER_KEY"; constexpr auto DETANGLE_ICM_GRADIENT_BUFFER_KEY = "DETANGLE_ICM_GRADIENT_BUFFER_KEY"; - constexpr auto DEFAULT_MAX_DETANGLE_INTERSECTION_PAIR = 10000; + constexpr auto DEFAULT_MAX_DETANGLE_INTERSECTION_PAIR = 100000; auto zsparticles = get_input("zsparticles"); auto& verts = zsparticles->getParticles(); @@ -751,6 +751,7 @@ struct Detangle2 : zeno::INode { std::cout << "nm_kin_intersections : " << nm_kinematic_intersection << std::endl; } + auto gradInfNorm = TILEVEC_OPS::inf_norm<3>(cudaExec,verts,"grad"); if(gradInfNorm < 1e-3) break; From 4773fbd4acbe216d8ef6b96b328497a3ffbe4ab0 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 29 May 2024 18:01:02 +0800 Subject: [PATCH 035/244] fix-selectmode-when-delete --- ui/zenoedit/nodesys/zenosubgraphscene.cpp | 4 ++++ ui/zenoedit/viewport/cameracontrol.cpp | 4 ++-- ui/zenoedit/viewport/displaywidget.cpp | 12 ++++++++---- ui/zenoedit/viewportinteraction/picker.cpp | 16 ++++++++-------- zenovis/include/zenovis/Scene.h | 5 ++++- zenovis/src/Scene.cpp | 8 ++++++++ zenovis/src/bate/FrameBufferPicker.cpp | 12 ++++++------ zenovis/src/bate/HudGraphicPrimHighlight.cpp | 8 ++++---- 8 files changed, 44 insertions(+), 25 deletions(-) diff --git a/ui/zenoedit/nodesys/zenosubgraphscene.cpp b/ui/zenoedit/nodesys/zenosubgraphscene.cpp index 35f3089b2d..789d3c4eb4 100644 --- a/ui/zenoedit/nodesys/zenosubgraphscene.cpp +++ b/ui/zenoedit/nodesys/zenosubgraphscene.cpp @@ -48,6 +48,9 @@ ZenoSubGraphScene::ZenoSubGraphScene(QObject *parent) // bsp tree index causes crash when removeItem and delete item. for safety, disable it. // https://stackoverflow.com/questions/38458830/crash-after-qgraphicssceneremoveitem-with-custom-item-class setItemIndexMethod(QGraphicsScene::NoIndex); + connect(this, &ZenoSubGraphScene::selectionChanged, this, [=]() { + afterSelectionChanged(); + }); } ZenoSubGraphScene::~ZenoSubGraphScene() @@ -1015,6 +1018,7 @@ void ZenoSubGraphScene::onRowsAboutToBeRemoved(const QModelIndex& subgIdx, const QString id = idx.data(ROLE_OBJID).toString(); ZASSERT_EXIT(m_nodes.find(id) != m_nodes.end()); ZenoNode* pNode = m_nodes[id]; + pNode->setSelected(false); if (qobject_cast(pNode)) { GroupNode *pBlackboard = qobject_cast(pNode); diff --git a/ui/zenoedit/viewport/cameracontrol.cpp b/ui/zenoedit/viewport/cameracontrol.cpp index 2978c63ba2..3ee954ef05 100644 --- a/ui/zenoedit/viewport/cameracontrol.cpp +++ b/ui/zenoedit/viewport/cameracontrol.cpp @@ -668,7 +668,7 @@ void CameraControl::fakeMouseReleaseEvent(QMouseEvent *event) { } else { m_picker->pick(releasePos.x(), releasePos.y()); m_picker->sync_to_scene(); - if (scene->select_mode == zenovis::PICK_MODE::PICK_OBJECT) + if (scene->get_select_mode() == zenovis::PICK_MODE::PICK_OBJECT) onPrimSelected(); m_transformer->clear(); m_transformer->addObject(m_picker->get_picked_prims()); @@ -703,7 +703,7 @@ void CameraControl::fakeMouseReleaseEvent(QMouseEvent *event) { m_picker->pick(x0, y0, x1, y1, mode); m_picker->sync_to_scene(); - if (scene->select_mode == zenovis::PICK_MODE::PICK_OBJECT) + if (scene->get_select_mode() == zenovis::PICK_MODE::PICK_OBJECT) onPrimSelected(); m_transformer->clear(); m_transformer->addObject(m_picker->get_picked_prims()); diff --git a/ui/zenoedit/viewport/displaywidget.cpp b/ui/zenoedit/viewport/displaywidget.cpp index 920d3dfe8d..5a01efa4ef 100644 --- a/ui/zenoedit/viewport/displaywidget.cpp +++ b/ui/zenoedit/viewport/displaywidget.cpp @@ -1337,11 +1337,11 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex // read selected mode auto select_mode_str = zeno::NodeSyncMgr::GetInstance().getInputValString(nodes[0], "mode"); if (select_mode_str == "triangle") - scene->select_mode = zenovis::PICK_MODE::PICK_MESH; + scene->set_select_mode(zenovis::PICK_MODE::PICK_MESH); else if (select_mode_str == "line") - scene->select_mode = zenovis::PICK_MODE::PICK_LINE; + scene->set_select_mode(zenovis::PICK_MODE::PICK_LINE); else - scene->select_mode = zenovis::PICK_MODE::PICK_VERTEX; + scene->set_select_mode(zenovis::PICK_MODE::PICK_VERTEX); // read selected elements string node_context; auto node_selected_str = zeno::NodeSyncMgr::GetInstance().getParamValString(nodes[0], "selected"); @@ -1353,7 +1353,7 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex node_context += prim_name + ":" + e.toStdString() + " "; if (picker) - picker->load_from_str(node_context, scene->select_mode, zeno::SELECTION_MODE::NORMAL); + picker->load_from_str(node_context, scene->get_select_mode(), zeno::SELECTION_MODE::NORMAL); } if (picker) { picker->sync_to_scene(); @@ -1365,6 +1365,10 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex picker->sync_to_scene(); picker->focus(""); picker->set_picked_elems_callback({}); + { + picker->clear(); + scene->set_select_mode(zenovis::PICK_MODE::PICK_OBJECT); + } } } zenoApp->getMainWindow()->updateViewport(); diff --git a/ui/zenoedit/viewportinteraction/picker.cpp b/ui/zenoedit/viewportinteraction/picker.cpp index 2f9714d141..43ded13b37 100644 --- a/ui/zenoedit/viewportinteraction/picker.cpp +++ b/ui/zenoedit/viewportinteraction/picker.cpp @@ -108,7 +108,7 @@ void Picker::pick(int x, int y) { // scene->select_mode = zenovis::PICK_MODE::PICK_MESH; auto selected = picker->getPicked(x, y); - if (scene->select_mode == zenovis::PICK_MODE::PICK_OBJECT) { + if (scene->get_select_mode() == zenovis::PICK_MODE::PICK_OBJECT) { if (selected.empty()) { selected_prims.clear(); return; @@ -150,7 +150,7 @@ void Picker::pick(int x0, int y0, int x1, int y1, SELECTION_MODE mode) { ZASSERT_EXIT(scene); auto selected = picker->getPicked(x0, y0, x1, y1); // qDebug() << "pick: " << selected.c_str(); - if (scene->select_mode == zenovis::PICK_MODE::PICK_OBJECT) { + if (scene->get_select_mode() == zenovis::PICK_MODE::PICK_OBJECT) { if (selected.empty()) { selected_prims.clear(); return; @@ -158,7 +158,7 @@ void Picker::pick(int x0, int y0, int x1, int y1, SELECTION_MODE mode) { load_from_str(selected, zenovis::PICK_MODE::PICK_OBJECT, SELECTION_MODE::NORMAL); } else { - load_from_str(selected, scene->select_mode, mode); + load_from_str(selected, scene->get_select_mode(), mode); if (picked_elems_callback) picked_elems_callback(selected_elements); } } @@ -177,10 +177,10 @@ string Picker::just_pick_prim(int x, int y) { auto scene = this->scene(); ZASSERT_EXIT(scene, ""); - auto store_mode = scene->select_mode; - scene->select_mode = zenovis::PICK_MODE::PICK_OBJECT; + auto store_mode = scene->get_select_mode(); + scene->set_select_mode(zenovis::PICK_MODE::PICK_OBJECT); auto res = picker->getPicked(x, y); - scene->select_mode = store_mode; + scene->set_select_mode(store_mode); return res; } @@ -252,7 +252,7 @@ void Picker::save_context() { auto scene = this->scene(); ZASSERT_EXIT(scene); - select_mode_context = scene->select_mode; + select_mode_context = scene->get_select_mode(); selected_prims_context = std::move(selected_prims); selected_elements_context = std::move(selected_elements); } @@ -263,7 +263,7 @@ void Picker::load_context() { auto scene = this->scene(); ZASSERT_EXIT(scene); - scene->select_mode = select_mode_context; + scene->set_select_mode(select_mode_context); selected_prims = std::move(selected_prims_context); selected_elements = std::move(selected_elements_context); select_mode_context = zenovis::PICK_MODE::PICK_NONE; diff --git a/zenovis/include/zenovis/Scene.h b/zenovis/include/zenovis/Scene.h index 2d379bc035..45f358bd0c 100644 --- a/zenovis/include/zenovis/Scene.h +++ b/zenovis/include/zenovis/Scene.h @@ -30,7 +30,6 @@ enum class PICK_MODE { struct Scene : zeno::disable_copy { std::optional select_box = {}; std::unordered_set selected = {}; - PICK_MODE select_mode = PICK_MODE::PICK_OBJECT; std::unordered_map> selected_elements = {}; std::unique_ptr camera; std::unique_ptr drawOptions; @@ -50,6 +49,10 @@ struct Scene : zeno::disable_copy { bool cameraFocusOnNode(std::string const &nodeid, zeno::vec3f ¢er, float &radius); static void loadGLAPI(void *procaddr); void* getOptixImg(int &w, int &h); + void set_select_mode(PICK_MODE _select_mode); + PICK_MODE get_select_mode(); +private: + PICK_MODE select_mode = PICK_MODE::PICK_OBJECT; }; } // namespace zenovis diff --git a/zenovis/src/Scene.cpp b/zenovis/src/Scene.cpp index 11df211415..c54654e662 100644 --- a/zenovis/src/Scene.cpp +++ b/zenovis/src/Scene.cpp @@ -15,6 +15,7 @@ #ifdef ZENO_ENABLE_OPTIX #include "../xinxinoptix/xinxinoptixapi.h" #endif +//#include #include #include @@ -113,6 +114,13 @@ bool Scene::loadFrameObjects(int frameid) { return inserted; } +void Scene::set_select_mode(PICK_MODE _select_mode) { +// zeno::log_info("{} -> {}", magic_enum::enum_name(select_mode), magic_enum::enum_name(_select_mode)); + select_mode = _select_mode; +} +PICK_MODE Scene::get_select_mode() { + return select_mode; +} void Scene::draw(bool record) { if (renderMan->getDefaultEngineName() != "optx") { diff --git a/zenovis/src/bate/FrameBufferPicker.cpp b/zenovis/src/bate/FrameBufferPicker.cpp index c414589810..c0e8ffafc3 100644 --- a/zenovis/src/bate/FrameBufferPicker.cpp +++ b/zenovis/src/bate/FrameBufferPicker.cpp @@ -309,7 +309,7 @@ struct FrameBufferPicker : IPicker { vbo->attribute(0, sizeof(float) * 0, sizeof(float) * 3, GL_FLOAT, 3); bool pick_particle = false; - if (scene->select_mode == PICK_MODE::PICK_OBJECT) { + if (scene->get_select_mode() == PICK_MODE::PICK_OBJECT) { pick_particle = prim->tris->empty() && prim->quads->empty() && prim->polys->empty() && prim->loops->empty(); CHECK_GL(glEnable(GL_DEPTH_TEST)); CHECK_GL(glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE)); @@ -343,7 +343,7 @@ struct FrameBufferPicker : IPicker { CHECK_GL(glDisable(GL_DEPTH_TEST)); } - if (scene->select_mode == PICK_MODE::PICK_VERTEX || pick_particle) { + if (scene->get_select_mode() == PICK_MODE::PICK_VERTEX || pick_particle) { // ----- enable depth test ----- CHECK_GL(glEnable(GL_DEPTH_TEST)); CHECK_GL(glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE)); @@ -387,7 +387,7 @@ struct FrameBufferPicker : IPicker { CHECK_GL(glDisable(GL_DEPTH_TEST)); } - if (scene->select_mode == PICK_MODE::PICK_LINE) { + if (scene->get_select_mode() == PICK_MODE::PICK_LINE) { // ----- enable depth test ----- CHECK_GL(glEnable(GL_DEPTH_TEST)); CHECK_GL(glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE)); @@ -454,7 +454,7 @@ struct FrameBufferPicker : IPicker { CHECK_GL(glDisable(GL_DEPTH_TEST)); } - if (scene->select_mode == PICK_MODE::PICK_MESH) { + if (scene->get_select_mode() == PICK_MODE::PICK_MESH) { // ----- enable depth test ----- CHECK_GL(glEnable(GL_DEPTH_TEST)); CHECK_GL(glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE)); @@ -541,7 +541,7 @@ struct FrameBufferPicker : IPicker { fbo->unbind(); string result; - if (scene->select_mode == PICK_MODE::PICK_OBJECT) { + if (scene->get_select_mode() == PICK_MODE::PICK_OBJECT) { if (!pixel.has_object() || !id_table.count(pixel.obj_id)) return ""; result = id_table[pixel.obj_id]; } @@ -594,7 +594,7 @@ struct FrameBufferPicker : IPicker { fbo->unbind(); string result; - if (scene->select_mode == PICK_MODE::PICK_OBJECT) { + if (scene->get_select_mode() == PICK_MODE::PICK_OBJECT) { unordered_set selected_obj; // fetch selected objects' ids for (int i = 0; i < pixel_count; i++) { diff --git a/zenovis/src/bate/HudGraphicPrimHighlight.cpp b/zenovis/src/bate/HudGraphicPrimHighlight.cpp index 833dcc5028..1e6cbee273 100644 --- a/zenovis/src/bate/HudGraphicPrimHighlight.cpp +++ b/zenovis/src/bate/HudGraphicPrimHighlight.cpp @@ -75,7 +75,7 @@ struct PrimitiveHighlight : IGraphicDraw { CHECK_GL(glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE)); glDepthFunc(GL_GREATER); CHECK_GL(glClearDepth(0.0)); - if (scene->select_mode == PICK_MODE::PICK_OBJECT) { + if (scene->get_select_mode() == PICK_MODE::PICK_OBJECT) { for (const auto &prim_id : scene->selected) { // ----- get primitive ----- PrimitiveObject *prim = nullptr; @@ -143,7 +143,7 @@ struct PrimitiveHighlight : IGraphicDraw { vbo->attribute(0, sizeof(float) * 0, sizeof(float) * 3, GL_FLOAT, 3); // ----- draw selected vertices ----- - if (scene->select_mode == PICK_MODE::PICK_VERTEX) { + if (scene->get_select_mode() == PICK_MODE::PICK_VERTEX) { // prepare indices CHECK_GL(glEnable(GL_PROGRAM_POINT_SIZE)); vector ind(selected_count); @@ -159,7 +159,7 @@ struct PrimitiveHighlight : IGraphicDraw { } // ----- draw selected edges ----- - if (scene->select_mode == PICK_MODE::PICK_LINE) { + if (scene->get_select_mode() == PICK_MODE::PICK_LINE) { if (prim->lines->empty()) return; // prepare indices vector ind(selected_count); @@ -175,7 +175,7 @@ struct PrimitiveHighlight : IGraphicDraw { } // ----- draw selected meshes ----- - if (scene->select_mode == PICK_MODE::PICK_MESH) { + if (scene->get_select_mode() == PICK_MODE::PICK_MESH) { // prepare indices vector ind(selected_count); int i = 0; From dc45fd5561af51664fac6419c15db8310e1ffff3 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Thu, 30 May 2024 05:59:47 +0800 Subject: [PATCH 036/244] fix surface cell pin --- projects/CuLagrange/pbd/ConstraintsBuilder.cu | 3 ++- projects/CuLagrange/pbd/ConstraintsSolver.cu | 18 +++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu index f021e9a35d..d3f86b7f60 100644 --- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu +++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu @@ -1314,6 +1314,7 @@ virtual void apply() override { constexpr auto eps = 1e-6; constraint->setMeta(CONSTRAINT_KEY,category_c::vertex_pin_to_cell_constraint); + constraint->setMeta(PBD_USE_HARD_CONSTRAINT,use_hard_constraint); auto target = get_input("target"); @@ -1405,7 +1406,7 @@ virtual void apply() override { zs::vec prism_bary{}; T toc{}; - if(!compute_vertex_prism_barycentric_weights(p,as[0],as[1],as[2],bs[0],bs[1],bs[2],toc,prism_bary,(T)0.001)) + if(!compute_vertex_prism_barycentric_weights(p,as[0],as[1],as[2],bs[0],bs[1],bs[2],toc,prism_bary,(T)0.0001)) return; auto toc_dist = zs::abs(toc - (T)0.5); diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index 549fb12c49..d91af67cb1 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -1184,6 +1184,10 @@ struct XPBDSolveSmoothAll : INode { if(category == category_c::vertex_pin_to_cell_constraint) { // std::cout << "solve vertex cell pin constraint" << std::endl; + auto use_hard_constraint = constraint_ptr->readMeta(PBD_USE_HARD_CONSTRAINT); + if(use_hard_constraint && iter_id > 0) + continue; + auto target = constraint_ptr->readMeta(CONSTRAINT_TARGET); const auto& kverts = target->getParticles(); @@ -1214,6 +1218,7 @@ struct XPBDSolveSmoothAll : INode { thickness); cudaPol(zs::range(cquads.size()),[ + use_hard_constraint = use_hard_constraint, cquads = proxy({},cquads), cell_buffer = proxy({},cell_buffer), dptagOffset = verts.getPropertyOffset(dptag), @@ -1248,11 +1253,14 @@ struct XPBDSolveSmoothAll : INode { tp += bs[i] * bary[i + 3]; } - auto dp = tp - verts.pack(dim_c<3>,ptagOffset,vi); - - - for(int d = 0;d != 3;++d){ - atomic_add(exec_tag,&verts(dptagOffset + d,vi),dp[d]); + if(use_hard_constraint) { + verts.tuple(dim_c<3>,ptagOffset,vi) = tp; + } else { + auto dp = (tp - verts.pack(dim_c<3>,ptagOffset,vi)) * w; + atomic_add(exec_tag,&verts(wOffset,vi),w); + for(int d = 0;d != 3;++d){ + atomic_add(exec_tag,&verts(dptagOffset + d,vi),dp[d]); + } } }); } From 8128b73d88541535bcb4d418c7d6e64fa6059455 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Thu, 30 May 2024 06:00:00 +0800 Subject: [PATCH 037/244] constraint update support weight update --- projects/CuLagrange/pbd/ConstraintsUpdator.cu | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/projects/CuLagrange/pbd/ConstraintsUpdator.cu b/projects/CuLagrange/pbd/ConstraintsUpdator.cu index 95c81a1efd..2cf732504f 100644 --- a/projects/CuLagrange/pbd/ConstraintsUpdator.cu +++ b/projects/CuLagrange/pbd/ConstraintsUpdator.cu @@ -47,6 +47,14 @@ virtual void apply() override { auto source = get_input("source"); auto constraint = get_input("constraint"); + auto update_weight = get_input2("update_weight"); + auto new_uniform_weight = get_input2("new_uniform_weight"); + + if(update_weight) { + auto& cquads = constraint->getQuadraturePoints(); + TILEVEC_OPS::fill(cudaPol,cquads,"w",new_uniform_weight); + } + // auto target = get_input("target"); auto type = constraint->readMeta(CONSTRAINT_KEY,wrapt{}); @@ -89,7 +97,9 @@ virtual void apply() override { ZENDEFNODE(UpdateConstraintTarget, {{ {"source"}, {"target"}, - {"constraint"} + {"constraint"}, + {"bool","update_weight","0"}, + {"float","new_uniform_weight","1.0"} }, {{"source"},{"constraint"}}, { From f989144d07e79b31cf9469cb09221c13cd39de1f Mon Sep 17 00:00:00 2001 From: littlemine Date: Thu, 30 May 2024 18:14:47 +0800 Subject: [PATCH 038/244] p2p attrib assign --- projects/CUDA/utils/Primitives.cpp | 48 ++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/projects/CUDA/utils/Primitives.cpp b/projects/CUDA/utils/Primitives.cpp index a795fdb7fb..f5d60cb5e6 100644 --- a/projects/CUDA/utils/Primitives.cpp +++ b/projects/CUDA/utils/Primitives.cpp @@ -4633,6 +4633,54 @@ ZENDEFNODE(AdvanceFrame, { {"zs_geom"}, }); +struct PrimAssignRefAttrib : INode { + virtual void apply() override { + auto points = get_input("prim"); + auto prim = get_input("ref_prim"); + auto idTag = get_input2("pointIdTag"); + auto tag = get_input2("attribTag"); + + auto pointIndex = points->attr(idTag); + + auto assignAttrib = [&pointIndex](auto &dstAttrib, const auto &srcAttrib) { + if constexpr (zs::is_same_v) { + #pragma omp parallel for + for (auto index = 0; index < dstAttrib.size(); ++index) { + dstAttrib[index] = srcAttrib[(int)pointIndex[index]]; + } + } else + throw std::runtime_error( + fmt::format("destination attrib [{}], source attrib [{}]\n", + zs::get_var_type_str(dstAttrib), zs::get_var_type_str(srcAttrib))); + }; + + if (tag == "pos") { + assignAttrib(points->verts.values, prim->verts.values); + } else { + zs::match([&verts = points->verts, &tag](const auto &src) { + verts.add_attr(tag); + })(prim->verts.attr(tag)); + zs::match([&assignAttrib](auto &dst, const auto &src) { + assignAttrib(dst, src); + })(points->verts.attr(tag), prim->verts.attr(tag)); + } + + set_output("prim", get_input("prim")); + } +}; + +ZENDEFNODE(PrimAssignRefAttrib, { + { + "prim", + "ref_prim", + {"string", "pointIdTag", "bvh_id"}, + {"string", "attribTag"}, + }, + {"prim"}, + {}, + {"primitive"}, + }); + struct RemovePrimitiveTopo : INode { void apply() override { auto prim = get_input2("prim"); From 56ac1783c7411174ce6d6855977fd31cede0afe5 Mon Sep 17 00:00:00 2001 From: iaomw Date: Fri, 31 May 2024 21:46:41 +0800 Subject: [PATCH 039/244] Sky Composer --- ui/zenoedit/launch/serialize.cpp | 2 +- zeno/include/zeno/types/LightObject.h | 15 + zeno/include/zeno/utils/pfm.h | 27 + zeno/src/extra/GlobalComm.cpp | 2 +- zeno/src/nodes/ProcedrualSkyNode.cpp | 234 ++++++++ zenovis/CMakeLists.txt | 2 - zenovis/include/zenovis/RenderEngine.h | 2 +- zenovis/src/Scene.cpp | 2 +- zenovis/src/bate/RenderEngineBate.cpp | 2 +- zenovis/src/optx/RenderEngineOptx.cpp | 86 ++- zenovis/src/zhxx/RenderEngineZhxx.cpp | 2 +- zenovis/xinxinoptix/CMakeLists.txt | 7 +- zenovis/xinxinoptix/Light.h | 151 +++-- zenovis/xinxinoptix/OptiXStuff.h | 133 +++-- zenovis/xinxinoptix/PTKernel.cu | 2 +- zenovis/xinxinoptix/Portal.h | 716 ++++++++++++++++++++++++ zenovis/xinxinoptix/Sampling.h | 23 + zenovis/xinxinoptix/Shape.h | 16 - zenovis/xinxinoptix/optixPathTracer.cpp | 233 ++++---- zenovis/xinxinoptix/optixPathTracer.h | 3 + zenovis/xinxinoptix/proceduralSky.h | 29 +- zenovis/xinxinoptix/xinxinoptixapi.h | 10 +- zenovis/xinxinoptix/zxxglslvec.h | 7 + 23 files changed, 1448 insertions(+), 258 deletions(-) create mode 100644 zeno/include/zeno/utils/pfm.h create mode 100644 zenovis/xinxinoptix/Portal.h diff --git a/ui/zenoedit/launch/serialize.cpp b/ui/zenoedit/launch/serialize.cpp index 27a8877b5c..5493f8e91c 100644 --- a/ui/zenoedit/launch/serialize.cpp +++ b/ui/zenoedit/launch/serialize.cpp @@ -13,7 +13,7 @@ using namespace JsonHelper; QSet lightCameraNodes({ "CameraEval", "CameraNode", "CihouMayaCameraFov", "ExtractCameraData", "GetAlembicCamera","MakeCamera", - "LightNode", "BindLight", "ProceduralSky", "HDRSky", + "LightNode", "BindLight", "ProceduralSky", "HDRSky", "SkyComposer" }); std::set matNodeNames = {"ShaderFinalize", "ShaderVolume", "ShaderVolumeHomogeneous"}; diff --git a/zeno/include/zeno/types/LightObject.h b/zeno/include/zeno/types/LightObject.h index c6c7f7321e..374ceece56 100644 --- a/zeno/include/zeno/types/LightObject.h +++ b/zeno/include/zeno/types/LightObject.h @@ -5,6 +5,12 @@ #include #include +#else + +#ifndef vec3f +#define vec3f vec3 +#endif + #endif namespace zeno { @@ -23,6 +29,15 @@ namespace zeno { LightConfigDoubleside = 2u }; + struct DistantLightData { + vec3f direction; + float angle; + vec3f color; + float intensity; + + DistantLightData() = default; + }; + #ifndef __CUDACC_RTC__ struct LightData { diff --git a/zeno/include/zeno/utils/pfm.h b/zeno/include/zeno/utils/pfm.h new file mode 100644 index 0000000000..7efe11a952 --- /dev/null +++ b/zeno/include/zeno/utils/pfm.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include + +#include +#include + +namespace zeno { + +static void write_pfm(const char* path, int w, int h, const float *pixel, bool mono=false) { + std::string header = zeno::format("PF\n{} {}\n-1.0\n", w, h); + char channel = 3; + + if (mono) { + header = zeno::format("Pf\n{} {}\n-1.0\n", w, h); + channel = 1; + } + + std::vector data(header.size() + w * h * sizeof(float) * channel); + memcpy(data.data(), header.data(), header.size()); + memcpy(data.data() + header.size(), pixel, w * h * sizeof(float) * channel); + zeno::file_put_binary(data, path); +} + +} \ No newline at end of file diff --git a/zeno/src/extra/GlobalComm.cpp b/zeno/src/extra/GlobalComm.cpp index ec3b8b6eb6..b09c34f9be 100644 --- a/zeno/src/extra/GlobalComm.cpp +++ b/zeno/src/extra/GlobalComm.cpp @@ -21,7 +21,7 @@ namespace zeno { std::vector cachepath(3); std::unordered_set lightCameraNodes({ "CameraEval", "CameraNode", "CihouMayaCameraFov", "ExtractCameraData", "GetAlembicCamera","MakeCamera", - "LightNode", "BindLight", "ProceduralSky", "HDRSky", + "LightNode", "BindLight", "ProceduralSky", "HDRSky", "SkyComposer" }); std::set matNodeNames = {"ShaderFinalize", "ShaderVolume", "ShaderVolumeHomogeneous"}; diff --git a/zeno/src/nodes/ProcedrualSkyNode.cpp b/zeno/src/nodes/ProcedrualSkyNode.cpp index e6ad587fd4..c96a0927a9 100644 --- a/zeno/src/nodes/ProcedrualSkyNode.cpp +++ b/zeno/src/nodes/ProcedrualSkyNode.cpp @@ -2,6 +2,12 @@ #include #include #include +#include +#include + +#include +#include + namespace zeno { struct ProceduralSky : INode { @@ -78,6 +84,234 @@ ZENDEFNODE(HDRSky, { {"shader"}, }); +struct DistantLightWrapper : IObject{ + DistantLightData data; +}; + +struct DistantLight : INode { + + virtual void apply() override { + auto dir2 = get_input2("Lat-Lon"); + // dir2[0] = fmod(dir2[0], 180.f); + // dir2[1] = fmod(dir2[1], 180.f); + dir2 *= M_PIf / 180.f; + + zeno::vec3f dir3; + dir3[1] = std::sin(dir2[0]); + + dir3[2] = std::cos(dir2[0]) * std::cos(dir2[1]); + dir3[0] = std::cos(dir2[0]) * std::sin(dir2[1]); + + //dir3 = zeno::normalize(dir3); + + auto angleExtent = get_input2("angleExtent"); + angleExtent = zeno::clamp(angleExtent, 0.0f, 60.0f); + + auto color = get_input2("color"); + auto intensity = get_input2("intensity"); + intensity = fmaxf(0.0, intensity); + + auto result = std::make_shared(); + result->data.direction = dir3; + result->data.angle = angleExtent; + result->data.color = color; + result->data.intensity = intensity; + set_output2("out", std::move(result) ); + } +}; + +ZENDEFNODE(DistantLight, { + { + {"vec2f", "Lat-Lon", "45, 90"}, + {"float", "angleExtent", "0.5"}, + {"colorvec3f", "color", "1,1,1"}, + {"float", "intensity", "1"} + }, + { + {"out"}, + }, + { + }, + {"shader"}, +}); + +struct PortalLight : INode { + virtual void apply() override { + + auto pos = get_input2("pos"); + auto scale = get_input2("scale"); + auto rotate = get_input2("rotate"); + auto size = get_input2("size"); + size = std::max(size, 180); + + scale = 0.5f * abs(scale); + + auto order = get_input2("EulerRotationOrder:"); + auto orderTyped = magic_enum::enum_cast(order).value_or(EulerAngle::RotationOrder::XYZ); + + auto measure = get_input2("EulerAngleMeasure:"); + auto measureTyped = magic_enum::enum_cast(measure).value_or(EulerAngle::Measure::Radians); + + glm::vec3 eularAngleXYZ = glm::vec3(rotate[0], rotate[1], rotate[2]); + glm::mat4 rotation = EulerAngle::rotate(orderTyped, measureTyped, eularAngleXYZ); + + glm::mat4 transform(1.0f); + + transform = glm::translate(transform, glm::vec3(pos[0], pos[1], pos[2])); + transform = transform * rotation; + transform = glm::scale(transform, glm::vec3(scale[0], 0.5 * (scale[0] + scale[1]), scale[1])); + + auto prim = std::make_shared(); + prim->verts->resize(8); + + prim->verts[0] = zeno::vec3f(-1, 0, -1); + prim->verts[1] = zeno::vec3f(+1, 0, -1); + prim->verts[2] = zeno::vec3f(+1, 0, +1); + prim->verts[3] = zeno::vec3f(-1, 0, +1); + + prim->verts[4] = zeno::vec3f(0, 0, 0); + prim->verts[5] = zeno::vec3f(0.5, 0, 0); + prim->verts[6] = zeno::vec3f(0, 0.5, 0); + prim->verts[7] = zeno::vec3f(0, 0, 0.5); + + for (size_t i=0; iverts->size(); ++i) { + auto& ele = prim->verts[i]; + auto ttt = transform * glm::vec4(ele[0], ele[1], ele[2], 1.0f); + prim->verts[i] = zeno::vec3f(ttt.x, ttt.y, ttt.z); + } + + //prim->lines.attrs.clear(); + prim->lines->resize(8); + prim->lines[0] = {0, 1}; + prim->lines[1] = {1, 2}; + prim->lines[2] = {2, 3}; + prim->lines[3] = {3, 0}; + + prim->lines[4] = {4, 5}; + prim->lines[5] = {4, 6}; + prim->lines[6] = {4, 7}; + + auto& color = prim->verts.add_attr("clr"); + color.resize(8); + color[0] = {1,1,1}; + color[1] = {1,1,1}; + color[2] = {1,1,1}; + color[3] = {1,1,1}; + + color[4] = {1, 1, 1}; + color[5] = {1, 0, 0}; + color[6] = {0, 1, 0}; + color[7] = {0, 0, 1}; + //prim->lines.update(); + prim->userData().set2("size", size); + set_output2("out", std::move(prim)); + } +}; + +ZENDEFNODE(PortalLight, { + { + {"vec3f", "pos", "0,0,0"}, + {"vec2f", "scale", "1, 1"}, + {"vec3f", "rotate", "0,0,0"}, + {"int", "size", "180"} + }, + { + {"out"}, + }, + { + {"enum " + EulerAngle::RotationOrderListString(), "EulerRotationOrder", "XYZ"}, + {"enum " + EulerAngle::MeasureListString(), "EulerAngleMeasure", "Degree"} + }, + {"shader"}, +}); + +struct SkyComposer : INode { + virtual void apply() override { + + auto prim = std::make_shared(); + + if (has_input("dlights")) { + auto dlights = get_input("dlights")->get(); + if (dlights.empty()) { + throw zeno::makeError("Bad input for dlights"); + } + + prim->verts->resize(dlights.size()); + auto& attr_rad = prim->verts.add_attr("rad"); + auto& attr_angle = prim->verts.add_attr("angle"); + auto& attr_color = prim->verts.add_attr("color"); + auto& attr_inten = prim->verts.add_attr("inten"); + + unsigned i = 0; + for (const auto& dlight : dlights) { + + prim->verts[i] = dlight->data.direction; + attr_rad[i] = 0.0f; + attr_angle[i] = dlight->data.angle; + attr_color[i] = dlight->data.color; + attr_inten[i] = dlight->data.intensity; + + ++i; + } + } + + if (has_input("portals")) { + auto portals = get_input("portals")->get(); + if (portals.empty()) { + throw zeno::makeError("Bad input for portals"); + } + + using json = nlohmann::json; + std::vector raw(4 * portals.size()); + std::vector psizes(portals.size()); + + for (size_t i=0; iverts[0]; + auto p1 = rect->verts[1]; + auto p2 = rect->verts[2]; + auto p3 = rect->verts[3]; + + /* p0 --- p1 */ + /* --------- */ + /* p3 --- p2 */ + + raw[4 * i + 0] = p0; + raw[4 * i + 1] = p1; + raw[4 * i + 2] = p2; + raw[4 * i + 3] = p3; + + auto psize = rect->userData().get2("size"); + psizes[i] = psize; + } + + json aux(raw); + prim->userData().set2("portals", std::move(aux.dump())); + prim->userData().set2("psizes", json(psizes).dump()); + } + + prim->userData().set2("SkyComposer", std::move(1)); + prim->userData().set2("isRealTimeObject", std::move(1)); + set_output2("out", std::move(prim)); + } +}; + +ZENDEFNODE(SkyComposer, { + { + + {"list", "dlights"}, + {"list", "portals"} + }, + { + {"out"}, + }, + { + {"enum SphereUnbounded", "proxy", "SphereUnbounded"}, + }, + {"shader"}, +}); + vec3f colorTemperatureToRGB(float temperatureInKelvins) { vec3f retColor; diff --git a/zenovis/CMakeLists.txt b/zenovis/CMakeLists.txt index 7389441083..f7fd3cd746 100644 --- a/zenovis/CMakeLists.txt +++ b/zenovis/CMakeLists.txt @@ -2,8 +2,6 @@ file(GLOB_RECURSE source CONFIGURE_DEPENDS include/*.h src/*.cpp) file(GLOB_RECURSE glad_source CONFIGURE_DEPENDS glad/include/*.h glad/src/*.c) file(GLOB_RECURSE stbi_source CONFIGURE_DEPENDS stbi/include/*.h stbi/src/*.cpp stbi/src/*.c) -#OPTION(OPTIX_USE_20XX "turn on if on a 20xx gpu" OFF) - add_library(zenovis OBJECT ${source} ${glad_source} ${stbi_source}) target_link_libraries(zenovis PRIVATE ${CMAKE_DL_LIBS}) target_link_libraries(zenovis PUBLIC zeno) diff --git a/zenovis/include/zenovis/RenderEngine.h b/zenovis/include/zenovis/RenderEngine.h index a5c0ad58b9..4b275d16b4 100644 --- a/zenovis/include/zenovis/RenderEngine.h +++ b/zenovis/include/zenovis/RenderEngine.h @@ -15,7 +15,7 @@ struct Scene; struct RenderEngine { virtual void draw(bool record) = 0; virtual void update() = 0; - virtual void cleanupOptix() = 0; + virtual void cleanupAssets() = 0; virtual ~RenderEngine() = default; }; diff --git a/zenovis/src/Scene.cpp b/zenovis/src/Scene.cpp index 347814a3ae..e67af2ea14 100644 --- a/zenovis/src/Scene.cpp +++ b/zenovis/src/Scene.cpp @@ -59,7 +59,7 @@ void Scene::cleanUpScene() RenderEngine* pEngine = renderMan->getEngine(); if (pEngine) { pEngine->update(); - pEngine->cleanupOptix(); + pEngine->cleanupAssets(); } } diff --git a/zenovis/src/bate/RenderEngineBate.cpp b/zenovis/src/bate/RenderEngineBate.cpp index 2a67d741ac..c4fc891593 100644 --- a/zenovis/src/bate/RenderEngineBate.cpp +++ b/zenovis/src/bate/RenderEngineBate.cpp @@ -92,7 +92,7 @@ struct RenderEngineBate : RenderEngine { } } - void cleanupOptix() override { + void cleanupAssets() override { } }; diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index 4e8542328c..049e550789 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -32,6 +32,7 @@ #include "../../xinxinoptix/OptiXStuff.h" #include #include +#include #include #include @@ -694,14 +695,88 @@ struct GraphicsManager { float evnTexStrength = prim_in->userData().get2("evnTexStrength"); bool enableHdr = prim_in->userData().get2("enable"); if (!path.empty()) { - if (OptixUtil::sky_tex.has_value() && OptixUtil::sky_tex.value() != path) { + if (OptixUtil::sky_tex.has_value() && OptixUtil::sky_tex.value() != path + && OptixUtil::sky_tex.value() != OptixUtil::default_sky_tex ) { OptixUtil::removeTexture(OptixUtil::sky_tex.value()); } + OptixUtil::sky_tex = path; OptixUtil::addTexture(path); + } else { + OptixUtil::sky_tex = OptixUtil::default_sky_tex; } + xinxinoptix::update_hdr_sky(evnTexRotation, evnTex3DRotation, evnTexStrength); xinxinoptix::using_hdr_sky(enableHdr); + + if (OptixUtil::portal_delayed.has_value()) { + OptixUtil::portal_delayed.value()(); + //OptixUtil::portal_delayed.reset(); + } + } + else if (prim_in->userData().has("SkyComposer")) { + + auto& attr_dir = prim_in->verts; + + std::vector dlights; + dlights.reserve(attr_dir->size()); + + if (attr_dir->size()) { + + auto& attr_angle = attr_dir.attr("angle"); + auto& attr_color = attr_dir.attr("color"); + auto& attr_inten = attr_dir.attr("inten"); + + for (size_t i=0; isize(); ++i) { + + auto& dld = dlights.emplace_back(); + dld.direction = attr_dir[i]; + dld.angle = attr_angle[i]; + dld.color = attr_color[i]; + dld.intensity = attr_inten[i]; + } + } + xinxinoptix::updateDistantLights(dlights); + + if(prim_in->userData().has("portals")) { + + auto portals_string = prim_in->userData().get2("portals"); + auto portals_json = nlohmann::json::parse(portals_string); + + auto ps_string = prim_in->userData().get2("psizes"); + auto ps_json = nlohmann::json::parse(ps_string); + + std::vector portals {}; + + if (portals_json.is_array() && portals_json.size()%4 == 0) { + + portals.reserve(portals_json.size()/4); + + auto pack = [&portals_json](size_t i) { + auto x = portals_json[i][0].template get(); + auto y = portals_json[i][1].template get(); + auto z = portals_json[i][2].template get(); + return zeno::vec3f(x, y, z); + }; + + for (size_t i=0; i(); + portals.push_back({v0, v1, v2, v3, psize}); + } + } + + if (OptixUtil::sky_tex.has_value()) { + xinxinoptix::updatePortalLights(portals); + } + OptixUtil::portal_delayed = [=]() { + xinxinoptix::updatePortalLights(portals); + }; + } //portals } } return sky_found; @@ -1160,6 +1235,9 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { if (OptixUtil::sky_tex.has_value() && tex == OptixUtil::sky_tex.value()) { continue; } + if (tex == OptixUtil::default_sky_tex) { + continue; + } needToRemoveTexPaths.emplace_back(tex); } for (const auto& need_remove_tex: needToRemoveTexPaths) { @@ -1389,11 +1467,11 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { } ~RenderEngineOptx() override { - xinxinoptix::optixcleanup(); + xinxinoptix::optixDestroy(); } - void cleanupOptix() override { - + void cleanupAssets() override { + xinxinoptix::optixCleanup(); } }; diff --git a/zenovis/src/zhxx/RenderEngineZhxx.cpp b/zenovis/src/zhxx/RenderEngineZhxx.cpp index 8ee8ed1621..1dfe495fe7 100644 --- a/zenovis/src/zhxx/RenderEngineZhxx.cpp +++ b/zenovis/src/zhxx/RenderEngineZhxx.cpp @@ -123,7 +123,7 @@ struct RenderEngineZhxx : RenderEngine, zeno::disable_copy { zenvis::finalize(); } - void cleanupOptix() override { + void cleanupAssets() override { } }; diff --git a/zenovis/xinxinoptix/CMakeLists.txt b/zenovis/xinxinoptix/CMakeLists.txt index 3f558d2378..43e552e726 100644 --- a/zenovis/xinxinoptix/CMakeLists.txt +++ b/zenovis/xinxinoptix/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources(zenovis PRIVATE volume/optixVolume.h optixSphere.cpp optixSphere.h + Portal.h Shape.h XAS.h @@ -36,10 +37,6 @@ target_sources(zenovis PRIVATE SDK/sutil/Preprocessor.h SDK/sutil/WorkDistribution.h ) -#if (OPTIX_USE_20XX) - #target_compile_definitions(zenovis PRIVATE -DUSING_20XX) -#endif() - find_package(CUDAToolkit REQUIRED COMPONENTS cudart nvrtc REQUIRED) target_link_libraries(zenovis PRIVATE CUDA::cudart CUDA::nvrtc) @@ -133,6 +130,8 @@ set(FILE_LIST ${CMAKE_CURRENT_SOURCE_DIR}/@LightBounds.h ${CMAKE_CURRENT_SOURCE_DIR}/@LightTree.h + ${CMAKE_CURRENT_SOURCE_DIR}/@Portal.h + ${CMAKE_CURRENT_SOURCE_DIR}/@Sampling.h ${CMAKE_CURRENT_SOURCE_DIR}/@Shape.h ${CMAKE_CURRENT_SOURCE_DIR}/@Light.cu diff --git a/zenovis/xinxinoptix/Light.h b/zenovis/xinxinoptix/Light.h index 6554fe16de..97bdd8697d 100644 --- a/zenovis/xinxinoptix/Light.h +++ b/zenovis/xinxinoptix/Light.h @@ -7,6 +7,8 @@ // #include "DisneyBSDF.h" #include "proceduralSky.h" +#include "Portal.h" + static __inline__ __device__ vec3 ImportanceSampleEnv(float* env_cdf, int* env_start, int nx, int ny, float p, float &pdf) { @@ -210,8 +212,7 @@ static __forceinline__ __device__ void DirectLighting(RadiancePRD *prd, ShadowPRD& shadowPRD, const float3& shadingP, const float3& ray_dir, TypeEvalBxDF& evalBxDF, TypeAux* taskAux=nullptr, float3* RadianceWithoutShadow=nullptr) { - const float3 wo = normalize(-ray_dir); - float3 light_attenuation = vec3(1.0f); + const float3 wo = normalize(-ray_dir); const float _SKY_PROB_ = params.skyLightProbablity(); @@ -415,7 +416,7 @@ void DirectLighting(RadiancePRD *prd, ShadowPRD& shadowPRD, const float3& shadin shadowPRD.maxDistance = lsr.dist; traceOcclusion(params.handle, shadowPRD.origin, lsr.dir, 0, lsr.dist, &shadowPRD); - light_attenuation = shadowPRD.attanuation; + auto light_attenuation = shadowPRD.attanuation; if (nullptr==RadianceWithoutShadow && lengthSquared(light_attenuation) == 0.0f) return; @@ -446,69 +447,125 @@ void DirectLighting(RadiancePRD *prd, ShadowPRD& shadowPRD, const float3& shadin } else { - float env_weight_sum = 1e-8f; - int NSamples = prd->depth<=2?1:1;//16 / pow(4.0f, (float)prd->depth-1); - for(int samples=0;samplesseed), envpdf) - : vec3(params.sunLightDirX, params.sunLightDirY, params.sunLightDirZ); - auto sun_dir = BRDFBasics::halfPlaneSample(prd->seed, sunLightDir, - params.sunSoftness * 0.0f); //perturb the sun to have some softness - sun_dir = hasenv ? normalize(sunLightDir):normalize(sun_dir); - - float tmpPdf; - auto illum = float3(envSky(sun_dir, sunLightDir, make_float3(0., 0., 1.), - 40, // be careful - .45, 15., 1.030725f * 0.3f, params.elapsedTime, tmpPdf)); - if(tmpPdf <= 0.0f) { return; } + auto shadeTask = [&](float3 sampleDir, float samplePDF, float3 illum, const bool mis) {\ - auto Ldir = sun_dir; - if (envpdf < __FLT_DENORM_MIN__) { - return; - } - - shadowPRD.maxDistance = 1e16f; - traceOcclusion(params.handle, shadowPRD.origin, sun_dir, - 1e-5f, // tmin - 1e16f, // tmax, + shadowPRD.attanuation = vec3(1.0); + shadowPRD.maxDistance = FLT_MAX; + traceOcclusion(params.handle, shadowPRD.origin, sampleDir, + 0, // tmin + FLT_MAX, // tmax, &shadowPRD); - light_attenuation = shadowPRD.attanuation; - if (nullptr==RadianceWithoutShadow && lengthSquared(light_attenuation) == 0.0f) return; + if (nullptr==RadianceWithoutShadow && lengthSquared(shadowPRD.attanuation) == 0.0f) return; - auto inverseProb = 1.0f/_SKY_PROB_; - auto bxdf_value = evalBxDF(sun_dir, wo, scatterPDF); + auto bxdf_value = evalBxDF(sampleDir, wo, scatterPDF); - float tmp = 1.0f; + float tmp = 1.0f / samplePDF; - if constexpr(_MIS_) { - float misWeight = BRDFBasics::PowerHeuristic(tmpPdf, scatterPDF); + if (mis) { + float misWeight = BRDFBasics::PowerHeuristic(samplePDF, scatterPDF); misWeight = misWeight>0.0f?misWeight:1.0f; misWeight = scatterPDF>1e-5f?misWeight:0.0f; - misWeight = tmpPdf>1e-5f?misWeight:0.0f; + misWeight = samplePDF>1e-5f?misWeight:0.0f; - tmp = (1.0f / NSamples) * misWeight * inverseProb / tmpPdf; - } else { - tmp = (1.0f / NSamples) * inverseProb / tmpPdf; - } + tmp *= misWeight; + } float3 radianceNoShadow = illum * tmp * bxdf_value; if (nullptr != RadianceWithoutShadow) { - *RadianceWithoutShadow = radianceNoShadow; + *RadianceWithoutShadow += radianceNoShadow; } if constexpr (!detail::is_void::value) { - (*taskAux)(illum * tmp * light_attenuation); + (*taskAux)(illum * tmp * shadowPRD.attanuation); }// TypeAux - prd->radiance += radianceNoShadow * light_attenuation; // with shadow + prd->radiance += radianceNoShadow * shadowPRD.attanuation; // with shadow + }; // shadeTask + + UF = UF / _SKY_PROB_; + UF = clamp(UF, 0.0f, 1.0f); + + auto binsearch = [&](float* cdf, uint min, uint max) { + //auto idx = min; + while(min < max) { + auto _idx_ = (min + max) / 2; + auto _cdf_ = cdf[_idx_]; + + if (_cdf_ > UF) { + max = _idx_; continue; //include + } + if (_cdf_ < UF) { + min = _idx_+1; continue; + } + min = _idx_; break; + } + return min; + }; + + auto dlights = reinterpret_cast(params.dlights_ptr); + + if (nullptr != dlights && dlights->COUNT()) { + + auto idx = binsearch(dlights->cdf, 0, dlights->COUNT()); + auto& dlight = dlights->list[idx]; + auto dlight_dir = reinterpret_cast(dlight.direction); + + auto sample_dir = BRDFBasics::halfPlaneSample(prd->seed, dlight_dir, dlight.angle/180.0f); + auto sample_prob = _SKY_PROB_ / dlights->COUNT(); + + if (dlight.intensity > 0) { + auto ccc = dlight.color * dlight.intensity; + auto illum = reinterpret_cast(ccc); + shadeTask(sample_dir, sample_prob, illum, false); + } + } + + auto plights = reinterpret_cast(params.plights_ptr); + + if (plights != nullptr && plights->COUNT()) { + + uint idx = binsearch(plights->cdf, 0, plights->COUNT()); + auto plight = &plights->list[idx]; + + LightSampleRecord lsr; lsr.PDF = 0.0f; + float2 uu = { prd->rndf(), prd->rndf() }; + float3 color {}; + + plight->sample(lsr, reinterpret_cast(shadingP), uu, color); + + lsr.PDF *= plights->pdf[idx] * _SKY_PROB_; + if (lsr.PDF > 0) { + //auto suv = sphereUV(lsr.dir, true); + //color = (vec3)texture2D(params.sky_texture, vec2(suv.x, suv.y)); + shadeTask(lsr.dir, lsr.PDF, color * params.sky_strength, false); + } + return; + } + + { // SKY + bool hasenv = params.skynx | params.skyny; + hasenv = params.usingHdrSky && hasenv; + float envpdf = 1.0f; + + vec3 sunLightDir = vec3(params.sunLightDirX, params.sunLightDirY, params.sunLightDirZ); + + vec3 sample_dir = hasenv? ImportanceSampleEnv(params.skycdf, params.sky_start, + params.skynx, params.skyny, rnd(prd->seed), envpdf) + : BRDFBasics::halfPlaneSample(prd->seed, sunLightDir, + params.sunSoftness * 0.0f); + sample_dir = normalize(sample_dir); + + float samplePDF; + float3 illum = envSky(sample_dir, sunLightDir, make_float3(0., 0., 1.), + 40, // be careful + .45, 15., 1.030725f * 0.3f, params.elapsedTime, samplePDF);\ + samplePDF *= _SKY_PROB_; + if(samplePDF <= 0.0f) { return; } + + shadeTask(sample_dir, samplePDF, illum, true); } } }; \ No newline at end of file diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index c1cbaab36b..8c7d168f2f 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -336,9 +336,21 @@ inline void createRTProgramGroups(OptixDeviceContext &context, OptixModule &_mod } struct cuTexture{ std::string md5; - cudaArray_t gpuImageArray; - cudaTextureObject_t texture; - cuTexture(){gpuImageArray = nullptr;texture=0;} + + cudaArray_t gpuImageArray = nullptr; + cudaTextureObject_t texture = 0llu; + + uint32_t width, height; + float average = 0.0f; + + std::vector cdf; + std::vector pdf; + std::vector start; + + std::vector rawData; + + cuTexture() {} + cuTexture(uint32_t w, uint32_t h) : width(w), height(h) {} ~cuTexture() { if(gpuImageArray!=nullptr) @@ -355,7 +367,7 @@ inline sutil::Texture loadCubeMap(const std::string& ppm_filename) } inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, int ny, int nc) { - auto texture = std::make_shared(); + auto texture = std::make_shared(nx, ny); std::vector data; data.resize(nx*ny); for(int j=0;j makeCudaTexture(unsigned char* img, int nx, in } inline std::shared_ptr makeCudaTexture(float* img, int nx, int ny, int nc) { - auto texture = std::make_shared(); + auto texture = std::make_shared(nx, ny); std::vector data; data.resize(nx*ny); for(int j=0;j loadIES(const std::string& path, float& coneAngle) return iesData; } + inline std::map> g_tex; inline std::map g_tex_last_write_time; inline std::map md5_path_mapping; inline std::optional sky_tex; -inline std::map sky_nx_map; -inline std::map sky_ny_map; -inline std::map sky_avg_map; +inline std::string default_sky_tex; +inline std::optional> portal_delayed; struct WrapperIES { raii ptr; @@ -628,18 +640,19 @@ struct WrapperIES { inline std::map g_ies; -inline std::map> sky_cdf_map; -inline std::map> sky_pdf_map; -inline std::map> sky_start_map; - -template -inline void calc_sky_cdf_map(int nx, int ny, int nc, T *img) { - auto &sky_nx = sky_nx_map[sky_tex.value()]; - auto &sky_ny = sky_ny_map[sky_tex.value()]; - auto &sky_cdf = sky_cdf_map[sky_tex.value()]; - auto &sky_pdf = sky_pdf_map[sky_tex.value()]; - auto &sky_start = sky_start_map[sky_tex.value()]; - auto &sky_avg = sky_avg_map[sky_tex.value()]; +inline void calc_sky_cdf_map(int nx, int ny, int nc, std::function& look) { + + auto& tex = g_tex[sky_tex.value()]; + + auto &sky_nx = tex->width; + auto &sky_ny = tex->height; + + auto &sky_avg = tex->average; + + auto &sky_cdf = tex->cdf; + auto &sky_pdf = tex->pdf; + auto &sky_start = tex->start; + sky_nx = nx; sky_ny = ny; //we need to recompute cdf @@ -658,7 +671,7 @@ inline void calc_sky_cdf_map(int nx, int ny, int nc, T *img) { size_t idx2 = jj*nx*nc + ii*nc; size_t idx = jj*nx + ii; float illum = 0.0f; - auto color = zeno::vec3f(img[idx2+0], img[idx2+1], img[idx2+2]); + auto color = zeno::vec3f(look(idx2+0), look(idx2+1), look(idx2+2)); illum = zeno::dot(color, zeno::vec3f(0.33333333f,0.33333333f, 0.33333333f)); //illum = illum > 0.5? illum : 0.0f; illum = abs(illum) * sinf(3.1415926f*((float)jj + 0.5f)/(float)ny); @@ -687,6 +700,7 @@ inline void calc_sky_cdf_map(int nx, int ny, int nc, T *img) { } } } + static std::string calculateMD5(const std::vector& input) { CryptoPP::byte digest[CryptoPP::Weak::MD5::DIGESTSIZE]; CryptoPP::Weak::MD5().CalculateDigest(digest, (const CryptoPP::byte*)input.data(), input.size()); @@ -724,9 +738,13 @@ inline void addTexture(std::string path) else { md5_path_mapping[md5Hash] = path; } + int nx, ny, nc; stbi_set_flip_vertically_on_load(true); + std::function lookupTexture = [](uint32_t x) {return 0.0f;}; + std::function cleanupTexture = [](){}; + if (zeno::ends_with(path, ".exr", false)) { float* rgba; const char* err; @@ -748,12 +766,15 @@ inline void addTexture(std::string path) } } assert(rgba); - if(sky_tex.value() == path)//if this is a loading of a sky texture - { - calc_sky_cdf_map(nx, ny, nc, rgba); - } + g_tex[path] = makeCudaTexture(rgba, nx, ny, nc); - free(rgba); + + lookupTexture = [&](uint32_t idx) { + return rgba[idx]; + }; + cleanupTexture = [&]() { + free(rgba); + }; } else if (zeno::ends_with(path, ".ies", false)) { float coneAngle; @@ -783,13 +804,11 @@ inline void addTexture(std::string path) g_tex[path] = std::make_shared(); return; } - int nx = std::max(img->userData().get2("w"), 1); - int ny = std::max(img->userData().get2("h"), 1); + nx = std::max(img->userData().get2("w"), 1); + ny = std::max(img->userData().get2("h"), 1); int channels = std::max(img->userData().get2("channels"), 3); - if(sky_tex.value() == path)//if this is a loading of a sky texture - { - calc_sky_cdf_map(nx, ny, 3, (float *)img->verts.data()); - } + nc = 3; + if (channels == 3) { std::vector ucdata; ucdata.resize(img->verts.size()*3); @@ -811,6 +830,11 @@ inline void addTexture(std::string path) } g_tex[path] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4); } + + lookupTexture = [img](uint32_t idx) { + auto ptr = (float*)img->verts->data(); + return ptr[idx]; + }; } else if (stbi_is_hdr(native_path.c_str())) { float *img = stbi_loadf(native_path.c_str(), &nx, &ny, &nc, 0); @@ -822,12 +846,15 @@ inline void addTexture(std::string path) nx = std::max(nx, 1); ny = std::max(ny, 1); assert(img); - if(sky_tex.value() == path)//if this is a loading of a sky texture - { - calc_sky_cdf_map(nx, ny, nc, img); - } + g_tex[path] = makeCudaTexture(img, nx, ny, nc); - stbi_image_free(img); + + lookupTexture = [&](uint32_t idx) { + return img[idx]; + }; + cleanupTexture = [&]() { + stbi_image_free(img); + }; } else { unsigned char *img = stbi_load(native_path.c_str(), &nx, &ny, &nc, 0); @@ -839,15 +866,31 @@ inline void addTexture(std::string path) nx = std::max(nx, 1); ny = std::max(ny, 1); assert(img); - if(sky_tex.value() == path)//if this is a loading of a sky texture - { - calc_sky_cdf_map(nx, ny, nc, img); - } + g_tex[path] = makeCudaTexture(img, nx, ny, nc); - stbi_image_free(img); + + lookupTexture = [&](uint32_t idx) { + return (float)img[idx] / 255; + }; + cleanupTexture = [&]() { + stbi_image_free(img); + }; } g_tex[path]->md5 = md5Hash; + if(sky_tex.value() == path) + { + calc_sky_cdf_map(nx, ny, nc, lookupTexture); + auto& tex = g_tex[sky_tex.value()]; + auto float_count = nx * ny * nc; + tex->rawData.resize(float_count); + + for (size_t i=0; irawData.at(i) = lookupTexture(i); + } + } + cleanupTexture(); + for (auto i = g_tex.begin(); i != g_tex.end(); i++) { zeno::log_info("-{}", i->first); } @@ -863,12 +906,6 @@ inline void removeTexture(std::string path) { zeno::log_error("removeTexture: {} not exists!", path); } g_tex.erase(path); - sky_nx_map.erase(path); - sky_ny_map.erase(path); - sky_cdf_map.erase(path); - sky_pdf_map.erase(path); - sky_start_map.erase(path); - sky_avg_map.erase(path); g_tex_last_write_time.erase(path); } } diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index 75dfd235fb..e5ee736af3 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -306,7 +306,7 @@ extern "C" __global__ void __raygen__rg() //if(prd.depth>prd.max_depth) { float RRprob = max(max(prd.attenuation.x, prd.attenuation.y), prd.attenuation.z); - if(rnd(prd.seed) > RRprob || prd.depth > prd.max_depth*2) { + if(rnd(prd.seed) > RRprob || prd.depth > prd.max_depth) { prd.done=true; } else { prd.attenuation = prd.attenuation / RRprob; diff --git a/zenovis/xinxinoptix/Portal.h b/zenovis/xinxinoptix/Portal.h new file mode 100644 index 0000000000..fc2125fcec --- /dev/null +++ b/zenovis/xinxinoptix/Portal.h @@ -0,0 +1,716 @@ +#pragma once + +#ifndef __CUDACC_RTC__ +#include +#include +#include +#include +#include +#include + +#include +#include + +#endif + +#include +#include + +struct Bounds2f { + Vector2f pMin = Vector2f {FLT_MAX, FLT_MAX}; + Vector2f pMax = -Vector2f {FLT_MAX, FLT_MAX}; + + bool contains(Vector2f p) { + if (p[0] < pMin[0] || p[1] < pMin[1]) + return false; + if (p[0] > pMax[0] || p[1] > pMax[1]) + return false; + + return true; + } + + float area() { + auto delta = pMax - pMin; + return delta[0] * delta[1]; + } +}; + +namespace xx { + +template +struct Array2D { + +#ifndef __CUDACC_RTC__ + std::vector data; + xinxinoptix::raii buffer; +#else + T* data; +#endif + uint32_t _x, _y; + + uint32_t XSize() const { return _x; } + uint32_t YSize() const { return _y; } + + Array2D() = default; + +#ifndef __CUDACC_RTC__ + + Array2D(uint32_t x, uint32_t y) { + _x = x; _y = y; + data.resize(_x * _y, {}); + } + + auto upload() { + + size_t byte_size = sizeof(T) * data.size(); + + buffer.resize( byte_size ); + cudaMemcpy((void*)buffer.handle, data.data(), byte_size, cudaMemcpyHostToDevice); + + struct Dummy { + void* ptr; + uint32_t _x, _y; + }; + + return Dummy { + (void*)buffer.handle, _x, _y + }; + } + +#endif + + T &operator()(uint32_t x, uint32_t y) { + size_t idx = x + y * _x; + return data[idx]; + } + + const T &operator()(uint32_t x, uint32_t y) const { + size_t idx = x + y * _x; + return data[idx]; + } + + T lookUV(float2 uv) const { + + auto xf = uv.x * _x; + auto yf = uv.y * _y; + xf -= 0.5f; yf -= 0.5f; + + auto xi = (int)floor(xf); + auto yi = (int)floor(yf); + + auto dx = xf - xi; + auto dy = yf - yi; + + auto v00 = lookUp( xi, yi); + auto v10 = lookUp(1+xi, yi); + auto v01 = lookUp( xi, 1+yi); + auto v11 = lookUp(1+xi, 1+yi); + + return + + (v00 * (1-dx) + v10 * dx) * (1-dy) + + + (v01 * (1-dx) + v11 * dx) * dy; + } + + T lookUp(int x, int y) const { + + if (x<0 || x >= _x) return {}; + if (y<0 || y >= _y) return {}; + size_t idx = x + y * _x; + return data[idx]; + } +}; + +}; + +struct SummedAreaTable { + public: + // SummedAreaTable Public Methods + SummedAreaTable() = default; + +#ifndef __CUDACC_RTC__ + SummedAreaTable(const xx::Array2D &values) + : sum(values.XSize(), values.YSize()) { + + sum(0, 0) = values(0, 0); + // Compute sums along first row and column + for (int x = 1; x < sum.XSize(); ++x) + sum(x, 0) = values(x, 0) + sum(x - 1, 0); + for (int y = 1; y < sum.YSize(); ++y) + sum(0, y) = values(0, y) + sum(0, y - 1); + + // Compute sums for the remainder of the entries + for (int y = 1; y < sum.YSize(); ++y) + for (int x = 1; x < sum.XSize(); ++x) + sum(x, y) = (values(x, y) + sum(x - 1, y) + sum(x, y - 1) - sum(x - 1, y - 1)); + } + + auto upload() { + return sum.upload(); + } + +#endif + + float Integral(Bounds2f extent) const { + double s = ((double)Lookup(extent.pMax[0], extent.pMax[1]) - (double)Lookup(extent.pMin[0], extent.pMax[1])) + + + ((double)Lookup(extent.pMin[0], extent.pMin[1]) - (double)Lookup(extent.pMax[0], extent.pMin[1])); + return fmaxf(s / (sum.XSize() * sum.YSize()), 0); + } + + private: + // SummedAreaTable Private Methods + float Lookup(float x, float y) const { + // Rescale $(x,y)$ to table resolution and compute integer coordinates + x = x * sum.XSize(); + y = y * sum.YSize(); + + x = x - 0.5f; + y = y - 0.5f; + + int x0 = (int)x; + int y0 = (int)y; + + float dx = x - int(x); + float dy = y - int(y); + + // Bilinearly interpolate between surrounding table values + float v00 = LookupInt(x0, y0), v10 = LookupInt(x0 + 1, y0); + float v01 = LookupInt(x0, y0 + 1), v11 = LookupInt(x0 + 1, y0 + 1); + + return (1 - dx) * ( (1 - dy) * v00 + dy * v01 ) + + + dx * ( (1 - dy) * v10 + dy * v11 ); + } + + float LookupInt(int x, int y) const { + // Return zero at lower boundaries + if (x <= 0 || y <= 0) + return 0; + + // Reindex $(x,y)$ and return actual stored value + x = min(x-1, (int)sum.XSize() - 1); + y = min(y-1, (int)sum.YSize() - 1); + return sum(x, y); + } + + // SummedAreaTable Private Members + xx::Array2D sum; +}; + +template +inline T BiLinear(T* data, uint width, uint height, float2 pos) { + + pos -= {0.5f, 0.5f}; + + auto lowX = (int)std::floor(pos.x), highX = lowX+1; + auto lowY = (int)std::floor(pos.y), highY = lowY+1; + + auto ratioX = pos.x - lowX; + auto ratioY = pos.y - lowY; + + auto lookUp = [&](int x, int y) { + if (x < 0 || x >= width ) return T{}; + if (y < 0 || y >= height) return T{}; + + return *(data + (y * width + x)); + }; + + auto v00 = lookUp(lowX, lowY); + auto v10 = lookUp(highX,lowY); + auto vv0 = v00 * (1-ratioX) + v10 * ratioX; + + auto v01 = lookUp(lowX, highY); + auto v11 = lookUp(highX,highY); + auto vv1 = v01 * (1-ratioX) + v11 * ratioX; + + return vv0 * (1-ratioY) + vv1 * ratioY; +} + +struct Portal { + Vector3f p0, p1, p2, p3; + uint32_t psize; +}; + +struct PortalLight { + + xx::Array2D image {}; + xx::Array2D dist {}; + SummedAreaTable sat; + + Portal portal; + Vector3f X,Y,Z; + //PortalLight() = default; + +#ifndef __CUDACC_RTC__ + + auto pack() { + + auto image_dummy = image.upload(); + auto dist_dummy = dist.upload(); + auto sat_dummy = sat.upload(); + + struct Dummy { + + typeof(image_dummy) image; + typeof(dist_dummy) dist; + typeof(sat_dummy) sat; + Portal portal; + Vector3f X,Y,Z; + }; + + return Dummy { image_dummy, dist_dummy, sat_dummy, portal, X, Y, Z }; + } + + PortalLight(const Portal& por, float3* texture, uint tex_width, uint tex_height, glm::mat4* rotate=nullptr) : portal(por) { + + Vector3f p01 = normalize(portal.p1 - portal.p0); + Vector3f p12 = normalize(portal.p2 - portal.p1); + Vector3f p32 = normalize(portal.p2 - portal.p3); + Vector3f p03 = normalize(portal.p3 - portal.p0); + // Do opposite edges have the same direction? + if (std::abs(dot(p01, p32) - 1) > .001 || std::abs(dot(p12, p03) - 1) > .001) + throw std::runtime_error("Infinite light portal isn't a planar quadrilateral"); + + // Sides perpendicular? + if (std::abs(dot(p01, p12)) > .001 || std::abs(dot(p12, p32)) > .001 || + std::abs(dot(p32, p03)) > .001 || std::abs(dot(p03, p01)) > .001) + throw std::runtime_error("Infinite light portal isn't a planar quadrilateral"); + + X = p03, Y = p01, Z = -cross(X, Y); + + uint pixel_count_x = por.psize, pixel_count_y = por.psize; + uint pixel_count = pixel_count_x * pixel_count_y; + + image = xx::Array2D(pixel_count_x, pixel_count_y); + dist = xx::Array2D(pixel_count_x, pixel_count_y); + + auto luminance = [](float3 c) { + return dot(c, float3{0.2722287, 0.6740818, 0.0536895}); + }; + + for (uint i=0; i(uv), &duv_dw); + + if (rotate != nullptr && *rotate != glm::mat4(1.0f)) { + glm::vec4 tmp = glm::vec4(world_dir[0], world_dir[1], world_dir[2], 0.0f); + tmp = tmp * (*rotate); + + world_dir = {tmp.x, tmp.y, tmp.z}; + } + + auto suv = sphereUV(reinterpret_cast(world_dir), true); + auto pos = (*(float2*)&suv) * make_float2(tex_width, tex_height); + + auto pixel = BiLinear(texture, tex_width, tex_height, pos); + auto average = (pixel.x + pixel.y + pixel.z) / 3.0f; + //average = luminance(pixel); + //average *= std::sin(M_PIf * suv.y); + + image(i, j) = pixel; + dist(i, j) = duv_dw * average; + } // j + } // i + + sat = SummedAreaTable(dist); + + #if !defined( NDEBUG ) + zeno::write_pfm("portal.pfm", image.XSize(), image.YSize(), (float*)image.data.data()); + zeno::write_pfm("dist.pfm", dist.XSize(), dist.YSize(), (float*)dist.data.data(), true); + #endif + } + +#endif + + inline float area() { + auto a = length(portal.p1 - portal.p0); + auto b = length(portal.p2 - portal.p1); + return a * b; + } + + float phi() { + + float3 sum {}; + for (uint y=0; y= angleX1 || angleY0 >= angleY1) + { + return false; + } + + Vector2f uv0 = { + ( angleX0 + M_PI_2f ) / M_PIf, + ( angleY0 + M_PI_2f ) / M_PIf + }; + + Vector2f uv1 = { + ( angleX1 + M_PI_2f ) / M_PIf, + ( angleY1 + M_PI_2f ) / M_PIf + }; + + bounds = Bounds2f{ uv0, uv1 }; + return true; + } + + template + static float SampleBisection(CDF P, const float u, float min, float max, uint n) { + assert(0.0<=min && min < max && max<=1.0); + + while (min < max && ( (n * max) - (n * min)) > 1) { + + assert(P(min) <= u); + assert(P(max) >= u); + float mid = (min + max) / 2; + auto PM = P(mid); + //PM = clamp(PM, 0.0f, 1.0f); + + if (PM > u) + max = mid; + else + min = mid; + } + + // Find sample by interpolating between _min_ and _max_ + float t = (u - P(min)) / (P(max) - P(min)); + return clamp(pbrt::Lerp(t, min, max), min, max); + } + + float Eval(float2 p) const { + float2 pi{ fminf(p.x * dist.XSize(), dist.XSize() - 1), + fminf(p.y * dist.YSize(), dist.YSize() - 1) }; + //return dist.lookUp((int)pi.x, (int)pi.y); + return dist.lookUV(p); + } + + Vector2f direction_uv(Vector3f dir, float *duvdw=nullptr) { + + auto x = dot(dir, X); + auto y = dot(dir, Y); + auto z = dot(dir, Z); + + if (z <= 0) {return {};} + + auto w = Vector3f{x, y, z}; + + if (duvdw) + *duvdw = pbrt::Sqr(M_PIf) * (1 - pbrt::Sqr(w[0])) * (1 - pbrt::Sqr(w[1])) / w[2]; + + auto sinL = sqrt(1.0f - z * z); + auto angleX = asin(x / sinL); + auto angleY = -acos(y / sinL); + + Vector2f uv = { + ( angleX + M_PI_2f ) / M_PIf, + ( angleY + M_PIf ) / M_PIf + }; + } + + Vector3f uv_direction(float2 uv, float* duvdw=nullptr) { + + float alpha = -M_PIf / 2 + uv.x * M_PIf; + float beta = -M_PIf / 2 + uv.y * M_PIf; + float x = tanf(alpha), y = tanf(beta); + + DCHECK(!IsInf(x) && !IsInf(y)); + + Vector3f w = normalize(Vector3f(x, y, 1)); + + if (duvdw) + *duvdw = pbrt::Sqr(M_PIf) * (1 - pbrt::Sqr(w[0])) * (1 - pbrt::Sqr(w[1])) / w[2]; + + Vector3f dir {}; + dir = dir + X * w[0]; + dir = dir + Y * w[1]; + dir = dir + Z * w[2]; + return dir; + } + + void sample(LightSampleRecord& lsr, const Vector3f& pos, float2 uu, float3& color) { + Bounds2f bds; // uv bounds + auto valid = ImageBounds(pos, bds); + if (!valid) return; + + auto bIntegral = sat.Integral(bds); + if( bIntegral <= 0 ) return; + + auto Px = [&](float x) -> float { + Bounds2f bx = bds; + bx.pMax[0] = x; + return sat.Integral(bx) / bIntegral; + }; + + float2 uv; + uv.x = SampleBisection(Px, uu.x, bds.pMin[0], bds.pMax[0], image.XSize()); + + uint nx = image.XSize(); + Bounds2f bCond { + { floor(uv.x * nx)/nx, bds.pMin[1] }, + { ceil (uv.x * nx)/nx, bds.pMax[1] } }; + + if (bCond.pMin[0] == bCond.pMax[0]) + bCond.pMax[0] += 1.0f / nx; + + float condIntegral = sat.Integral(bCond); + if (condIntegral == 0) + return; + + auto Py = [&](float y) -> float { + Bounds2f by = bCond; + by.pMax[1] = y; + return sat.Integral(by) / condIntegral; + }; + uv.y = SampleBisection(Py, uu.y, bds.pMin[1], bds.pMax[1], image.YSize()); + //uv = clamp(uv, 0.0, 1.0); + + float duvdw; + auto tmp = uv_direction(uv, &duvdw); + + lsr.dir = reinterpret_cast(tmp); + lsr.dist = FLT_MAX; + lsr.uv = uv; + + // Compute PDF and return point sampled from windowed function + lsr.PDF = Eval(uv) / bIntegral; + lsr.PDF /= duvdw; + if(!isfinite(lsr.PDF)) { + lsr.PDF = 0.0; + return; + } + + color = image.lookUV(uv); + } + + float PDF(Vector3f p, Vector3f w) { + float duvdw; + auto uv = direction_uv(w, &duvdw); + + Bounds2f bds; + bool valid = ImageBounds(p, bds); + if (!valid) return 0.0f; + + float integ = sat.Integral(bds); + if (integ == 0) return 0.0f; + + return Eval(reinterpret_cast(uv)) / duvdw; + } +}; + +struct PortalLightList { + +#ifndef __CUDACC_RTC__ + std::vector list; + xinxinoptix::raii buffer; + + std::vector pdf; + std::vector cdf; + xinxinoptix::raii pdf_buffer; + xinxinoptix::raii cdf_buffer; + + xinxinoptix::raii dummy_buffer; +#else + PortalLight *list; + size_t count; + + float* pdf; + float* cdf; +#endif + + inline size_t COUNT() { + #ifndef __CUDACC_RTC__ + return list.size(); + #else + return count; + #endif + } + +#ifndef __CUDACC_RTC__ + auto upload() { + + if (list.size() == 0) { + *this = {}; + return 0llu; + } + + auto first = list.front().pack(); + std::vector tmp; + tmp.reserve(list.size()); + tmp.push_back(first); + + pdf.clear(); pdf.resize(list.size()); + cdf.clear(); cdf.resize(list.size()); + + auto power = list.front().phi(); + pdf[0] = power; + cdf[0] = power; + + for (size_t i=1; i list; + std::vector cdf; + + xinxinoptix::raii data_buffer; + xinxinoptix::raii cdf_buffer; + + xinxinoptix::raii dummy_buffer; +#else + zeno::DistantLightData* list; + float* cdf; + uint count; +#endif + inline size_t COUNT() { +#ifndef __CUDACC_RTC__ + return list.size(); +#else + return count; +#endif + } + +#ifndef __CUDACC_RTC__ + + auto upload() { + + size_t byte_size = sizeof(zeno::DistantLightData) * list.size(); + data_buffer.resize( byte_size ); + cudaMemcpy((void*)data_buffer.handle, list.data(), byte_size, cudaMemcpyHostToDevice); + + byte_size = sizeof(float) * cdf.size(); + cdf_buffer.resize(byte_size); + cudaMemcpy((void*)cdf_buffer.handle, cdf.data(), byte_size, cudaMemcpyHostToDevice); + + struct Dummy { + void* data; + void* cdf; + size_t count; + }; + + Dummy dummy { + (void*)data_buffer.handle, + (void*)cdf_buffer.handle, + list.size() + }; + + dummy_buffer.resize(sizeof(dummy)); + cudaMemcpy((void*)dummy_buffer.handle, &dummy, sizeof(dummy), cudaMemcpyHostToDevice); + + return dummy_buffer.handle; + } + +#endif +}; \ No newline at end of file diff --git a/zenovis/xinxinoptix/Sampling.h b/zenovis/xinxinoptix/Sampling.h index dbffe9db65..289adb7078 100644 --- a/zenovis/xinxinoptix/Sampling.h +++ b/zenovis/xinxinoptix/Sampling.h @@ -7,13 +7,19 @@ #ifdef __CUDACC_RTC__ #include "zxxglslvec.h" + using Vector2f = vec2; using Vector3f = vec3; #else #include "Host.h" #include + using Vector2f = zeno::vec<2, float>; using Vector3f = zeno::vec<3, float>; #endif +#ifndef FLT_MAX +#define FLT_MAX __FLT_MAX__ +#endif + #ifdef __CUDACC_DEBUG__ #define DCHECK assert #else @@ -24,6 +30,22 @@ #endif +struct LightSampleRecord { + float3 p; + float PDF; + + float3 n; + float NoL; + + float3 dir; + float dist; + + float2 uv; + + float intensity = 1.0f; + bool isDelta = false; +}; + namespace pbrt { template @@ -229,6 +251,7 @@ static __host__ __device__ __inline__ float3 sphereUV(const float3 &dir, bool in return float3 {u, v, 0.0f}; } + static __host__ __device__ __inline__ float3 interp(float2 barys, float3 a, float3 b, float3 c) { float w0 = 1 - barys.x - barys.y; diff --git a/zenovis/xinxinoptix/Shape.h b/zenovis/xinxinoptix/Shape.h index 16d9668009..249f943218 100644 --- a/zenovis/xinxinoptix/Shape.h +++ b/zenovis/xinxinoptix/Shape.h @@ -9,22 +9,6 @@ #include "Host.h" #endif -struct LightSampleRecord { - float3 p; - float PDF; - - float3 n; - float NoL; - - float3 dir; - float dist; - - float2 uv; - - float intensity = 1.0f; - bool isDelta = false; -}; - static constexpr float MinSphericalSampleArea = 3e-4f; static constexpr float MaxSphericalSampleArea = 6.22f; diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index d4251bdb72..505b340f5a 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -81,6 +82,7 @@ #include "LightBounds.h" #include "LightTree.h" +#include "Portal.h" #include "ChiefDesignerEXR.h" using namespace zeno::ChiefDesignerEXR; @@ -269,9 +271,6 @@ struct PathTracerState OptixTraversableHandle meshHandleIAS; raii meshBufferIAS; - OptixTraversableHandle gas_handle = {}; // Traversable handle for triangle AS - raii d_gas_output_buffer; // Triangle AS memory - raii d_vertices; raii d_clr; raii d_nrm; @@ -290,17 +289,12 @@ struct PathTracerState raii d_uniforms; raii ptx_module; - raii ptx_module2; OptixPipelineCompileOptions pipeline_compile_options; OptixPipeline pipeline; OptixProgramGroup raygen_prog_group; OptixProgramGroup radiance_miss_group; OptixProgramGroup occlusion_miss_group; - OptixProgramGroup radiance_hit_group; - OptixProgramGroup occlusion_hit_group; - OptixProgramGroup radiance_hit_group2; - OptixProgramGroup occlusion_hit_group2; raii stream; raii accum_buffer_p; @@ -311,7 +305,14 @@ struct PathTracerState raii accum_buffer_s; raii accum_buffer_t; raii accum_buffer_b; - raii lightsbuf_p; + + raii finite_lights_ptr; + + PortalLightList plights; + DistantLightList dlights; + + //std::vector portals; + raii sky_cdf_p; raii sky_start; Params params; @@ -1301,55 +1302,6 @@ void updateRootIAS() state.params.handle = state.rootHandleIAS; } -static void buildMeshAccel( PathTracerState& state ) -{ - // - // copy mesh data to device - // - const size_t vertices_size_in_bytes = g_vertices.size() * sizeof( Vertex ); - CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.d_vertices.reset() ), vertices_size_in_bytes ) ); - CUDA_CHECK( cudaMemcpy( - reinterpret_cast( (CUdeviceptr&)state.d_vertices ), - g_vertices.data(), vertices_size_in_bytes, - cudaMemcpyHostToDevice - ) ); - - const size_t mat_indices_size_in_bytes = g_mat_indices.size() * sizeof( uint32_t ); - CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.d_mat_indices.reset() ), mat_indices_size_in_bytes ) ); - CUDA_CHECK( cudaMemcpy( - reinterpret_cast( (CUdeviceptr)state.d_mat_indices ), - g_mat_indices.data(), - mat_indices_size_in_bytes, - cudaMemcpyHostToDevice - ) ); - - // // Build triangle GAS // // One per SBT record for this build input - std::vector triangle_input_flags(//MAT_COUNT - g_mtlidlut.size(), - OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL); - - OptixBuildInput triangle_input = {}; - triangle_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; - triangle_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; - triangle_input.triangleArray.vertexStrideInBytes = sizeof( Vertex ); - triangle_input.triangleArray.numVertices = static_cast( g_vertices.size() ); - triangle_input.triangleArray.vertexBuffers = g_vertices.empty() ? nullptr : & state.d_vertices; - triangle_input.triangleArray.flags = triangle_input_flags.data(); - triangle_input.triangleArray.numSbtRecords = g_vertices.empty() ? 1 : g_mtlidlut.size(); - triangle_input.triangleArray.sbtIndexOffsetBuffer = state.d_mat_indices; - triangle_input.triangleArray.sbtIndexOffsetSizeInBytes = sizeof( uint32_t ); - triangle_input.triangleArray.sbtIndexOffsetStrideInBytes = sizeof( uint32_t ); - - OptixAccelBuildOptions accel_options = {}; - accel_options.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS | OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS; - accel_options.operation = OPTIX_BUILD_OPERATION_BUILD; - - buildXAS(state.context, accel_options, triangle_input, state.d_gas_output_buffer, state.d_gas_output_buffer); - - state.d_vertices.reset(); - state.d_mat_indices.reset(); -} - static void createSBT( PathTracerState& state ) { state.d_raygen_record.reset(); @@ -1357,7 +1309,6 @@ static void createSBT( PathTracerState& state ) state.d_hitgroup_records.reset(); state.d_callable_records.reset(); - state.d_gas_output_buffer.reset(); state.accum_buffer_p.reset(); state.albedo_buffer_p.reset(); state.normal_buffer_p.reset(); @@ -1742,7 +1693,10 @@ void optixinit( int argc, char* argv[] ) auto cur_path = std::string(_pgmptr); cur_path = cur_path.substr(0, cur_path.find_last_of("\\")); #endif - OptixUtil::sky_tex = cur_path + "/hdr/Panorama.hdr"; + + OptixUtil::default_sky_tex = cur_path + "/hdr/Panorama.hdr"; + OptixUtil::sky_tex = OptixUtil::default_sky_tex; + OptixUtil::addTexture(OptixUtil::sky_tex.value()); xinxinoptix::update_hdr_sky(0, {0, 0, 0}, 0.8); } @@ -2105,6 +2059,14 @@ void unload_light(){ triangleLightCoords.clear(); triangleLightNormals.clear(); + state.dlights = {}; + state.plights = {}; + + state.params.dlights_ptr = 0llu; + state.params.plights_ptr = 0llu; + + OptixUtil::portal_delayed.reset(); + std::cout << "Lights unload done. \n"<< std::endl; } @@ -2158,6 +2120,62 @@ void show_background(bool enable) { state.params.show_background = enable; } +void updatePortalLights(const std::vector& portals) { + + auto &tex = OptixUtil::g_tex[OptixUtil::sky_tex.value()]; + + auto& pll = state.plights; + auto& pls = pll.list; + pls.clear(); + pls.reserve(max(portals.size(), 0) ); + + glm::mat4 rotation = glm::mat4(1.0f); + rotation = glm::rotate(rotation, glm::radians(state.params.sky_rot_y), glm::vec3(0,1,0)); + rotation = glm::rotate(rotation, glm::radians(state.params.sky_rot_x), glm::vec3(1,0,0)); + rotation = glm::rotate(rotation, glm::radians(state.params.sky_rot_z), glm::vec3(0,0,1)); + rotation = glm::rotate(rotation, glm::radians(state.params.sky_rot), glm::vec3(0,1,0)); + + glm::mat4* rotation_ptr = nullptr; + if ( glm::mat4(1.0f) != rotation ) { + rotation_ptr = &rotation; + } + + for (auto& portal : portals) { + auto pl = PortalLight(portal, (float3*)tex->rawData.data(), tex->width, tex->height, rotation_ptr); + pls.push_back(std::move(pl)); + } + + state.params.plights_ptr = (void*)pll.upload(); +} + +void updateDistantLights(std::vector& dldl) +{ + if (dldl.empty()) { + state.dlights = {}; + state.params.dlights_ptr = 0u; + return; + } + + float power = 0.0f; + + std::vector cdf; cdf.reserve(dldl.size()); + + for (auto& dld : dldl) { + auto ppp = dld.color * dld.intensity; + power += (ppp[0] + ppp[1] + ppp[2]) / 3.0f; + cdf.push_back(power); + } + + for(auto& c : cdf) { + c /= power; + } + + state.dlights.list = dldl; + state.dlights.cdf = cdf; + + state.params.dlights_ptr = (void*)state.dlights.upload(); +} + void update_procedural_sky( zeno::vec2f sunLightDir, float sunLightSoftness, @@ -2358,7 +2376,7 @@ static void buildLightTrianglesGAS( PathTracerState& state, std::vector& void buildLightTree() { camera_changed = true; - state.lightsbuf_p.reset(); + state.finite_lights_ptr.reset(); state.params.lightTreeSampler = 0llu; state.params.triangleLightCoordsBuffer = 0llu; @@ -2544,13 +2562,13 @@ void buildLightTree() { buildLightTrianglesGAS(state, lightsWrapper._triangleLightGeo, lightsWrapper.lightTrianglesGasBuffer, lightsWrapper.lightTrianglesGas); CUDA_CHECK( cudaMalloc( - reinterpret_cast( &state.lightsbuf_p.reset() ), + reinterpret_cast( &state.finite_lights_ptr.reset() ), sizeof( GenericLight ) * std::max(lightsWrapper.g_lights.size(),(size_t)1) ) ); - state.params.lights = (GenericLight*)(CUdeviceptr)state.lightsbuf_p; + state.params.lights = (GenericLight*)(CUdeviceptr)state.finite_lights_ptr; CUDA_CHECK( cudaMemcpy( - reinterpret_cast( (CUdeviceptr)state.lightsbuf_p ), + reinterpret_cast( (CUdeviceptr)state.finite_lights_ptr ), lightsWrapper.g_lights.data(), sizeof( GenericLight ) * lightsWrapper.g_lights.size(), cudaMemcpyHostToDevice ) ); @@ -2729,28 +2747,39 @@ OptixUtil::_compile_group.wait(); theTimer.tock("Done Optix Shader Compile:"); if (OptixUtil::sky_tex.has_value()) { - state.params.sky_texture = OptixUtil::g_tex[OptixUtil::sky_tex.value()]->texture; - state.params.skynx = OptixUtil::sky_nx_map[OptixUtil::sky_tex.value()]; - state.params.skyny = OptixUtil::sky_ny_map[OptixUtil::sky_tex.value()]; - state.params.envavg = OptixUtil::sky_avg_map[OptixUtil::sky_tex.value()]; + + auto &tex = OptixUtil::g_tex[OptixUtil::sky_tex.value()]; + if (tex.get() == 0) { + tex = OptixUtil::g_tex[OptixUtil::default_sky_tex]; + } + + if (tex->texture == state.params.sky_texture) return; + + state.params.sky_texture = tex->texture; + state.params.skynx = tex->width; + state.params.skyny = tex->height; + state.params.envavg = tex->average; + CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.sky_cdf_p.reset() ), - sizeof(float2)*OptixUtil::sky_cdf_map[OptixUtil::sky_tex.value()].size() ) ); + sizeof(float2)*tex->cdf.size() ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.sky_start.reset() ), - sizeof(int)*OptixUtil::sky_start_map[OptixUtil::sky_tex.value()].size() ) ); + sizeof(int)*tex->start.size() ) ); + cudaMemcpy(reinterpret_cast((CUdeviceptr)state.sky_cdf_p), - OptixUtil::sky_cdf_map[OptixUtil::sky_tex.value()].data(), - sizeof(float)*OptixUtil::sky_cdf_map[OptixUtil::sky_tex.value()].size(), + tex->cdf.data(), + sizeof(float)*tex->cdf.size(), cudaMemcpyHostToDevice); - cudaMemcpy(reinterpret_cast((CUdeviceptr)state.sky_cdf_p)+sizeof(float)*OptixUtil::sky_cdf_map[OptixUtil::sky_tex.value()].size(), - OptixUtil::sky_pdf_map[OptixUtil::sky_tex.value()].data(), - sizeof(float)*OptixUtil::sky_pdf_map[OptixUtil::sky_tex.value()].size(), + cudaMemcpy(reinterpret_cast((CUdeviceptr)state.sky_cdf_p) + sizeof(float)*tex->cdf.size(), + tex->pdf.data(), + sizeof(float)*tex->pdf.size(), cudaMemcpyHostToDevice); cudaMemcpy(reinterpret_cast((CUdeviceptr)state.sky_start), - OptixUtil::sky_start_map[OptixUtil::sky_tex.value()].data(), - sizeof(int)*OptixUtil::sky_start_map[OptixUtil::sky_tex.value()].size(), + tex->start.data(), + sizeof(int)*tex->start.size(), cudaMemcpyHostToDevice); + state.params.skycdf = reinterpret_cast((CUdeviceptr)state.sky_cdf_p); - state.params.sky_start = reinterpret_cast((CUdeviceptr)state.sky_start); + state.params.sky_start = reinterpret_cast((CUdeviceptr)state.sky_start); } else { state.params.skynx = 0; @@ -2764,19 +2793,6 @@ void optixupdateend() { OptixUtil::createPipeline(); printf("Pipeline created \n"); - //static bool hadOnce = false; - //if (hadOnce) { - //OPTIX_CHECK( optixPipelineDestroy( state.pipeline ) ); - //state.raygen_prog_group ) ); - //state.radiance_miss_group ) ); - //state.occlusion_miss_group ) ); - //OPTIX_CHECK( optixProgramGroupDestroy( state.radiance_hit_group ) ); - //OPTIX_CHECK( optixProgramGroupDestroy( state.occlusion_hit_group ) ); - //OPTIX_CHECK( optixProgramGroupDestroy( state.radiance_hit_group2 ) ); - //OPTIX_CHECK( optixProgramGroupDestroy( state.occlusion_hit_group2 ) ); - //OPTIX_CHECK( optixModuleDestroy( state.ptx_module ) ); - //OPTIX_CHECK( optixDeviceContextDestroy( state.context ) ); - //} hadOnce = true; state.pipeline_compile_options = OptixUtil::pipeline_compile_options; state.pipeline = OptixUtil::pipeline; @@ -3767,13 +3783,6 @@ void set_perspective_by_focal_length(float const *U, float const *V, float const void set_outside_random_number(int32_t outside_random_number) { state.params.outside_random_number = outside_random_number; } -static void write_pfm(std::string& path, int w, int h, const float *rgb) { - std::string header = zeno::format("PF\n{} {}\n-1.0\n", w, h); - std::vector data(header.size() + w * h * sizeof(zeno::vec3f)); - memcpy(data.data(), header.data(), header.size()); - memcpy(data.data() + header.size(), rgb, w * h * sizeof(zeno::vec3f)); - zeno::file_put_binary(data, path); -} void *optixgetimg_extra(std::string name) { if (name == "diffuse") { @@ -3897,13 +3906,13 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) { //SaveEXR(_albedo_buffer, w, h, 4, 0, (path+".albedo.exr").c_str(), nullptr); auto a_path = path + ".albedo.pfm"; std::string native_a_path = zeno::create_directories_when_write_file(a_path); - write_pfm(native_a_path, w, h, _albedo_buffer); + zeno::write_pfm(native_a_path.c_str(), w, h, _albedo_buffer); const float* _normal_buffer = reinterpret_cast(state.normal_buffer_p.handle); //SaveEXR(_normal_buffer, w, h, 4, 0, (path+".normal.exr").c_str(), nullptr); auto n_path = path + ".normal.pfm"; std::string native_n_path = zeno::create_directories_when_write_file(n_path); - write_pfm(native_n_path, w, h, _normal_buffer); + zeno::write_pfm(native_n_path.c_str(), w, h, _normal_buffer); } } } @@ -3930,7 +3939,29 @@ void *optixgetimg(int &w, int &h) { //sutil::saveImage( outfile, buffer, false ); //} -void optixcleanup() { +void optixCleanup() { + + state.dlights = {}; + state.params.dlights_ptr = 0u; + + state.plights = {}; + state.params.plights_ptr = 0u; + + lightsWrapper.reset(); + state.finite_lights_ptr.reset(); + + state.params.sky_strength = 1.0f; + state.params.sky_texture; + + auto sky_path = OptixUtil::default_sky_tex; + auto sky_tex = OptixUtil::g_tex[sky_path]; + + OptixUtil::g_tex = { {sky_path, sky_tex} }; + //OptixUtil::g_tex.at(sky_path) = sky_tex; + OptixUtil::sky_tex = OptixUtil::default_sky_tex; +} + +void optixDestroy() { using namespace OptixUtil; try { CUDA_SYNC_CHECK(); diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index d26f167511..7df84d652e 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -196,6 +196,9 @@ struct Params uint32_t firstSoloSphereOffset; void* sphereInstAuxLutBuffer; + void* dlights_ptr; + void* plights_ptr; + float skyLightProbablity() { if (sky_strength <= 0.0f) diff --git a/zenovis/xinxinoptix/proceduralSky.h b/zenovis/xinxinoptix/proceduralSky.h index fd8e4d01e7..ad95192d29 100644 --- a/zenovis/xinxinoptix/proceduralSky.h +++ b/zenovis/xinxinoptix/proceduralSky.h @@ -292,30 +292,6 @@ static __inline__ __device__ vec3 proceduralSky( return col; } -static __inline__ __device__ vec3 hdrSky2( - vec3 dir -){ - dir = dir - .rotY(to_radians(params.sky_rot_y)) - .rotX(to_radians(params.sky_rot_x)) - .rotZ(to_radians(params.sky_rot_z)) - .rotY(to_radians(params.sky_rot)); - - vec3 uv = sphereUV(dir, true); - vec3 col = vec3(0); - for(int jj=-2;jj<=2;jj++) - { - for(int ii=-2;ii<=2;ii++) - { - float dx = (float)ii / (float)(params.skynx); - float dy = (float)jj / (float)(params.skyny); - col = col + (vec3)texture2D(params.sky_texture, vec2(uv[0] + dx, uv[1] + dy)) * params.sky_strength; - } - } - - return col/9.0f; -} - static __inline__ __device__ vec3 hdrSky( vec3 dir, float upperBound, float isclamp, float &pdf ){ @@ -363,10 +339,7 @@ static __inline__ __device__ vec3 colorTemperatureToRGB(float temperatureInKelvi return retColor; } -static __inline__ __device__ vec3 envSky2(vec3 dir) -{ - return hdrSky2(dir); -} + static __inline__ __device__ vec3 envSky( vec3 dir, vec3 sunLightDir, diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h index 826145e270..6194369c91 100644 --- a/zenovis/xinxinoptix/xinxinoptixapi.h +++ b/zenovis/xinxinoptix/xinxinoptixapi.h @@ -8,6 +8,9 @@ #include "optixSphere.h" #include "zeno/utils/vec.h" +#include "zeno/types/LightObject.h" + +#include "Portal.h" enum ShaderMaker { Mesh = 0, @@ -30,7 +33,9 @@ namespace xinxinoptix { std::set uniqueMatsForMesh(); -void optixcleanup(); +void optixCleanup(); + +void optixDestroy(); void optixrender(int fbo = 0, int samples = 1, bool denoise = false, bool simpleRender = false); void *optixgetimg(int &w, int &h); void optixinit(int argc, char* argv[]); @@ -102,6 +107,9 @@ void update_procedural_sky(zeno::vec2f sunLightDir, float sunLightSoftness, zeno void update_hdr_sky(float sky_rot, zeno::vec3f sky_rot3d, float sky_strength); void using_hdr_sky(bool enable); void show_background(bool enable); + +void updatePortalLights(const std::vector& portals); +void updateDistantLights(std::vector& dldl); // void optixUpdateUniforms(std::vector & inConstants); void optixUpdateUniforms(void *inConstants, std::size_t size); } diff --git a/zenovis/xinxinoptix/zxxglslvec.h b/zenovis/xinxinoptix/zxxglslvec.h index 28f59ac447..699081020f 100644 --- a/zenovis/xinxinoptix/zxxglslvec.h +++ b/zenovis/xinxinoptix/zxxglslvec.h @@ -92,6 +92,13 @@ struct vec3{ struct vec2{ float x, y; + + __forceinline__ __device__ float& operator[](unsigned int index) { + auto ptr= &this->x; + ptr += index; + return *ptr; + } + __forceinline__ __device__ vec2(const float2 &_v) { x = _v.x; From 946a08f27e963adb09974097a5d66441712abacb Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 3 Jun 2024 15:41:37 +0800 Subject: [PATCH 040/244] clean up --- ui/zenoedit/viewport/optixviewport.cpp | 4 ++-- zenovis/src/ObjectsManager.cpp | 1 + zenovis/xinxinoptix/OptiXStuff.h | 9 +++------ zenovis/xinxinoptix/optixPathTracer.cpp | 11 +++-------- 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/ui/zenoedit/viewport/optixviewport.cpp b/ui/zenoedit/viewport/optixviewport.cpp index cb85615c84..cd57bbb6e1 100644 --- a/ui/zenoedit/viewport/optixviewport.cpp +++ b/ui/zenoedit/viewport/optixviewport.cpp @@ -341,8 +341,8 @@ void OptixWorker::onSetBackground(bool bShowBg) ZASSERT_EXIT(session); auto scene = session->get_scene(); ZASSERT_EXIT(scene); - scene->objectsMan->needUpdateLight = true; - scene->drawOptions->simpleRender = true; + //scene->objectsMan->needUpdateLight = true; + //scene->drawOptions->simpleRender = true; updateFrame(); } diff --git a/zenovis/src/ObjectsManager.cpp b/zenovis/src/ObjectsManager.cpp index cf5e981ea4..a2b67b58e1 100644 --- a/zenovis/src/ObjectsManager.cpp +++ b/zenovis/src/ObjectsManager.cpp @@ -41,6 +41,7 @@ bool ObjectsManager::load_objects(std::map ObjectsManager::get(std::string nid) { diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 8c7d168f2f..49f8a7627b 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -662,8 +662,7 @@ inline void calc_sky_cdf_map(int nx, int ny, int nc, std::function skypdf(nx*ny); - skypdf.assign(nx*ny,0); + for(int jj=0; jj 0.5? illum : 0.0f; illum = abs(illum) * sinf(3.1415926f*((float)jj + 0.5f)/(float)ny); sky_cdf[idx] += illum + (idx>0? sky_cdf[idx-1]:0); - skypdf[idx] = illum; } } float total_illum = sky_cdf[sky_cdf.size()-1]; @@ -685,8 +683,7 @@ inline void calc_sky_cdf_map(int nx, int ny, int nc, std::function0) { if(sky_cdf[ii]>sky_cdf[ii-1]) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 505b340f5a..d8799c4ef9 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2170,9 +2170,7 @@ void updateDistantLights(std::vector& dldl) c /= power; } - state.dlights.list = dldl; - state.dlights.cdf = cdf; - + state.dlights = DistantLightList {dldl, cdf}; state.params.dlights_ptr = (void*)state.dlights.upload(); } @@ -2761,7 +2759,7 @@ OptixUtil::_compile_group.wait(); state.params.envavg = tex->average; CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.sky_cdf_p.reset() ), - sizeof(float2)*tex->cdf.size() ) ); + sizeof(float)*tex->cdf.size() ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.sky_start.reset() ), sizeof(int)*tex->start.size() ) ); @@ -2769,10 +2767,7 @@ OptixUtil::_compile_group.wait(); tex->cdf.data(), sizeof(float)*tex->cdf.size(), cudaMemcpyHostToDevice); - cudaMemcpy(reinterpret_cast((CUdeviceptr)state.sky_cdf_p) + sizeof(float)*tex->cdf.size(), - tex->pdf.data(), - sizeof(float)*tex->pdf.size(), - cudaMemcpyHostToDevice); + cudaMemcpy(reinterpret_cast((CUdeviceptr)state.sky_start), tex->start.data(), sizeof(int)*tex->start.size(), From 435c47f16772820ebb22b9ddb610831eb237bc01 Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 3 Jun 2024 18:27:03 +0800 Subject: [PATCH 041/244] Minor updates --- zenovis/xinxinoptix/Portal.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/zenovis/xinxinoptix/Portal.h b/zenovis/xinxinoptix/Portal.h index fc2125fcec..ef85e6ae09 100644 --- a/zenovis/xinxinoptix/Portal.h +++ b/zenovis/xinxinoptix/Portal.h @@ -243,6 +243,10 @@ struct PortalLight { Vector3f X,Y,Z; //PortalLight() = default; + auto luminance(float3 c) { + return dot(c, float3{0.2722287, 0.6740818, 0.0536895}); + }; + #ifndef __CUDACC_RTC__ auto pack() { @@ -286,10 +290,6 @@ struct PortalLight { image = xx::Array2D(pixel_count_x, pixel_count_y); dist = xx::Array2D(pixel_count_x, pixel_count_y); - auto luminance = [](float3 c) { - return dot(c, float3{0.2722287, 0.6740818, 0.0536895}); - }; - for (uint i=0; iluminance(pixel); //average *= std::sin(M_PIf * suv.y); image(i, j) = pixel; @@ -347,10 +346,9 @@ struct PortalLight { sum += value / duvdw; } } - - auto average = (sum.x + sum.y + sum.z) / 3.0f; - average /= (image.XSize() * image.YSize()); - return area() * average; + + sum /= (image.XSize() * image.YSize()); + return area() * this->luminance(sum); } float3 Le(const Vector3f& ray_origin, const Vector3f& ray_dir) { @@ -428,8 +426,8 @@ struct PortalLight { while (min < max && ( (n * max) - (n * min)) > 1) { - assert(P(min) <= u); - assert(P(max) >= u); + DCHECK(P(min) <= u); + DCHECK(P(max) >= u); float mid = (min + max) / 2; auto PM = P(mid); //PM = clamp(PM, 0.0f, 1.0f); From 06fcbf1912e5d77946c2dbca4823ec132cfabbe3 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 4 Jun 2024 17:44:17 +0800 Subject: [PATCH 042/244] for glm log --- zeno/include/zeno/utils/to_string.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/zeno/include/zeno/utils/to_string.h b/zeno/include/zeno/utils/to_string.h index d44309d9ee..da08fa9bee 100644 --- a/zeno/include/zeno/utils/to_string.h +++ b/zeno/include/zeno/utils/to_string.h @@ -52,6 +52,16 @@ struct _to_stream_impl { return _helper_tuple_to_stream(os, t, fms, std::make_index_sequence - 1>{}); } + template + static auto to_stream(Os &os, T const &t, std::string_view fms) -> decltype((std::enable_if_t::value && !_has_range_begin_end::value, void>)(std::declval()[0], std::declval().length())) { + os << "[glm: "; + for (int i = 0; i < t.length(); ++i) { + to_stream(os, t[i], fms); + os << ", "; + } + os << "]"; + } + template ::value && (std::tuple_size::value == 0), int> = 0> static void to_stream(Os &os, T const &t, std::string_view fms) { From 45a42b1625b3e40b1c803ebb0b10f78c89edc8df Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Tue, 4 Jun 2024 20:12:06 +0800 Subject: [PATCH 043/244] fix-optix-lightmap --- zenovis/src/optx/RenderEngineOptx.cpp | 9 +++++++++ zenovis/xinxinoptix/optixPathTracer.cpp | 5 +++++ zenovis/xinxinoptix/xinxinoptixapi.h | 1 + 3 files changed, 15 insertions(+) diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index b6ba4c66c2..e6287db440 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -1152,6 +1152,15 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { } } } + // add light map + for(auto const &[_, ld]: xinxinoptix::get_lightdats()) { + if (ld.profileKey.size()) { + realNeedTexPaths.emplace_back(ld.profileKey); + } + if (ld.textureKey.size()) { + realNeedTexPaths.emplace_back(ld.textureKey); + } + } std::vector needToRemoveTexPaths; for(auto const &[tex, _]: OptixUtil::g_tex) { if (std::find(realNeedTexPaths.begin(), realNeedTexPaths.end(), tex) != realNeedTexPaths.end()) { diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 4f4ba44181..41f83e5399 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2099,6 +2099,11 @@ static std::map lightdats; static std::vector triangleLightCoords; static std::vector triangleLightNormals; + +std::map &get_lightdats() { + return lightdats; +} + void unload_light(){ lightdats.clear(); diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h index 826145e270..fd40f09ebd 100644 --- a/zenovis/xinxinoptix/xinxinoptixapi.h +++ b/zenovis/xinxinoptix/xinxinoptixapi.h @@ -104,4 +104,5 @@ void using_hdr_sky(bool enable); void show_background(bool enable); // void optixUpdateUniforms(std::vector & inConstants); void optixUpdateUniforms(void *inConstants, std::size_t size); +std::map &get_lightdats(); } From c97314d027e2a6d3672eb99d6517a51b673d5325 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 7 Jun 2024 16:20:36 +0800 Subject: [PATCH 044/244] fix-camera --- ui/zenoedit/viewport/displaywidget.cpp | 4 +--- zenovis/include/zenovis/Camera.h | 1 + zenovis/src/Camera.cpp | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/ui/zenoedit/viewport/displaywidget.cpp b/ui/zenoedit/viewport/displaywidget.cpp index 5a01efa4ef..46fcc826e1 100644 --- a/ui/zenoedit/viewport/displaywidget.cpp +++ b/ui/zenoedit/viewport/displaywidget.cpp @@ -1383,12 +1383,10 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex Zenovis *pZenovis = m_glView->getZenoVis(); ZASSERT_EXIT(pZenovis && pZenovis->getSession()); auto scene = pZenovis->getSession()->get_scene(); - auto _near = scene->camera->m_near; - auto _far = scene->camera->m_far; auto fov = scene->camera->m_fov; auto cz = glm::length(scene->camera->m_lodcenter); if (depth != 0) { - cz = scene->camera->m_near / depth; + cz = scene->camera->inf_z_near / depth; } auto w = scene->camera->m_nx; auto h = scene->camera->m_ny; diff --git a/zenovis/include/zenovis/Camera.h b/zenovis/include/zenovis/Camera.h index 00126d5de2..c6b1983006 100644 --- a/zenovis/include/zenovis/Camera.h +++ b/zenovis/include/zenovis/Camera.h @@ -20,6 +20,7 @@ struct ZOptixCameraSettingInfo { }; struct Camera { + float inf_z_near = 0.001f; int m_nx{512}, m_ny{512}; glm::mat4x4 m_view{1}, m_proj{1}; diff --git a/zenovis/src/Camera.cpp b/zenovis/src/Camera.cpp index 56a0354de5..232d820946 100644 --- a/zenovis/src/Camera.cpp +++ b/zenovis/src/Camera.cpp @@ -82,7 +82,7 @@ void Camera::placeCamera(glm::vec3 pos, glm::vec3 front, glm::vec3 up) { m_proj = glm::orthoZO(-radius * getAspect(), radius * getAspect(), -radius, radius, m_far, m_near); } else { - m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), 0.001); + m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near); } } @@ -107,7 +107,7 @@ void Camera::updateMatrix() { void Camera::setResolution(int nx, int ny) { m_nx = nx; m_ny = ny; - m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), m_near); + m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near); } void Camera::setResolutionInfo(bool block, int nx, int ny) { From 12e27ecbb4cc2000ace174de77e635ccc3d028a4 Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Fri, 7 Jun 2024 21:39:30 +0800 Subject: [PATCH 045/244] surface projection --- .../CuLagrange/geometry/kernel/geo_math.hpp | 78 +-- projects/CuLagrange/pbd/ConstraintsSolver.cu | 535 +++++++++++++++++- .../constraint_types.hpp | 11 + 3 files changed, 588 insertions(+), 36 deletions(-) diff --git a/projects/CuLagrange/geometry/kernel/geo_math.hpp b/projects/CuLagrange/geometry/kernel/geo_math.hpp index 300d0693e2..c4e185ab50 100644 --- a/projects/CuLagrange/geometry/kernel/geo_math.hpp +++ b/projects/CuLagrange/geometry/kernel/geo_math.hpp @@ -395,28 +395,33 @@ namespace zeno { namespace LSL_GEO { /////////////////////////////////////////////////////////////////////// constexpr REAL get_vertex_triangle_distance(const VECTOR3& v0, const VECTOR3& v1, - const VECTOR3& v2, const VECTOR3& v,VECTOR3& barycentric,VECTOR3& project_bary) + const VECTOR3& v2, const VECTOR3& v,VECTOR3& barycentric,VECTOR3& project_point) { // get the barycentric coordinates const VECTOR3 e1 = v1 - v0; const VECTOR3 e2 = v2 - v0; + const VECTOR3 e3 = v2 - v1; const VECTOR3 n = e1.cross(e2); const VECTOR3 na = (v2 - v1).cross(v - v1); const VECTOR3 nb = (v0 - v2).cross(v - v2); const VECTOR3 nc = (v1 - v0).cross(v - v0); - barycentric = VECTOR3(n.dot(na) / n.l2NormSqr(), - n.dot(nb) / n.l2NormSqr(), - n.dot(nc) / n.l2NormSqr()); - + // barycentric = VECTOR3(n.dot(na) / n.l2NormSqr(), + // n.dot(nb) / n.l2NormSqr(), + // n.dot(nc) / n.l2NormSqr()); + auto n2 = n.l2NormSqr(); + barycentric = VECTOR3(n.dot(na),n.dot(nb),n.dot(nc)); const REAL barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); // if the point projects to inside the triangle, it should sum to 1 - if (zs::abs(barySum - 1.0) < 1e-6) + if (zs::abs(barySum - n2) < static_cast(1e-6) && n2 > static_cast(1e-6)) { const VECTOR3 nHat = n / n.norm(); const REAL normalDistance = (nHat.dot(v - v0)); - project_bary = barycentric; - // project_v = barycentric[0] * v0 + barycentric[1] * v1 + barycentric[2] * v2; + barycentric /= n2; + // project_bary = barycentric; + // project_point = VECTOR3::zeros(); + + project_point = barycentric[0] * v0 + barycentric[1] * v1 + barycentric[2] * v2; return zs::abs(normalDistance); } @@ -425,44 +430,51 @@ constexpr REAL get_vertex_triangle_distance(const VECTOR3& v0, const VECTOR3& v1 VECTOR3 es[3] = {}; // project onto each edge, find the distance to each edge - const VECTOR3 e3 = v2 - v1; + const VECTOR3 ev = v - v0; const VECTOR3 ev3 = v - v1; - const VECTOR3 e1Hat = e1 / e1.norm(); - const VECTOR3 e2Hat = e2 / e2.norm(); - const VECTOR3 e3Hat = e3 / e3.norm(); - VECTOR3 edgeDistances(1e8, 1e8, 1e8); + + // const VECTOR3 e2Hat = e2 / e2.norm(); + // const VECTOR3 e3Hat = e3 / e3.norm(); + VECTOR3 edgeDistances{1e8, 1e8, 1e8}; // see if it projects onto the interval of the edge // if it doesn't, then the vertex distance will be smaller, // so we can skip computing anything - const REAL e1dot = e1Hat.dot(ev); + // VECTOR3 e1Hat = e1; + REAL e1dot = e1.dot(ev); // VECTOR3 projected_e[3] = {}; - if (e1dot > 0.0 && e1dot < e1.norm()) + // auto e1n = e1.norm(); + auto e1n2 = e1.l2NormSqr(); + if (e1dot > 0.0 && e1dot < e1n2 && e1n2 > static_cast(1e-6)) { - const VECTOR3 projected = v0 + e1Hat * e1dot; + // e1Hat /= e1.norm(); + const VECTOR3 projected = v0 + e1 * e1dot / e1n2; es[0] = projected; edgeDistances[0] = (v - projected).norm(); } - const REAL e2dot = e2Hat.dot(ev); - if (e2dot > 0.0 && e2dot < e2.norm()) + + const REAL e2dot = e2.dot(ev); + auto e2n2 = e2.l2NormSqr(); + if (e2dot > 0.0 && e2dot < e2n2 && e2n2 > static_cast(1e-6)) { - const VECTOR3 projected = v0 + e2Hat * e2dot; + const VECTOR3 projected = v0 + e2 * e2dot / e2n2; es[1] = projected; edgeDistances[1] = (v - projected).norm(); } - const REAL e3dot = e3Hat.dot(ev3); - if (e3dot > 0.0 && e3dot < e3.norm()) + const REAL e3dot = e3.dot(ev3); + auto e3n2 = e3.l2NormSqr(); + if (e3dot > 0.0 && e3dot < e3n2 && e3n2 > static_cast(1e-6)) { - const VECTOR3 projected = v1 + e3Hat * e3dot; + const VECTOR3 projected = v1 + e3 * e3dot / e3n2; es[2] = projected; edgeDistances[2] = (v - projected).norm(); } // get the distance to each vertex - const VECTOR3 vertexDistances((v - v0).norm(), + const VECTOR3 vertexDistances{(v - v0).norm(), (v - v1).norm(), - (v - v2).norm()); + (v - v2).norm()}; // get the smallest of both the edge and vertex distances REAL vertexMin = 1e8; @@ -485,22 +497,22 @@ constexpr REAL get_vertex_triangle_distance(const VECTOR3& v0, const VECTOR3& v1 // vertexMin = vertexMin > vertexDistances[i] ? vertexDistances[i] : vertexMin; // edgeMin = edgeMin > edgeDistances[i] ? edgeDistances[i] : edgeMin; } - VECTOR3 project_v{}; + // VECTOR3 project_v{}; if(vertexMin < edgeMin) - project_v = vs[min_v_idx]; + project_point = vs[min_v_idx]; else - project_v = es[min_e_idx]; + project_point = es[min_e_idx]; // const VECTOR3 e1 = v1 - v0; // const VECTOR3 e2 = v2 - v0; // const VECTOR3 n = e1.cross(e2); - auto na_p = (v2 - v1).cross(project_v - v1); - auto nb_p = (v0 - v2).cross(project_v - v2); - auto nc_p = (v1 - v0).cross(project_v - v0); - project_bary = VECTOR3(n.dot(na_p) / n.l2NormSqr(), - n.dot(nb_p) / n.l2NormSqr(), - n.dot(nc_p) / n.l2NormSqr()); + // auto na_p = (v2 - v1).cross(project_v - v1); + // auto nb_p = (v0 - v2).cross(project_v - v2); + // auto nc_p = (v1 - v0).cross(project_v - v0); + // project_bary = VECTOR3(n.dot(na_p) / n.l2NormSqr(), + // n.dot(nb_p) / n.l2NormSqr(), + // n.dot(nc_p) / n.l2NormSqr()); // return the smallest of those return (vertexMin < edgeMin) ? vertexMin : edgeMin; diff --git a/projects/CuLagrange/pbd/ConstraintsSolver.cu b/projects/CuLagrange/pbd/ConstraintsSolver.cu index d91af67cb1..7e18ac3065 100644 --- a/projects/CuLagrange/pbd/ConstraintsSolver.cu +++ b/projects/CuLagrange/pbd/ConstraintsSolver.cu @@ -1545,19 +1545,32 @@ struct XPBDSolveSmoothAll : INode { } } + auto update_vertex_position = get_input2("update_vertex_position"); + + auto output_debug_inform = get_input2("output_debug_inform"); + cudaPol(zs::range(verts.size()),[ + update_vertex_position = update_vertex_position, verts = proxy({},verts), eps = eps, + // output_debug_inform = output_debug_inform, dptagOffset = verts.getPropertyOffset(dptag), ptagOffset = verts.getPropertyOffset(ptag), wOffset = verts.getPropertyOffset("w")] ZS_LAMBDA(int vi) mutable { if(verts(wOffset,vi) > eps) - verts.tuple(dim_c<3>,dptagOffset,vi) = verts.pack(dim_c<3>,dptagOffset,vi) / verts(wOffset,vi); + verts.tuple(dim_c<3>,dptagOffset,vi) = 2.f * verts.pack(dim_c<3>,dptagOffset,vi) / verts(wOffset,vi); else verts.tuple(dim_c<3>,dptagOffset,vi) = vec3::zeros(); - verts.tuple(dim_c<3>,ptagOffset,vi) = verts.pack(dim_c<3>,ptagOffset,vi) + 2.f * verts.pack(dim_c<3>,dptagOffset,vi); + if(update_vertex_position) + verts.tuple(dim_c<3>,ptagOffset,vi) = verts.pack(dim_c<3>,ptagOffset,vi) + verts.pack(dim_c<3>,dptagOffset,vi); }); + if(output_debug_inform) { + auto ndp = TILEVEC_OPS::dot<3>(cudaPol,verts,dptag,dptag); + std::cout << "ndp : " << ndp << std::endl; + } + + set_output("zsparticles",get_input("zsparticles")); set_output("constraints",get_input("constraints")); }; @@ -1572,12 +1585,528 @@ ZENDEFNODE(XPBDSolveSmoothAll, {{{"zsparticles"}, {"float","dt","1.0"}, {"int","nm_substeps","1"}, {"int","substep_id","0"}, - {"int","iter_id","0"} + {"int","iter_id","0"}, + {"bool","update_vertex_position","1"}, + {"bool","output_debug_inform","0"} }, {{"zsparticles"},{"constraints"}}, {}, {"PBD"}}); + +// recalc target nodal normal and bvh structure before using this node +struct ProjectOntoSurface : INode { + + using bvh_t = ZenoLinearBvh::lbvh_t; + using bv_t = bvh_t::Box; + using dtiles_t = zs::TileVector; + + virtual void apply() override { + using namespace zs; + using namespace PBD_CONSTRAINT; + + using vec2 = zs::vec; + using vec3 = zs::vec; + using vec4 = zs::vec; + using mat3 = zs::vec; + using vec2i = zs::vec; + using vec3i = zs::vec; + using vec4i = zs::vec; + using mat4 = zs::vec; + using Box = AABBBox<3,float>; + + constexpr auto space = execspace_e::cuda; + auto cudaPol = cuda_exec(); + constexpr auto exec_tag = wrapv{}; + constexpr auto eps = 1e-6; + + auto target = get_input2("target"); + + const auto& tverts = target->getParticles(); + const auto& ttris = target->getQuadraturePoints(); + auto tptag = get_input2("tptag"); + + auto update_target_mesh = get_input2("update_target_mesh"); + + if(!target->hasBvh(TRIANGLE_MESH_BVH)) { + target->bvh(TRIANGLE_MESH_BVH) = LBvh<3,int,T>{}; + } + auto& ttri_bvh = target->bvh(TRIANGLE_MESH_BVH); + + if(!target->hasMeta(MESH_REORDER_KEYS)) { + update_target_mesh = true; + target->setMeta(MESH_REORDER_KEYS, + zs::Vector{tverts.get_allocator(),tverts.size()}); + } + auto& keys = target->readMeta&>(MESH_REORDER_KEYS); + + if(!target->hasMeta(MESH_REORDER_INDICES)) { + update_target_mesh = true; + target->setMeta(MESH_REORDER_INDICES, + zs::Vector{tverts.get_allocator(),tverts.size()}); + } + auto& indices = target->readMeta&>(MESH_REORDER_INDICES); + + if(!target->hasMeta(MESH_REORDER_VERTICES_BUFFER)) { + update_target_mesh = true; + target->setMeta(MESH_REORDER_VERTICES_BUFFER, + zs::Vector{tverts.get_allocator(),tverts.size()}); + } + auto& reorderedVBuffer = target->readMeta&>(MESH_REORDER_VERTICES_BUFFER); + + if(!target->hasMeta(MESH_MAIN_AXIS)) { + update_target_mesh = true; + target->setMeta(MESH_MAIN_AXIS,0); + } + auto& axis = target->readMeta(MESH_MAIN_AXIS); + + if(!target->hasMeta(MESH_GLOBAL_BOUNDING_BOX)) { + update_target_mesh = true; + target->setMeta(MESH_GLOBAL_BOUNDING_BOX,Box{}); + } + auto& gbv = target->readMeta(MESH_GLOBAL_BOUNDING_BOX); + + // std::cout << "before update_target_mesh" << std::endl; + + if(update_target_mesh) { + auto tbvs = retrieve_bounding_volumes(cudaPol,tverts,ttris,wrapv<3>{},(T)0,tptag); + ttri_bvh.build(cudaPol,tbvs); + + zs::Vector gmins{tverts.get_allocator(),tverts.size()},gmaxs{tverts.get_allocator(),tverts.size()}; + // Box gbv; + zs::Vector ret{tverts.get_allocator(),1}; + + for(int d = 0;d != 3;++d) { + cudaPol(enumerate(gmins,gmaxs),[ + tverts = proxy({},tverts), + tptagOffset = tverts.getPropertyOffset(tptag), + d = d] ZS_LAMBDA(int i,float& gmin,float& gmax) mutable { + auto p = tverts.pack(dim_c<3>,tptagOffset,i); + gmin = p[d]; + gmax = p[d]; + }); + + reduce(cudaPol,std::begin(gmins),std::end(gmins),std::begin(ret),limits::max(),getmin{}); + gbv._min[d] = ret.getVal(); + reduce(cudaPol,std::begin(gmaxs),std::end(gmaxs),std::begin(ret),limits::min(),getmax{}); + gbv._max[d] = ret.getVal(); + } + axis = 0; + auto dis = gbv._max[0] - gbv._min[0]; + for(int d = 1;d != 3;++d) { + if(auto tmp = gbv._max[d] - gbv._min[d];tmp > dis) { + dis = tmp; + axis = d; + } + } + + zs::Vector keys{tverts.get_allocator(),tverts.size()}; + zs::Vector indices{tverts.get_allocator(),tverts.size()}; + cudaPol(enumerate(keys,indices),[tverts = proxy({},tverts), + tptagOffset = tverts.getPropertyOffset(tptag), + axis] ZS_LAMBDA(int id,float& key,int &idx) mutable { + auto p = tverts.pack(dim_c<3>,tptagOffset,id); + key = p[axis]; + idx = id; + }); + + merge_sort_pair(cudaPol,std::begin(keys),std::begin(indices),tverts.size(),std::less{}); + cudaPol(zip(indices,reorderedVBuffer),[ + tverts = proxy({},tverts), + tptagOffset = tverts.getPropertyOffset(tptag)] ZS_LAMBDA(int oid,vec3& p) mutable { + p = tverts.pack(dim_c<3>,tptagOffset,oid); + }); + } + + auto zsparticles = get_input("zsparticles"); + auto& verts = zsparticles->getParticles(); + auto ptag = get_input2("ptag"); + + auto npcheck = TILEVEC_OPS::dot<3>(cudaPol,verts,ptag,ptag); + if(isnan(npcheck)){ + std::cout << "nan np detected" << std::endl; + // throw std::runtime_error("nan np detected"); + } + + zs::Vector locs{verts.get_allocator(),verts.size()}; + cudaPol(zip(zs::range(verts.size()),locs),[axis = axis, + keys = proxy(keys), + minvOffset = verts.getPropertyOffset("minv"), + verts = proxy({},verts), + ptagOffset = verts.getPropertyOffset(ptag)] ZS_LAMBDA(int vi,int& loc) mutable { + auto locate = [&keys](float v) -> int { + int left = 0, right = keys.size(); + while (left < right) { + auto mid = left + (right - left) / 2; + if (keys[mid] > v) + right = mid; + else + left = mid + 1; + } + if (left < keys.size()) { + if (keys[left] > v) + left--; + } else + left = keys.size() - 1; + // left could be -1 + return left; + }; + if(verts(minvOffset,vi) < 0.00001) + return; + + auto xi = verts.pack(dim_c<3>,ptagOffset,vi); + loc = locate(xi[axis]); + }); + + zs::Vector search_radii{verts.get_allocator(),verts.size()}; + auto default_search_radius = get_input2("default_search_radius"); + + // std::cout << "before projection" << std::endl; + + cudaPol(zs::range(verts.size()),[ + verts = proxy({},verts), + minvOffset = verts.getPropertyOffset("minv"), + ptagOffset = verts.getPropertyOffset(ptag), + reorderedVBuffer = proxy(reorderedVBuffer), + locs = proxy(locs), + keys = proxy(keys), + search_radii = proxy(search_radii), + axis = axis, + default_search_radius = default_search_radius, + dd2 = default_search_radius * default_search_radius, + indices = proxy(indices)] ZS_LAMBDA(int vi) mutable { + if(verts(minvOffset,vi) < 0.00001) + return; + auto loc = locs[vi]; + auto p = verts.pack(dim_c<3>,ptagOffset,vi); + int l = loc + 1; + // auto d2 = limits::max(); + auto d2 = dd2; + int j = -1; + int cnt = 0; + while(l < verts.size() && cnt++ < 128) { + if(auto tmp = zs::sqr(reorderedVBuffer[l][axis] - p[axis]);tmp > d2 || tmp > dd2) + break; + if(auto tmp = (reorderedVBuffer[l] - p).l2NormSqr();tmp < d2) { + d2 = tmp; + j = l; + } + ++l; + } + cnt = 0; + l = loc; + while(l >= 0 && cnt++ < 128) { + if(auto tmp = zs::sqr(reorderedVBuffer[l][axis] - p[axis]);tmp > d2 || tmp > dd2) + break; + if(auto tmp = (reorderedVBuffer[l] - p).l2NormSqr();tmp < d2) { + d2 = tmp; + j = l; + } + l--; + } + + if(j != -1) { + search_radii[vi] = zs::sqrt(d2 + 0.000001f) * 1.0001f; + } else { + search_radii[vi] = default_search_radius * 1.0001f; + } + }); + + auto do_moton_ordering = get_input2("do_moton_ordering"); + + + if(!zsparticles->hasMeta(MESH_REORDER_INDICES)) { + do_moton_ordering = true; + zsparticles->setMeta(MESH_REORDER_INDICES,zs::Vector{verts.get_allocator(),verts.size()}); + } + auto& is = zsparticles->readMeta&>(MESH_REORDER_INDICES); + + // std::cout << "do motor ordering" << std::endl; + if(do_moton_ordering) { + if(!zsparticles->hasMeta(MESH_REORDER_KEYS)) { + do_moton_ordering = true; + zsparticles->setMeta(MESH_REORDER_KEYS,zs::Vector{verts.get_allocator(),verts.size()}); + } + auto& ks = zsparticles->readMeta&>(MESH_REORDER_KEYS); + + cudaPol(enumerate(ks,is),[ + gbv = gbv, + ptagOffset = verts.getPropertyOffset(ptag), + verts = proxy({},verts)] ZS_LAMBDA(int i,u32& key,int& idx) mutable { + auto p = verts.pack(dim_c<3>,ptagOffset,i); + for (int d = 0; d != 3; ++d) { + if (p[d] < gbv._min[d]) + p[d] = gbv._min[d]; + else if (p[d] > gbv._max[d]) + p[d] = gbv._max[d]; + } + auto coord = gbv.getUniformCoord(p).template cast(); + key = morton_code<3>(coord); + idx = i; + }); + + merge_sort_pair(cudaPol, std::begin(ks), std::begin(is), verts.size(), std::less{}); + } else { + cudaPol(enumerate(is),[] ZS_LAMBDA(int i,int& idx) mutable {idx = i;}); + } + + auto dptag = get_input2("dptag"); + auto update_vertex_position = get_input2("update_vertex_position"); + + if(!update_vertex_position && !verts.hasProperty(dptag)) { + verts.append_channels(cudaPol,{{dptag,3}}); + TILEVEC_OPS::fill(cudaPol,verts,dptag,0.f); + } + + // std::cout << "before doing projection" << std::endl; + + cudaPol(is,[verts = proxy({},verts), + ptagOffset = verts.getPropertyOffset(ptag), + dptagOffset = verts.getPropertyOffset(dptag), + minvOffset = verts.getPropertyOffset("minv"), + tverts = proxy({},tverts), + tptagOffset = tverts.getPropertyOffset(tptag), + ttris = proxy({},ttris), + update_vertex_position = update_vertex_position, + default_search_radius = default_search_radius, + is = proxy(is), + tindsOffset = ttris.getPropertyOffset("inds"), + search_radii = proxy(search_radii), + ttri_bvh = proxy(ttri_bvh)] ZS_LAMBDA(int qid) mutable { + if(verts(minvOffset,qid) < 0.00001) + return; + auto rad = search_radii[qid]; + auto p = verts.pack(dim_c<3>,ptagOffset,qid); + auto bv = Box{get_bounding_box(p - rad,p + rad)}; + + auto closest_dist = limits::max(); + int closest_ti = -1; + // auto closest_bary = vec3{1.f,0.f,0.f}; + auto closest_cp = vec3::zeros(); + + auto find_closest_triangles = [&](int ti) { + auto ttri = ttris.pack(dim_c<3>,tindsOffset,ti,int_c); + vec3 tps[3] = {}; + for(int i = 0;i != 3;++i) + tps[i] = tverts.pack(dim_c<3>,tptagOffset,ttri[i]); + vec3 bary{}; + vec3 project_cp{}; + auto dist = LSL_GEO::get_vertex_triangle_distance(tps[0],tps[1],tps[2],p,bary,project_cp); + if(project_cp.norm() < 1e-6) { + { + auto v0 = tps[0]; + auto v1 = tps[1]; + auto v2 = tps[2]; + auto v = p; + vec3 barycentric{}; + vec3 project_point{}; + + const vec3 e1 = v1 - v0; + const vec3 e2 = v2 - v0; + const vec3 e3 = v2 - v1; + const vec3 n = e1.cross(e2); + const vec3 na = (v2 - v1).cross(v - v1); + const vec3 nb = (v0 - v2).cross(v - v2); + const vec3 nc = (v1 - v0).cross(v - v0); + // barycentric = vec3(n.dot(na) / n.l2NormSqr(), + // n.dot(nb) / n.l2NormSqr(), + // n.dot(nc) / n.l2NormSqr()); + auto n2 = n.l2NormSqr(); + barycentric = vec3(n.dot(na),n.dot(nb),n.dot(nc)); + const float barySum = zs::abs(barycentric[0]) + zs::abs(barycentric[1]) + zs::abs(barycentric[2]); + + // if the point projects to inside the triangle, it should sum to 1 + if (zs::abs(barySum - n2) < static_cast(1e-6) && n2 > static_cast(1e-6)) + { + const vec3 nHat = n / n.norm(); + const float normalDistance = (nHat.dot(v - v0)); + barycentric /= n2; + // project_bary = barycentric; + // project_point = vec3::zeros(); + + project_point = barycentric[0] * v0 + barycentric[1] * v1 + barycentric[2] * v2; + + printf("wierd——000 project center[%d]->[%d] : %f %f %f : %f %f %f : A : %f D : %f\n",qid,ti, + (float)project_cp[0], + (float)project_cp[1], + (float)project_cp[2], + (float)tps[0].norm(), + (float)tps[1].norm(), + (float)tps[2].norm(), + (float)LSL_GEO::area(tps[0],tps[1],tps[2]), + (float)dist); + + return; + + // return zs::abs(normalDistance); + } + + vec3 vs[3] = {v0,v1,v2}; + + vec3 es[3] = {}; + + // project onto each edge, find the distance to each edge + + const vec3 ev = v - v0; + const vec3 ev3 = v - v1; + + // const vec3 e2Hat = e2 / e2.norm(); + // const vec3 e3Hat = e3 / e3.norm(); + vec3 edgeDistances{1e8, 1e8, 1e8}; + + // see if it projects onto the interval of the edge + // if it doesn't, then the vertex distance will be smaller, + // so we can skip computing anything + // vec3 e1Hat = e1; + float e1dot = e1.dot(ev); + // vec3 projected_e[3] = {}; + // auto e1n = e1.norm(); + auto e1n2 = e1.l2NormSqr(); + if (e1dot > 0.0 && e1dot < e1n2 && e1n2 > static_cast(1e-6)) + { + // e1Hat /= e1.norm(); + const vec3 projected = v0 + e1 * e1dot / e1n2; + es[0] = projected; + edgeDistances[0] = (v - projected).norm(); + } + + const float e2dot = e2.dot(ev); + auto e2n2 = e2.l2NormSqr(); + if (e2dot > 0.0 && e2dot < e2n2 && e2n2 > static_cast(1e-6)) + { + const vec3 projected = v0 + e2 * e2dot / e2n2; + es[1] = projected; + edgeDistances[1] = (v - projected).norm(); + } + const float e3dot = e3.dot(ev3); + auto e3n2 = e3.l2NormSqr(); + if (e3dot > 0.0 && e3dot < e3n2 && e3n2 > static_cast(1e-6)) + { + const vec3 projected = v1 + e3 * e3dot / e3n2; + es[2] = projected; + edgeDistances[2] = (v - projected).norm(); + } + + // get the distance to each vertex + const vec3 vertexDistances{(v - v0).norm(), + (v - v1).norm(), + (v - v2).norm()}; + + // get the smallest of both the edge and vertex distances + float vertexMin = 1e8; + float edgeMin = 1e8; + + int min_e_idx = 0; + int min_v_idx = 0; + // vec3 project_v_min{}; + // vec3 project_e_min{}; + + for(int i = 0;i < 3;++i){ + if(vertexMin > vertexDistances[i]){ + vertexMin = vertexDistances[i]; + min_v_idx = i; + } + if(edgeMin > edgeDistances[i]){ + edgeMin = edgeDistances[i]; + min_e_idx = i; + } + // vertexMin = vertexMin > vertexDistances[i] ? vertexDistances[i] : vertexMin; + // edgeMin = edgeMin > edgeDistances[i] ? edgeDistances[i] : edgeMin; + } + // vec3 project_v{}; + if(vertexMin < edgeMin) + project_point = vs[min_v_idx]; + else + project_point = es[min_e_idx]; + + printf("wierd-111 project center[%d]->[%d] : PCP : %f %f %f : V %f %f %f %f : VD : %f %f %f : A : %f Vmin : %f\n",qid,ti, + (float)project_cp[0], + (float)project_cp[1], + (float)project_cp[2], + (float)v.norm(), + (float)v0.norm(), + (float)v1.norm(), + (float)v2.norm(), + (float)vertexDistances[0], + (float)vertexDistances[1], + (float)vertexDistances[2], + (float)LSL_GEO::area(tps[0],tps[1],tps[2]), + (float)vertexMin); + + return; + + } + + + // printf("wierd project center[%d]->[%d] : %f %f %f : %f %f %f : A : %f D : %f\n",qid,ti, + // (float)project_cp[0], + // (float)project_cp[1], + // (float)project_cp[2], + // (float)tps[0].norm(), + // (float)tps[1].norm(), + // (float)tps[2].norm(), + // (float)LSL_GEO::area(tps[0],tps[1],tps[2]), + // (float)dist); + } + // if(isnan(dist) || isnan(project_bary.norm())) + // return; + if(dist < closest_dist) { + closest_dist = dist; + closest_ti = ti; + closest_cp = project_cp; + } + }; + + ttri_bvh.iter_neighbors(bv,find_closest_triangles); + if(closest_ti < 0) { + return; + } else { + auto cp = closest_cp; + auto dp = cp - verts.pack(dim_c<3>,ptagOffset,qid); + + // if(isnan(dp.norm()) || dp.norm() > default_search_radius) { + // printf("too big projection update detected %d(%f) -> %d [%f %f %f]\n",qid,(float)rad,closest_ti, + // (float)closest_cp[0], + // (float)closest_cp[1], + // (float)closest_cp[2]); + // return; + // } + + if(update_vertex_position) + verts.tuple(dim_c<3>,ptagOffset,qid) = cp; + else + verts.tuple(dim_c<3>,dptagOffset,qid) = dp; + } + }); + + auto np = TILEVEC_OPS::dot<3>(cudaPol,verts,ptag,ptag); + if(isnan(np)) { + std::cout << "nan update detected after surface project" << std::endl; + throw std::runtime_error("nan update detected after surface project"); + } + + + set_output("zsparticles",get_input("zsparticles")); + set_output("target",get_input("target")); + }; +}; + + +ZENDEFNODE(ProjectOntoSurface, {{{"zsparticles"}, + {"target"}, + {"string","ptag","x"}, + {"string","dptag","dx"}, + {"string","tptag","x"}, + {"bool","update_target_mesh","1"}, + {"bool","do_moton_ordering","1"}, + {"bool","update_vertex_position","1"}, + {"float","default_search_radius","0.0"} + }, + {{"zsparticles"},{"target"}}, + {}, + {"PBD"}}); + + struct VisualizeDCDProximity : zeno::INode { virtual void apply() override { diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp index 5874bd63b0..b25eac7832 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint_types.hpp @@ -33,6 +33,17 @@ constexpr auto SHAPE_MATCHING_SHAPE_OFFSET = "SHAPE_MATCHING_SHAPE_OFFSET"; constexpr auto SHAPE_MATCHING_MATRIX_BUFFER = "SHAPE_MATCHING_MATRIX_BUFFER"; +constexpr auto VERTEX_BV = "VERTEX_BV"; +constexpr auto VERTEX_BVH = "VERTEX_BV"; +constexpr auto TRIANGLE_MESH_BV = "TRIANGLE_MESH_BV"; +constexpr auto TRIANGLE_MESH_BVH = "TRIANGLE_MESH_BVH"; + +constexpr auto MESH_REORDER_KEYS = "MESH_REORDER_KEYS"; +constexpr auto MESH_REORDER_INDICES = "MESH_REORDER_INDICES"; +constexpr auto MESH_REORDER_VERTICES_BUFFER = "MESH_REORDER_VERTICES_BUFFER"; +constexpr auto MESH_MAIN_AXIS = "MESH_MAIN_AXIS"; +constexpr auto MESH_GLOBAL_BOUNDING_BOX = "MESH_GLOBAL_BOUNDING_BOX"; + // constexpr auto DCD_COLLISIONS_MESH_COLLIDER = "DCD_COLLISION_MESH_COLLIDER"; enum category_c : int { From 173da59c7c6747300dd256062e8735ffd0a3f65a Mon Sep 17 00:00:00 2001 From: Lu Shuliang Date: Fri, 7 Jun 2024 21:40:00 +0800 Subject: [PATCH 046/244] update flesh bones weight --- projects/CuLagrange/fem/FleshDynamicStepping.cu | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/projects/CuLagrange/fem/FleshDynamicStepping.cu b/projects/CuLagrange/fem/FleshDynamicStepping.cu index 17ab53e908..50ccfb0dd4 100644 --- a/projects/CuLagrange/fem/FleshDynamicStepping.cu +++ b/projects/CuLagrange/fem/FleshDynamicStepping.cu @@ -833,7 +833,10 @@ struct FleshDynamicStepping : INode { T stiffness = (2.0066 * mu + 1.0122 * lambda) * b_verts("strength",vi); - auto alpha = stiffness * bone_driven_weight * bcws("strength",vi) * bcws("cnorm",vi) * eles("vol",ei) * eles("bdw",ei); + auto area = (T)1.0; + if(b_verts.hasProperty("area")) + area = b_verts("area",vi); + auto alpha = area * stiffness * bone_driven_weight * bcws("strength",vi) * bcws("cnorm",vi) * eles("vol",ei) * eles("bdw",ei); for(size_t i = 0;i != 4;++i){ auto tmp = -pdiff * alpha * w[i]; @@ -1336,6 +1339,7 @@ struct FleshDynamicStepping : INode { auto bverts = typename ZenoParticles::particles_t({ {"x",3}, {"intersect",1}, + {"area",1}, {"strength",1}},0,zs::memsrc_e::device); if(has_input("driven_boudary") && zsparticles->hasAuxData(driven_tag)){ auto zsbones = get_input("driven_boudary"); @@ -1354,6 +1358,12 @@ struct FleshDynamicStepping : INode { else TILEVEC_OPS::fill(cudaPol,bverts,"intersect",(T)0.0); + if(zsbones_verts.hasProperty("area")) { + TILEVEC_OPS::copy(cudaPol,zsbones_verts,"area",bverts,"area"); + std::cout << "use dynamic area driven weight" << std::endl; + } else + TILEVEC_OPS::fill(cudaPol,bverts,"area",(T)1.0); + const auto& inbbw = (*zsparticles)[driven_tag]; bbw.resize(inbbw.size()); TILEVEC_OPS::copy(cudaPol,inbbw,"X",bbw,"X"); From 43efaefa47fdfef6da530271b6f1f126e95397d7 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 11 Jun 2024 19:23:57 +0800 Subject: [PATCH 047/244] visual guide lines for light --- zeno/src/nodes/CameraNodes.cpp | 228 ------------- zeno/src/nodes/LightNodes.cpp | 451 ++++++++++++++++++++++++++ zenovis/src/optx/RenderEngineOptx.cpp | 24 +- 3 files changed, 463 insertions(+), 240 deletions(-) create mode 100644 zeno/src/nodes/LightNodes.cpp diff --git a/zeno/src/nodes/CameraNodes.cpp b/zeno/src/nodes/CameraNodes.cpp index e8694880d9..d53973bf4a 100644 --- a/zeno/src/nodes/CameraNodes.cpp +++ b/zeno/src/nodes/CameraNodes.cpp @@ -163,234 +163,6 @@ ZENO_DEFNODE(MakeLight)({ {"shader"}, }); -struct LightNode : INode { - virtual void apply() override { - auto isL = true; //get_input2("islight"); - auto invertdir = get_input2("invertdir"); - auto position = get_input2("position"); - auto scale = get_input2("scale"); - auto rotate = get_input2("rotate"); - auto quaternion = get_input2("quaternion"); - - auto color = get_input2("color"); - - auto exposure = get_input2("exposure"); - auto intensity = get_input2("intensity"); - - auto scaler = powf(2.0f, exposure); - - if (std::isnan(scaler) || std::isinf(scaler) || scaler < 0.0f) { - scaler = 1.0f; - printf("Light exposure = %f is invalid, fallback to 0.0 \n", exposure); - } - - intensity *= scaler; - - std::string type = get_input2(lightTypeKey); - auto typeEnum = magic_enum::enum_cast(type).value_or(LightType::Diffuse); - auto typeOrder = magic_enum::enum_integer(typeEnum); - - std::string shapeString = get_input2(lightShapeKey); - auto shapeEnum = magic_enum::enum_cast(shapeString).value_or(LightShape::Plane); - auto shapeOrder = magic_enum::enum_integer(shapeEnum); - - auto prim = std::make_shared(); - - if (has_input("prim")) { - auto mesh = get_input("prim"); - - if (mesh->size() > 0) { - prim = mesh; - shapeEnum = LightShape::TriangleMesh; - shapeOrder = magic_enum::enum_integer(shapeEnum); - } - } else { - - auto &verts = prim->verts; - auto &tris = prim->tris; - - auto start_point = zeno::vec3f(0.5, 0, 0.5); - float rm = 1.0f; - float cm = 1.0f; - - auto order = get_input2("EulerRotationOrder:"); - auto orderTyped = magic_enum::enum_cast(order).value_or(EulerAngle::RotationOrder::YXZ); - - auto measure = get_input2("EulerAngleMeasure:"); - auto measureTyped = magic_enum::enum_cast(measure).value_or(EulerAngle::Measure::Radians); - - glm::vec3 eularAngleXYZ = glm::vec3(rotate[0], rotate[1], rotate[2]); - glm::mat4 rotation = EulerAngle::rotate(orderTyped, measureTyped, eularAngleXYZ); - - // Plane Verts - for(int i=0; i<=1; i++){ - - auto rp = start_point - zeno::vec3f(i*rm, 0, 0); - for(int j=0; j<=1; j++){ - auto p = rp - zeno::vec3f(0, 0, j*cm); - // S R Q T - p = p * scale; // Scale - auto gp = glm::vec3(p[0], p[1], p[2]); - glm::vec4 result = rotation * glm::vec4(gp, 1.0f); // Rotate - gp = glm::vec3(result.x, result.y, result.z); - glm::quat rotation(quaternion[0], quaternion[1], quaternion[2], quaternion[3]); - gp = glm::rotate(rotation, gp); - p = zeno::vec3f(gp.x, gp.y, gp.z); - auto zp = zeno::vec3f(p[0], p[1], p[2]); - zp = zp + position; // Translate - - verts.push_back(zp); - } - } - - // Plane Indices - tris.emplace_back(zeno::vec3i(0, 3, 1)); - tris.emplace_back(zeno::vec3i(3, 0, 2)); - } - - auto &verts = prim->verts; - auto &tris = prim->tris; - - auto &clr = prim->verts.add_attr("clr"); - auto c = color * intensity; - - for (size_t i=0; iuserData().set2("isRealTimeObject", std::move(isL)); - - prim->userData().set2("isL", std::move(isL)); - prim->userData().set2("ivD", std::move(invertdir)); - prim->userData().set2("pos", std::move(position)); - prim->userData().set2("scale", std::move(scale)); - prim->userData().set2("rotate", std::move(rotate)); - prim->userData().set2("quaternion", std::move(quaternion)); - prim->userData().set2("color", std::move(color)); - prim->userData().set2("intensity", std::move(intensity)); - - auto fluxFixed = get_input2("fluxFixed"); - prim->userData().set2("fluxFixed", std::move(fluxFixed)); - auto maxDistance = get_input2("maxDistance"); - prim->userData().set2("maxDistance", std::move(maxDistance)); - auto falloffExponent = get_input2("falloffExponent"); - prim->userData().set2("falloffExponent", std::move(falloffExponent)); - - auto mask = get_input2("mask"); - auto spread = get_input2("spread"); - auto visible = get_input2("visible"); - auto doubleside = get_input2("doubleside"); - - if (has_input2("profile")) { - auto profile = get_input2("profile"); - prim->userData().set2("lightProfile", std::move(profile)); - } - if (has_input2("texturePath")) { - auto texture = get_input2("texturePath"); - prim->userData().set2("lightTexture", std::move(texture)); - - auto gamma = get_input2("textureGamma"); - prim->userData().set2("lightGamma", std::move(gamma)); - } - - prim->userData().set2("type", std::move(typeOrder)); - prim->userData().set2("shape", std::move(shapeOrder)); - - prim->userData().set2("mask", std::move(mask)); - prim->userData().set2("spread", std::move(spread)); - prim->userData().set2("visible", std::move(visible)); - prim->userData().set2("doubleside", std::move(doubleside)); - - auto visibleIntensity = get_input2("visibleIntensity"); - prim->userData().set2("visibleIntensity", std::move(visibleIntensity)); - - set_output("prim", std::move(prim)); - } - - const static inline std::string lightShapeKey = "shape"; - - static std::string lightShapeDefaultString() { - auto name = magic_enum::enum_name(LightShape::Plane); - return std::string(name); - } - - static std::string lightShapeListString() { - auto list = magic_enum::enum_names(); - - std::string result; - for (auto& ele : list) { - result += " "; - result += ele; - } - return result; - } - - const static inline std::string lightTypeKey = "type"; - - static std::string lightTypeDefaultString() { - auto name = magic_enum::enum_name(LightType::Diffuse); - return std::string(name); - } - - static std::string lightTypeListString() { - auto list = magic_enum::enum_names(); - - std::string result; - for (auto& ele : list) { - result += " "; - result += ele; - } - return result; - } -}; - -ZENO_DEFNODE(LightNode)({ - { - {"vec3f", "position", "0, 0, 0"}, - {"vec3f", "scale", "1, 1, 1"}, - {"vec3f", "rotate", "0, 0, 0"}, - {"vec4f", "quaternion", "1, 0, 0, 0"}, - - {"vec3f", "color", "1, 1, 1"}, - {"float", "exposure", "0"}, - {"float", "intensity", "1"}, - {"float", "fluxFixed", "-1.0"}, - - {"vec2f", "spread", "1.0, 0.0"}, - {"float", "maxDistance", "-1.0" }, - {"float", "falloffExponent", "2.0"}, - {"int", "mask", "255"}, - {"bool", "visible", "0"}, - {"bool", "invertdir", "0"}, - {"bool", "doubleside", "0"}, - - {"readpath", "profile"}, - {"readpath", "texturePath"}, - {"float", "textureGamma", "1.0"}, - {"float", "visibleIntensity", "-1.0"}, - - {"enum " + LightNode::lightShapeListString(), LightNode::lightShapeKey, LightNode::lightShapeDefaultString()}, - {"enum " + LightNode::lightTypeListString(), LightNode::lightTypeKey, LightNode::lightTypeDefaultString()}, - {"PrimitiveObject", "prim"}, - }, - { - "prim" - }, - { - {"enum " + EulerAngle::RotationOrderListString(), "EulerRotationOrder", EulerAngle::RotationOrderDefaultString()}, - {"enum " + EulerAngle::MeasureListString(), "EulerAngleMeasure", EulerAngle::MeasureDefaultString()} - }, - {"shader"}, -}); - - struct ScreenSpaceProjectedGrid : INode { float hitOnFloor(vec3f pos, vec3f dir, float sea_level) const { float t = (sea_level - pos[1]) / dir[1]; diff --git a/zeno/src/nodes/LightNodes.cpp b/zeno/src/nodes/LightNodes.cpp new file mode 100644 index 0000000000..a228eec43a --- /dev/null +++ b/zeno/src/nodes/LightNodes.cpp @@ -0,0 +1,451 @@ +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include "glm/gtc/matrix_transform.hpp" + +namespace zeno { + +struct LightNode : INode { + virtual void apply() override { + auto isL = true; //get_input2("islight"); + auto invertdir = get_input2("invertdir"); + + auto scale = get_input2("scale"); + auto rotate = get_input2("rotate"); + auto position = get_input2("position"); + auto quaternion = get_input2("quaternion"); + + auto color = get_input2("color"); + auto exposure = get_input2("exposure"); + auto intensity = get_input2("intensity"); + + auto scaler = powf(2.0f, exposure); + + if (std::isnan(scaler) || std::isinf(scaler) || scaler < 0.0f) { + scaler = 1.0f; + printf("Light exposure = %f is invalid, fallback to 0.0 \n", exposure); + } + + intensity *= scaler; + + auto ccc = color * intensity; + for (size_t i=0; i("mask"); + auto spread = get_input2("spread"); + auto visible = get_input2("visible"); + auto doubleside = get_input2("doubleside"); + + std::string type = get_input2(lightTypeKey); + auto typeEnum = magic_enum::enum_cast(type).value_or(LightType::Diffuse); + auto typeOrder = magic_enum::enum_integer(typeEnum); + + std::string shapeString = get_input2(lightShapeKey); + auto shapeEnum = magic_enum::enum_cast(shapeString).value_or(LightShape::Plane); + auto shapeOrder = magic_enum::enum_integer(shapeEnum); + + auto prim = std::make_shared(); + auto &VERTS = prim->verts; + auto &LINES = prim->lines; + auto &TRIS = prim->tris; + + if (has_input("prim")) { + auto mesh = get_input("prim"); + + if (mesh->tris->size() > 0) { + prim = mesh; + shapeEnum = LightShape::TriangleMesh; + shapeOrder = magic_enum::enum_integer(shapeEnum); + } + } else { + + auto order = get_input2("EulerRotationOrder:"); + auto orderTyped = magic_enum::enum_cast(order).value_or(EulerAngle::RotationOrder::YXZ); + + auto measure = get_input2("EulerAngleMeasure:"); + auto measureTyped = magic_enum::enum_cast(measure).value_or(EulerAngle::Measure::Radians); + + glm::vec3 eularAngleXYZ = glm::vec3(rotate[0], rotate[1], rotate[2]); + glm::mat4 rotation = EulerAngle::rotate(orderTyped, measureTyped, eularAngleXYZ); + + if (shapeEnum == LightShape::Point) { + scale = {0 ,scale[1], 0}; + + if (typeEnum == LightType::Diffuse) { + spread = {1, 1}; + } + } + + const auto transformWithoutScale = [&]() { + glm::quat rawQuat(quaternion[0], quaternion[1], quaternion[2], quaternion[3]); + glm::mat4 matQuat = glm::toMat4(rawQuat); + + glm::mat4 transform = glm::translate(glm::mat4(1.0f), glm::vec3(position[0], position[1], position[2])); + transform = transform * rotation * matQuat; + return transform; + } (); + + VERTS->push_back(zeno::vec3f(+0.5, 0, +0.5)); + VERTS->push_back(zeno::vec3f(+0.5, 0, -0.5)); + VERTS->push_back(zeno::vec3f(-0.5, 0, +0.5)); + VERTS->push_back(zeno::vec3f(-0.5, 0, -0.5)); + + auto pscale = std::max(scale[0], scale[2]); + pscale = std::max(pscale, scale[1]); + + if (shapeEnum == LightShape::Sphere) { + + auto tmpPrim = zeno::TempNodeSimpleCaller("CreateSphere") + .set2("position", {0,0,0}) + .set2("scaleSize", {1,1,1}) + .set2("radius", 0.5f) + .set2("rotate", {0,0,0}) + .set2("hasNormal", false) + .set2("hasVertUV", false) + .set2("isFlipFace", false) + .set2("rows", 180) + .set2("columns", 360) + .set2("quads", false) + .set2("SphereRT", false) + .set2("EulerRotationOrder:", "XYZ") + .set2("EulerAngleMeasure:", "Degree") + .call().get("prim"); + + VERTS->reserve(tmpPrim->verts->size()); + TRIS.reserve(tmpPrim->tris->size()); + + VERTS->insert(VERTS.end(), tmpPrim->verts->begin(), tmpPrim->verts->end()); + for (size_t i=0; itris.size(); ++i) { + auto tri = tmpPrim->tris[i]; + TRIS.push_back(tri+4); + } + + scale = zeno::vec3f(min(scale[0], scale[2])); + pscale = 0.0; + } + else if (shapeEnum == LightShape::Ellipse) { + + auto tmpPrim = zeno::TempNodeSimpleCaller("CreateDisk") + .set2("position", {0,0,0}) + .set2("scaleSize", {1,1,1}) + .set2("rotate", {0,0,0}) + .set2("radius", 0.5f) + .set2("divisions", 360) + .set2("hasNormal", false) + .set2("hasVertUV", false) + .set2("isFlipFace", false) + .call().get("prim"); + + VERTS->reserve(tmpPrim->verts->size()); + TRIS.reserve(tmpPrim->tris->size()); + + VERTS->insert(VERTS.end(), tmpPrim->verts->begin(), tmpPrim->verts->end()); + for (size_t i=0; itris.size(); ++i) { + auto tri = tmpPrim->tris[i]; + TRIS.push_back(tri+4); + } + } + + if (shapeEnum != LightShape::Point) { + // Plane Indices + if (TRIS->size() == 0) { + TRIS.emplace_back(zeno::vec3i(0, 3, 1)); + TRIS.emplace_back(zeno::vec3i(3, 0, 2)); + } + + for (auto& v : VERTS) { + v = scale * v; + } + } + + auto line_spread = spread; + if (typeEnum != LightType::Projector) { + line_spread = {spread[0], spread[0]}; + } + + int lut[] = {+1, +3, -1, -3}; + + int vertex_offset = VERTS->size(); + for (size_t i=0; i<4; ++i) { + + auto info = lut[i]; + + auto axis = glm::vec3(0, 0, 0); + auto pick = 2-(abs(info)-1); + axis[pick] = std::copysign(1, info); + + if (pick == 0) { // inverse axis + axis[pick] *= -1; + } + + glm::mat4 sub_rotate = glm::rotate(glm::mat4(1.0), line_spread[i%2] * M_PIf/2.0f, axis); + auto end_point = sub_rotate * glm::vec4(0, -0.3, 0, 1.0f); + + glm::vec4 p0 = glm::vec4(0,0,0,1); + glm::vec4 p1 = glm::vec4(pscale, pscale, pscale, 1.0f) * (end_point); + + auto delta = glm::vec4(0.0); + delta[abs(info)-1] = 0.5f * scale[abs(info)-1]; + + if ( std::signbit(info) ) { // negative + delta = -delta; + } + + p0 += delta; + p1 += delta; + + if (line_spread[i%2] < line_spread[(i+1)%2]) { // spread at the same surface + p1 *= cos( line_spread[(i+1)%2] * M_PIf/2.0f ) / cos( line_spread[(i)%2] * M_PIf/2.0f ); + } + + VERTS->push_back(zeno::vec3f(p0[0], p0[1], p0[2])); + VERTS->push_back(zeno::vec3f(p1[0], p1[1], p1[2])); + + LINES->push_back({vertex_offset, vertex_offset+1}); + vertex_offset +=2; + } + + if (shapeEnum == LightShape::Point) { + + int anchor_offset = VERTS->size(); + VERTS->push_back({0,0,0}); + + if (typeEnum != LightType::Projector){ + + glm::mat4 sub_trans = glm::rotate(glm::mat4(1.0), M_PIf/4, glm::vec3(0,1,0)); + + for (size_t i=4; i<=(anchor_offset-1); ++i) { + auto& v = VERTS.at(i); + auto p = sub_trans * glm::vec4(v[0], v[1], v[2], 1); + + VERTS->push_back( {p.x, p.y, p.z} ); + LINES->push_back({anchor_offset, (int)VERTS.size()-1}); + } + } else { + auto vertical_distance = VERTS[anchor_offset-1][1]; + float x_max=-FLT_MAX, x_min=FLT_MAX; + float z_max=-FLT_MAX, z_min=FLT_MAX; + + for (int i=0; i<4; ++i) { + auto idx = anchor_offset - 1 - i * 2; + auto& tmp = VERTS[idx]; + + x_max = max(tmp[0], x_max); + x_min = min(tmp[0], x_min); + z_max = max(tmp[2], z_max); + z_min = min(tmp[2], z_min); + } + + VERTS->push_back({ x_max, vertical_distance, z_max} ); + VERTS->push_back({ x_max, vertical_distance, z_min} ); + VERTS->push_back({ x_min, vertical_distance, z_min} ); + VERTS->push_back({ x_min, vertical_distance, z_max} ); + + LINES->push_back({anchor_offset+1, anchor_offset+2}); + LINES->push_back({anchor_offset+2, anchor_offset+3}); + LINES->push_back({anchor_offset+3, anchor_offset+4}); + LINES->push_back({anchor_offset+4, anchor_offset+1}); + } + + if (typeEnum == LightType::Diffuse) { + + int vertex_offset = VERTS->size(); + + for (auto i : {-1, 0, 1}) { + for (auto j : {-1, 0, 1}) { + + auto sub_trans = glm::rotate(glm::mat4(1.0), M_PIf/4, glm::vec3(i,0,j)); + if (i == 0 && j == 0) { sub_trans = glm::mat4(1.0); } + + sub_trans = glm::scale(sub_trans, {0, scale[1], 0}); + + auto p1 = sub_trans * glm::vec4(0, +.3, 0, 1); + auto p2 = sub_trans * glm::vec4(0, -.3, 0, 1); + + VERTS->push_back(zeno::vec3f(p1[0], p1[1], p1[2])); + VERTS->push_back(zeno::vec3f(p2[0], p2[1], p2[2])); + + LINES->push_back({anchor_offset, vertex_offset+0}); + LINES->push_back({anchor_offset, vertex_offset+1}); + + vertex_offset += 2; + } // j + } // i + } + } + + if ( (shapeEnum != LightShape::Sphere) && (invertdir || doubleside) ) { + + auto sub_trans = glm::rotate(glm::mat4(1.0), M_PIf, glm::vec3(1,0,0)); + auto vertices_offset = VERTS.size(); + + if (doubleside) { + + LINES->reserve(LINES->size()*2); + VERTS.reserve(VERTS->size()*2); + typeof(LINES) tmp(LINES->size()); + + std::transform(LINES.begin(), LINES.end(), tmp.begin(), + [&](auto ele){ return ele + vertices_offset; }); + + LINES->insert(LINES.end(), tmp->begin(), tmp->end()); + } + + for (size_t i=0; ipush_back(zeno::vec3f(p[0], p[1], p[2])); + } + } + } + + auto &clr = VERTS.add_attr("clr"); + for (size_t i=0; iuserData(); + + ud.set2("isRealTimeObject", std::move(isL)); + + ud.set2("isL", std::move(isL)); + ud.set2("ivD", std::move(invertdir)); + ud.set2("pos", std::move(position)); + ud.set2("scale", std::move(scale)); + ud.set2("rotate", std::move(rotate)); + ud.set2("quaternion", std::move(quaternion)); + ud.set2("color", std::move(color)); + ud.set2("intensity", std::move(intensity)); + + auto fluxFixed = get_input2("fluxFixed"); + ud.set2("fluxFixed", std::move(fluxFixed)); + auto maxDistance = get_input2("maxDistance"); + ud.set2("maxDistance", std::move(maxDistance)); + auto falloffExponent = get_input2("falloffExponent"); + ud.set2("falloffExponent", std::move(falloffExponent)); + + if (has_input2("profile")) { + auto profile = get_input2("profile"); + ud.set2("lightProfile", std::move(profile)); + } + if (has_input2("texturePath")) { + auto texture = get_input2("texturePath"); + ud.set2("lightTexture", std::move(texture)); + + auto gamma = get_input2("textureGamma"); + ud.set2("lightGamma", std::move(gamma)); + } + + ud.set2("type", std::move(typeOrder)); + ud.set2("shape", std::move(shapeOrder)); + + ud.set2("mask", std::move(mask)); + ud.set2("spread", std::move(spread)); + ud.set2("visible", std::move(visible)); + ud.set2("doubleside", std::move(doubleside)); + + auto visibleIntensity = get_input2("visibleIntensity"); + ud.set2("visibleIntensity", std::move(visibleIntensity)); + + set_output("prim", std::move(prim)); + } + + const static inline std::string lightShapeKey = "shape"; + + static std::string lightShapeDefaultString() { + auto name = magic_enum::enum_name(LightShape::Plane); + return std::string(name); + } + + static std::string lightShapeListString() { + auto list = magic_enum::enum_names(); + + std::string result; + for (auto& ele : list) { + result += " "; + result += ele; + } + return result; + } + + const static inline std::string lightTypeKey = "type"; + + static std::string lightTypeDefaultString() { + auto name = magic_enum::enum_name(LightType::Diffuse); + return std::string(name); + } + + static std::string lightTypeListString() { + auto list = magic_enum::enum_names(); + + std::string result; + for (auto& ele : list) { + result += " "; + result += ele; + } + return result; + } +}; + +ZENO_DEFNODE(LightNode)({ + { + {"vec3f", "position", "0, 0, 0"}, + {"vec3f", "scale", "1, 1, 1"}, + {"vec3f", "rotate", "0, 0, 0"}, + {"vec4f", "quaternion", "1, 0, 0, 0"}, + + {"colorvec3f", "color", "1, 1, 1"}, + {"float", "exposure", "0"}, + {"float", "intensity", "1"}, + {"float", "fluxFixed", "-1.0"}, + + {"vec2f", "spread", "1.0, 0.0"}, + {"float", "maxDistance", "-1.0" }, + {"float", "falloffExponent", "2.0"}, + {"int", "mask", "255"}, + {"bool", "visible", "0"}, + {"bool", "invertdir", "0"}, + {"bool", "doubleside", "0"}, + + {"readpath", "profile"}, + {"readpath", "texturePath"}, + {"float", "textureGamma", "1.0"}, + {"float", "visibleIntensity", "-1.0"}, + + {"enum " + LightNode::lightShapeListString(), LightNode::lightShapeKey, LightNode::lightShapeDefaultString()}, + {"enum " + LightNode::lightTypeListString(), LightNode::lightTypeKey, LightNode::lightTypeDefaultString()}, + {"PrimitiveObject", "prim"}, + }, + { + "prim" + }, + { + {"enum " + EulerAngle::RotationOrderListString(), "EulerRotationOrder", EulerAngle::RotationOrderDefaultString()}, + {"enum " + EulerAngle::MeasureListString(), "EulerAngleMeasure", EulerAngle::MeasureDefaultString()} + }, + {"shader"}, +}); + +} // namespace \ No newline at end of file diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index 10df4f2873..d1b88da063 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -634,19 +634,19 @@ struct GraphicsManager { } else { - auto p2 = prim_in->verts[prim_in->tris[0][0]]; - auto p0 = prim_in->verts[prim_in->tris[0][1]]; - auto p1 = prim_in->verts[prim_in->tris[0][2]]; - auto e1 = p1 - p0; - auto e2 = p2 - p1; + auto v0 = prim_in->verts[0]; + auto v1 = prim_in->verts[1]; + auto v3 = prim_in->verts[3]; + auto e1 = v1 - v3; + auto e2 = v0 - v1; - // p0 ---(+x)--> p1 + // v3 ---(+x)--> v1 // |||||||||||||(-) // |||||||||||||(z) // |||||||||||||(+) - // p* <--(-x)--- p2 + // v2 <--(-x)--- v0 - p0 = p0 + e2; // p* as p0 + v3 = v3 + e2; // p* as p0 e2 = -e2; // invert e2 // facing down in local space @@ -664,15 +664,15 @@ struct GraphicsManager { clr = color; extraStep(); - std::cout << "light: p"<userData().get2("ProceduralSky", 0) == 1) { From 71c9dd910880cfcf069c5d449ada4f4fa4fca379 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 11 Jun 2024 20:17:47 +0800 Subject: [PATCH 048/244] Improve projector intensity --- zenovis/xinxinoptix/Light.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zenovis/xinxinoptix/Light.h b/zenovis/xinxinoptix/Light.h index 97bdd8697d..800befd449 100644 --- a/zenovis/xinxinoptix/Light.h +++ b/zenovis/xinxinoptix/Light.h @@ -289,6 +289,9 @@ void DirectLighting(RadiancePRD *prd, ShadowPRD& shadowPRD, const float3& shadin if (n_len <= 0) {return;} + lsr.dist = n_len * lsr.dist; + lsr.intensity = M_PIf/(lsr.dist * lsr.dist); + auto tanU = t_len / n_len; auto tanV = b_len / n_len; From d4a645d82396bee98771945f2787fa4eda21ff59 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 11 Jun 2024 20:23:13 +0800 Subject: [PATCH 049/244] Improve visible intensity --- zenovis/xinxinoptix/Light.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zenovis/xinxinoptix/Light.cu b/zenovis/xinxinoptix/Light.cu index db5a114ddf..bc53d54cca 100644 --- a/zenovis/xinxinoptix/Light.cu +++ b/zenovis/xinxinoptix/Light.cu @@ -188,7 +188,7 @@ extern "C" __global__ void __closesthit__radiance() rect.normal, shadingP, light.spreadMajor, uvScale, uvOffset); - if (!valid) { return; } + //if (!valid) { return; } SphericalRect squad; SphericalRectInit(squad, shadingP, rect.v, rect.axisX, rect.lenX, rect.axisY, rect.lenY); @@ -221,7 +221,7 @@ extern "C" __global__ void __closesthit__radiance() default: return; } - if (light.type == zeno::LightType::Diffuse && light.spreadMajor < 1.0f) { + if (light.type == zeno::LightType::Diffuse && light.spreadMajor < 1.0f && prd->depth > 0) { auto void_angle = 0.5f * (1.0f - light.spreadMajor) * M_PIf; auto atten = light_spread_attenuation( From d2809f9e7f509ca1a5a51afa06ba8f0e31d453ce Mon Sep 17 00:00:00 2001 From: ShuliangLu Date: Tue, 11 Jun 2024 21:03:36 +0800 Subject: [PATCH 050/244] fem collision hessian spd using abs --- .../fem/collision_energy/vertex_face_sqrt_collision.hpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp b/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp index 9a18bfe6f7..5d809df922 100644 --- a/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp +++ b/projects/CuLagrange/fem/collision_energy/vertex_face_sqrt_collision.hpp @@ -153,15 +153,18 @@ namespace VERTEX_FACE_SQRT_COLLISION { auto tn = t.template cast().normalized().template cast(); auto productn = tDiff.transpose() * tn; - alpha = alpha > 0 ? alpha : 0; - beta = beta > 0 ? beta : 0; + // alpha = alpha > 0 ? alpha : 0; + alpha = alpha > 0 ? alpha : -alpha; + beta = beta > 0 ? beta : -beta; + // beta = beta > 0 ? beta : 0; auto result = (REAL)2.0 * _mu * ((REAL)alpha * (zs::dyadic_prod(productn,productn)) + (REAL)beta * tDiff.transpose() * tDiff); if(zs::isnan(result.norm())) { printf("nan cH detected %f %f %f %f\n",(float)alpha,(float)productn.norm(),(float)beta,(float)tDiff.norm()); } - return (REAL)2.0 * _mu * ((REAL)alpha * (zs::dyadic_prod(productn,productn)) + (REAL)beta * tDiff.transpose() * tDiff); + // return (REAL)2.0 * _mu * ((REAL)alpha * (zs::dyadic_prod(productn,productn)) + (REAL)beta * tDiff.transpose() * tDiff); + return result; // auto H = (REAL)2.0 * _mu * ((REAL)alpha * (zs::dyadic_prod(productn,productn)) + (REAL)beta * tDiff.transpose() * tDiff); // make_pd(H); // return H; From 051872ee82f44fee50286e2c45d68fa7e2790739 Mon Sep 17 00:00:00 2001 From: ShuliangLu Date: Tue, 11 Jun 2024 21:04:25 +0800 Subject: [PATCH 051/244] elastic rods constraints wip --- .../constraint_function_kernel/constraint.cuh | 191 +++++++++--------- 1 file changed, 96 insertions(+), 95 deletions(-) diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh index 9ec41df4d7..3572e62c43 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh @@ -5,6 +5,8 @@ #include "zensim/math/DihedralAngle.hpp" namespace zeno { namespace CONSTRAINT { + using namespace zs; + // FOR CLOTH SIMULATION template constexpr bool solve_DistanceConstraint( @@ -259,20 +261,6 @@ namespace zeno { namespace CONSTRAINT { ds[2] = VECTOR3d{grad[1 * 3 + 0],grad[1 * 3 + 1],grad[1 * 3 + 2]}; ds[3] = VECTOR3d{grad[2 * 3 + 0],grad[2 * 3 + 1],grad[2 * 3 + 2]}; ds[1] = VECTOR3d{grad[3 * 3 + 0],grad[3 * 3 + 1],grad[3 * 3 + 2]}; - // for(int i = 0;i != 4;++i) - // ds[i] = VECTOR3d{grad[i * 3 + 0],grad[i * 3 + 1],grad[i * 3 + 2]}; - - // for(int i = 0;i != 4;++i){ - // printf("ds[%d] : %f %f %f\n",i, - // (float)ds[i][0], - // (float)ds[i][1], - // (float)ds[i][2]); - // } - - - // SCALER alpha = 0.0; - // if (stiffness != 0.0) - // alpha = static_cast(1.0) / (stiffness * dt * dt); SCALER sum_normGradC = invMass0 * ds[0].l2NormSqr() + @@ -911,96 +899,109 @@ namespace zeno { namespace CONSTRAINT { // FOR ELASTIC RODS SIMULATION // ---------------------------------------------------------------------------------------------- -// template -// constexpr bool solve_StretchShearConstraint( -// const VECTOR3d& p0, SCALER invMass0, -// const VECTOR3d& p1, SCALER invMass1, -// const QUATERNION& q0, SCALER invMassq0, -// const VECTOR3d& stretchingAndShearingKs, -// const SCALER restLength, -// VECTOR3d& corr0, VECTOR3d& corr1, QUATERNION& corrq0) -// { -// VECTOR3d d3; //third director d3 = q0 * e_3 * q0_conjugate -// d3[0] = static_cast(2.0) * (q0.x() * q0.z() + q0.w() * q0.y()); -// d3[1] = static_cast(2.0) * (q0.y() * q0.z() - q0.w() * q0.x()); -// d3[2] = q0.w() * q0.w() - q0.x() * q0.x() - q0.y() * q0.y() + q0.z() * q0.z(); - -// VECTOR3d gamma = (p1 - p0) / restLength - d3; -// gamma /= (invMass1 + invMass0) / restLength + invMassq0 * static_cast(4.0)*restLength + eps; - -// if (std::abs(stretchingAndShearingKs[0] - stretchingAndShearingKs[1]) < eps && std::abs(stretchingAndShearingKs[0] - stretchingAndShearingKs[2]) < eps) //all Ks are approx. equal -// for (int i = 0; i<3; i++) gamma[i] *= stretchingAndShearingKs[i]; -// else //diffenent stretching and shearing Ks. Transform diag(Ks[0], Ks[1], Ks[2]) into world space using Ks_w = R(q0) * diag(Ks[0], Ks[1], Ks[2]) * R^T(q0) and multiply it with gamma -// { -// MATRIX3d R = q0.toRotationMatrix(); -// gamma = (R.transpose() * gamma).eval(); -// for (int i = 0; i<3; i++) gamma[i] *= stretchingAndShearingKs[i]; -// gamma = (R * gamma).eval(); -// } +template, + std::is_convertible_v, + VECTOR3d::dim == 1,VECTOR3d::extent == 3, + QUATERNION::dim == 1,QUATERNION::extent == 4> = 0> +constexpr bool solve_StretchShearConstraint( + const VECTOR3d& p0, SCALER invMass0, + const VECTOR3d& p1, SCALER invMass1, + const QUATERNION& q0, SCALER invMassq0, + const VECTOR3d& stretchingAndShearingKs, + const SCALER restLength, + VECTOR3d& corr0, VECTOR3d& corr1, QUATERNION& corrq0) +{ + constexpr auto eps = zs::limits::epsilon(); + VECTOR3d d3{}; //third director d3 = q0 * e_3 * q0_conjugate + d3[0] = static_cast(2.0) * (q0.x() * q0.z() + q0.w() * q0.y()); + d3[1] = static_cast(2.0) * (q0.y() * q0.z() - q0.w() * q0.x()); + d3[2] = q0.w() * q0.w() - q0.x() * q0.x() - q0.y() * q0.y() + q0.z() * q0.z(); + + VECTOR3d gamma = (p1 - p0) / restLength - d3; + gamma /= (invMass1 + invMass0) / restLength + invMassq0 * static_cast(4.0)*restLength + eps; + + if (zs::abs(stretchingAndShearingKs[0] - stretchingAndShearingKs[1]) < eps && zs::abs(stretchingAndShearingKs[0] - stretchingAndShearingKs[2]) < eps) //all Ks are approx. equal + for (int i = 0; i<3; i++) gamma[i] *= stretchingAndShearingKs[i]; + else //diffenent stretching and shearing Ks. Transform diag(Ks[0], Ks[1], Ks[2]) into world space using Ks_w = R(q0) * diag(Ks[0], Ks[1], Ks[2]) * R^T(q0) and multiply it with gamma + { + // MATRIX3d R = q0.toRotationMatrix(); + auto R = quaternion2matrix(q0); + gamma = R.transpose() * gamma; + for (int i = 0; i<3; i++) gamma[i] *= stretchingAndShearingKs[i]; + gamma = R * gamma; + } -// corr0 = invMass0 * gamma; -// corr1 = -invMass1 * gamma; + corr0 = invMass0 * gamma; + corr1 = -invMass1 * gamma; -// QUATERNION q_e_3_bar(q0.z(), -q0.y(), q0.x(), -q0.w()); //compute q*e_3.conjugate (cheaper than quaternion product) -// corrq0 = QUATERNION(0.0, gamma.x(), gamma.y(), gamma.z()) * q_e_3_bar; -// corrq0.coeffs() *= static_cast(2.0) * invMassq0 * restLength; + QUATERNION q_e_3_bar{-q0.y(), q0.x(), -q0.w(), q0.z()}; //compute q*e_3.conjugate (cheaper than quaternion product) + corrq0 = quaternionMultiply(QUATERNION{gamma.x(), gamma.y(), gamma.z(), 0.0 },q_e_3_bar); + + corrq0 *= static_cast(2.0) * invMassq0 * restLength; -// return true; -// } + return true; +} // // ---------------------------------------------------------------------------------------------- -// template -// constexpr bool solve_BendTwistConstraint( -// const QUATERNION& q0, SCALER invMassq0, -// const QUATERNION& q1, SCALER invMassq1, -// const VECTOR3d& bendingAndTwistingKs, -// const QUATERNION& restDarbouxVector, -// QUATERNION& corrq0, QUATERNION& corrq1) -// { -// QUATERNION omega = q0.conjugate() * q1; //darboux vector - -// QUATERNION omega_plus; -// omega_plus.coeffs() = omega.coeffs() + restDarbouxVector.coeffs(); //delta Omega with -Omega_0 -// omega.coeffs() = omega.coeffs() - restDarbouxVector.coeffs(); //delta Omega with + omega_0 -// if (omega.l2NormSqr() > omega_plus.l2NormSqr()) omega = omega_plus; - -// for (int i = 0; i < 3; i++) omega.coeffs()[i] *= bendingAndTwistingKs[i] / (invMassq0 + invMassq1 + static_cast(1.0e-6)); -// omega.w() = 0.0; //discrete Darboux vector does not have vanishing scalar part - -// corrq0 = q1 * omega; -// corrq1 = q0 * omega; -// corrq0.coeffs() *= invMassq0; -// corrq1.coeffs() *= -invMassq1; -// return true; -// } - -// // ---------------------------------------------------------------------------------------------- -// template -// constexpr bool solve_PerpendiculaBisectorConstraint( -// const VECTOR3d &p0, SCALER invMass0, -// const VECTOR3d &p1, SCALER invMass1, -// const VECTOR3d &p2, SCALER invMass2, -// const SCALER stiffness, -// VECTOR3d &corr0, VECTOR3d &corr1, VECTOR3d &corr2) -// { -// const VECTOR3d pm = 0.5 * (p0 + p1); -// const VECTOR3d p0p2 = p0 - p2; -// const VECTOR3d p2p1 = p2 - p1; -// const VECTOR3d p1p0 = p1 - p0; -// const VECTOR3d p2pm = p2 - pm; +template, + std::is_convertible_v, + VECTOR3d::dim == 1,VECTOR3d::extent == 3, + QUATERNION::dim == 1,QUATERNION::extent == 4> = 0> +constexpr bool solve_BendTwistConstraint( + const QUATERNION& q0, SCALER invMassq0, + const QUATERNION& q1, SCALER invMassq1, + const VECTOR3d& bendingAndTwistingKs, + const QUATERNION& restDarbouxVector, + QUATERNION& corrq0, QUATERNION& corrq1) +{ + QUATERNION omega = quaternionConjugateMultiply(q0,q1); //darboux vector + + QUATERNION omega_plus; + omega_plus = omega + restDarbouxVector; //delta Omega with -Omega_0 + omega = omega - restDarbouxVector; //delta Omega with + omega_0 + if (omega.l2NormSqr() > omega_plus.l2NormSqr()) omega = omega_plus; + + for (int i = 0; i < 3; i++) omega[i] *= bendingAndTwistingKs[i] / (invMassq0 + invMassq1 + static_cast(1.0e-6)); + omega.w() = 0.0; //discrete Darboux vector does not have vanishing scalar part + + // corrq0 = q1 * omega; + corrq0 = quaternionMultiply(q1,omega); + // corrq1 = q0 * omega; + corrq1 = quaternionMultiply(q0,omega); + corrq0 *= invMassq0; + corrq1 *= -invMassq1; + return true; +} -// SCALER wSum = invMass0 * p0p2.l2NormSqr() + invMass1 * p2p1.l2NormSqr() + invMass2 * p1p0.l2NormSqr(); -// if (wSum < eps) -// return false; +// ---------------------------------------------------------------------------------------------- +template +constexpr bool solve_PerpendiculaBisectorConstraint( + const VECTOR3d &p0, SCALER invMass0, + const VECTOR3d &p1, SCALER invMass1, + const VECTOR3d &p2, SCALER invMass2, + const SCALER stiffness, + VECTOR3d &corr0, VECTOR3d &corr1, VECTOR3d &corr2) +{ + const VECTOR3d pm = 0.5 * (p0 + p1); + const VECTOR3d p0p2 = p0 - p2; + const VECTOR3d p2p1 = p2 - p1; + const VECTOR3d p1p0 = p1 - p0; + const VECTOR3d p2pm = p2 - pm; + + SCALER wSum = invMass0 * p0p2.l2NormSqr() + invMass1 * p2p1.l2NormSqr() + invMass2 * p1p0.l2NormSqr(); + if (wSum < eps) + return false; -// const SCALER lambda = stiffness * p2pm.dot(p1p0) / wSum; + const SCALER lambda = stiffness * p2pm.dot(p1p0) / wSum; -// corr0 = -invMass0 * lambda * p0p2; -// corr1 = -invMass1 * lambda * p2p1; -// corr2 = -invMass2 * lambda * p1p0; + corr0 = -invMass0 * lambda * p0p2; + corr1 = -invMass1 * lambda * p2p1; + corr2 = -invMass2 * lambda * p1p0; -// return true; -// } + return true; +} // // ---------------------------------------------------------------------------------------------- // template From b76d4f25ecfa8391f0784704b0368c71ae4b1057 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 12 Jun 2024 18:11:53 +0800 Subject: [PATCH 052/244] fix-vol-tex --- zenovis/src/optx/RenderEngineOptx.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index e6287db440..356b11b2fa 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -1147,7 +1147,10 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { std::vector realNeedTexPaths; for(auto const &[matkey, mtldet] : matMap) { for(auto tex: mtldet->tex2Ds) { - if (cachedMeshesMaterials.count(mtldet->mtlidkey) > 0 || cachedSphereMaterials.count(mtldet->mtlidkey) > 0) { + if (cachedMeshesMaterials.count(mtldet->mtlidkey) > 0 + || cachedSphereMaterials.count(mtldet->mtlidkey) > 0 + || mtldet->parameters.find("vol") != std::string::npos + ) { realNeedTexPaths.emplace_back(tex->path); } } @@ -1174,6 +1177,9 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { for (const auto& need_remove_tex: needToRemoveTexPaths) { OptixUtil::removeTexture(need_remove_tex); } + for (const auto& realNeedTexPath: realNeedTexPaths) { + OptixUtil::addTexture(realNeedTexPath); + } } for(auto const &[matkey, mtldet] : matMap) { @@ -1231,14 +1237,9 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { //std::cout< shaderTex; - int texid=0; for(auto tex:mtldet->tex2Ds) { - if (cachedMeshesMaterials.count(mtldet->mtlidkey) > 0) { - OptixUtil::addTexture(tex->path.c_str()); - shaderTex.emplace_back(tex->path); - texid++; - } + shaderTex.emplace_back(tex->path); } ShaderPrepared shaderP; From 01a7d55af169ac03e302b4e9c722ebebc5c73a9f Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 13 Jun 2024 20:21:41 +0800 Subject: [PATCH 053/244] Fix windows build --- zeno/src/nodes/LightNodes.cpp | 13 +++++++++++-- zeno/src/nodes/ProcedrualSkyNode.cpp | 4 +++- zenovis/xinxinoptix/Portal.h | 8 ++++---- zenovis/xinxinoptix/Sampling.h | 4 ++++ zenovis/xinxinoptix/TraceStuff.h | 4 ---- zenovis/xinxinoptix/optixPathTracer.cpp | 2 +- zenovis/xinxinoptix/volume/optixVolume.h | 2 +- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/zeno/src/nodes/LightNodes.cpp b/zeno/src/nodes/LightNodes.cpp index a228eec43a..35423795bc 100644 --- a/zeno/src/nodes/LightNodes.cpp +++ b/zeno/src/nodes/LightNodes.cpp @@ -6,6 +6,14 @@ #include #include +#define _USE_MATH_DEFINES +#include +#include + +#ifndef M_PIf +#define M_PIf (float)M_PI +#endif + #include #include #include @@ -201,7 +209,7 @@ struct LightNode : INode { auto delta = glm::vec4(0.0); delta[abs(info)-1] = 0.5f * scale[abs(info)-1]; - if ( std::signbit(info) ) { // negative + if ( info < 0 ) { // negative delta = -delta; } @@ -297,7 +305,8 @@ struct LightNode : INode { LINES->reserve(LINES->size()*2); VERTS.reserve(VERTS->size()*2); - typeof(LINES) tmp(LINES->size()); + decltype(*&LINES) tmp = LINES; + //tmp->resize(LINES->size()); std::transform(LINES.begin(), LINES.end(), tmp.begin(), [&](auto ele){ return ele + vertices_offset; }); diff --git a/zeno/src/nodes/ProcedrualSkyNode.cpp b/zeno/src/nodes/ProcedrualSkyNode.cpp index c96a0927a9..8c7a20161a 100644 --- a/zeno/src/nodes/ProcedrualSkyNode.cpp +++ b/zeno/src/nodes/ProcedrualSkyNode.cpp @@ -94,7 +94,9 @@ struct DistantLight : INode { auto dir2 = get_input2("Lat-Lon"); // dir2[0] = fmod(dir2[0], 180.f); // dir2[1] = fmod(dir2[1], 180.f); - dir2 *= M_PIf / 180.f; + + dir2[0] = glm::radians(dir2[0]); + dir2[1] = glm::radians(dir2[1]); zeno::vec3f dir3; dir3[1] = std::sin(dir2[0]); diff --git a/zenovis/xinxinoptix/Portal.h b/zenovis/xinxinoptix/Portal.h index ef85e6ae09..339991e24a 100644 --- a/zenovis/xinxinoptix/Portal.h +++ b/zenovis/xinxinoptix/Portal.h @@ -257,9 +257,9 @@ struct PortalLight { struct Dummy { - typeof(image_dummy) image; - typeof(dist_dummy) dist; - typeof(sat_dummy) sat; + decltype (image_dummy) _image; + decltype (dist_dummy) _dist; + decltype (sat_dummy) _sat; Portal portal; Vector3f X,Y,Z; }; @@ -600,7 +600,7 @@ struct PortalLightList { } auto first = list.front().pack(); - std::vector tmp; + std::vector tmp; tmp.reserve(list.size()); tmp.push_back(first); diff --git a/zenovis/xinxinoptix/Sampling.h b/zenovis/xinxinoptix/Sampling.h index 289adb7078..2f4d46ff9d 100644 --- a/zenovis/xinxinoptix/Sampling.h +++ b/zenovis/xinxinoptix/Sampling.h @@ -20,6 +20,10 @@ #define FLT_MAX __FLT_MAX__ #endif +#ifndef uint +using uint = unsigned int; +#endif + #ifdef __CUDACC_DEBUG__ #define DCHECK assert #else diff --git a/zenovis/xinxinoptix/TraceStuff.h b/zenovis/xinxinoptix/TraceStuff.h index c9adac0a91..18808a7dec 100644 --- a/zenovis/xinxinoptix/TraceStuff.h +++ b/zenovis/xinxinoptix/TraceStuff.h @@ -16,10 +16,6 @@ #include "Host.h" #endif -#ifndef uint -using uint = unsigned int; -#endif - #define MISS_HIT 0 #define DIFFUSE_HIT 1 #define SPECULAR_HIT 2 diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 9db51caf5d..c1fefe614e 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2132,7 +2132,7 @@ void updatePortalLights(const std::vector& portals) { auto& pll = state.plights; auto& pls = pll.list; pls.clear(); - pls.reserve(max(portals.size(), 0) ); + pls.reserve(std::max(portals.size(), 0llu) ); glm::mat4 rotation = glm::mat4(1.0f); rotation = glm::rotate(rotation, glm::radians(state.params.sky_rot_y), glm::vec3(0,1,0)); diff --git a/zenovis/xinxinoptix/volume/optixVolume.h b/zenovis/xinxinoptix/volume/optixVolume.h index 1f672a427e..4a224bf9a2 100644 --- a/zenovis/xinxinoptix/volume/optixVolume.h +++ b/zenovis/xinxinoptix/volume/optixVolume.h @@ -34,7 +34,7 @@ #include #ifndef uint -typedef unsigned int uint; +using uint = unsigned int; #endif struct GridWrapper { From a0a020650680ad7a22a021e289c95af274129dc4 Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 13 Jun 2024 20:23:49 +0800 Subject: [PATCH 054/244] Improve texture loading --- zenovis/src/optx/RenderEngineOptx.cpp | 30 +++++----- zenovis/xinxinoptix/OptiXStuff.h | 75 ++++++++++++++++--------- zenovis/xinxinoptix/optixPathTracer.cpp | 2 +- 3 files changed, 63 insertions(+), 44 deletions(-) diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index 288705d4b1..4e5d36f4c1 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -701,7 +701,7 @@ struct GraphicsManager { } OptixUtil::sky_tex = path; - OptixUtil::addTexture(path); + OptixUtil::addSkyTexture(path); } else { OptixUtil::sky_tex = OptixUtil::default_sky_tex; } @@ -1216,33 +1216,33 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { cachedSphereMaterials = xinxinoptix::uniqueMatsForSphere(); } // preserve material names for materials-only updating case - //for (auto const &[key, obj]: graphicsMan->graphics) // Auto unload unused texure { - std::vector realNeedTexPaths; + std::set realNeedTexPaths; for(auto const &[matkey, mtldet] : matMap) { - for(auto tex: mtldet->tex2Ds) { - if (cachedMeshesMaterials.count(mtldet->mtlidkey) > 0 - || cachedSphereMaterials.count(mtldet->mtlidkey) > 0 - || mtldet->parameters.find("vol") != std::string::npos - ) { - realNeedTexPaths.emplace_back(tex->path); + if (mtldet->parameters.find("vol") != std::string::npos + || cachedMeshesMaterials.count(mtldet->mtlidkey) > 0 + || cachedSphereMaterials.count(mtldet->mtlidkey) > 0) + { + for(auto& tex: mtldet->tex2Ds) { + realNeedTexPaths.insert(tex->path); } } + } // add light map for(auto const &[_, ld]: xinxinoptix::get_lightdats()) { - if (ld.profileKey.size()) { - realNeedTexPaths.emplace_back(ld.profileKey); - } + // if (ld.profileKey.size()) { + // realNeedTexPaths.emplace_back(ld.profileKey); + // } if (ld.textureKey.size()) { - realNeedTexPaths.emplace_back(ld.textureKey); + realNeedTexPaths.insert(ld.textureKey); } } std::vector needToRemoveTexPaths; for(auto const &[tex, _]: OptixUtil::g_tex) { - if (std::find(realNeedTexPaths.begin(), realNeedTexPaths.end(), tex) != realNeedTexPaths.end()) { - continue; + if (realNeedTexPaths.count(tex) > 0) { + continue; } if (OptixUtil::sky_tex.has_value() && tex == OptixUtil::sky_tex.value()) { continue; diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 49f8a7627b..6e0dd2a653 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -640,12 +640,14 @@ struct WrapperIES { inline std::map g_ies; -inline void calc_sky_cdf_map(int nx, int ny, int nc, std::function& look) { +// Create cumulative distribution function for importance sampling of spherical environment lights. +// This is a textbook implementation for the CDF generation of a spherical HDR environment. +// See "Physically Based Rendering" v2, chapter 14.6.5 on Infinite Area Lights. - auto& tex = g_tex[sky_tex.value()]; +inline void calc_sky_cdf_map(cuTexture* tex, int nx, int ny, int nc, std::function& look) { - auto &sky_nx = tex->width; - auto &sky_ny = tex->height; + tex->width = nx; + tex->height = ny; auto &sky_avg = tex->average; @@ -653,8 +655,6 @@ inline void calc_sky_cdf_map(int nx, int ny, int nc, std::functionpdf; auto &sky_start = tex->start; - sky_nx = nx; - sky_ny = ny; //we need to recompute cdf sky_cdf.resize(nx*ny); sky_cdf.assign(nx*ny, 0); @@ -708,7 +708,18 @@ static std::string calculateMD5(const std::vector& input) { encoder.MessageEnd(); return output; } -inline void addTexture(std::string path) + +namespace detail { + template struct is_void { + static constexpr bool value = false; + }; + template <> struct is_void { + static constexpr bool value = true; + }; +} + +template +inline void addTexture(std::string path, TaskType* task=nullptr) { zeno::log_debug("loading texture :{}", path); std::string native_path = std::filesystem::u8path(path).string(); @@ -766,10 +777,10 @@ inline void addTexture(std::string path) g_tex[path] = makeCudaTexture(rgba, nx, ny, nc); - lookupTexture = [&](uint32_t idx) { + lookupTexture = [rgba](uint32_t idx) { return rgba[idx]; }; - cleanupTexture = [&]() { + cleanupTexture = [rgba]() { free(rgba); }; } @@ -828,7 +839,7 @@ inline void addTexture(std::string path) g_tex[path] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4); } - lookupTexture = [img](uint32_t idx) { + lookupTexture = [&img](uint32_t idx) { auto ptr = (float*)img->verts->data(); return ptr[idx]; }; @@ -846,10 +857,10 @@ inline void addTexture(std::string path) g_tex[path] = makeCudaTexture(img, nx, ny, nc); - lookupTexture = [&](uint32_t idx) { + lookupTexture = [img](uint32_t idx) { return img[idx]; }; - cleanupTexture = [&]() { + cleanupTexture = [img]() { stbi_image_free(img); }; } @@ -862,35 +873,25 @@ inline void addTexture(std::string path) } nx = std::max(nx, 1); ny = std::max(ny, 1); - assert(img); g_tex[path] = makeCudaTexture(img, nx, ny, nc); - lookupTexture = [&](uint32_t idx) { + lookupTexture = [img](uint32_t idx) { return (float)img[idx] / 255; }; - cleanupTexture = [&]() { + cleanupTexture = [img]() { stbi_image_free(img); }; } g_tex[path]->md5 = md5Hash; - if(sky_tex.value() == path) - { - calc_sky_cdf_map(nx, ny, nc, lookupTexture); - auto& tex = g_tex[sky_tex.value()]; - auto float_count = nx * ny * nc; - tex->rawData.resize(float_count); - - for (size_t i=0; irawData.at(i) = lookupTexture(i); + if constexpr (!detail::is_void::value) { + if (task != nullptr) { + (*task)(g_tex[path].get(), nx, ny, nc, lookupTexture); } } - cleanupTexture(); - for (auto i = g_tex.begin(); i != g_tex.end(); i++) { - zeno::log_info("-{}", i->first); - } + cleanupTexture(); } inline void removeTexture(std::string path) { if (path.size()) { @@ -907,6 +908,24 @@ inline void removeTexture(std::string path) { } } +inline void addSkyTexture(std::string path) { + + auto task = [](cuTexture* tex, uint32_t nx, uint32_t ny, uint32_t nc, std::function &lookupTexture) { + + const auto float_count = nx * ny * nc; + + auto& rawData = tex->rawData; + rawData.resize(float_count); + for (uint32_t i=0; i module {}; OptixModule* moduleIS = nullptr; diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index c1fefe614e..aa7467b97f 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -1697,7 +1697,7 @@ void optixinit( int argc, char* argv[] ) OptixUtil::default_sky_tex = cur_path + "/hdr/Panorama.hdr"; OptixUtil::sky_tex = OptixUtil::default_sky_tex; - OptixUtil::addTexture(OptixUtil::sky_tex.value()); + OptixUtil::addSkyTexture(OptixUtil::sky_tex.value()); xinxinoptix::update_hdr_sky(0, {0, 0, 0}, 0.8); } From 748797ad337031d056731edfc71790aee37c96db Mon Sep 17 00:00:00 2001 From: iaomw Date: Fri, 14 Jun 2024 16:00:59 +0800 Subject: [PATCH 055/244] fix visual guide lines on windows --- zeno/src/nodes/LightNodes.cpp | 10 +++++----- zenovis/src/optx/RenderEngineOptx.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/zeno/src/nodes/LightNodes.cpp b/zeno/src/nodes/LightNodes.cpp index 35423795bc..13c532fbd1 100644 --- a/zeno/src/nodes/LightNodes.cpp +++ b/zeno/src/nodes/LightNodes.cpp @@ -302,13 +302,13 @@ struct LightNode : INode { auto vertices_offset = VERTS.size(); if (doubleside) { - - LINES->reserve(LINES->size()*2); + VERTS.reserve(VERTS->size()*2); - decltype(*&LINES) tmp = LINES; - //tmp->resize(LINES->size()); + LINES.reserve(LINES->size()*2); + + std::remove_reference::type tmp(LINES->size()); - std::transform(LINES.begin(), LINES.end(), tmp.begin(), + std::transform(LINES.begin(), LINES.end(), tmp->begin(), [&](auto ele){ return ele + vertices_offset; }); LINES->insert(LINES.end(), tmp->begin(), tmp->end()); diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index 4e5d36f4c1..cbd63ed2c3 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -653,7 +653,7 @@ struct GraphicsManager { auto ne2 = zeno::normalize(e2); auto ne1 = zeno::normalize(e1); nor = zeno::normalize(zeno::cross(ne2, ne1)); - if (ivD) { nor *= -1; } + //if (ivD) { nor *= -1; } if (prim_in->verts.has_attr("clr")) { clr = prim_in->verts.attr("clr")[0]; From 93bd8d40ca336b31e46ef97f0e79bce962f6c7f9 Mon Sep 17 00:00:00 2001 From: iaomw Date: Fri, 14 Jun 2024 16:20:29 +0800 Subject: [PATCH 056/244] minor fix --- zeno/src/nodes/LightNodes.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zeno/src/nodes/LightNodes.cpp b/zeno/src/nodes/LightNodes.cpp index 13c532fbd1..f879ec72d3 100644 --- a/zeno/src/nodes/LightNodes.cpp +++ b/zeno/src/nodes/LightNodes.cpp @@ -57,6 +57,8 @@ struct LightNode : INode { auto spread = get_input2("spread"); auto visible = get_input2("visible"); auto doubleside = get_input2("doubleside"); + + if (doubleside) { invertdir = false; } std::string type = get_input2(lightTypeKey); auto typeEnum = magic_enum::enum_cast(type).value_or(LightType::Diffuse); From c36a802a6978d86baa8d246e429c1c4e8d0c59bc Mon Sep 17 00:00:00 2001 From: iaomw Date: Fri, 14 Jun 2024 20:51:14 +0800 Subject: [PATCH 057/244] update mis.zsg --- misc/graphs/mis.zsg | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/misc/graphs/mis.zsg b/misc/graphs/mis.zsg index 129a58be1f..d70f1e0ce7 100644 --- a/misc/graphs/mis.zsg +++ b/misc/graphs/mis.zsg @@ -4497,7 +4497,7 @@ "enable": { "link": null, "type": "bool", - "default-value": false, + "default-value": true, "control": { "name": "Boolean" } @@ -4533,7 +4533,7 @@ "strength": { "link": null, "type": "float", - "default-value": 1.0, + "default-value": 0.0, "control": { "name": "Float" } @@ -4560,7 +4560,9 @@ 6656.0884600564509, 487.12836685064578 ], - "options": [] + "options": [ + "VIEW" + ] } }, "view_rect": {} From 6038053a37cf4643acf5197df6d5cfa93cbc8224 Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 17 Jun 2024 20:29:50 +0800 Subject: [PATCH 058/244] Patch by luzh --- ui/zenoedit/zenomainwindow.cpp | 19 +++++++++++-------- zeno/include/zeno/extra/GlobalComm.h | 1 + zeno/src/extra/GlobalComm.cpp | 10 ++++++++++ zenovis/src/Scene.cpp | 24 +++++++++++++----------- 4 files changed, 35 insertions(+), 19 deletions(-) diff --git a/ui/zenoedit/zenomainwindow.cpp b/ui/zenoedit/zenomainwindow.cpp index bd2211e2d5..1f087c95e3 100644 --- a/ui/zenoedit/zenomainwindow.cpp +++ b/ui/zenoedit/zenomainwindow.cpp @@ -1682,14 +1682,6 @@ bool ZenoMainWindow::openFile(QString filePath) if (!pModel) return false; - //cleanup - zeno::getSession().globalComm->clearFrameState(); - auto views = viewports(); - for (auto view : views) - { - view->cleanUpScene(); - } - resetTimeline(pGraphs->timeInfo()); recordRecentFile(filePath); initUserdata(pGraphs->userdataInfo()); @@ -2014,6 +2006,17 @@ bool ZenoMainWindow::saveQuit() { return false; } } + + //cleanup + if (pModel) { + zeno::getSession().globalComm->clearFrameState(); + auto views = viewports(); + for (auto view : views) + { + view->cleanUpScene(); + } + } + pGraphsMgm->clear(); //clear timeline info. resetTimeline(TIMELINE_INFO()); diff --git a/zeno/include/zeno/extra/GlobalComm.h b/zeno/include/zeno/extra/GlobalComm.h index a488aeaadf..60f0d8e981 100644 --- a/zeno/include/zeno/extra/GlobalComm.h +++ b/zeno/include/zeno/extra/GlobalComm.h @@ -53,6 +53,7 @@ struct GlobalComm { ZENO_API bool load_objects(const int frameid, const std::function> const& objs)>& cb, bool& isFrameValid); + ZENO_API void clear_objects(const std::function& cb); ZENO_API bool isFrameCompleted(int frameid) const; ZENO_API FRAME_STATE getFrameState(int frameid) const; ZENO_API bool isFrameBroken(int frameid) const; diff --git a/zeno/src/extra/GlobalComm.cpp b/zeno/src/extra/GlobalComm.cpp index b09c34f9be..6786dceea4 100644 --- a/zeno/src/extra/GlobalComm.cpp +++ b/zeno/src/extra/GlobalComm.cpp @@ -337,6 +337,16 @@ ZENO_API GlobalComm::ViewObjects const &GlobalComm::getViewObjects() { return m_frames.back().view_objects; } +ZENO_API void GlobalComm::clear_objects(const std::function& callback) +{ + std::lock_guard lck(m_mtx); + if (!callback) + return; + + callback(); +} + + ZENO_API bool GlobalComm::load_objects( const int frameid, const std::function> const& objs)>& callback, diff --git a/zenovis/src/Scene.cpp b/zenovis/src/Scene.cpp index 1b49ed8588..a14ecb918a 100644 --- a/zenovis/src/Scene.cpp +++ b/zenovis/src/Scene.cpp @@ -62,17 +62,19 @@ void Scene::cleanupView() void Scene::cleanUpScene() { - if (objectsMan) - objectsMan->clear_objects(); - - if (!renderMan) - return; - - RenderEngine* pEngine = renderMan->getEngine(); - if (pEngine) { - pEngine->update(); - pEngine->cleanupAssets(); - } + zeno::getSession().globalComm->clear_objects([this](){ + if (objectsMan) + objectsMan->clear_objects(); + + if (!renderMan) + return; + + RenderEngine* pEngine = renderMan->getEngine(); + if (pEngine) { + pEngine->update(); + pEngine->cleanupAssets(); + } + }); } void Scene::switchRenderEngine(std::string const &name) { From 994454eda5f3d2cf52a655b85d6f8314ebfec8e3 Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 17 Jun 2024 20:32:33 +0800 Subject: [PATCH 059/244] fix windows build --- build.ps1 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/build.ps1 b/build.ps1 index 081e560773..f8ef4525cd 100644 --- a/build.ps1 +++ b/build.ps1 @@ -71,6 +71,7 @@ Write-Output "Making minimum build with Optix..." cmake -G Ninja -B build -DCMAKE_BUILD_TYPE="${b}" ` -DCMAKE_TOOLCHAIN_FILE="${env:VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ` + -DDEACTIVATE_ZLIB=ON ` -DZENO_WITH_zenvdb:BOOL=ON ` -DZENO_SYSTEM_OPENVDB:BOOL=OFF ` -DZENO_WITH_ZenoFX:BOOL=ON ` @@ -86,6 +87,7 @@ Write-Output "Making full build..." cmake -G Ninja -B build -DCMAKE_BUILD_TYPE="${b}" ` -DCMAKE_TOOLCHAIN_FILE="${env:VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" ` + -DDEACTIVATE_ZLIB=ON ` -DZENO_WITH_CUDA:BOOL=ON ` -DZENO_WITH_ZenoFX:BOOL=ON ` -DZENO_ENABLE_OPTIX:BOOL=ON ` From 1facdc0ab6441898a7ca20aafe8b3fbacafab8ca Mon Sep 17 00:00:00 2001 From: iaomw Date: Mon, 17 Jun 2024 20:33:09 +0800 Subject: [PATCH 060/244] cleanup textures --- zenovis/xinxinoptix/optixPathTracer.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index aa7467b97f..04354801e4 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -3977,11 +3977,12 @@ void optixCleanup() { state.params.sky_strength = 1.0f; state.params.sky_texture; - auto sky_path = OptixUtil::default_sky_tex; - auto sky_tex = OptixUtil::g_tex[sky_path]; - - OptixUtil::g_tex = { {sky_path, sky_tex} }; - //OptixUtil::g_tex.at(sky_path) = sky_tex; + for (auto& [k, v] : OptixUtil::g_tex) { + if (k != OptixUtil::default_sky_tex) { + OptixUtil::removeTexture(k); + } + } + OptixUtil::sky_tex = OptixUtil::default_sky_tex; } From 347586b929a7b1c6ff814c2aba92ccf3ce6ba9df Mon Sep 17 00:00:00 2001 From: luzh Date: Tue, 18 Jun 2024 16:03:37 +0800 Subject: [PATCH 061/244] update ver. --- ui/zenoedit/zenoedit.rc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/zenoedit/zenoedit.rc b/ui/zenoedit/zenoedit.rc index aa4b6b811a..d0f80a5c53 100644 --- a/ui/zenoedit/zenoedit.rc +++ b/ui/zenoedit/zenoedit.rc @@ -48,8 +48,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,3,1,515 - PRODUCTVERSION 1,3,1,515 + FILEVERSION 1,3,1,618 + PRODUCTVERSION 1,3,1,618 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -66,12 +66,12 @@ BEGIN BEGIN VALUE "CompanyName", "ZENUSTECH" VALUE "FileDescription", "Zeno Editor" - VALUE "FileVersion", "1.3.1.515" + VALUE "FileVersion", "1.3.1.618" VALUE "InternalName", "zenoedit.rc" VALUE "LegalCopyright", "Copyright (C) 2023" VALUE "OriginalFilename", "zenoedit.rc" VALUE "ProductName", "Zeno" - VALUE "ProductVersion", "1.3.1.515" + VALUE "ProductVersion", "1.3.1.618" END END BLOCK "VarFileInfo" From 27c61dcd5f6a7c03df075a27c48909f3df2139aa Mon Sep 17 00:00:00 2001 From: teachmain Date: Tue, 18 Jun 2024 16:10:04 +0800 Subject: [PATCH 062/244] move a node --- {zeno/src/nodes/prim => projects/zenvdb}/VolumeBox.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {zeno/src/nodes/prim => projects/zenvdb}/VolumeBox.cpp (100%) diff --git a/zeno/src/nodes/prim/VolumeBox.cpp b/projects/zenvdb/VolumeBox.cpp similarity index 100% rename from zeno/src/nodes/prim/VolumeBox.cpp rename to projects/zenvdb/VolumeBox.cpp From 9339dd70c35e4ea4849ccae0caae3bd3c0fc417b Mon Sep 17 00:00:00 2001 From: teachmain Date: Tue, 18 Jun 2024 16:32:47 +0800 Subject: [PATCH 063/244] fix linux compile problem --- zenovis/xinxinoptix/optixPathTracer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 04354801e4..b33f92b89f 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2132,7 +2132,7 @@ void updatePortalLights(const std::vector& portals) { auto& pll = state.plights; auto& pls = pll.list; pls.clear(); - pls.reserve(std::max(portals.size(), 0llu) ); + pls.reserve(std::max(portals.size(), size_t(0)) ); glm::mat4 rotation = glm::mat4(1.0f); rotation = glm::rotate(rotation, glm::radians(state.params.sky_rot_y), glm::vec3(0,1,0)); From c98b5862b7c37fcb9449e87a519c8eda465c5ac7 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 19 Jun 2024 20:23:12 +0800 Subject: [PATCH 064/244] float4 -> float3 --- zenovis/xinxinoptix/PTKernel.cu | 20 ++++++++++---------- zenovis/xinxinoptix/optixPathTracer.cpp | 24 ++++++++++++------------ zenovis/xinxinoptix/optixPathTracer.h | 10 +++++----- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index e5ee736af3..edd4ee0247 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -357,11 +357,11 @@ extern "C" __global__ void __raygen__rg() if( subframe_index > 0 ) { const float a = 1.0f / static_cast( subframe_index+1 ); - const float3 accum_color_prev = make_float3( params.accum_buffer[ image_index ]); - const float3 accum_color_prev_d = make_float3( params.accum_buffer_D[ image_index ]); - const float3 accum_color_prev_s = make_float3( params.accum_buffer_S[ image_index ]); - const float3 accum_color_prev_t = make_float3( params.accum_buffer_T[ image_index ]); - const float3 accum_color_prev_b = make_float3( params.accum_buffer_B[ image_index ]); + const float3 accum_color_prev = params.accum_buffer[ image_index ]; + const float3 accum_color_prev_d = params.accum_buffer_D[ image_index ]; + const float3 accum_color_prev_s = params.accum_buffer_S[ image_index ]; + const float3 accum_color_prev_t = params.accum_buffer_T[ image_index ]; + const float3 accum_color_prev_b = params.accum_buffer_B[ image_index ]; const float3 accum_mask_prev = params.frame_buffer_M[ image_index ]; accum_color = mix( vec3(accum_color_prev), accum_color, a ); accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a ); @@ -380,11 +380,11 @@ extern "C" __global__ void __raygen__rg() } } - params.accum_buffer[ image_index ] = make_float4( accum_color.x, accum_color.y, accum_color.z, 1.0f); - params.accum_buffer_D[ image_index ] = make_float4( accum_color_d.x,accum_color_d.y,accum_color_d.z, 1.0f); - params.accum_buffer_S[ image_index ] = make_float4( accum_color_s.x,accum_color_s.y, accum_color_s.z, 1.0f); - params.accum_buffer_T[ image_index ] = make_float4( accum_color_t.x,accum_color_t.y,accum_color_t.z, 1.0f); - params.accum_buffer_B[ image_index ] = make_float4( accum_color_b, 1.0f); + params.accum_buffer[ image_index ] = make_float3( accum_color.x, accum_color.y, accum_color.z); + params.accum_buffer_D[ image_index ] = make_float3( accum_color_d.x,accum_color_d.y,accum_color_d.z); + params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z); + params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z); + params.accum_buffer_B[ image_index ] = accum_color_b; vec3 rgb_mapped = PhysicalCamera(vec3(accum_color), aperture, shutter_speed, iso, midGray, false, false); diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index b33f92b89f..e462a536f3 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -568,9 +568,9 @@ static void initLaunchParams( PathTracerState& state ) CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_p.reset() ), - state.params.width * state.params.height * sizeof( float4 ) + state.params.width * state.params.height * sizeof( float3 ) ) ); - state.params.accum_buffer = (float4*)(CUdeviceptr)state.accum_buffer_p; + state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p; auto& params = state.params; @@ -628,25 +628,25 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params // Realloc accumulation buffer CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_p .reset()), - params.width * params.height * sizeof( float4 ) + params.width * params.height * sizeof( float3 ) ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_d .reset()), - params.width * params.height * sizeof( float4 ) + params.width * params.height * sizeof( float3 ) ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_s .reset()), - params.width * params.height * sizeof( float4 ) + params.width * params.height * sizeof( float3 ) ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_t .reset()), - params.width * params.height * sizeof( float4 ) + params.width * params.height * sizeof( float3 ) ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_b .reset()), - params.width * params.height * sizeof( float4 ) + params.width * params.height * sizeof( float3 ) ) ); - state.params.accum_buffer = (float4*)(CUdeviceptr)state.accum_buffer_p; + state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p; CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.albedo_buffer_p.reset()), @@ -660,10 +660,10 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params ) ); state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p; - state.params.accum_buffer_D = (float4*)(CUdeviceptr)state.accum_buffer_d; - state.params.accum_buffer_S = (float4*)(CUdeviceptr)state.accum_buffer_s; - state.params.accum_buffer_T = (float4*)(CUdeviceptr)state.accum_buffer_t; - state.params.accum_buffer_B = (float4*)(CUdeviceptr)state.accum_buffer_b; + state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d; + state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s; + state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t; + state.params.accum_buffer_B = (float3*)(CUdeviceptr)state.accum_buffer_b; state.params.subframe_index = 0; } diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 7df84d652e..9e034d9f86 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -154,11 +154,11 @@ struct CameraInfo struct Params { unsigned int subframe_index; - float4* accum_buffer; - float4* accum_buffer_D; - float4* accum_buffer_S; - float4* accum_buffer_T; - float4* accum_buffer_B; + float3* accum_buffer; + float3* accum_buffer_D; + float3* accum_buffer_S; + float3* accum_buffer_T; + float3* accum_buffer_B; uchar4* frame_buffer; float3* frame_buffer_C; float3* frame_buffer_D; From e1f83f21b168728204e320fc7d8e729db6a8253a Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 15:55:33 +0800 Subject: [PATCH 065/244] fix show background --- zenovis/xinxinoptix/optixPathTracer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index e462a536f3..2d1fb17761 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2123,6 +2123,7 @@ void using_hdr_sky(bool enable) { void show_background(bool enable) { state.params.show_background = enable; + state.params.subframe_index = 0; } void updatePortalLights(const std::vector& portals) { From bc3c08dbc87892d8b89f3db21b44441ce30e6842 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 16:17:43 +0800 Subject: [PATCH 066/244] direct use accum buffer --- zenovis/xinxinoptix/optixPathTracer.cpp | 46 ++++++++++++++----------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 2d1fb17761..e630028244 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -3785,26 +3785,30 @@ void set_outside_random_number(int32_t outside_random_number) { state.params.outside_random_number = outside_random_number; } -void *optixgetimg_extra(std::string name) { +std::vector optixgetimg_extra2(std::string name, int w, int h) { + std::vector tex_data(w * h * 3); if (name == "diffuse") { - return output_buffer_diffuse->getHostPointer(); + cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_d.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } else if (name == "specular") { - return output_buffer_specular->getHostPointer(); + cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_s.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } else if (name == "transmit") { - return output_buffer_transmit->getHostPointer(); + cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_t.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } else if (name == "background") { - return output_buffer_background->getHostPointer(); + cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_b.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } else if (name == "mask") { - return output_buffer_mask->getHostPointer(); + std::copy_n((float*) output_buffer_mask->getHostPointer(), tex_data.size(), tex_data.data()); } else if (name == "color") { - return output_buffer_color->getHostPointer(); + cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_p.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } - throw std::runtime_error("invalid optixgetimg_extra name: " + name); + else { + throw std::runtime_error("invalid optixgetimg_extra name: " + name); + } + return tex_data; } static void save_exr(float3* ptr, int w, int h, std::string path) { std::vector data(w * h); @@ -3880,7 +3884,7 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) { auto exr_path = path.substr(0, path.size() - 4) + ".exr"; if (enable_output_mask) { path = path.substr(0, path.size() - 4); - save_png_data(path + "_mask.png", w, h, (float*)optixgetimg_extra("mask")); + save_png_data(path + "_mask.png", w, h, optixgetimg_extra2("mask", w, h).data()); } // AOV if (enable_output_aov) { @@ -3888,12 +3892,12 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) { zeno::create_directories_when_write_file(exr_path); SaveMultiLayerEXR( { - (float*)optixgetimg_extra("color"), - (float*)optixgetimg_extra("diffuse"), - (float*)optixgetimg_extra("specular"), - (float*)optixgetimg_extra("transmit"), - (float*)optixgetimg_extra("background"), - (float*)optixgetimg_extra("mask"), + optixgetimg_extra2("color", w, h).data(), + optixgetimg_extra2("diffuse", w, h).data(), + optixgetimg_extra2("specular", w, h).data(), + optixgetimg_extra2("transmit", w, h).data(), + optixgetimg_extra2("background", w, h).data(), + optixgetimg_extra2("mask", w, h).data(), }, w, h, @@ -3911,17 +3915,17 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) { } else { path = path.substr(0, path.size() - 4); - save_png_color(path + ".aov.diffuse.png", w, h, (float*)optixgetimg_extra("diffuse")); - save_png_color(path + ".aov.specular.png", w, h, (float*)optixgetimg_extra("specular")); - save_png_color(path + ".aov.transmit.png", w, h, (float*)optixgetimg_extra("transmit")); - save_png_data(path + ".aov.background.png", w, h, (float*)optixgetimg_extra("background")); - save_png_data(path + ".aov.mask.png", w, h, (float*)optixgetimg_extra("mask")); + save_png_color(path + ".aov.diffuse.png", w, h, optixgetimg_extra2("diffuse", w, h).data()); + save_png_color(path + ".aov.specular.png", w, h, optixgetimg_extra2("specular", w, h).data()); + save_png_color(path + ".aov.transmit.png", w, h, optixgetimg_extra2("transmit", w, h).data()); + save_png_data(path + ".aov.background.png", w, h, optixgetimg_extra2("background", w, h).data()); + save_png_data(path + ".aov.mask.png", w, h, optixgetimg_extra2("mask", w, h).data()); } } else { if (enable_output_exr) { zeno::create_directories_when_write_file(exr_path); - save_exr((float3 *)optixgetimg_extra("color"), w, h, exr_path); + save_exr((float3 *)optixgetimg_extra2("color", w, h).data(), w, h, exr_path); } else { std::string jpg_native_path = zeno::create_directories_when_write_file(path); From 5dce05da579105c2ed7b67562aa0927bf3823401 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 16:30:49 +0800 Subject: [PATCH 067/244] remove temp buffer --- zenovis/xinxinoptix/PTKernel.cu | 19 +------- zenovis/xinxinoptix/optixPathTracer.cpp | 64 ------------------------- zenovis/xinxinoptix/optixPathTracer.h | 5 -- 3 files changed, 1 insertion(+), 87 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index edd4ee0247..bec7c39fd5 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -386,24 +386,7 @@ extern "C" __global__ void __raygen__rg() params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z); params.accum_buffer_B[ image_index ] = accum_color_b; - - vec3 rgb_mapped = PhysicalCamera(vec3(accum_color), aperture, shutter_speed, iso, midGray, false, false); - vec3 d_mapped = PhysicalCamera(vec3(accum_color_d), aperture, shutter_speed, iso, midGray, false, false); - vec3 s_mapped = PhysicalCamera(vec3(accum_color_s), aperture, shutter_speed, iso, midGray, false, false); - vec3 t_mapped = PhysicalCamera(vec3(accum_color_t), aperture, shutter_speed, iso, midGray, false, false); - - - float3 out_color = rgb_mapped; - float3 out_color_d = d_mapped; - float3 out_color_s = s_mapped; - float3 out_color_t = t_mapped; - float3 out_color_b = accum_color_b; - params.frame_buffer[ image_index ] = make_color ( out_color ); - params.frame_buffer_C[ image_index ] = out_color; - params.frame_buffer_D[ image_index ] = out_color_d; - params.frame_buffer_S[ image_index ] = out_color_s; - params.frame_buffer_T[ image_index ] = out_color_t; - params.frame_buffer_B[ image_index ] = accum_color_b; + params.frame_buffer[ image_index ] = make_color ( accum_color ); params.frame_buffer_M[ image_index ] = accum_mask; if (params.denoise) { diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index e630028244..d01df32bdd 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -253,11 +253,6 @@ ushort2 halfNormal(float4 in) #endif std::optional> output_buffer_o; -std::optional> output_buffer_color; -std::optional> output_buffer_diffuse; -std::optional> output_buffer_specular; -std::optional> output_buffer_transmit; -std::optional> output_buffer_background; std::optional> output_buffer_mask; using Vertex = float4; @@ -618,11 +613,6 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params resize_dirty = false; output_buffer.resize( params.width, params.height ); - (*output_buffer_color).resize( params.width, params.height ); - (*output_buffer_diffuse).resize( params.width, params.height ); - (*output_buffer_specular).resize( params.width, params.height ); - (*output_buffer_transmit).resize( params.width, params.height ); - (*output_buffer_background).resize( params.width, params.height ); (*output_buffer_mask).resize( params.width, params.height ); // Realloc accumulation buffer @@ -687,11 +677,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path // Launch uchar4* result_buffer_data = output_buffer.map(); state.params.frame_buffer = result_buffer_data; - state.params.frame_buffer_C = (*output_buffer_color ).map(); - state.params.frame_buffer_D = (*output_buffer_diffuse ).map(); - state.params.frame_buffer_S = (*output_buffer_specular ).map(); - state.params.frame_buffer_T = (*output_buffer_transmit ).map(); - state.params.frame_buffer_B = (*output_buffer_background).map(); state.params.frame_buffer_M = (*output_buffer_mask ).map(); state.params.num_lights = lightsWrapper.g_lights.size(); state.params.denoise = denoise; @@ -725,11 +710,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path } } output_buffer.unmap(); - (*output_buffer_color ).unmap(); - (*output_buffer_diffuse ).unmap(); - (*output_buffer_specular ).unmap(); - (*output_buffer_transmit ).unmap(); - (*output_buffer_background).unmap(); (*output_buffer_mask ).unmap(); try { @@ -1627,46 +1607,6 @@ void optixinit( int argc, char* argv[] ) ); output_buffer_o->setStream( 0 ); } - if (!output_buffer_color) { - output_buffer_color.emplace( - output_buffer_type, - state.params.width, - state.params.height - ); - output_buffer_color->setStream( 0 ); - } - if (!output_buffer_diffuse) { - output_buffer_diffuse.emplace( - output_buffer_type, - state.params.width, - state.params.height - ); - output_buffer_diffuse->setStream( 0 ); - } - if (!output_buffer_specular) { - output_buffer_specular.emplace( - output_buffer_type, - state.params.width, - state.params.height - ); - output_buffer_specular->setStream( 0 ); - } - if (!output_buffer_transmit) { - output_buffer_transmit.emplace( - output_buffer_type, - state.params.width, - state.params.height - ); - output_buffer_transmit->setStream( 0 ); - } - if (!output_buffer_background) { - output_buffer_background.emplace( - output_buffer_type, - state.params.width, - state.params.height - ); - output_buffer_background->setStream( 0 ); - } if (!output_buffer_mask) { output_buffer_mask.emplace( output_buffer_type, @@ -4025,10 +3965,6 @@ void optixDestroy() { OptixUtil::shaderCoreLUT.clear(); output_buffer_o .reset(); - output_buffer_diffuse .reset(); - output_buffer_specular .reset(); - output_buffer_transmit .reset(); - output_buffer_background .reset(); output_buffer_mask .reset(); g_StaticMeshPieces .clear(); g_meshPieces .clear(); diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 9e034d9f86..d1a59cdfb5 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -160,11 +160,6 @@ struct Params float3* accum_buffer_T; float3* accum_buffer_B; uchar4* frame_buffer; - float3* frame_buffer_C; - float3* frame_buffer_D; - float3* frame_buffer_S; - float3* frame_buffer_T; - float3* frame_buffer_B; float3* frame_buffer_M; float3* debug_buffer; From 384e45a3c617c7e216e6d43a27cb0c8cb88e1447 Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 20 Jun 2024 16:50:45 +0800 Subject: [PATCH 068/244] Improve Optix cleanup --- zenovis/xinxinoptix/optixPathTracer.cpp | 56 +++++++++++-------------- zenovis/xinxinoptix/xinxinoptixapi.h | 2 +- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index b33f92b89f..172da61aa9 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -1507,20 +1507,6 @@ static void cleanupState( PathTracerState& state ) OPTIX_CHECK(optixModuleDestroy(OptixUtil::ray_module)); OPTIX_CHECK(optixModuleDestroy(OptixUtil::sphere_module)); - cleanupSpheresGPU(); - lightsWrapper.reset(); - - for (auto& ele : list_volume) { - cleanupVolume(*ele); - } - list_volume.clear(); - - for (auto const& [key, val] : OptixUtil::g_vdb_cached_map) { - cleanupVolume(*val); - } - OptixUtil::g_vdb_cached_map.clear(); - OptixUtil::g_ies.clear(); - std::cout << "optix cleanup" << std::endl; } @@ -3984,31 +3970,42 @@ void optixCleanup() { } OptixUtil::sky_tex = OptixUtil::default_sky_tex; + + cleanupSpheresGPU(); + lightsWrapper.reset(); + + for (auto& ele : list_volume) { + cleanupVolume(*ele); + } + list_volume.clear(); + + for (auto const& [key, val] : OptixUtil::g_vdb_cached_map) { + cleanupVolume(*val); + } + OptixUtil::g_vdb_cached_map.clear(); + OptixUtil::g_ies.clear(); + + g_StaticMeshPieces.clear(); + g_meshPieces.clear(); } void optixDestroy() { using namespace OptixUtil; try { CUDA_SYNC_CHECK(); + optixCleanup(); cleanupState( state ); rtMaterialShaders.clear(); + OptixUtil::shaderCoreLUT.clear(); + OPTIX_CHECK(optixPipelineDestroy(state.pipeline)); OPTIX_CHECK(optixDeviceContextDestroy(state.context)); } catch (sutil::Exception const& e) { std::cout << "OptixCleanupError: " << e.what() << std::endl; } -//// state.d_vertices.reset(); -//// state.d_clr.reset(); -//// state.d_mat_indices.reset(); -//// state.d_nrm.reset(); -//// state.d_tan.reset(); -//// state.d_uv.reset(); -// std::memset((void *)&state, 0, sizeof(state)); -// //std::memset((void *)&rtMaterialShaders[0], 0, sizeof(rtMaterialShaders[0]) * rtMaterialShaders.size()); -// -// + context .handle=0; pipeline .handle=0; ray_module .handle=0; @@ -4017,22 +4014,17 @@ void optixDestroy() { radiance_miss_group .handle=0; occlusion_miss_group .handle=0; - OptixUtil::shaderCoreLUT.clear(); - output_buffer_o .reset(); output_buffer_diffuse .reset(); output_buffer_specular .reset(); output_buffer_transmit .reset(); output_buffer_background .reset(); output_buffer_mask .reset(); - g_StaticMeshPieces .clear(); - g_meshPieces .clear(); - state = {}; - isPipelineCreated = false; - - + state = {}; + isPipelineCreated = false; } + #if 0 if( outfile.empty() ) { diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h index 135cd460db..a75af3a313 100644 --- a/zenovis/xinxinoptix/xinxinoptixapi.h +++ b/zenovis/xinxinoptix/xinxinoptixapi.h @@ -34,8 +34,8 @@ namespace xinxinoptix { std::set uniqueMatsForMesh(); void optixCleanup(); - void optixDestroy(); + void optixrender(int fbo = 0, int samples = 1, bool denoise = false, bool simpleRender = false); void *optixgetimg(int &w, int &h); void optixinit(int argc, char* argv[]); From cc75adf3f8997418c605a2d006ac6d661a90ac99 Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 20 Jun 2024 16:52:17 +0800 Subject: [PATCH 069/244] dirty fix --- zenovis/xinxinoptix/optixPathTracer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 172da61aa9..ad38fddad9 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2108,7 +2108,10 @@ void using_hdr_sky(bool enable) { } void show_background(bool enable) { - state.params.show_background = enable; + if (enable != state.params.show_background) { + state.params.show_background = enable; + state.params.subframe_index = 0; + } } void updatePortalLights(const std::vector& portals) { From b3473e45277f4dcf54a61d0844b8c9189973019c Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 17:12:07 +0800 Subject: [PATCH 070/244] Revert "fix show background" This reverts commit e1f83f21b168728204e320fc7d8e729db6a8253a. --- zenovis/xinxinoptix/optixPathTracer.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index d01df32bdd..2e01f3d04e 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2063,7 +2063,6 @@ void using_hdr_sky(bool enable) { void show_background(bool enable) { state.params.show_background = enable; - state.params.subframe_index = 0; } void updatePortalLights(const std::vector& portals) { From 06c0b5e0ebc45386d882a65084c952420827139c Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 18:23:27 +0800 Subject: [PATCH 071/244] accum_buffer_B use half3 --- zenovis/xinxinoptix/PTKernel.cu | 31 +++++++++++++++++++++++-- zenovis/xinxinoptix/TypeCaster.cpp | 11 +++++++++ zenovis/xinxinoptix/TypeCaster.h | 3 ++- zenovis/xinxinoptix/optixPathTracer.cpp | 13 ++++++++--- zenovis/xinxinoptix/optixPathTracer.h | 2 +- 5 files changed, 53 insertions(+), 7 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index bec7c39fd5..8f6f340d4f 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -92,6 +92,33 @@ vec3 PhysicalCamera(vec3 in, mapped = in * exposure; return enableExposure? (enableACES? ACESFilm(mapped):mapped ) : (enableACES? ACESFilm(in) : in); } + +static __inline__ __device__ +ushort3 float3_to_half3(float3 in) +{ + half x = __float2half(in.x); + half y = __float2half(in.y); + half z = __float2half(in.z); + ushort3 v; + v.x = reinterpret_cast(x); + v.y = reinterpret_cast(y); + v.z = reinterpret_cast(z); + return v; +} + +static __inline__ __device__ +float3 half3_to_float3(ushort3 in) +{ + half x = reinterpret_cast(in.x); + half y = reinterpret_cast(in.y); + half z = reinterpret_cast(in.z); + float3 v; + v.x = __half2float(x); + v.y = __half2float(y); + v.z = __half2float(z); + return v; +} + extern "C" __global__ void __raygen__rg() { @@ -361,7 +388,7 @@ extern "C" __global__ void __raygen__rg() const float3 accum_color_prev_d = params.accum_buffer_D[ image_index ]; const float3 accum_color_prev_s = params.accum_buffer_S[ image_index ]; const float3 accum_color_prev_t = params.accum_buffer_T[ image_index ]; - const float3 accum_color_prev_b = params.accum_buffer_B[ image_index ]; + const float3 accum_color_prev_b = half3_to_float3(params.accum_buffer_B[ image_index ]); const float3 accum_mask_prev = params.frame_buffer_M[ image_index ]; accum_color = mix( vec3(accum_color_prev), accum_color, a ); accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a ); @@ -384,7 +411,7 @@ extern "C" __global__ void __raygen__rg() params.accum_buffer_D[ image_index ] = make_float3( accum_color_d.x,accum_color_d.y,accum_color_d.z); params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z); params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z); - params.accum_buffer_B[ image_index ] = accum_color_b; + params.accum_buffer_B[ image_index ] = float3_to_half3(accum_color_b); params.frame_buffer[ image_index ] = make_color ( accum_color ); params.frame_buffer_M[ image_index ] = accum_mask; diff --git a/zenovis/xinxinoptix/TypeCaster.cpp b/zenovis/xinxinoptix/TypeCaster.cpp index eba49af49d..1d19df7fc3 100644 --- a/zenovis/xinxinoptix/TypeCaster.cpp +++ b/zenovis/xinxinoptix/TypeCaster.cpp @@ -18,4 +18,15 @@ ushort3 toHalf(float4 in) ushort3 toHalf(float3 in) { return toHalf({in.x, in.y, in.z, 0.0f}); +} + +float3 toFloat(ushort3 in) { + half x = reinterpret_cast(in.x); + half y = reinterpret_cast(in.y); + half z = reinterpret_cast(in.z); + return { + __half2float(x), + __half2float(y), + __half2float(z), + }; } \ No newline at end of file diff --git a/zenovis/xinxinoptix/TypeCaster.h b/zenovis/xinxinoptix/TypeCaster.h index 519fb09e8e..3c5db83790 100644 --- a/zenovis/xinxinoptix/TypeCaster.h +++ b/zenovis/xinxinoptix/TypeCaster.h @@ -3,4 +3,5 @@ #include ushort3 toHalf(float4 in); -ushort3 toHalf(float3 in); \ No newline at end of file +ushort3 toHalf(float3 in); +float3 toFloat(ushort3 in); \ No newline at end of file diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 2e01f3d04e..81e4649197 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -634,7 +634,7 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_b .reset()), - params.width * params.height * sizeof( float3 ) + params.width * params.height * sizeof( ushort3 ) ) ); state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p; @@ -653,7 +653,7 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d; state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s; state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t; - state.params.accum_buffer_B = (float3*)(CUdeviceptr)state.accum_buffer_b; + state.params.accum_buffer_B = (ushort3*)(CUdeviceptr)state.accum_buffer_b; state.params.subframe_index = 0; } @@ -3736,7 +3736,14 @@ std::vector optixgetimg_extra2(std::string name, int w, int h) { cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_t.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } else if (name == "background") { - cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_b.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); + std::vector temp_buffer(w * h); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + float3 v = toFloat(temp_buffer[i]); + tex_data[i * 3 + 0] = v.x; + tex_data[i * 3 + 1] = v.y; + tex_data[i * 3 + 2] = v.z; + } } else if (name == "mask") { std::copy_n((float*) output_buffer_mask->getHostPointer(), tex_data.size(), tex_data.data()); diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index d1a59cdfb5..011ed43623 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -158,7 +158,7 @@ struct Params float3* accum_buffer_D; float3* accum_buffer_S; float3* accum_buffer_T; - float3* accum_buffer_B; + ushort3* accum_buffer_B; uchar4* frame_buffer; float3* frame_buffer_M; From f39e944f08618193cc76b935bb991da16778da34 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 19:16:00 +0800 Subject: [PATCH 072/244] albedo normal buffer half3 --- zenovis/xinxinoptix/PTKernel.cu | 8 +++---- zenovis/xinxinoptix/optixPathTracer.cpp | 31 +++++++++++++++---------- zenovis/xinxinoptix/optixPathTracer.h | 4 ++-- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index 8f6f340d4f..7e0286cece 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -399,10 +399,10 @@ extern "C" __global__ void __raygen__rg() if (params.denoise) { - const float3 accum_albedo_prev = params.albedo_buffer[ image_index ]; + const float3 accum_albedo_prev = half3_to_float3(params.albedo_buffer[ image_index ]); tmp_albedo = lerp(accum_albedo_prev, tmp_albedo, a); - const float3 accum_normal_prev = params.normal_buffer[ image_index ]; + const float3 accum_normal_prev = half3_to_float3(params.normal_buffer[ image_index ]); tmp_normal = lerp(accum_normal_prev, tmp_normal, a); } } @@ -417,8 +417,8 @@ extern "C" __global__ void __raygen__rg() params.frame_buffer_M[ image_index ] = accum_mask; if (params.denoise) { - params.albedo_buffer[ image_index ] = tmp_albedo; - params.normal_buffer[ image_index ] = tmp_normal; + params.albedo_buffer[ image_index ] = float3_to_half3(tmp_albedo); + params.normal_buffer[ image_index ] = float3_to_half3(tmp_normal); } } diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 81e4649197..9dd092136c 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -571,15 +571,15 @@ static void initLaunchParams( PathTracerState& state ) CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.albedo_buffer_p.reset()), - params.width * params.height * sizeof( float3 ) + params.width * params.height * sizeof( ushort3 ) ) ); - state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p; + state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p; CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.normal_buffer_p.reset()), - params.width * params.height * sizeof( float3 ) + params.width * params.height * sizeof( ushort3 ) ) ); - state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p; + state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p; state.params.frame_buffer = nullptr; // Will be set when output buffer is mapped @@ -640,15 +640,15 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.albedo_buffer_p.reset()), - params.width * params.height * sizeof( float3 ) + params.width * params.height * sizeof( ushort3 ) ) ); - state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p; + state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p; CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.normal_buffer_p.reset()), - params.width * params.height * sizeof( float3 ) + params.width * params.height * sizeof( ushort3 ) ) ); - state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p; + state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p; state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d; state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s; @@ -3877,17 +3877,24 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) { std::string jpg_native_path = zeno::create_directories_when_write_file(path); stbi_write_jpg(jpg_native_path.c_str(), w, h, 4, p, 100); if (denoise) { - const float* _albedo_buffer = reinterpret_cast(state.albedo_buffer_p.handle); + std::vector temp_buffer(w * h); + const ushort3* _albedo_buffer = reinterpret_cast(state.albedo_buffer_p.handle); + for (auto i = 0; i < w * h; i++) { + temp_buffer[i] = toFloat(_albedo_buffer[i]); + } //SaveEXR(_albedo_buffer, w, h, 4, 0, (path+".albedo.exr").c_str(), nullptr); auto a_path = path + ".albedo.pfm"; std::string native_a_path = zeno::create_directories_when_write_file(a_path); - zeno::write_pfm(native_a_path.c_str(), w, h, _albedo_buffer); + zeno::write_pfm(native_a_path.c_str(), w, h, (float*)temp_buffer.data()); - const float* _normal_buffer = reinterpret_cast(state.normal_buffer_p.handle); + const ushort3* _normal_buffer = reinterpret_cast(state.normal_buffer_p.handle); + for (auto i = 0; i < w * h; i++) { + temp_buffer[i] = toFloat(_normal_buffer[i]); + } //SaveEXR(_normal_buffer, w, h, 4, 0, (path+".normal.exr").c_str(), nullptr); auto n_path = path + ".normal.pfm"; std::string native_n_path = zeno::create_directories_when_write_file(n_path); - zeno::write_pfm(native_n_path.c_str(), w, h, _normal_buffer); + zeno::write_pfm(native_n_path.c_str(), w, h, (float*)temp_buffer.data()); } } } diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 011ed43623..4e2e186756 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -163,8 +163,8 @@ struct Params float3* frame_buffer_M; float3* debug_buffer; - float3* albedo_buffer; - float3* normal_buffer; + ushort3* albedo_buffer; + ushort3* normal_buffer; unsigned int width; unsigned int height; From 7f09426df789bf20bd4009e38f084253ed30c2b5 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 19:37:39 +0800 Subject: [PATCH 073/244] accum_buffer_B use half --- zenovis/xinxinoptix/PTKernel.cu | 22 ++++++++++++++++++++-- zenovis/xinxinoptix/TypeCaster.cpp | 4 ++++ zenovis/xinxinoptix/TypeCaster.h | 3 ++- zenovis/xinxinoptix/optixPathTracer.cpp | 16 ++++++++-------- zenovis/xinxinoptix/optixPathTracer.h | 2 +- 5 files changed, 35 insertions(+), 12 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index 7e0286cece..a90bb6e5ad 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -119,6 +119,20 @@ float3 half3_to_float3(ushort3 in) return v; } +static __inline__ __device__ +ushort1 float_to_half(float in) +{ + half x = __float2half(in); + return reinterpret_cast(x); +} + +static __inline__ __device__ +float half_to_float(ushort1 in) +{ + half x = reinterpret_cast(in); + return __half2float(x); +} + extern "C" __global__ void __raygen__rg() { @@ -388,7 +402,11 @@ extern "C" __global__ void __raygen__rg() const float3 accum_color_prev_d = params.accum_buffer_D[ image_index ]; const float3 accum_color_prev_s = params.accum_buffer_S[ image_index ]; const float3 accum_color_prev_t = params.accum_buffer_T[ image_index ]; - const float3 accum_color_prev_b = half3_to_float3(params.accum_buffer_B[ image_index ]); + const float3 accum_color_prev_b = { + half_to_float(params.accum_buffer_B[ image_index ]), + half_to_float(params.accum_buffer_B[ image_index ]), + half_to_float(params.accum_buffer_B[ image_index ]), + }; const float3 accum_mask_prev = params.frame_buffer_M[ image_index ]; accum_color = mix( vec3(accum_color_prev), accum_color, a ); accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a ); @@ -411,7 +429,7 @@ extern "C" __global__ void __raygen__rg() params.accum_buffer_D[ image_index ] = make_float3( accum_color_d.x,accum_color_d.y,accum_color_d.z); params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z); params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z); - params.accum_buffer_B[ image_index ] = float3_to_half3(accum_color_b); + params.accum_buffer_B[ image_index ] = float_to_half(accum_color_b.x); params.frame_buffer[ image_index ] = make_color ( accum_color ); params.frame_buffer_M[ image_index ] = accum_mask; diff --git a/zenovis/xinxinoptix/TypeCaster.cpp b/zenovis/xinxinoptix/TypeCaster.cpp index 1d19df7fc3..6f60aea0bc 100644 --- a/zenovis/xinxinoptix/TypeCaster.cpp +++ b/zenovis/xinxinoptix/TypeCaster.cpp @@ -29,4 +29,8 @@ float3 toFloat(ushort3 in) { __half2float(y), __half2float(z), }; +} +float toFloat(ushort1 in) { + half x = reinterpret_cast(in); + return __half2float(x); } \ No newline at end of file diff --git a/zenovis/xinxinoptix/TypeCaster.h b/zenovis/xinxinoptix/TypeCaster.h index 3c5db83790..c208adc72a 100644 --- a/zenovis/xinxinoptix/TypeCaster.h +++ b/zenovis/xinxinoptix/TypeCaster.h @@ -4,4 +4,5 @@ ushort3 toHalf(float4 in); ushort3 toHalf(float3 in); -float3 toFloat(ushort3 in); \ No newline at end of file +float3 toFloat(ushort3 in); +float toFloat(ushort1 in); \ No newline at end of file diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 9dd092136c..f752a5bd80 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -634,7 +634,7 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_b .reset()), - params.width * params.height * sizeof( ushort3 ) + params.width * params.height * sizeof( ushort1 ) ) ); state.params.accum_buffer = (float3*)(CUdeviceptr)state.accum_buffer_p; @@ -653,7 +653,7 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d; state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s; state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t; - state.params.accum_buffer_B = (ushort3*)(CUdeviceptr)state.accum_buffer_b; + state.params.accum_buffer_B = (ushort1*)(CUdeviceptr)state.accum_buffer_b; state.params.subframe_index = 0; } @@ -3736,13 +3736,13 @@ std::vector optixgetimg_extra2(std::string name, int w, int h) { cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_t.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } else if (name == "background") { - std::vector temp_buffer(w * h); - cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost); + std::vector temp_buffer(w * h); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(ushort1) * temp_buffer.size(), cudaMemcpyDeviceToHost); for (auto i = 0; i < temp_buffer.size(); i++) { - float3 v = toFloat(temp_buffer[i]); - tex_data[i * 3 + 0] = v.x; - tex_data[i * 3 + 1] = v.y; - tex_data[i * 3 + 2] = v.z; + float v = toFloat(temp_buffer[i]); + tex_data[i * 3 + 0] = v; + tex_data[i * 3 + 1] = v; + tex_data[i * 3 + 2] = v; } } else if (name == "mask") { diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 4e2e186756..bcf6e90a63 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -158,7 +158,7 @@ struct Params float3* accum_buffer_D; float3* accum_buffer_S; float3* accum_buffer_T; - ushort3* accum_buffer_B; + ushort1* accum_buffer_B; uchar4* frame_buffer; float3* frame_buffer_M; From 0551d2021a4bab2d549565be03d87405c95f5b08 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 20 Jun 2024 20:04:18 +0800 Subject: [PATCH 074/244] frame_buffer_M use half3 --- zenovis/xinxinoptix/PTKernel.cu | 4 ++-- zenovis/xinxinoptix/optixPathTracer.cpp | 28 ++++++++++++------------- zenovis/xinxinoptix/optixPathTracer.h | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index a90bb6e5ad..b7791706ee 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -407,7 +407,7 @@ extern "C" __global__ void __raygen__rg() half_to_float(params.accum_buffer_B[ image_index ]), half_to_float(params.accum_buffer_B[ image_index ]), }; - const float3 accum_mask_prev = params.frame_buffer_M[ image_index ]; + const float3 accum_mask_prev = half3_to_float3(params.frame_buffer_M[ image_index ]); accum_color = mix( vec3(accum_color_prev), accum_color, a ); accum_color_d = mix( vec3(accum_color_prev_d), accum_color_d, a ); accum_color_s = mix( vec3(accum_color_prev_s), accum_color_s, a ); @@ -432,7 +432,7 @@ extern "C" __global__ void __raygen__rg() params.accum_buffer_B[ image_index ] = float_to_half(accum_color_b.x); params.frame_buffer[ image_index ] = make_color ( accum_color ); - params.frame_buffer_M[ image_index ] = accum_mask; + params.frame_buffer_M[ image_index ] = float3_to_half3(accum_mask); if (params.denoise) { params.albedo_buffer[ image_index ] = float3_to_half3(tmp_albedo); diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index f752a5bd80..b0e5abce31 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -253,7 +253,6 @@ ushort2 halfNormal(float4 in) #endif std::optional> output_buffer_o; -std::optional> output_buffer_mask; using Vertex = float4; struct PathTracerState @@ -300,6 +299,7 @@ struct PathTracerState raii accum_buffer_s; raii accum_buffer_t; raii accum_buffer_b; + raii accum_buffer_m; raii finite_lights_ptr; @@ -613,7 +613,6 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params resize_dirty = false; output_buffer.resize( params.width, params.height ); - (*output_buffer_mask).resize( params.width, params.height ); // Realloc accumulation buffer CUDA_CHECK( cudaMalloc( @@ -632,6 +631,10 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params reinterpret_cast( &state.accum_buffer_t .reset()), params.width * params.height * sizeof( float3 ) ) ); + CUDA_CHECK( cudaMalloc( + reinterpret_cast( &state.accum_buffer_m .reset()), + params.width * params.height * sizeof( ushort3 ) + ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_b .reset()), params.width * params.height * sizeof( ushort1 ) @@ -653,6 +656,7 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d; state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s; state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t; + state.params.frame_buffer_M = (ushort3*)(CUdeviceptr)state.accum_buffer_m; state.params.accum_buffer_B = (ushort1*)(CUdeviceptr)state.accum_buffer_b; state.params.subframe_index = 0; } @@ -677,7 +681,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path // Launch uchar4* result_buffer_data = output_buffer.map(); state.params.frame_buffer = result_buffer_data; - state.params.frame_buffer_M = (*output_buffer_mask ).map(); state.params.num_lights = lightsWrapper.g_lights.size(); state.params.denoise = denoise; for(int j=0;j<1;j++){ @@ -710,7 +713,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path } } output_buffer.unmap(); - (*output_buffer_mask ).unmap(); try { CUDA_SYNC_CHECK(); @@ -1607,14 +1609,6 @@ void optixinit( int argc, char* argv[] ) ); output_buffer_o->setStream( 0 ); } - if (!output_buffer_mask) { - output_buffer_mask.emplace( - output_buffer_type, - state.params.width, - state.params.height - ); - output_buffer_mask->setStream( 0 ); - } #ifdef OPTIX_BASE_GL if (!gl_display_o) { gl_display_o.emplace(sutil::BufferImageFormat::UNSIGNED_BYTE4); @@ -3746,7 +3740,14 @@ std::vector optixgetimg_extra2(std::string name, int w, int h) { } } else if (name == "mask") { - std::copy_n((float*) output_buffer_mask->getHostPointer(), tex_data.size(), tex_data.data()); + std::vector temp_buffer(w * h); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_m.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + float3 v = toFloat(temp_buffer[i]); + tex_data[i * 3 + 0] = v.x; + tex_data[i * 3 + 1] = v.y; + tex_data[i * 3 + 2] = v.z; + } } else if (name == "color") { cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_p.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); @@ -3978,7 +3979,6 @@ void optixDestroy() { OptixUtil::shaderCoreLUT.clear(); output_buffer_o .reset(); - output_buffer_mask .reset(); g_StaticMeshPieces .clear(); g_meshPieces .clear(); state = {}; diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index bcf6e90a63..78e26f8b88 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -160,7 +160,7 @@ struct Params float3* accum_buffer_T; ushort1* accum_buffer_B; uchar4* frame_buffer; - float3* frame_buffer_M; + ushort3* frame_buffer_M; float3* debug_buffer; ushort3* albedo_buffer; From 28d5df2691561033c1b4b9aa79df8a99b57030a0 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 24 Jun 2024 13:58:45 +0800 Subject: [PATCH 075/244] Revert "albedo normal buffer half3" This reverts commit f39e944f08618193cc76b935bb991da16778da34. --- zenovis/xinxinoptix/PTKernel.cu | 8 +++---- zenovis/xinxinoptix/optixPathTracer.cpp | 31 ++++++++++--------------- zenovis/xinxinoptix/optixPathTracer.h | 4 ++-- 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index b7791706ee..843ce5bf47 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -417,10 +417,10 @@ extern "C" __global__ void __raygen__rg() if (params.denoise) { - const float3 accum_albedo_prev = half3_to_float3(params.albedo_buffer[ image_index ]); + const float3 accum_albedo_prev = params.albedo_buffer[ image_index ]; tmp_albedo = lerp(accum_albedo_prev, tmp_albedo, a); - const float3 accum_normal_prev = half3_to_float3(params.normal_buffer[ image_index ]); + const float3 accum_normal_prev = params.normal_buffer[ image_index ]; tmp_normal = lerp(accum_normal_prev, tmp_normal, a); } } @@ -435,8 +435,8 @@ extern "C" __global__ void __raygen__rg() params.frame_buffer_M[ image_index ] = float3_to_half3(accum_mask); if (params.denoise) { - params.albedo_buffer[ image_index ] = float3_to_half3(tmp_albedo); - params.normal_buffer[ image_index ] = float3_to_half3(tmp_normal); + params.albedo_buffer[ image_index ] = tmp_albedo; + params.normal_buffer[ image_index ] = tmp_normal; } } diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 231e7e3be7..ee314e3efb 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -571,15 +571,15 @@ static void initLaunchParams( PathTracerState& state ) CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.albedo_buffer_p.reset()), - params.width * params.height * sizeof( ushort3 ) + params.width * params.height * sizeof( float3 ) ) ); - state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p; + state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p; CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.normal_buffer_p.reset()), - params.width * params.height * sizeof( ushort3 ) + params.width * params.height * sizeof( float3 ) ) ); - state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p; + state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p; state.params.frame_buffer = nullptr; // Will be set when output buffer is mapped @@ -643,15 +643,15 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.albedo_buffer_p.reset()), - params.width * params.height * sizeof( ushort3 ) + params.width * params.height * sizeof( float3 ) ) ); - state.params.albedo_buffer = (ushort3*)(CUdeviceptr)state.albedo_buffer_p; + state.params.albedo_buffer = (float3*)(CUdeviceptr)state.albedo_buffer_p; CUDA_CHECK( cudaMallocManaged( reinterpret_cast( &state.normal_buffer_p.reset()), - params.width * params.height * sizeof( ushort3 ) + params.width * params.height * sizeof( float3 ) ) ); - state.params.normal_buffer = (ushort3*)(CUdeviceptr)state.normal_buffer_p; + state.params.normal_buffer = (float3*)(CUdeviceptr)state.normal_buffer_p; state.params.accum_buffer_D = (float3*)(CUdeviceptr)state.accum_buffer_d; state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s; @@ -3867,24 +3867,17 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) { std::string jpg_native_path = zeno::create_directories_when_write_file(path); stbi_write_jpg(jpg_native_path.c_str(), w, h, 4, p, 100); if (denoise) { - std::vector temp_buffer(w * h); - const ushort3* _albedo_buffer = reinterpret_cast(state.albedo_buffer_p.handle); - for (auto i = 0; i < w * h; i++) { - temp_buffer[i] = toFloat(_albedo_buffer[i]); - } + const float* _albedo_buffer = reinterpret_cast(state.albedo_buffer_p.handle); //SaveEXR(_albedo_buffer, w, h, 4, 0, (path+".albedo.exr").c_str(), nullptr); auto a_path = path + ".albedo.pfm"; std::string native_a_path = zeno::create_directories_when_write_file(a_path); - zeno::write_pfm(native_a_path.c_str(), w, h, (float*)temp_buffer.data()); + zeno::write_pfm(native_a_path.c_str(), w, h, _albedo_buffer); - const ushort3* _normal_buffer = reinterpret_cast(state.normal_buffer_p.handle); - for (auto i = 0; i < w * h; i++) { - temp_buffer[i] = toFloat(_normal_buffer[i]); - } + const float* _normal_buffer = reinterpret_cast(state.normal_buffer_p.handle); //SaveEXR(_normal_buffer, w, h, 4, 0, (path+".normal.exr").c_str(), nullptr); auto n_path = path + ".normal.pfm"; std::string native_n_path = zeno::create_directories_when_write_file(n_path); - zeno::write_pfm(native_n_path.c_str(), w, h, (float*)temp_buffer.data()); + zeno::write_pfm(native_n_path.c_str(), w, h, _normal_buffer); } } } diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 78e26f8b88..636366da22 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -163,8 +163,8 @@ struct Params ushort3* frame_buffer_M; float3* debug_buffer; - ushort3* albedo_buffer; - ushort3* normal_buffer; + float3* albedo_buffer; + float3* normal_buffer; unsigned int width; unsigned int height; From 0b105366f00ec6d6d25f8d463bee6ccdf8e93f97 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 24 Jun 2024 19:28:18 +0800 Subject: [PATCH 076/244] improve --- zenovis/xinxinoptix/ChiefDesignerEXR.h | 45 ++++++++++++++++++ zenovis/xinxinoptix/optixPathTracer.cpp | 63 ++++++++++++++++++++++--- 2 files changed, 101 insertions(+), 7 deletions(-) diff --git a/zenovis/xinxinoptix/ChiefDesignerEXR.h b/zenovis/xinxinoptix/ChiefDesignerEXR.h index d7c1835176..e72966418f 100644 --- a/zenovis/xinxinoptix/ChiefDesignerEXR.h +++ b/zenovis/xinxinoptix/ChiefDesignerEXR.h @@ -161,4 +161,49 @@ inline void SaveMultiLayerEXR( file.writePixels (height); } +inline void SaveMultiLayerEXR_half( + std::vector pixels + , int width + , int height + , std::vector channels + , const char* exrFilePath +) { + using namespace Imath; + using namespace Imf; + + Header header(width, height); + ChannelList channelList; + + const char *std_suffix = "RGB"; + for (auto channel: channels) { + for (int i = 0; i < 3; i++) { + std::string name = zeno::format("{}{}", channel, std_suffix[i]); + channelList.insert(name, Channel(HALF)); + } + } + + header.channels() = channelList; + + OutputFile file (exrFilePath, header); + FrameBuffer frameBuffer; + + std::vector> data; + for (half *rgb: pixels) { + std::vector half_rgb(width * height * 3); + for (auto i = 0; i < half_rgb.size(); i++) { + half_rgb[i] = rgb[i]; + } + data.push_back(std::move(half_rgb)); + } + + for (auto i = 0; i < channels.size(); i++) { + frameBuffer.insert (zeno::format("{}R", channels[i]), Slice ( HALF, (char*) &data[i][0], sizeof (half) * 3, sizeof (half) * width * 3)); + frameBuffer.insert (zeno::format("{}G", channels[i]), Slice ( HALF, (char*) &data[i][1], sizeof (half) * 3, sizeof (half) * width * 3)); + frameBuffer.insert (zeno::format("{}B", channels[i]), Slice ( HALF, (char*) &data[i][2], sizeof (half) * 3, sizeof (half) * width * 3)); + } + + file.setFrameBuffer (frameBuffer); + file.writePixels (height); +} + } diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index ee314e3efb..ad0d2ffcc8 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -3746,6 +3746,55 @@ std::vector optixgetimg_extra2(std::string name, int w, int h) { } return tex_data; } + +std::vector optixgetimg_extra3(std::string name, int w, int h) { + std::vector tex_data(w * h * 3); + if (name == "diffuse") { + std::vector temp_buffer(w * h * 3); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_d.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + tex_data[i] = temp_buffer[i]; + } + } + else if (name == "specular") { + std::vector temp_buffer(w * h * 3); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_s.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + tex_data[i] = temp_buffer[i]; + } + } + else if (name == "transmit") { + std::vector temp_buffer(w * h * 3); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_t.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + tex_data[i] = temp_buffer[i]; + } + } + else if (name == "background") { + std::vector temp_buffer(w * h); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_b.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + tex_data[i * 3 + 0] = temp_buffer[i]; + tex_data[i * 3 + 1] = temp_buffer[i]; + tex_data[i * 3 + 2] = temp_buffer[i]; + } + } + else if (name == "mask") { + cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_m.handle, sizeof(half) * tex_data.size(), cudaMemcpyDeviceToHost); + } + else if (name == "color") { + std::vector temp_buffer(w * h * 3); + cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_p.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + tex_data[i] = temp_buffer[i]; + } + } + else { + throw std::runtime_error("invalid optixgetimg_extra name: " + name); + } + zeno::image_flip_vertical((ushort3*)tex_data.data(), w, h); + return tex_data; +} static void save_exr(float3* ptr, int w, int h, std::string path) { std::vector data(w * h); std::copy_n(ptr, w * h, data.data()); @@ -3826,14 +3875,14 @@ void optixrender(int fbo, int samples, bool denoise, bool simpleRender) { if (enable_output_aov) { if (enable_output_exr) { zeno::create_directories_when_write_file(exr_path); - SaveMultiLayerEXR( + SaveMultiLayerEXR_half( { - optixgetimg_extra2("color", w, h).data(), - optixgetimg_extra2("diffuse", w, h).data(), - optixgetimg_extra2("specular", w, h).data(), - optixgetimg_extra2("transmit", w, h).data(), - optixgetimg_extra2("background", w, h).data(), - optixgetimg_extra2("mask", w, h).data(), + optixgetimg_extra3("color", w, h).data(), + optixgetimg_extra3("diffuse", w, h).data(), + optixgetimg_extra3("specular", w, h).data(), + optixgetimg_extra3("transmit", w, h).data(), + optixgetimg_extra3("background", w, h).data(), + optixgetimg_extra3("mask", w, h).data(), }, w, h, From 4c446cb45c93e940d350a487cf34e41996b598fc Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 24 Jun 2024 19:39:07 +0800 Subject: [PATCH 077/244] improve --- zenovis/xinxinoptix/ChiefDesignerEXR.h | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/zenovis/xinxinoptix/ChiefDesignerEXR.h b/zenovis/xinxinoptix/ChiefDesignerEXR.h index e72966418f..31e270a318 100644 --- a/zenovis/xinxinoptix/ChiefDesignerEXR.h +++ b/zenovis/xinxinoptix/ChiefDesignerEXR.h @@ -187,19 +187,10 @@ inline void SaveMultiLayerEXR_half( OutputFile file (exrFilePath, header); FrameBuffer frameBuffer; - std::vector> data; - for (half *rgb: pixels) { - std::vector half_rgb(width * height * 3); - for (auto i = 0; i < half_rgb.size(); i++) { - half_rgb[i] = rgb[i]; - } - data.push_back(std::move(half_rgb)); - } - for (auto i = 0; i < channels.size(); i++) { - frameBuffer.insert (zeno::format("{}R", channels[i]), Slice ( HALF, (char*) &data[i][0], sizeof (half) * 3, sizeof (half) * width * 3)); - frameBuffer.insert (zeno::format("{}G", channels[i]), Slice ( HALF, (char*) &data[i][1], sizeof (half) * 3, sizeof (half) * width * 3)); - frameBuffer.insert (zeno::format("{}B", channels[i]), Slice ( HALF, (char*) &data[i][2], sizeof (half) * 3, sizeof (half) * width * 3)); + frameBuffer.insert (zeno::format("{}R", channels[i]), Slice ( HALF, (char*) &pixels[i][0], sizeof (half) * 3, sizeof (half) * width * 3)); + frameBuffer.insert (zeno::format("{}G", channels[i]), Slice ( HALF, (char*) &pixels[i][1], sizeof (half) * 3, sizeof (half) * width * 3)); + frameBuffer.insert (zeno::format("{}B", channels[i]), Slice ( HALF, (char*) &pixels[i][2], sizeof (half) * 3, sizeof (half) * width * 3)); } file.setFrameBuffer (frameBuffer); From 0ba6b63fd9c54cf4bdcc938488e5aebc08c928d9 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 25 Jun 2024 16:37:31 +0800 Subject: [PATCH 078/244] fix warning --- zenovis/xinxinoptix/LightTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenovis/xinxinoptix/LightTree.cpp b/zenovis/xinxinoptix/LightTree.cpp index f4d99ef417..1787d6e0ce 100644 --- a/zenovis/xinxinoptix/LightTree.cpp +++ b/zenovis/xinxinoptix/LightTree.cpp @@ -20,7 +20,7 @@ LightTreeSampler::LightTreeSampler(std::vector &lights) { LightBounds lightBounds = light.bounds(); if (lightBounds.phi > 0) { - bvhLights.push_back(std::make_pair(i, lightBounds)); + bvhLights.push_back(std::make_pair((int)i, lightBounds)); rootBounds = Union(rootBounds, lightBounds.bounds); } } From 15fa735d0d8fb3f78c3f8b58605228f3c8a0174b Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 25 Jun 2024 16:55:49 +0800 Subject: [PATCH 079/244] improve ptx compiling --- zenovis/xinxinoptix/OptiXStuff.h | 38 ++++++------ zenovis/xinxinoptix/SDK/sutil/sutil.cpp | 81 ++++++------------------- zenovis/xinxinoptix/SDK/sutil/sutil.h | 2 +- zenovis/xinxinoptix/optixPathTracer.cpp | 18 ++---- 4 files changed, 44 insertions(+), 95 deletions(-) diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 6e0dd2a653..4df7b9ea61 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -76,6 +76,19 @@ inline raii radiance_miss_group ; inline raii occlusion_miss_group ; inline bool isPipelineCreated = false; ////end material independent stuffs + +inline static auto DefaultCompileOptions() { + OptixModuleCompileOptions module_compile_options = {}; +#if defined( NDEBUG ) + module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT; + module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; +#else + module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; + module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; +#endif + return module_compile_options; +} + inline void createContext() { // Initialize CUDA @@ -85,7 +98,11 @@ inline void createContext() OPTIX_CHECK( optixInit() ); OptixDeviceContextOptions options = {}; options.logCallbackFunction = &context_log_cb; +#if defined( NDEBUG ) + options.logCallbackLevel = 0; +#else options.logCallbackLevel = 4; +#endif options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL; OPTIX_CHECK( optixDeviceContextCreate( cu_ctx, &options, &context ) ); pipeline_compile_options = {}; @@ -98,14 +115,7 @@ inline void createContext() pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW | OPTIX_EXCEPTION_FLAG_TRACE_DEPTH | OPTIX_EXCEPTION_FLAG_DEBUG; pipeline_compile_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE | OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE | OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; - OptixModuleCompileOptions module_compile_options = {}; - #if defined( NDEBUG ) - module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT; - module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL; - #else - module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; - module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; - #endif + OptixModuleCompileOptions module_compile_options = DefaultCompileOptions(); OptixBuiltinISOptions builtin_is_options {}; @@ -169,16 +179,8 @@ static std::vector readData(std::string const& filename) inline bool createModule(OptixModule &module, OptixDeviceContext &context, const char *source, const char *name, const char *macro=nullptr, tbb::task_group* _c_group = nullptr) { - OptixModuleCompileOptions module_compile_options = {}; + OptixModuleCompileOptions module_compile_options = DefaultCompileOptions(); module_compile_options.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT; -#if defined( NDEBUG ) - module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_DEFAULT; - module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL; -#else - module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; - module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; - -#endif char log[2048]; size_t sizeof_log = sizeof( log ); @@ -203,7 +205,7 @@ inline bool createModule(OptixModule &module, OptixDeviceContext &context, const compilerOptions.push_back(macro); } - const char* input = sutil::getInputData( source, macro, name, inputSize, is_success, nullptr, compilerOptions); + const char* input = sutil::getCodePTX( source, macro, name, inputSize, is_success, nullptr, compilerOptions); if(is_success==false) { diff --git a/zenovis/xinxinoptix/SDK/sutil/sutil.cpp b/zenovis/xinxinoptix/SDK/sutil/sutil.cpp index c5f72571a6..d8d687724b 100644 --- a/zenovis/xinxinoptix/SDK/sutil/sutil.cpp +++ b/zenovis/xinxinoptix/SDK/sutil/sutil.cpp @@ -881,53 +881,12 @@ inline bool getPtxFromCuString( std::string& ptx, const char* cu_source, const char* name, const char** log_string, - const std::vector& compiler_options) + const std::vector& options) { // Create program nvrtcProgram prog; NVRTC_CHECK_ERROR( nvrtcCreateProgram( &prog, cu_source, name, getIncFileTab().size(), getIncFileTab().data(), getIncPathTab().data() ) ); - // Gather NVRTC options - std::vector options; - - //const char *abs_dirs[] = {SAMPLES_ABSOLUTE_INCLUDE_DIRS}; - //const std::string base_dir = getSampleDir(); - - //// Set sample dir as the primary include path - //std::string sample_dir; - //if( sample_directory ) - //{ - //sample_dir = std::string( "-I" ) + base_dir + '/' + sample_directory; - //options.push_back( sample_dir.c_str() ); - //} - - //// Collect include dirs - //std::vector include_dirs; - //const char* abs_dirs[] = {SAMPLES_ABSOLUTE_INCLUDE_DIRS}; - //const char* rel_dirs[] = {SAMPLES_RELATIVE_INCLUDE_DIRS}; - - //for( const char* dir : abs_dirs ) - //{ - //include_dirs.push_back( std::string( "-I" ) + dir ); - //} - //for( const char* dir : rel_dirs ) - //{ - //include_dirs.push_back( "-I" + base_dir + '/' + dir ); - //} - //for( const std::string& dir : include_dirs) - //{ - //options.push_back( dir.c_str() ); - //} - //std::vector fuckcpp; - //for( const char* dir : abs_dirs ) - //{ - //fuckcpp.push_back(std::string( "-I" ) + dir); - //options.push_back( fuckcpp.back().c_str() ); - //} - - // Collect NVRTC options - std::copy( std::begin( compiler_options ), std::end( compiler_options ), std::back_inserter( options ) ); - // JIT compile CU to PTX const nvrtcResult compileRes = nvrtcCompileProgram( prog, (int)options.size(), options.data() ); @@ -1049,13 +1008,13 @@ static void getInputDataFromFile( std::string& ptx, const char* sample_name, con struct PtxSourceCache { - std::map map; + std::map< std::string, std::shared_ptr > map; ~PtxSourceCache() { - for( std::map::const_iterator it = map.begin(); it != map.end(); ++it ) - delete it->second; + map = {}; } }; + static PtxSourceCache g_ptxSourceCache; static std::string ridincs(std::string s) { @@ -1092,37 +1051,31 @@ static const char* getOptixHeader() { } #endif -const char* getInputData( const char* source, - const char* macro, - const char* name, - size_t& dataSize, - bool & is_success, - const char** log, - const std::vector& compilerOptions) +const char* getCodePTX( const char* source, + const char* macro, + const char* name, + size_t& dataSize, + bool & is_success, + const char** log, + const std::vector& compilerOptions) { if( log ) *log = NULL; - std::string * ptx, cu; - std::string key = std::string( source ) + (macro!=nullptr? std::string(macro):""); - std::map::iterator elem = g_ptxSourceCache.map.find( key ); + std::shared_ptr ptx {}; + std::string key = std::string( source ) + (macro!=nullptr? std::string(macro):""); - if( elem == g_ptxSourceCache.map.end() ) + if( g_ptxSourceCache.map.count(key) == 0 ) { - ptx = new std::string(); -#if CUDA_NVRTC_ENABLED - //getCuStringFromFile( cu, location, sampleDir, filename ); - //cu.replace(cu.find("#include \n"), strlen("#include \n"), getOptixHeader()); + ptx = std::make_shared(); is_success = getPtxFromCuString( *ptx, source, name, log, compilerOptions ); -#else - getInputDataFromFile( *ptx, sample, filename ); -#endif + if(is_success==true) g_ptxSourceCache.map[key] = ptx; } else { - ptx = elem->second; + ptx = g_ptxSourceCache.map[key]; is_success = true; } dataSize = ptx->size(); diff --git a/zenovis/xinxinoptix/SDK/sutil/sutil.h b/zenovis/xinxinoptix/SDK/sutil/sutil.h index 107287e83b..6807250940 100644 --- a/zenovis/xinxinoptix/SDK/sutil/sutil.h +++ b/zenovis/xinxinoptix/SDK/sutil/sutil.h @@ -144,7 +144,7 @@ SUTILAPI void calculateCameraVariables( double SUTILAPI currentTime(); // Get input data, either pre-compiled with NVCC or JIT compiled by NVRTC. -SUTILAPI const char* getInputData( const char* source, +SUTILAPI const char* getCodePTX( const char* source, const char* macro, const char* name, size_t& dataSize, diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index ad38fddad9..8cd1bf4861 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -709,9 +709,7 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path ) ); //CUDA_SYNC_CHECK(); - - /* printf("mama%d\n", std::this_thread::get_id()); */ - /* fflush(stdout); */ + OPTIX_CHECK( optixLaunch( state.pipeline, 0, @@ -2626,7 +2624,7 @@ void optixupdatematerial(std::vector> &shaders) auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); shaderCore->moduleIS = &OptixUtil::sphere_module; - shaderCore->loadProgram(0, "--define-macro=_SPHERE_"); + shaderCore->loadProgram(1, "--define-macro=_SPHERE_"); shaderCoreLUT.emplace(std::tuple{"DeflMatShader.cu", ShaderMaker::Sphere}, shaderCore); }); @@ -2634,16 +2632,12 @@ void optixupdatematerial(std::vector> &shaders) auto shader_string = sutil::lookupIncFile("Light.cu"); auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); - shaderCore->loadProgram(0); + shaderCore->loadProgram(2); shaderCoreLUT.emplace(std::tuple{"Light.cu", ShaderMaker::Mesh}, shaderCore); - }); - - OptixUtil::_compile_group.run([&] () { - auto shader_string = sutil::lookupIncFile("Light.cu"); - auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); + shaderCore = std::make_shared(shader_string, "__closesthit__radiance", "__anyhit__shadow_cutout"); shaderCore->moduleIS = &OptixUtil::sphere_module; - shaderCore->loadProgram(0); + shaderCore->loadProgram(3); shaderCoreLUT.emplace(std::tuple{"Light.cu", ShaderMaker::Sphere}, shaderCore); }); @@ -2652,7 +2646,7 @@ void optixupdatematerial(std::vector> &shaders) auto shaderCore = std::make_shared(shader_string, "__closesthit__radiance_volume", "__anyhit__occlusion_volume", "__intersection__volume"); - shaderCore->loadProgram(0); + shaderCore->loadProgram(4); shaderCoreLUT.emplace(std::tuple{"volume.cu", ShaderMaker::Volume}, shaderCore); }); From 6942696b7ab0815c62997b0409ccbd98e21fbb08 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 25 Jun 2024 17:38:30 +0800 Subject: [PATCH 080/244] reduce optix debug level --- zenovis/xinxinoptix/OptiXStuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 4df7b9ea61..310d015263 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -84,7 +84,7 @@ inline static auto DefaultCompileOptions() { module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_NONE; #else module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0; - module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL; + module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_MODERATE; #endif return module_compile_options; } From 9ba5ef443c345075f9eb1ab12f2cc67168f927b5 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 25 Jun 2024 18:22:47 +0800 Subject: [PATCH 081/244] fix crash for debug build --- zenovis/xinxinoptix/optixPathTracer.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 08795f21d0..6575430c19 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -3961,11 +3961,17 @@ void optixCleanup() { state.params.sky_strength = 1.0f; state.params.sky_texture; - for (auto& [k, v] : OptixUtil::g_tex) { + std::vector keys; + + for (auto& [k, _] : OptixUtil::g_tex) { if (k != OptixUtil::default_sky_tex) { - OptixUtil::removeTexture(k); + keys.push_back(k); } } + + for (auto& k : keys) { + OptixUtil::removeTexture(k); + } OptixUtil::sky_tex = OptixUtil::default_sky_tex; From ea8f963b2aba93622a8e5ddca2e223e363769e87 Mon Sep 17 00:00:00 2001 From: Zhou Hang <765229842@qq.com> Date: Mon, 1 Jul 2024 14:51:49 +0800 Subject: [PATCH 082/244] Improve camera (#1952) * camera quat * improve * roll * roll ok * work * use quat * rot * translate * focus * refactor * remove ZxxHappyLookParam * refactor * CameraLookToDir * refactor * getDepth * some bug * fix * depth-aware * mouse wheel * glDepthMask * snap surface * blank rot * blank dolly * depth-aware button * fix camera move * First Person Navigation: movement speed * statusbarShowMessage * refactor * wasd move step * FPN remove roll * auto flip * refactor * click pos * pos channel * optix click pos * exchange ctrl alt * Revert "optix click pos" This reverts commit 7bdc1533e8fd35c3c420b2c66531eb8cebc26c33. * Revert "pos channel" This reverts commit 835cfec569640d33b340db590e2b6cab0caec61b. * pos channel 2 * optix click pos 2 * refactor * camera reset * remove view proj matrix --- projects/FBX/MayaCamera.cpp | 54 --- ui/zenoedit/dock/docktabcontent.cpp | 29 ++ ui/zenoedit/dock/docktabcontent.h | 3 + ui/zenoedit/nodesys/cameranode.cpp | 22 +- ui/zenoedit/viewport/cameracontrol.cpp | 431 +++++++++--------- ui/zenoedit/viewport/cameracontrol.h | 37 +- ui/zenoedit/viewport/displaywidget.cpp | 23 +- ui/zenoedit/viewport/displaywidget.h | 2 +- ui/zenoedit/viewport/optixviewport.cpp | 16 +- ui/zenoedit/viewport/optixviewport.h | 2 +- ui/zenoedit/viewport/viewportwidget.cpp | 16 +- ui/zenoedit/viewport/viewportwidget.h | 3 +- ui/zenoedit/viewport/zenovis.cpp | 1 - ui/zenoedit/viewport/zoptixviewport.cpp | 16 +- ui/zenoedit/viewport/zoptixviewport.h | 3 +- ui/zenoedit/viewportinteraction/transform.cpp | 22 +- ui/zenoedit/viewportinteraction/transform.h | 10 +- ui/zenoedit/zenomainwindow.cpp | 4 + ui/zenoedit/zenomainwindow.h | 1 + zeno/include/zeno/types/CameraObject.h | 22 +- zeno/src/nodes/CameraNodes.cpp | 48 ++ zenovis/include/zenovis/Camera.h | 91 ++-- zenovis/include/zenovis/RenderEngine.h | 1 + .../include/zenovis/bate/FrameBufferRender.h | 43 +- zenovis/src/Camera.cpp | 106 +---- zenovis/src/bate/GraphicPrimitive.cpp | 2 +- zenovis/src/bate/GraphicRotateHandler.cpp | 2 +- zenovis/src/bate/GraphicScaleHandler.cpp | 6 +- zenovis/src/bate/GraphicTransHandler.cpp | 2 +- zenovis/src/bate/HudGraphicGrid.cpp | 6 +- zenovis/src/bate/RenderEngineBate.cpp | 27 +- zenovis/src/optx/RenderEngineOptx.cpp | 19 +- zenovis/src/zhxx/RenderEngineZhxx.cpp | 5 +- zenovis/xinxinoptix/DeflMatShader.cu | 1 + zenovis/xinxinoptix/PTKernel.cu | 5 +- zenovis/xinxinoptix/TraceStuff.h | 1 + zenovis/xinxinoptix/optixPathTracer.cpp | 30 +- zenovis/xinxinoptix/optixPathTracer.h | 1 + zenovis/xinxinoptix/xinxinoptixapi.h | 2 + 39 files changed, 610 insertions(+), 505 deletions(-) diff --git a/projects/FBX/MayaCamera.cpp b/projects/FBX/MayaCamera.cpp index f5fc3cb4ed..e100cdec00 100644 --- a/projects/FBX/MayaCamera.cpp +++ b/projects/FBX/MayaCamera.cpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -32,7 +31,6 @@ #include #include -#include #define SET_CAMERA_DATA \ out_pos = (n->pos); \ @@ -88,58 +86,6 @@ ZENO_DEFNODE(CihouMayaCameraFov)({ {"FBX"}, }); -struct CameraNode: zeno::INode{ - virtual void apply() override { - auto camera = std::make_unique(); - - camera->pos = get_input2("pos"); - camera->up = get_input2("up"); - camera->view = get_input2("view"); - camera->fov = get_input2("fov"); - camera->aperture = get_input2("aperture"); - camera->focalPlaneDistance = get_input2("focalPlaneDistance"); - camera->userData().set2("frame", get_input2("frame")); - - auto other_props = get_input2("other"); - std::regex reg(","); - std::sregex_token_iterator p(other_props.begin(), other_props.end(), reg, -1); - std::sregex_token_iterator end; - std::vector prop_vals; - while (p != end) { - prop_vals.push_back(std::stof(*p)); - p++; - } - if (prop_vals.size() == 6) { - camera->isSet = true; - camera->center = {prop_vals[0], prop_vals[1], prop_vals[2]}; - camera->theta = prop_vals[3]; - camera->phi = prop_vals[4]; - camera->radius = prop_vals[5]; - } - - set_output("camera", std::move(camera)); - } -}; - -ZENO_DEFNODE(CameraNode)({ - { - {"vec3f", "pos", "0,0,5"}, - {"vec3f", "up", "0,1,0"}, - {"vec3f", "view", "0,0,-1"}, - {"float", "fov", "45"}, - {"float", "aperture", "11"}, - {"float", "focalPlaneDistance", "2.0"}, - {"string", "other", ""}, - {"int", "frame", "0"}, - }, - { - {"CameraObject", "camera"}, - }, - { - }, - {"FBX"}, -}); - struct CameraEval: zeno::INode { glm::quat to_quat(zeno::vec3f up, zeno::vec3f view){ diff --git a/ui/zenoedit/dock/docktabcontent.cpp b/ui/zenoedit/dock/docktabcontent.cpp index f034cf41d9..8e679b65d0 100644 --- a/ui/zenoedit/dock/docktabcontent.cpp +++ b/ui/zenoedit/dock/docktabcontent.cpp @@ -904,6 +904,18 @@ void DockContent_View::initToolbar(QHBoxLayout* pToolLayout) pToolLayout->addWidget(m_camera_setting); } + { + pToolLayout->addWidget(new ZLineWidget(false, QColor("#121416"))); + m_depth = new QCheckBox(tr("Depth")); + m_depth->setStyleSheet("color: white;"); + m_depth->setCheckState(Qt::Checked); + pToolLayout->addWidget(m_depth); + m_FPN = new QCheckBox(tr("FPN")); + m_FPN->setStyleSheet("color: white;"); + pToolLayout->addWidget(m_FPN); + m_Reset = new QPushButton(tr("Reset")); + pToolLayout->addWidget(m_Reset); + } pToolLayout->addWidget(new ZLineWidget(false, QColor("#121416"))); pToolLayout->addWidget(m_screenshoot); pToolLayout->addWidget(m_recordVideo); @@ -941,6 +953,16 @@ void DockContent_View::initConnections() }); } + connect(m_depth, &QCheckBox::stateChanged, this, [=](int state) { + bool bChecked = (state == Qt::Checked); + zeno::getSession().userData().set2("viewport-depth-aware-navigation", bChecked); + }); + + connect(m_FPN, &QCheckBox::stateChanged, this, [=](int state) { + bool bChecked = (state == Qt::Checked); + zeno::getSession().userData().set2("viewport-FPN-navigation", bChecked); + }); + if (m_camera_setting) { connect(m_camera_setting, &QPushButton::clicked, this, [=](bool bToggled) { zenovis::ZOptixCameraSettingInfo info = m_pDisplay->getCamera(); @@ -953,6 +975,13 @@ void DockContent_View::initConnections() } }); } + if (m_Reset) { + connect(m_Reset, &QPushButton::clicked, this, [=](bool bToggled) { + auto *scene = m_pDisplay->getZenoVis()->getSession()->get_scene(); + scene->camera->reset(); + m_pDisplay->updateFrame(); + }); + } connect(m_smooth_shading, &ZToolBarButton::toggled, this, [=](bool bToggled) { m_pDisplay->onCommandDispatched(ZenoMainWindow::ACTION_SMOOTH_SHADING, bToggled); diff --git a/ui/zenoedit/dock/docktabcontent.h b/ui/zenoedit/dock/docktabcontent.h index 5a59ae87c1..c173ca61fa 100644 --- a/ui/zenoedit/dock/docktabcontent.h +++ b/ui/zenoedit/dock/docktabcontent.h @@ -162,6 +162,9 @@ class DockContent_View : public DockToolbarWidget QPushButton *m_camera_setting = nullptr; QCheckBox *m_background; QCheckBox *m_uv_mode = nullptr; + QCheckBox *m_depth = nullptr; + QCheckBox *m_FPN = nullptr; + QPushButton *m_Reset = nullptr; QComboBox* m_cbRes; QAction* m_pFocus; diff --git a/ui/zenoedit/nodesys/cameranode.cpp b/ui/zenoedit/nodesys/cameranode.cpp index 7194cd518c..c4eb08d78a 100644 --- a/ui/zenoedit/nodesys/cameranode.cpp +++ b/ui/zenoedit/nodesys/cameranode.cpp @@ -86,21 +86,23 @@ void CameraNode::onEditClicked() auto camera = *(scene->camera.get()); INPUT_SOCKET pos = inputs["pos"]; - vec = {camera.m_lodcenter[0], camera.m_lodcenter[1], camera.m_lodcenter[2]}; + vec = {camera.m_pos[0], camera.m_pos[1], camera.m_pos[2]}; info.name = "pos"; info.oldValue = pos.info.defaultValue; info.newValue = QVariant::fromValue(vec); pModel->updateSocketDefl(nodeid, info, this->subgIndex(), true); + auto m_lodup = camera.get_lodup(); + auto m_lodfront = camera.get_lodfront(); INPUT_SOCKET up = inputs["up"]; - vec = {camera.m_lodup[0], camera.m_lodup[1], camera.m_lodup[2]}; + vec = {m_lodup[0], m_lodup[1], m_lodup[2]}; info.name = "up"; info.oldValue = up.info.defaultValue; info.newValue = QVariant::fromValue(vec); pModel->updateSocketDefl(nodeid, info, this->subgIndex(), true); INPUT_SOCKET view = inputs["view"]; - vec = {camera.m_lodfront[0], camera.m_lodfront[1], camera.m_lodfront[2]}; + vec = {m_lodfront[0], m_lodfront[1], m_lodfront[2]}; info.name = "view"; info.oldValue = view.info.defaultValue; info.newValue = QVariant::fromValue(vec); @@ -137,11 +139,11 @@ void CameraNode::onEditClicked() INPUT_SOCKET other = inputs["other"]; std::string other_prop; - auto center = camera.m_center; + auto center = camera.m_pivot; other_prop += zeno::format("{},{},{},", center[0], center[1], center[2]); - other_prop += zeno::format("{},", camera.m_theta); - other_prop += zeno::format("{},", camera.m_phi); - other_prop += zeno::format("{},", camera.m_radius); + other_prop += zeno::format("{},", 0); + other_prop += zeno::format("{},", 0); + other_prop += zeno::format("{},", camera.get_radius()); info.name = "other"; info.oldValue = other.info.defaultValue; info.newValue = QVariant::fromValue(QString(other_prop.c_str())); @@ -191,10 +193,10 @@ void LightNode::onEditClicked(){ PARAM_UPDATE_INFO info; auto camera = *(scene->camera.get()); - auto original_pos = glm::vec3(camera.m_lodcenter); + auto original_pos = glm::vec3(camera.m_pos); // auto pos = glm::normalize(glm::vec3(camProp[0], camProp[1], camProp[2])); - auto view = -1.0f * glm::normalize(camera.m_lodfront); - auto up = glm::normalize(camera.m_lodup); + auto view = -1.0f * glm::normalize(camera.get_lodfront()); + auto up = glm::normalize(camera.get_lodup()); auto right = glm::normalize(glm::cross(up, view)); glm::mat3 rotation(right, up, view); diff --git a/ui/zenoedit/viewport/cameracontrol.cpp b/ui/zenoedit/viewport/cameracontrol.cpp index 3ee954ef05..6919ba8126 100644 --- a/ui/zenoedit/viewport/cameracontrol.cpp +++ b/ui/zenoedit/viewport/cameracontrol.cpp @@ -7,6 +7,8 @@ #include "nodesview/zenographseditor.h" #include #include "settings/zenosettingsmanager.h" +#include "glm/gtx/quaternion.hpp" +#include "zeno/core/Session.h" #include @@ -32,38 +34,29 @@ void CameraControl::setRes(QVector2D res) { m_res = res; } -float CameraControl::getRoll() const { +glm::vec3 CameraControl::getPos() const { auto *scene = m_zenovis->getSession()->get_scene(); - return scene->camera->m_roll; + return scene->camera->getPos(); } -void CameraControl::setRoll(float roll) { +void CameraControl::setPos(glm::vec3 value) { auto *scene = m_zenovis->getSession()->get_scene(); - scene->camera->m_roll = roll; + scene->camera->setPos(value); } - -float CameraControl::getTheta() const { - auto *scene = m_zenovis->getSession()->get_scene(); - return scene->camera->m_theta; -} -void CameraControl::setTheta(float theta) { - auto *scene = m_zenovis->getSession()->get_scene(); - scene->camera->m_theta = theta; -} -float CameraControl::getPhi() const { +glm::vec3 CameraControl::getPivot() const { auto *scene = m_zenovis->getSession()->get_scene(); - return scene->camera->m_phi; + return scene->camera->getPivot(); } -void CameraControl::setPhi(float phi) { +void CameraControl::setPivot(glm::vec3 value) { auto *scene = m_zenovis->getSession()->get_scene(); - scene->camera->m_phi = phi; + scene->camera->setPivot(value); } -zeno::vec3f CameraControl::getCenter() const { +glm::quat CameraControl::getRotation() { auto *scene = m_zenovis->getSession()->get_scene(); - return scene->camera->m_center; + return scene->camera->m_rotation; } -void CameraControl::setCenter(zeno::vec3f center) { +void CameraControl::setRotation(glm::quat value) { auto *scene = m_zenovis->getSession()->get_scene(); - scene->camera->m_center = center; + scene->camera->m_rotation = value; } bool CameraControl::getOrthoMode() const { auto *scene = m_zenovis->getSession()->get_scene(); @@ -75,11 +68,7 @@ void CameraControl::setOrthoMode(bool orthoMode) { } float CameraControl::getRadius() const { auto *scene = m_zenovis->getSession()->get_scene(); - return scene->camera->m_radius; -} -void CameraControl::setRadius(float radius) { - auto *scene = m_zenovis->getSession()->get_scene(); - scene->camera->m_radius = radius; + return scene->camera->get_radius(); } float CameraControl::getFOV() const { @@ -115,40 +104,26 @@ void CameraControl::fakeMousePressEvent(QMouseEvent *event) auto scene = m_zenovis->getSession()->get_scene(); if (event->button() == Qt::MiddleButton) { middle_button_pressed = true; + if (zeno::getSession().userData().get2("viewport-depth-aware-navigation", true)) { + m_hit_posWS = scene->renderMan->getEngine()->getClickedPos(event->x(), event->y()); + if (m_hit_posWS.has_value()) { + scene->camera->setPivot(m_hit_posWS.value()); + } + } } auto m_picker = this->m_picker.lock(); auto m_transformer = this->m_transformer.lock(); - if (scene->camera->m_need_sync) { - scene->camera->m_need_sync = false; - if (bool(m_picker) && scene->camera->m_auto_radius) { - m_picker->set_picked_depth_callback([&] (float depth, int x, int y) { - if (depth < 0.001f) { - return; - } - glm::vec4 ndc = {0, 0, depth, 1}; - glm::vec4 posCS = glm::inverse(scene->camera->m_proj) * ndc; - glm::vec4 posVS = posCS / posCS.w; - glm::vec4 pWS = glm::inverse(scene->camera->m_view) * posVS; - glm::vec3 p3WS = glm::vec3(pWS.x, pWS.y, pWS.z); - setRadius(glm::length(scene->camera->m_lodcenter - p3WS)); - setCenter({p3WS.x, p3WS.y, p3WS.z}); - }); - int mid_x = int(this->res().x() * 0.5); - int mid_y = int(this->res().y() * 0.5); - m_picker->pick_depth(mid_x, mid_y); - } - } int button = Qt::NoButton; ZenoSettingsManager& settings = ZenoSettingsManager::GetInstance(); settings.getViewShortCut(ShortCut_MovingView, button); settings.getViewShortCut(ShortCut_RotatingView, button); bool bTransform = false; - auto front = scene->camera->m_lodfront; - auto dir = screenToWorldRay(event->x() / res().x(), event->y() / res().y()); + auto front = scene->camera->get_lodfront(); + auto dir = screenPosToRayWS(event->x() / res().x(), event->y() / res().y()); if (m_transformer) { if (event->buttons() & Qt::LeftButton && !scene->selected.empty() && m_transformer->isTransformMode() && - m_transformer->clickedAnyHandler(realPos(), dir, front)) + m_transformer->clickedAnyHandler(getPos(), dir, front)) { bTransform = true; } @@ -165,62 +140,24 @@ void CameraControl::fakeMousePressEvent(QMouseEvent *event) } } -void CameraControl::lookTo(int dir) { - if (dir < 0 || dir > 6) - return; - auto x_axis = QVector3D(1, 0, 0); - auto y_axis = QVector3D(0, 1, 0); - auto z_axis = QVector3D(0, 0, 1); - +void CameraControl::lookTo(zenovis::CameraLookToDir dir) { ZASSERT_EXIT(m_zenovis); - auto c = getCenter(); - QVector3D center = {c[0], c[1], c[2]}; - auto radius = getRadius(); switch (dir) { - case 0: - // front view - setTheta(0); - setPhi(0); - m_zenovis->updateCameraFront(center + z_axis * radius, -z_axis, y_axis); + case zenovis::CameraLookToDir::front_view: break; - case 1: - // right view - setTheta(0); - setPhi(-glm::pi() / 2); - m_zenovis->updateCameraFront(center + x_axis * radius, -x_axis, y_axis); + case zenovis::CameraLookToDir::right_view: break; - case 2: - // top view - setTheta(-glm::pi() / 2); - setPhi(0); - m_zenovis->updateCameraFront(center + y_axis * radius, -z_axis, y_axis); + case zenovis::CameraLookToDir::top_view: break; - case 3: - // back view - setTheta(0); - setPhi(glm::pi()); - m_zenovis->updateCameraFront(center - z_axis * radius, z_axis, y_axis); + case zenovis::CameraLookToDir::back_view: break; - case 4: - // left view - setTheta(0); - setPhi(glm::pi() / 2); - m_zenovis->updateCameraFront(center - x_axis * radius, x_axis, y_axis); + case zenovis::CameraLookToDir::left_view: break; - case 5: - // bottom view - setTheta(glm::pi() / 2); - setPhi(0); - m_zenovis->updateCameraFront(center - y_axis * radius, y_axis, z_axis); + case zenovis::CameraLookToDir::bottom_view: + break; + case zenovis::CameraLookToDir::back_to_origin: break; - case 6: - // back to origin - setCenter({0, 0, 0}); - setRadius(5); - setTheta(0); - setPhi(0); - m_zenovis->updateCameraFront(center, -z_axis, y_axis); default: break; } setOrthoMode(true); @@ -311,37 +248,37 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event) if (m_transformer) { bTransform = m_transformer->isTransforming(); // check if hover a handler - auto front = scene->camera->m_lodfront; - auto dir = screenToWorldRay(event->x() / res().x(), event->y() / res().y()); + auto front = scene->camera->get_lodfront(); + auto dir = screenPosToRayWS(event->x() / res().x(), event->y() / res().y()); if (!scene->selected.empty() && !(event->buttons() & Qt::LeftButton)) { - m_transformer->hoveredAnyHandler(realPos(), dir, front); + m_transformer->hoveredAnyHandler(getPos(), dir, front); } } - if (!bTransform && ctrl_pressed && (event->buttons() & Qt::MiddleButton)) { - float ratio = QApplication::desktop()->devicePixelRatio(); - float dx = xpos - m_lastMidButtonPos.x(), dy = ypos - m_lastMidButtonPos.y(); - dx *= ratio / m_res[0]; - dy *= ratio / m_res[1]; - float cos_t = cos(getTheta()); - float sin_t = sin(getTheta()); - float cos_p = cos(getPhi()); - float sin_p = sin(getPhi()); - QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p); - QVector3D delta = -back * dy; - auto c = getCenter(); - QVector3D center = {c[0], c[1], c[2]}; - center += delta * getRadius(); - setCenter({float(center.x()), float(center.y()), float(center.z())}); + if (!bTransform && alt_pressed && (event->buttons() & Qt::MiddleButton)) { + // zoom + if (zeno::getSession().userData().get2("viewport-FPN-navigation", false) == false) { + float dy = ypos - m_lastMidButtonPos.y(); + auto step = 0.99f; + float scale = glm::pow(step, -dy); + auto pos = getPos(); + auto pivot = getPivot(); + auto new_pos = (pos - pivot) * scale + pivot; + setPos(new_pos); + } m_lastMidButtonPos = QPointF(xpos, ypos); } - else if (!bTransform && alt_pressed && (event->buttons() & Qt::MiddleButton)) { + else if (!bTransform && ctrl_pressed && (event->buttons() & Qt::MiddleButton)) { + // rot roll + float step = 1.0f; float ratio = QApplication::desktop()->devicePixelRatio(); float dy = ypos - m_lastMidButtonPos.y(); - dy *= ratio / m_res[1]; - float roll = getRoll(); - roll += dy; - setRoll(roll); + dy *= ratio / m_res[1] * step; + { + auto rot = getRotation(); + rot = rot * glm::angleAxis(dy, glm::vec3(0, 0, 1)); + setRotation(rot); + } m_lastMidButtonPos = QPointF(xpos, ypos); } else if (!bTransform && (event->buttons() & (rotateButton | moveButton))) { @@ -352,41 +289,61 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event) //bool shift_pressed = event->modifiers() & Qt::ShiftModifier; Qt::KeyboardModifiers modifiers = event->modifiers(); if ((moveKey == modifiers) && (event->buttons() & moveButton)) { - float cos_t = cos(getTheta()); - float sin_t = sin(getTheta()); - float cos_p = cos(getPhi()); - float sin_p = sin(getPhi()); - QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p); - QVector3D up(-sin_t * sin_p, cos_t, sin_t * cos_p); - QVector3D right = QVector3D::crossProduct(up, back); - up = QVector3D::crossProduct(back, right); - right.normalize(); - up.normalize(); - QVector3D delta = right * dx + up * dy; - auto c = getCenter(); - QVector3D center = {c[0], c[1], c[2]}; - if (getOrthoMode()) { - delta = (right * dx * m_res[0] / m_res[1] + up * dy) * 2; - } - center += delta * getRadius(); - setCenter({float(center.x()), float(center.y()), float(center.z())}); - } else if ((rotateKey == modifiers) && (event->buttons() & rotateButton)) { - setOrthoMode(false); - setTheta(getTheta() - dy * M_PI); - if (int(abs(getTheta()) / M_PI) % 2 == 0) { - if (glm::fract(abs(getTheta()) / M_PI) < 0.5) { - setPhi(getPhi() + dx * M_PI); - } - else { - setPhi(getPhi() - dx * M_PI); + // translate + if (m_hit_posWS.has_value()) { + auto ray = screenPosToRayWS(event->x() / res().x(), event->y() / res().y()); + auto new_pos = intersectRayPlane(m_hit_posWS.value(), ray * (-1.0f), getPos(), getViewDir()); + if (new_pos.has_value()) { + auto diff = new_pos.value() - getPos(); + setPivot(getPivot() + diff); + setPos(new_pos.value()); } } else { - if (glm::fract(abs(getTheta()) / M_PI) < 0.5) { - setPhi(getPhi() - dx * M_PI); + auto left = getRightDir() * -1.0f; + auto up = getUpDir(); + auto delta = left * dx + up * dy; + if (getOrthoMode()) { + delta = (left * dx * float(m_res[0]) / float(m_res[1]) + up * dy) * 2.0f; + } + auto diff = delta * getRadius(); + setPivot(getPivot() + diff); + auto new_pos = getPos() + diff; + setPos(new_pos); + } + } else if ((rotateKey == modifiers) && (event->buttons() & rotateButton)) { + float step = 4.0f; + dx *= step; + if (getUpDir().y < 0) { + dx *= -1; + } + dy *= step; + // rot yaw pitch + setOrthoMode(false); + { + auto rot = getRotation(); + auto beforeMat = glm::toMat3(rot); + rot = glm::angleAxis(-dx, glm::vec3(0, 1, 0)) * rot; + rot = rot * glm::angleAxis(-dy, glm::vec3(1, 0, 0)); + setRotation(rot); + auto afterMat = glm::toMat3(rot); + if (zeno::getSession().userData().get2("viewport-FPN-navigation", false)) { + if (glm::abs(glm::dot(getRightDir(), {0, 1, 0})) > 0.01) { + auto right_dir = glm::cross(getViewDir(), {0, 1, 0}); + auto up_dir = glm::cross(right_dir, getViewDir()); + glm::mat3 rotation; + rotation[0] = right_dir; + rotation[1] = up_dir; + rotation[2] = -getViewDir(); + setRotation(glm::quat_cast(rotation)); + }; + setPivot(getPos()); } else { - setPhi(getPhi() + dx * M_PI); + auto pos = getPos(); + auto pivot = getPivot(); + auto new_pos = afterMat * glm::inverse(beforeMat) * (pos - pivot) + pivot; + setPos(new_pos); } } } @@ -395,8 +352,7 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event) if (m_transformer) { if (m_transformer->isTransforming()) { - auto dir = screenToWorldRay(event->pos().x() / res().x(), event->pos().y() / res().y()); - auto camera_pos = realPos(); + auto dir = screenPosToRayWS(event->pos().x() / res().x(), event->pos().y() / res().y()); // mouse pos auto mouse_pos = glm::vec2(xpos, ypos); @@ -407,8 +363,8 @@ void CameraControl::fakeMouseMoveEvent(QMouseEvent *event) mouse_start[0] = (2 * mouse_start[0] / res().x()) - 1; mouse_start[1] = 1 - (2 * mouse_start[1] / res().y()); - auto vp = scene->camera->m_proj * scene->camera->m_view; - m_transformer->transform(camera_pos, dir, mouse_start, mouse_pos, scene->camera->m_lodfront, vp); + auto vp = scene->camera->get_proj_matrix() * scene->camera->get_view_matrix(); + m_transformer->transform(getPos(), dir, mouse_start, mouse_pos, scene->camera->get_lodfront(), vp); zenoApp->getMainWindow()->updateViewport(); } else { float min_x = std::min((float)m_boundRectStartPos.x(), (float)event->x()) / m_res.x(); @@ -462,7 +418,53 @@ void CameraControl::fakeWheelEvent(QWheelEvent *event) { float temp = getDisPlane() + delta * 0.05; setDisPlane(temp >= 0.05 ? temp : 0.05); } else if (scaleKey == 0 || event->modifiers() & scaleKey){ - setRadius(getRadius() * scale); + if (zeno::getSession().userData().get2("viewport-FPN-navigation", false)) { + auto FPN_move_speed = zeno::getSession().userData().get2("viewport-FPN-move-speed", 0); + FPN_move_speed += dy > 0? 1: -1; + zeno::getSession().userData().set2("viewport-FPN-move-speed", FPN_move_speed); + auto pMainWindow = zenoApp->getMainWindow(); + if (pMainWindow) { + pMainWindow->statusbarShowMessage(zeno::format("First Person Navigation: movement speed level: {}", FPN_move_speed), 10000); + } + } + else { + auto pos = getPos(); + if (zeno::getSession().userData().get2("viewport-depth-aware-navigation", true)) { + auto session = m_zenovis->getSession(); + auto scene = session->get_scene(); + auto hit_posWS = scene->renderMan->getEngine()->getClickedPos(event->x(), event->y()); + if (hit_posWS.has_value()) { + auto pivot = hit_posWS.value(); + auto new_pos = (pos - pivot) * scale + pivot; + setPos(new_pos); + } + else { + auto posOnFloorWS = screenHitOnFloorWS(event->x() / res().x(), event->y() / res().y()); + auto pivot = posOnFloorWS; + if (dot((pivot - pos), getViewDir()) > 0) { + auto translate = (pivot - pos) * (1 - scale); + if (glm::length(translate) < 0.01) { + translate = glm::normalize(translate) * 0.01f; + } + auto new_pos = translate + pos; + setPos(new_pos); + } + else { + auto translate = screenPosToRayWS(event->x() / res().x(), event->y() / res().y()) * getPos().y * (1 - scale); + if (getPos().y < 0) { + translate *= -1; + } + auto new_pos = translate + pos; + setPos(new_pos); + } + } + } + else { + auto pivot = getPivot(); + auto new_pos = (pos - pivot) * scale + pivot; + setPos(new_pos); + } + } } updatePerspective(); @@ -521,67 +523,69 @@ void CameraControl::fakeMouseDoubleClickEvent(QMouseEvent *event) } } } -//void CameraControl::fakeMouseDoubleClickEvent(QMouseEvent* event) { -void CameraControl::setKeyFrame() { - //todo -} void CameraControl::focus(QVector3D center, float radius) { - setCenter({float(center.x()), float(center.y()), float(center.z())}); + setPivot({float(center.x()), float(center.y()), float(center.z())}); if (getFOV() >= 1e-6) radius /= (getFOV() / 45.0f); - setRadius(radius); + auto dir = getRotation() * glm::vec3(0, 0, 1) * radius; + setPos(getPivot() + dir); updatePerspective(); } QVector3D CameraControl::realPos() const { - float cos_t = cos(getTheta()); - float sin_t = sin(getTheta()); - float cos_p = cos(getPhi()); - float sin_p = sin(getPhi()); - QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p); - auto c = getCenter(); - QVector3D center = {c[0], c[1], c[2]}; - return center - back * getRadius(); + auto p = getPos(); + return {p[0], p[1], p[2]}; +} + +// ¼ÆËãÉäÏßÓëƽÃæµÄ½»µã +std::optional CameraControl::intersectRayPlane( + glm::vec3 ray_origin + , glm::vec3 ray_direction + , glm::vec3 plane_point + , glm::vec3 plane_normal +) { + // ¼ÆËãÉäÏß·½ÏòºÍƽÃæ·¨ÏòÁ¿µÄµã»ý + float denominator = glm::dot(plane_normal, ray_direction); + + // Èç¹ûµã»ý½Ó½üÓÚ0£¬ËµÃ÷ÉäÏßÓëƽÃæƽÐлòÔÚƽÃæÄÚ + if (glm::abs(denominator) < 1e-6f) { + return std::nullopt; // ·µ»Ø¿Õ£¬±íʾûÓн»µã + } + + // ¼ÆËãÉäÏßÆðµãµ½Æ½ÃæÉÏÒ»µãµÄÏòÁ¿ + glm::vec3 diff = plane_point - ray_origin; + + // ¼ÆËãtÖµ + float t = glm::dot(diff, plane_normal) / denominator; + + // Èç¹ût < 0£¬ËµÃ÷½»µãÔÚÉäÏßÆðµã֮ǰ£¬·µ»Ø¿Õ + + if (t < 0) { + return std::nullopt; + } + + // ¼ÆËã½»µã + glm::vec3 intersection = ray_origin + t * ray_direction; + + return intersection; } // x, y from [0, 1] -QVector3D CameraControl::screenToWorldRay(float x, float y) const { - float cos_t = cos(getTheta()); - float sin_t = sin(getTheta()); - float cos_p = cos(getPhi()); - float sin_p = sin(getPhi()); - QVector3D back(cos_t * sin_p, sin_t, -cos_t * cos_p); - QVector3D up(-sin_t * sin_p, cos_t, sin_t * cos_p); - QVector3D right = QVector3D::crossProduct(up, back); - up = QVector3D::crossProduct(back, right); - right.normalize(); - up.normalize(); - QMatrix4x4 view; - view.setToIdentity(); - auto c = getCenter(); - QVector3D center = {c[0], c[1], c[2]}; - view.lookAt(realPos(), center, up); +glm::vec3 CameraControl::screenPosToRayWS(float x, float y) { x = (x - 0.5) * 2; y = (y - 0.5) * (-2); float v = std::tan(glm::radians(getFOV()) * 0.5f); float aspect = res().x() / res().y(); - auto dir = QVector3D(v * x * aspect, v * y, -1); - dir = dir.normalized(); - dir = view.inverted().mapVector(dir); - return dir; -} - -QVariant CameraControl::hitOnFloor(float x, float y) const { - auto dir = screenToWorldRay(x, y); - auto pos = realPos(); - float t = (0 - pos.y()) / dir.y(); - if (t > 0) { - auto p = pos + dir * t; - return p; - } else { - return {}; - } + auto dir = glm::normalize(glm::vec3(v * x * aspect, v * y, -1)); + return getRotation() * dir; +} + +glm::vec3 CameraControl::screenHitOnFloorWS(float x, float y) { + auto dir = screenPosToRayWS(x, y); + auto pos = getPos(); + float t = (0 - pos.y) / dir.y; + return pos + dir * t; } void CameraControl::fakeMouseReleaseEvent(QMouseEvent *event) { @@ -770,48 +774,37 @@ bool CameraControl::fakeKeyPressEvent(int uKey) { if (!middle_button_pressed) { return false; } - float cos_t = cos(getTheta()); - float sin_t = sin(getTheta()); - float cos_p = cos(getPhi()); - float sin_p = sin(getPhi()); - zeno::vec3f back(cos_t * sin_p, sin_t, -cos_t * cos_p); - zeno::vec3f up(-sin_t * sin_p, cos_t, sin_t * cos_p); - zeno::vec3f left = zeno::cross(up, back); - auto center = getCenter(); - float step = 1.0f; + float step = glm::pow(1.2f, float(zeno::getSession().userData().get2("viewport-FPN-move-speed", 0))); bool processed = false; if (uKey == Qt::Key_Q) { - setCenter(center + zeno::vec3f(0, -1, 0) * step); + setPos(getPos() - getUpDir() * step); processed = true; } else if (uKey == Qt::Key_E) { - setCenter(center + zeno::vec3f(0, 1, 0) * step); + setPos(getPos() + getUpDir() * step); processed = true; } else if (uKey == Qt::Key_W) { - setCenter(center + back * step); + setPos(getPos() + getViewDir() * step); processed = true; } else if (uKey == Qt::Key_S) { - setCenter(center - back * step); + setPos(getPos() - getViewDir() * step); processed = true; } else if (uKey == Qt::Key_A) { - setCenter(center + left * step); + setPos(getPos() - getRightDir() * step); processed = true; } else if (uKey == Qt::Key_D) { - setCenter(center - left * step); + setPos(getPos() + getRightDir() * step); processed = true; } if (processed) { updatePerspective(); - return true; - } - else { - return false; } + return processed; } bool CameraControl::fakeKeyReleaseEvent(int uKey) { diff --git a/ui/zenoedit/viewport/cameracontrol.h b/ui/zenoedit/viewport/cameracontrol.h index 61cfc6c75b..cf01ee4b6f 100644 --- a/ui/zenoedit/viewport/cameracontrol.h +++ b/ui/zenoedit/viewport/cameracontrol.h @@ -5,6 +5,7 @@ #include #include #include +#include class Zenovis; @@ -19,18 +20,15 @@ class CameraControl : public QObject void setRes(QVector2D res); QVector2D res() const { return m_res; } - float getRoll() const; - void setRoll(float roll); - float getTheta() const; - void setTheta(float theta); - float getPhi() const; - void setPhi(float phi); - zeno::vec3f getCenter() const; - void setCenter(zeno::vec3f center); + glm::vec3 getPos() const; + void setPos(glm::vec3 value); + glm::vec3 getPivot() const; + void setPivot(glm::vec3 value); + glm::quat getRotation(); + void setRotation(glm::quat value); bool getOrthoMode() const; void setOrthoMode(bool OrthoMode); float getRadius() const; - void setRadius(float radius); float getFOV() const; void setFOV(float fov); float getAperture() const; @@ -38,7 +36,6 @@ class CameraControl : public QObject float getDisPlane() const; void setDisPlane(float disPlane); void updatePerspective(); - void setKeyFrame(); bool fakeKeyPressEvent(int uKey); bool fakeKeyReleaseEvent(int uKey); @@ -48,21 +45,35 @@ class CameraControl : public QObject void fakeWheelEvent(QWheelEvent* event); void fakeMouseDoubleClickEvent(QMouseEvent* event); void focus(QVector3D center, float radius); + [[deprecated]] QVector3D realPos() const; - QVector3D screenToWorldRay(float x, float y) const; - QVariant hitOnFloor(float x, float y) const; - void lookTo(int dir); + glm::vec3 screenPosToRayWS(float x, float y); + glm::vec3 screenHitOnFloorWS(float x, float y); + glm::vec3 getViewDir() { + return getRotation() * glm::vec3(0, 0, -1); + }; + glm::vec3 getUpDir() { + return getRotation() * glm::vec3(0, 1, 0); + }; + glm::vec3 getRightDir() { + return getRotation() * glm::vec3(1, 0, 0); + }; + void lookTo(zenovis::CameraLookToDir dir); void clearTransformer(); void changeTransformOperation(const QString& node); void changeTransformOperation(int mode); void changeTransformCoordSys(); void resizeTransformHandler(int dir); + std::optional intersectRayPlane( + glm::vec3 ray_origin, glm::vec3 ray_direction, + glm::vec3 plane_point, glm::vec3 plane_normal); private: QPointF m_lastMidButtonPos; QPoint m_boundRectStartPos; QVector2D m_res; QSet m_pressedKeys; + std::optional m_hit_posWS; std::weak_ptr m_picker; std::weak_ptr m_transformer; diff --git a/ui/zenoedit/viewport/displaywidget.cpp b/ui/zenoedit/viewport/displaywidget.cpp index 46fcc826e1..bfd4a1cc7a 100644 --- a/ui/zenoedit/viewport/displaywidget.cpp +++ b/ui/zenoedit/viewport/displaywidget.cpp @@ -327,7 +327,7 @@ std::tuple DisplayWidget::getOriginWindowSizeInfo() return originWindowSizeInfo; } -void DisplayWidget::cameraLookTo(int dir) +void DisplayWidget::cameraLookTo(zenovis::CameraLookToDir dir) { if (m_bGLView) m_glView->cameraLookTo(dir); @@ -794,30 +794,30 @@ void DisplayWidget::onDockViewAction(bool triggered) switch (viewType) { case ACTION_ORIGIN_VIEW: - cameraLookTo(viewType); + cameraLookTo(zenovis::CameraLookToDir::back_to_origin); break; case ACTION_FRONT_VIEW: { - cameraLookTo(viewType); + cameraLookTo(zenovis::CameraLookToDir::front_view); break; } case ACTION_BACK_VIEW: { - cameraLookTo(viewType); + cameraLookTo(zenovis::CameraLookToDir::back_view); break; } case ACTION_RIGHT_VIEW: { - cameraLookTo(viewType); + cameraLookTo(zenovis::CameraLookToDir::right_view); break; } case ACTION_LEFT_VIEW: { - cameraLookTo(viewType); + cameraLookTo(zenovis::CameraLookToDir::left_view); break; } case ACTION_TOP_VIEW: { - cameraLookTo(viewType); + cameraLookTo(zenovis::CameraLookToDir::top_view); break; } case ACTION_BOTTOM_VIEW: { - cameraLookTo(viewType); + cameraLookTo(zenovis::CameraLookToDir::bottom_view); break; } } @@ -1384,10 +1384,11 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex ZASSERT_EXIT(pZenovis && pZenovis->getSession()); auto scene = pZenovis->getSession()->get_scene(); auto fov = scene->camera->m_fov; - auto cz = glm::length(scene->camera->m_lodcenter); + auto cz = glm::length(scene->camera->m_pos); if (depth != 0) { cz = scene->camera->inf_z_near / depth; } + zeno::log_info("click depth {}", depth); auto w = scene->camera->m_nx; auto h = scene->camera->m_ny; // zeno::log_info("fov: {}", fov); @@ -1399,11 +1400,11 @@ void DisplayWidget::onNodeSelected(const QModelIndex &subgIdx, const QModelIndex auto cx = u * tan(glm::radians(fov) / 2) * w / h * cz; // zeno::log_info("cx: {}, cy: {}, cz: {}", cx, cy, -cz); glm::vec4 cc = {cx, cy, -cz, 1}; - auto wc = glm::inverse(scene->camera->m_view) * cc; + auto wc = glm::inverse(scene->camera->get_view_matrix()) * cc; wc /= wc.w; // zeno::log_info("wx: {}, wy: {}, wz: {}", word_coord.x, word_coord.y, word_coord.z); auto points = zeno::NodeSyncMgr::GetInstance().getInputValString(nodes[0], "points"); - zeno::log_info("fetch {}", points.c_str()); + zeno::log_info("fetch {}", wc); points += std::to_string(wc.x) + " " + std::to_string(wc.y) + " " + std::to_string(wc.z) + " "; zeno::NodeSyncMgr::GetInstance().updateNodeInputString(node_location, "points", points); }; diff --git a/ui/zenoedit/viewport/displaywidget.h b/ui/zenoedit/viewport/displaywidget.h index 076a483a2f..01a5330a49 100644 --- a/ui/zenoedit/viewport/displaywidget.h +++ b/ui/zenoedit/viewport/displaywidget.h @@ -60,7 +60,7 @@ class DisplayWidget : public QWidget bool isCurrent(); void setLoopPlaying(bool enable); std::tuple getOriginWindowSizeInfo(); - void cameraLookTo(int dir); + void cameraLookTo(zenovis::CameraLookToDir dir); protected: void mouseReleaseEvent(QMouseEvent* event) override; public slots: diff --git a/ui/zenoedit/viewport/optixviewport.cpp b/ui/zenoedit/viewport/optixviewport.cpp index d26c5416c7..4a443a7379 100644 --- a/ui/zenoedit/viewport/optixviewport.cpp +++ b/ui/zenoedit/viewport/optixviewport.cpp @@ -474,7 +474,7 @@ void ZOptixViewport::setRenderSeparately(bool updateLightCameraOnly, bool update emit sig_setRenderSeparately(updateLightCameraOnly, updateMatlOnly); } -void ZOptixViewport::cameraLookTo(int dir) +void ZOptixViewport::cameraLookTo(zenovis::CameraLookToDir dir) { m_camera->lookTo(dir); } @@ -692,26 +692,26 @@ void ZOptixViewport::keyPressEvent(QKeyEvent* event) key = settings.getShortCut(ShortCut_FrontView); if (uKey == key) - this->cameraLookTo(0); + this->cameraLookTo(zenovis::CameraLookToDir::front_view); key = settings.getShortCut(ShortCut_RightView); if (uKey == key) - this->cameraLookTo(1); + this->cameraLookTo(zenovis::CameraLookToDir::right_view); key = settings.getShortCut(ShortCut_VerticalView); if (uKey == key) - this->cameraLookTo(2); + this->cameraLookTo(zenovis::CameraLookToDir::top_view); key = settings.getShortCut(ShortCut_InitViewPos); if (uKey == key) - this->cameraLookTo(6); + this->cameraLookTo(zenovis::CameraLookToDir::back_to_origin); key = settings.getShortCut(ShortCut_BackView); if (uKey == key) - this->cameraLookTo(3); + this->cameraLookTo(zenovis::CameraLookToDir::back_view); key = settings.getShortCut(ShortCut_LeftView); if (uKey == key) - this->cameraLookTo(4); + this->cameraLookTo(zenovis::CameraLookToDir::left_view); key = settings.getShortCut(ShortCut_UpwardView); if (uKey == key) - this->cameraLookTo(5); + this->cameraLookTo(zenovis::CameraLookToDir::bottom_view); key = settings.getShortCut(ShortCut_InitHandler); if (uKey == key) diff --git a/ui/zenoedit/viewport/optixviewport.h b/ui/zenoedit/viewport/optixviewport.h index e9864b38bb..b512a41fce 100644 --- a/ui/zenoedit/viewport/optixviewport.h +++ b/ui/zenoedit/viewport/optixviewport.h @@ -66,7 +66,7 @@ class ZOptixViewport : public QWidget ~ZOptixViewport(); void setSimpleRenderOption(); void setRenderSeparately(bool updateLightCameraOnly, bool updateMatlOnly); - void cameraLookTo(int dir); + void cameraLookTo(zenovis::CameraLookToDir dir); void updateCameraProp(float aperture, float disPlane, UI_VECTYPE skipParam = UI_VECTYPE()); void updatePerspective(); void setCameraRes(const QVector2D& res); diff --git a/ui/zenoedit/viewport/viewportwidget.cpp b/ui/zenoedit/viewport/viewportwidget.cpp index 07b5e47f2e..c74098f937 100644 --- a/ui/zenoedit/viewport/viewportwidget.cpp +++ b/ui/zenoedit/viewport/viewportwidget.cpp @@ -300,7 +300,7 @@ void ViewportWidget::mouseDoubleClickEvent(QMouseEvent* event) { update(); } //void ViewportWidget::mouseDoubleClickEvent(QMouseEvent* event) { -void ViewportWidget::cameraLookTo(int dir) { +void ViewportWidget::cameraLookTo(zenovis::CameraLookToDir dir) { m_camera->lookTo(dir); } @@ -378,26 +378,26 @@ void ViewportWidget::keyPressEvent(QKeyEvent *event) key = settings.getShortCut(ShortCut_FrontView); if (uKey == key) - this->cameraLookTo(0); + this->cameraLookTo(zenovis::CameraLookToDir::front_view); key = settings.getShortCut(ShortCut_RightView); if (uKey == key) - this->cameraLookTo(1); + this->cameraLookTo(zenovis::CameraLookToDir::right_view); key = settings.getShortCut(ShortCut_VerticalView); if (uKey == key) - this->cameraLookTo(2); + this->cameraLookTo(zenovis::CameraLookToDir::top_view); key = settings.getShortCut(ShortCut_InitViewPos); if (uKey == key) - this->cameraLookTo(6); + this->cameraLookTo(zenovis::CameraLookToDir::back_to_origin); key = settings.getShortCut(ShortCut_BackView); if (uKey == key) - this->cameraLookTo(3); + this->cameraLookTo(zenovis::CameraLookToDir::back_view); key = settings.getShortCut(ShortCut_LeftView); if (uKey == key) - this->cameraLookTo(4); + this->cameraLookTo(zenovis::CameraLookToDir::left_view); key = settings.getShortCut(ShortCut_UpwardView); if (uKey == key) - this->cameraLookTo(5); + this->cameraLookTo(zenovis::CameraLookToDir::bottom_view); key = settings.getShortCut(ShortCut_InitHandler); if (uKey == key) diff --git a/ui/zenoedit/viewport/viewportwidget.h b/ui/zenoedit/viewport/viewportwidget.h index af8e7a55af..4734470ebc 100644 --- a/ui/zenoedit/viewport/viewportwidget.h +++ b/ui/zenoedit/viewport/viewportwidget.h @@ -9,6 +9,7 @@ #include #include +#include "zenovis/Camera.h" class ZTimeline; class ZenoMainWindow; @@ -40,7 +41,7 @@ class ViewportWidget : public QGLWidget void setSafeFrames(bool bLock, int nx, int ny); void updatePerspective(); void updateCameraProp(float aperture, float disPlane); - void cameraLookTo(int dir); + void cameraLookTo(zenovis::CameraLookToDir dir); void clearTransformer(); void changeTransformOperation(const QString& node); void changeTransformOperation(int mode); diff --git a/ui/zenoedit/viewport/zenovis.cpp b/ui/zenoedit/viewport/zenovis.cpp index 423e87da16..e0b7b7eea5 100644 --- a/ui/zenoedit/viewport/zenovis.cpp +++ b/ui/zenoedit/viewport/zenovis.cpp @@ -174,7 +174,6 @@ int Zenovis::setCurrentFrameId(int frameid) if (m_camera_keyframe && m_camera_control) { PerspectiveInfo r; if (m_camera_keyframe->queryFrame(frameid, r)) { - m_camera_control->setKeyFrame(); m_camera_control->updatePerspective(); } } diff --git a/ui/zenoedit/viewport/zoptixviewport.cpp b/ui/zenoedit/viewport/zoptixviewport.cpp index 5429d43c96..be5188d963 100644 --- a/ui/zenoedit/viewport/zoptixviewport.cpp +++ b/ui/zenoedit/viewport/zoptixviewport.cpp @@ -78,7 +78,7 @@ void ZOptixProcViewport::setRenderSeparately(bool updateLightCameraOnly, bool up scene->drawOptions->updateMatlOnly = updateMatlOnly; } -void ZOptixProcViewport::cameraLookTo(int dir) +void ZOptixProcViewport::cameraLookTo(zenovis::CameraLookToDir dir) { m_camera->lookTo(dir); } @@ -283,26 +283,26 @@ void ZOptixProcViewport::keyPressEvent(QKeyEvent* event) key = settings.getShortCut(ShortCut_FrontView); if (uKey == key) - this->cameraLookTo(0); + this->cameraLookTo(zenovis::CameraLookToDir::front_view); key = settings.getShortCut(ShortCut_RightView); if (uKey == key) - this->cameraLookTo(1); + this->cameraLookTo(zenovis::CameraLookToDir::right_view); key = settings.getShortCut(ShortCut_VerticalView); if (uKey == key) - this->cameraLookTo(2); + this->cameraLookTo(zenovis::CameraLookToDir::top_view); key = settings.getShortCut(ShortCut_InitViewPos); if (uKey == key) - this->cameraLookTo(6); + this->cameraLookTo(zenovis::CameraLookToDir::back_to_origin); key = settings.getShortCut(ShortCut_BackView); if (uKey == key) - this->cameraLookTo(3); + this->cameraLookTo(zenovis::CameraLookToDir::back_view); key = settings.getShortCut(ShortCut_LeftView); if (uKey == key) - this->cameraLookTo(4); + this->cameraLookTo(zenovis::CameraLookToDir::left_view); key = settings.getShortCut(ShortCut_UpwardView); if (uKey == key) - this->cameraLookTo(5); + this->cameraLookTo(zenovis::CameraLookToDir::bottom_view); key = settings.getShortCut(ShortCut_InitHandler); if (uKey == key) diff --git a/ui/zenoedit/viewport/zoptixviewport.h b/ui/zenoedit/viewport/zoptixviewport.h index cbe8b7ca7d..2785bf6f6f 100644 --- a/ui/zenoedit/viewport/zoptixviewport.h +++ b/ui/zenoedit/viewport/zoptixviewport.h @@ -3,6 +3,7 @@ #include #include "optixviewport.h" +#include "zenovis/Camera.h" class Zenovis; class CameraControl; @@ -16,7 +17,7 @@ class ZOptixProcViewport : public QWidget ~ZOptixProcViewport(); void setSimpleRenderOption(); void setRenderSeparately(bool updateLightCameraOnly, bool updateMatlOnly); - void cameraLookTo(int dir); + void cameraLookTo(zenovis::CameraLookToDir dir); void updateViewport(); void updateCameraProp(float aperture, float disPlane); void updatePerspective(); diff --git a/ui/zenoedit/viewportinteraction/transform.cpp b/ui/zenoedit/viewportinteraction/transform.cpp index cac59fb4b4..6e2a1d1d11 100644 --- a/ui/zenoedit/viewportinteraction/transform.cpp +++ b/ui/zenoedit/viewportinteraction/transform.cpp @@ -150,26 +150,22 @@ bool FakeTransformer::calcTransformStart(glm::vec3 ori, glm::vec3 dir, glm::vec3 return true; } -bool FakeTransformer::clickedAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front) { +bool FakeTransformer::clickedAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front) { if (!m_handler) return false; - auto ray_ori = QVec3ToGLMVec3(ori); - auto ray_dir = QVec3ToGLMVec3(dir); - m_operation_mode = m_handler->handleClick(ray_ori, ray_dir); - if (!calcTransformStart(ray_ori, ray_dir, front)) return false; + m_operation_mode = m_handler->handleClick(ori, dir); + if (!calcTransformStart(ori, dir, front)) return false; return m_operation_mode != zenovis::INTERACT_NONE; } -bool FakeTransformer::hoveredAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front) +bool FakeTransformer::hoveredAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front) { if (!m_handler) return false; - auto ray_ori = QVec3ToGLMVec3(ori); - auto ray_dir = QVec3ToGLMVec3(dir); - int mode = m_handler->handleHover(ray_ori, ray_dir); - if (!calcTransformStart(ray_ori, ray_dir, front)) return false; + int mode = m_handler->handleHover(ori, dir); + if (!calcTransformStart(ori, dir, front)) return false; return mode != zenovis::INTERACT_NONE; } -void FakeTransformer::transform(QVector3D camera_pos, QVector3D ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp) { +void FakeTransformer::transform(glm::vec3 camera_pos, glm::vec3 ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp) { if (m_operation == NONE) return; auto pZenovis = m_viewport->getZenoVis(); @@ -179,8 +175,8 @@ void FakeTransformer::transform(QVector3D camera_pos, QVector3D ray_dir, glm::ve auto scene = sess->get_scene(); ZASSERT_EXIT(scene); - auto ori = QVec3ToGLMVec3(camera_pos); - auto dir = QVec3ToGLMVec3(ray_dir); + auto ori = camera_pos; + auto dir = ray_dir; auto x_axis = glm::vec3(1, 0, 0); auto y_axis = glm::vec3(0, 1, 0); diff --git a/ui/zenoedit/viewportinteraction/transform.h b/ui/zenoedit/viewportinteraction/transform.h index 04d37b715f..7532e2eb7b 100644 --- a/ui/zenoedit/viewportinteraction/transform.h +++ b/ui/zenoedit/viewportinteraction/transform.h @@ -31,9 +31,9 @@ class FakeTransformer { void removeObject(const std::string& name); void removeObject(const std::unordered_set& names); bool calcTransformStart(glm::vec3 ori, glm::vec3 dir, glm::vec3 front); - bool clickedAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front); - bool hoveredAnyHandler(QVector3D ori, QVector3D dir, glm::vec3 front); - void transform(QVector3D camera_pos, QVector3D ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp); + bool clickedAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front); + bool hoveredAnyHandler(glm::vec3 ori, glm::vec3 dir, glm::vec3 front); + void transform(glm::vec3 camera_pos, glm::vec3 ray_dir, glm::vec2 mouse_start, glm::vec2 mouse_pos, glm::vec3 front, glm::mat4 vp); void startTransform(); void endTransform(bool moved); bool isTransforming() const; @@ -67,10 +67,6 @@ class FakeTransformer { // 把FakeTransform上的SRT应用到primitive上 void doTransform(); - - static glm::vec3 QVec3ToGLMVec3(QVector3D QVec3) { - return {QVec3.x(), QVec3.y(), QVec3.z()}; - } void markObjectInteractive(const std::string& obj_name); void unmarkObjectInteractive(const std::string& obj_name); void markObjectsInteractive(); diff --git a/ui/zenoedit/zenomainwindow.cpp b/ui/zenoedit/zenomainwindow.cpp index 1f087c95e3..f2005aa311 100644 --- a/ui/zenoedit/zenomainwindow.cpp +++ b/ui/zenoedit/zenomainwindow.cpp @@ -2285,6 +2285,10 @@ void ZenoMainWindow::doFrameUpdate(int frame) { } } +void ZenoMainWindow::statusbarShowMessage(const std::string& text, int timeout) const { + m_ui->statusbar->showMessage(text.c_str(), timeout); +} + static bool openFileAndExportAsZsl(const char *inPath, const char *outPath) { auto pGraphs = zenoApp->graphsManagment(); IGraphsModel* pModel = pGraphs->openZsgFile(inPath); diff --git a/ui/zenoedit/zenomainwindow.h b/ui/zenoedit/zenomainwindow.h index 2ccae1f137..2a0bc06689 100644 --- a/ui/zenoedit/zenomainwindow.h +++ b/ui/zenoedit/zenomainwindow.h @@ -56,6 +56,7 @@ class ZenoMainWindow : public QMainWindow bool isOnlyOptixWindow() const; bool isRecordByCommandLine() const; void openFileAndUpdateParam(const QString& path, const QString& paramJson); + void statusbarShowMessage(const std::string& text, int timeout = 0) const; QLineEdit* selected = nullptr; ZenoLights* lightPanel = nullptr; diff --git a/zeno/include/zeno/types/CameraObject.h b/zeno/include/zeno/types/CameraObject.h index f30b5eb9bc..0e9e9eee4e 100644 --- a/zeno/include/zeno/types/CameraObject.h +++ b/zeno/include/zeno/types/CameraObject.h @@ -2,8 +2,24 @@ #include #include +#include +#include +#include +#include namespace zeno { +static glm::quat from_theta_phi(float theta, float phi) { + float cos_t = glm::cos(theta), sin_t = glm::sin(theta); + float cos_p = glm::cos(phi), sin_p = glm::sin(phi); + glm::vec3 front(cos_t * sin_p, sin_t, -cos_t * cos_p); + glm::vec3 up(-sin_t * sin_p, cos_t, sin_t * cos_p); + glm::vec3 right = glm::cross(front, up); + glm::mat3 rotation; + rotation[0] = right; + rotation[1] = up; + rotation[2] = -front; + return glm::quat_cast(rotation); +} struct CameraData { vec3f pos{0, 0, 1}; @@ -16,11 +32,7 @@ struct CameraData { float aperture{0.0f}; float focalPlaneDistance{2.0f}; - bool isSet = false; - vec3f center{0, 0, 0}; - float radius{1}; - float theta{}; - float phi{}; + std::optional pivot = std::nullopt; }; struct CameraObject : IObjectClone, CameraData { diff --git a/zeno/src/nodes/CameraNodes.cpp b/zeno/src/nodes/CameraNodes.cpp index d53973bf4a..97563d4871 100644 --- a/zeno/src/nodes/CameraNodes.cpp +++ b/zeno/src/nodes/CameraNodes.cpp @@ -10,8 +10,56 @@ #include #include #include "zeno/extra/TempNode.h" +#include namespace zeno { +struct CameraNode: zeno::INode{ + virtual void apply() override { + auto camera = std::make_unique(); + + camera->pos = get_input2("pos"); + camera->up = get_input2("up"); + camera->view = get_input2("view"); + camera->fov = get_input2("fov"); + camera->aperture = get_input2("aperture"); + camera->focalPlaneDistance = get_input2("focalPlaneDistance"); + camera->userData().set2("frame", get_input2("frame")); + + auto other_props = get_input2("other"); + std::regex reg(","); + std::sregex_token_iterator p(other_props.begin(), other_props.end(), reg, -1); + std::sregex_token_iterator end; + std::vector prop_vals; + while (p != end) { + prop_vals.push_back(std::stof(*p)); + p++; + } + if (prop_vals.size() == 6) { + camera->pivot = {prop_vals[0], prop_vals[1], prop_vals[2]}; + } + + set_output("camera", std::move(camera)); + } +}; + +ZENO_DEFNODE(CameraNode)({ + { + {"vec3f", "pos", "0,0,5"}, + {"vec3f", "up", "0,1,0"}, + {"vec3f", "view", "0,0,-1"}, + {"float", "fov", "45"}, + {"float", "aperture", "11"}, + {"float", "focalPlaneDistance", "2.0"}, + {"string", "other", ""}, + {"int", "frame", "0"}, + }, + { + {"CameraObject", "camera"}, + }, + { + }, + {"FBX"}, + }); struct MakeCamera : INode { virtual void apply() override { diff --git a/zenovis/include/zenovis/Camera.h b/zenovis/include/zenovis/Camera.h index c6b1983006..be015a2f66 100644 --- a/zenovis/include/zenovis/Camera.h +++ b/zenovis/include/zenovis/Camera.h @@ -7,6 +7,15 @@ #include namespace zenovis { +enum class CameraLookToDir { + front_view, + right_view, + top_view, + back_view, + left_view, + bottom_view, + back_to_origin, +}; namespace opengl { class Program; @@ -22,7 +31,6 @@ struct ZOptixCameraSettingInfo { struct Camera { float inf_z_near = 0.001f; int m_nx{512}, m_ny{512}; - glm::mat4x4 m_view{1}, m_proj{1}; float m_near = 0.01f; float m_far = 20000.0f; @@ -33,39 +41,44 @@ struct Camera { float m_dof = -1.f; float m_safe_frames = 0; - glm::vec3 m_lodcenter{0, 0, -1}; - glm::vec3 m_lodfront{0, 0, 1}; - glm::vec3 m_lodup{0, 1, 0}; + glm::vec3 m_pos{0, 0, 5}; + glm::vec3 m_pivot = {}; + glm::quat m_rotation = {1, 0, 0, 0}; - bool m_need_sync = false; bool m_block_window = false; - bool m_auto_radius = false; - - float m_theta = 0; - float m_phi = 0; - float m_roll = 0; - zeno::vec3f m_center = {}; +public: + void reset() { + m_pos = {0, 0, 5}; + m_pivot = {}; + m_rotation = {1, 0, 0, 0}; + updateMatrix(); + } + glm::vec3 get_lodfront() { + return m_rotation * glm::vec3(0, 0, -1); + } + glm::vec3 get_lodup() { + return m_rotation * glm::vec3(0, 1, 0); + } bool m_ortho_mode = false; - float m_radius = 5; + float get_radius() { + return glm::distance(m_pos, m_pivot); + } + glm::vec3 getPos() { + return m_pos; + } + void setPos(glm::vec3 value) { + m_pos = value; + } + glm::vec3 getPivot() { + return m_pivot; + } + void setPivot(glm::vec3 value) { + m_pivot = value; + } zeno::vec2i viewport_offset = {}; ZOptixCameraSettingInfo zOptixCameraSettingInfo = {}; - // only used in real-shader - struct ZxxHappyLookParam { - float cx = 0; - float cy = 0; - float cz = 0; - float theta = 0; - float phi = 0; - float radius = 0; - float fov = 0; - bool ortho_mode = false; - float aperture = 0; - float focalPlaneDistance = 0; - }; - struct ZxxHappyLookParam m_zxx; - float getAspect() const { return (float)m_nx / (float)m_ny; } @@ -77,11 +90,31 @@ struct Camera { bool is_locked_window() const; void setCamera(zeno::CameraData const &cam); void setPhysicalCamera(float aperture, float shutter_speed, float iso, bool aces, bool exposure); - void placeCamera(glm::vec3 pos, glm::vec3 front, glm::vec3 up); - void lookCamera(float cx, float cy, float cz, float theta, float phi, float radius, bool ortho_mode, float fov, float aperture, float focalPlaneDistance); + void placeCamera(glm::vec3 pos, glm::vec3 view, glm::vec3 up); + void placeCamera(glm::vec3 pos, glm::quat rotation); void focusCamera(float cx, float cy, float cz, float radius); void set_program_uniforms(opengl::Program *pro); void updateMatrix(); + glm::mat4x4 get_view_matrix() { + return glm::lookAt(m_pos, m_pos + get_lodfront(), get_lodup()); + } + static glm::mat4 MakeInfReversedZProjRH(float fovY_radians, float aspectWbyH, float zNear) { + float f = 1.0f / tan(fovY_radians / 2.0f); + return glm::mat4( + f / aspectWbyH, 0.0f, 0.0f, 0.0f, + 0.0f, f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, -1.0f, + 0.0f, 0.0f, zNear, 0.0f); + } + glm::mat4x4 get_proj_matrix() { + if (m_ortho_mode) { + auto radius = get_radius(); + return glm::orthoZO(-radius * getAspect(), radius * getAspect(), -radius, + radius, m_far, m_near); + } else { + return MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near); + } + } }; } // namespace zenovis diff --git a/zenovis/include/zenovis/RenderEngine.h b/zenovis/include/zenovis/RenderEngine.h index 84c45a5c12..32e65c6e0f 100644 --- a/zenovis/include/zenovis/RenderEngine.h +++ b/zenovis/include/zenovis/RenderEngine.h @@ -19,6 +19,7 @@ struct RenderEngine { virtual void cleanupWhenExit() = 0; virtual ~RenderEngine() = default; + virtual std::optional getClickedPos(int x, int y) { return {}; } }; class RenderManager { diff --git a/zenovis/include/zenovis/bate/FrameBufferRender.h b/zenovis/include/zenovis/bate/FrameBufferRender.h index 36b99ba256..3a8ce31e8c 100644 --- a/zenovis/include/zenovis/bate/FrameBufferRender.h +++ b/zenovis/include/zenovis/bate/FrameBufferRender.h @@ -71,9 +71,10 @@ struct FrameBufferRender { unique_ptr fbo; unique_ptr picking_texture; - unique_ptr depth_rbo; + unique_ptr depth_texture; unique_ptr intermediate_fbo; + unique_ptr screen_depth_tex; unique_ptr screen_tex; unique_ptr quad_vao; @@ -124,14 +125,15 @@ struct FrameBufferRender { CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D_MULTISAMPLE, picking_texture->tex, 0)); // generate depth texture - depth_rbo = make_unique(); - CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, depth_rbo->rbo)); - CHECK_GL(glRenderbufferStorageMultisample(GL_RENDERBUFFER, samples, GL_DEPTH_COMPONENT32F, w, h)); - CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, 0)); - CHECK_GL(glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, depth_rbo->rbo)); + depth_texture = make_unique(); + depth_texture->target = GL_TEXTURE_2D_MULTISAMPLE; + CHECK_GL(glBindTexture(depth_texture->target, depth_texture->tex)); + CHECK_GL(glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, GL_DEPTH_COMPONENT, w, h, GL_TRUE)); + CHECK_GL(glBindTexture(depth_texture->target, 0)); + CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, depth_texture->tex, 0)); // check fbo - if(!fbo->complete()) printf("fbo error\n"); + if(!fbo->complete()) zeno::log_error("fbo error"); // unbind fbo & texture CHECK_GL(glBindTexture(GL_TEXTURE_2D, 0)); @@ -139,13 +141,20 @@ struct FrameBufferRender { intermediate_fbo = make_unique(); screen_tex = make_unique(); + screen_depth_tex = make_unique(); intermediate_fbo->bind(); CHECK_GL(glBindTexture(GL_TEXTURE_2D, screen_tex->tex)); CHECK_GL(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, w, h, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL)); CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)); CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)); - CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, screen_tex->tex, 0)); // we only need a color buffer - if(!intermediate_fbo->complete()) printf("fbo error\n"); + CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, screen_tex->tex, 0)); + + CHECK_GL(glBindTexture(GL_TEXTURE_2D, screen_depth_tex->tex)); + CHECK_GL(glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, w, h, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL)); + CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)); + CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)); + CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, screen_depth_tex->tex, 0)); + if(!intermediate_fbo->complete()) zeno::log_error("fbo error"); CHECK_GL(glBindTexture(GL_TEXTURE_2D, 0)); intermediate_fbo->unbind(); } @@ -153,9 +162,10 @@ struct FrameBufferRender { void destroy_buffers() { fbo.reset(); picking_texture.reset(); - depth_rbo.reset(); + depth_texture.reset(); intermediate_fbo.reset(); screen_tex.reset(); + screen_depth_tex.reset(); } void bind() { // enable framebuffer writing @@ -170,7 +180,7 @@ struct FrameBufferRender { // 2. now blit multisampled buffer(s) to normal colorbuffer of intermediate FBO. Image is stored in screenTexture CHECK_GL(glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo->fbo)); CHECK_GL(glBindFramebuffer(GL_DRAW_FRAMEBUFFER, intermediate_fbo->fbo)); - CHECK_GL(glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_COLOR_BUFFER_BIT, GL_NEAREST)); + CHECK_GL(glBlitFramebuffer(0, 0, w, h, 0, 0, w, h, GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT, GL_NEAREST)); // 3. now render quad with scene's visuals as its texture image CHECK_GL(glBindFramebuffer(GL_FRAMEBUFFER, 0)); @@ -187,5 +197,16 @@ struct FrameBufferRender { CHECK_GL(glDrawArrays(GL_TRIANGLES, 0, 6)); glEnable(GL_MULTISAMPLE); } + float getDepth(int x, int y) { + if (!intermediate_fbo->complete()) return 0; + intermediate_fbo->bind(); + CHECK_GL(glBindFramebuffer(GL_READ_FRAMEBUFFER, intermediate_fbo->fbo)); + + float depth; + CHECK_GL(glReadPixels(x, h - y - 1, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &depth)); + + intermediate_fbo->unbind(); + return depth; + } }; } \ No newline at end of file diff --git a/zenovis/src/Camera.cpp b/zenovis/src/Camera.cpp index 232d820946..7706ac558a 100644 --- a/zenovis/src/Camera.cpp +++ b/zenovis/src/Camera.cpp @@ -14,42 +14,15 @@ void Camera::setCamera(zeno::CameraData const &cam) { glm::vec3(cam.pos[0], cam.pos[1], cam.pos[2]), glm::vec3(cam.view[0], cam.view[1], cam.view[2]), glm::vec3(cam.up[0], cam.up[1], cam.up[2])); - //this->m_dof = cam.dof; this->m_aperture = cam.aperture; this->focalPlaneDistance = cam.focalPlaneDistance; -// zeno::log_info("radius {}", m_zxx.radius); - - if (cam.isSet) { - m_center = cam.center; - m_theta = cam.theta; - m_phi = cam.phi; - m_radius = cam.radius; + if (cam.pivot.has_value()) { + this->m_pivot = zeno::vec_to_other(cam.pivot.value()); } else { - auto view = zeno::normalize(cam.view); - zeno::vec3f center = cam.pos + m_radius * zeno::normalize(cam.view); - float theta = M_PI_2 - glm::acos(zeno::dot(view, zeno::vec3f(0, 1, 0))); - float phi = M_PI_2 + std::atan2(view[2], view[0]); -// zeno::log_info("theta: {}", theta); -// zeno::log_info("phi: {}", phi); - - m_center = center; - m_theta = theta; - m_phi = phi; - - float cos_t = glm::cos(m_theta), sin_t = glm::sin(m_theta); - float cos_p = glm::cos(m_phi), sin_p = glm::sin(m_phi); - glm::vec3 front(cos_t * sin_p, sin_t, -cos_t * cos_p); - glm::vec3 up(-sin_t * sin_p, cos_t, sin_t * cos_p); - glm::vec3 left = glm::cross(up, front); - float map_to_up = glm::dot(up, zeno::vec_to_other(cam.up)); - float map_to_left = glm::dot(left, zeno::vec_to_other(cam.up)); - m_roll = glm::atan(map_to_left, map_to_up); + this->m_pivot = zeno::vec_to_other(cam.pos); } - - this->m_auto_radius = !cam.isSet; - this->m_need_sync = true; } void Camera::setPhysicalCamera(float aperture, float shutter_speed, float iso, bool aces, bool exposure) { @@ -60,54 +33,36 @@ void Camera::setPhysicalCamera(float aperture, float shutter_speed, float iso, b this->zOptixCameraSettingInfo.exposure = exposure; } -static glm::mat4 MakeInfReversedZProjRH(float fovY_radians, float aspectWbyH, float zNear) { - float f = 1.0f / tan(fovY_radians / 2.0f); - return glm::mat4( - f / aspectWbyH, 0.0f, 0.0f, 0.0f, - 0.0f, f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, -1.0f, - 0.0f, 0.0f, zNear, 0.0f); +void Camera::placeCamera(glm::vec3 pos, glm::vec3 view, glm::vec3 up) { + auto right = glm::cross(glm::normalize(view), glm::normalize(up)); + glm::mat3 rotation; + rotation[0] = right; + rotation[1] = up; + rotation[2] = -view; + + Camera::placeCamera(pos, glm::quat_cast(rotation)); } -void Camera::placeCamera(glm::vec3 pos, glm::vec3 front, glm::vec3 up) { - front = glm::normalize(front); - up = glm::normalize(up); - - m_lodcenter = pos; - m_lodfront = front; - m_lodup = up; - - m_view = glm::lookAt(m_lodcenter, m_lodcenter + m_lodfront, m_lodup); - if (m_ortho_mode) { - auto radius = m_radius; - m_proj = glm::orthoZO(-radius * getAspect(), radius * getAspect(), -radius, - radius, m_far, m_near); - } else { - m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near); - } + +void Camera::placeCamera(glm::vec3 pos, glm::quat rotation) { + m_pos = pos; + m_rotation = rotation; } void Camera::updateMatrix() { - auto center = zeno::vec_to_other(m_center) ; - float cos_t = glm::cos(m_theta), sin_t = glm::sin(m_theta); - float cos_p = glm::cos(m_phi), sin_p = glm::sin(m_phi); - glm::vec3 front(cos_t * sin_p, sin_t, -cos_t * cos_p); - glm::vec3 up(-sin_t * sin_p, cos_t, sin_t * cos_p); - glm::vec3 left = glm::cross(up, front); - up = glm::cos(m_roll) * up + glm::sin(m_roll) * left; + auto center = zeno::vec_to_other(m_pivot) ; if (!m_ortho_mode) { m_near = 0.05f; - m_far = 20000.0f * std::max(1.0f, (float)m_radius / 10000.f); - placeCamera(center - front * m_radius, front, up); + m_far = 20000.0f * std::max(1.0f, get_radius() / 10000.f); + placeCamera(getPos(), m_rotation); } else { - placeCamera(center - front * m_radius * 0.4f, front, up); + placeCamera(getPos(), m_rotation); } } void Camera::setResolution(int nx, int ny) { m_nx = nx; m_ny = ny; - m_proj = MakeInfReversedZProjRH(glm::radians(m_fov), getAspect(), inf_z_near); } void Camera::setResolutionInfo(bool block, int nx, int ny) { @@ -129,30 +84,13 @@ bool Camera::is_locked_window() const { void Camera::focusCamera(float cx, float cy, float cz, float radius) { auto center = glm::vec3(cx, cy, cz); - placeCamera(center - m_lodfront * radius, m_lodfront, m_lodup); -} -void Camera::lookCamera(float cx, float cy, float cz, float theta, float phi, float radius, bool ortho_mode, float fov, float aperture, float focalPlaneDistance) { - m_zxx.cx = cx; - m_zxx.cy = cy; - m_zxx.cz = cz; - m_zxx.theta = theta; - m_zxx.phi = phi; - m_zxx.radius = radius; - m_zxx.fov = fov; - m_zxx.ortho_mode = ortho_mode; - m_zxx.aperture = aperture; - m_zxx.focalPlaneDistance = focalPlaneDistance; - - m_ortho_mode = ortho_mode; - m_aperture = aperture; - this->focalPlaneDistance = focalPlaneDistance; - - updateMatrix(); + placeCamera(center - get_lodfront() * radius, m_rotation); } void Camera::set_program_uniforms(opengl::Program *pro) { pro->use(); - + auto m_view = get_view_matrix(); + auto m_proj = get_proj_matrix(); auto vp = m_proj * m_view; pro->set_uniform("mVP", vp); pro->set_uniform("mInvVP", glm::inverse(vp)); diff --git a/zenovis/src/bate/GraphicPrimitive.cpp b/zenovis/src/bate/GraphicPrimitive.cpp index 2e6c376df5..8c35b8cb69 100644 --- a/zenovis/src/bate/GraphicPrimitive.cpp +++ b/zenovis/src/bate/GraphicPrimitive.cpp @@ -711,7 +711,7 @@ struct ZhxxGraphicPrimitive final : IGraphicDraw { triObj.prog->set_uniformi("mRenderWireframe", false); triObj.prog->set_uniformi("mCustomColor", custom_color); { - auto camera_center = scene->camera->m_lodcenter; + auto camera_center = scene->camera->m_pos; triObj.prog->set_uniform("mCameraCenter", camera_center); } diff --git a/zenovis/src/bate/GraphicRotateHandler.cpp b/zenovis/src/bate/GraphicRotateHandler.cpp index 6216739bed..33bd4ab7a6 100644 --- a/zenovis/src/bate/GraphicRotateHandler.cpp +++ b/zenovis/src/bate/GraphicRotateHandler.cpp @@ -83,7 +83,7 @@ struct RotateHandler final : IGraphicHandler { } void draw() override { - auto dist = glm::distance(scene->camera->m_lodcenter, glm::vec3(center[0], center[1], center[2])); + auto dist = glm::distance(scene->camera->m_pos, glm::vec3(center[0], center[1], center[2])); bound = dist / 5.0f * scale; diff --git a/zenovis/src/bate/GraphicScaleHandler.cpp b/zenovis/src/bate/GraphicScaleHandler.cpp index c7f49a6e07..256cdd023c 100644 --- a/zenovis/src/bate/GraphicScaleHandler.cpp +++ b/zenovis/src/bate/GraphicScaleHandler.cpp @@ -86,7 +86,7 @@ struct ScaleHandler final : IGraphicHandler { } void draw() override { - auto dist = glm::distance(scene->camera->m_lodcenter, glm::vec3(center[0], center[1], center[2])); + auto dist = glm::distance(scene->camera->m_pos, glm::vec3(center[0], center[1], center[2])); bound = dist / 5.0f * scale; @@ -142,7 +142,7 @@ struct ScaleHandler final : IGraphicHandler { } // xyz if (mode == INTERACT_NONE || mode == INTERACT_XYZ) { - const auto& view = scene->camera->m_view; + const auto& view = scene->camera->get_view_matrix(); // http://www.opengl-tutorial.org/cn/intermediate-tutorials/billboards-particles/billboards/ // always face camera // This is equivalent to mlutiplying (1,0,0) and (0,1,0) by inverse(ViewMatrix). @@ -162,7 +162,7 @@ struct ScaleHandler final : IGraphicHandler { auto z_axis = glm::vec3(0, 0, 1); auto model_matrix = glm::translate(zeno::vec_to_other(center)); - const auto& view = scene->camera->m_view; + const auto& view = scene->camera->get_view_matrix(); float t; diff --git a/zenovis/src/bate/GraphicTransHandler.cpp b/zenovis/src/bate/GraphicTransHandler.cpp index 9315234c45..4e5a3712b8 100644 --- a/zenovis/src/bate/GraphicTransHandler.cpp +++ b/zenovis/src/bate/GraphicTransHandler.cpp @@ -79,7 +79,7 @@ struct TransHandler final : IGraphicHandler { } void draw() override { - auto dist = glm::distance(scene->camera->m_lodcenter, glm::vec3(center[0], center[1], center[2])); + auto dist = glm::distance(scene->camera->m_pos, glm::vec3(center[0], center[1], center[2])); bound = dist / 5.0f * scale; diff --git a/zenovis/src/bate/HudGraphicGrid.cpp b/zenovis/src/bate/HudGraphicGrid.cpp index 78215214c9..23d1e4e0f8 100644 --- a/zenovis/src/bate/HudGraphicGrid.cpp +++ b/zenovis/src/bate/HudGraphicGrid.cpp @@ -120,9 +120,9 @@ struct GraphicGrid final : IGraphicDraw { scene->camera->set_program_uniforms(prog); { - auto camera_radius = glm::length(scene->camera->m_lodcenter); - auto camera_center = scene->camera->m_lodcenter - + scene->camera->m_lodfront * camera_radius; + auto camera_radius = glm::length(scene->camera->m_pos); + auto camera_center = scene->camera->m_pos + + scene->camera->get_lodfront() * camera_radius; camera_radius *= scene->camera->m_fov / 45.f; float level = std::max(std::log(camera_radius) / std::log(5.0f) - 1.0f, -1.0f); auto grid_scale = std::pow(5.f, std::floor(level)); diff --git a/zenovis/src/bate/RenderEngineBate.cpp b/zenovis/src/bate/RenderEngineBate.cpp index 0bda2c7c4d..8e01892518 100644 --- a/zenovis/src/bate/RenderEngineBate.cpp +++ b/zenovis/src/bate/RenderEngineBate.cpp @@ -67,6 +67,7 @@ struct RenderEngineBate : RenderEngine { // } primHighlight->draw(); if (scene->drawOptions->show_grid) { + glDepthMask(GL_FALSE); for (auto const &hudgra : hudGraphics) { hudgra->draw(); } @@ -84,6 +85,7 @@ struct RenderEngineBate : RenderEngine { *scene->camera = backup; } } + glDepthMask(GL_TRUE); } if (!scene->selected.empty() && scene->drawOptions->handler) { CHECK_GL(glClear(GL_DEPTH_BUFFER_BIT)); @@ -92,7 +94,6 @@ struct RenderEngineBate : RenderEngine { if (!record) { fbr->unbind(); fbr->draw_to_screen(); - fbr->destroy_buffers(); } } @@ -110,6 +111,30 @@ struct RenderEngineBate : RenderEngine { primHighlight = nullptr; fbr = nullptr; } + std::optional getClickedPos(int x, int y) override { + auto depth = fbr->getDepth(x, y); + if (depth == 0) { + return {}; + } +// zeno::log_info("depth: {}", depth); + + auto fov = scene->camera->m_fov; + float cz = scene->camera->inf_z_near / depth; + auto w = scene->camera->m_nx; + auto h = scene->camera->m_ny; +// zeno::log_info("{} {} {} {}", x, y, w, h); +// zeno::log_info("fov: {}", fov); +// zeno::log_info("w: {}, h: {}", w, h); + auto u = (2.0 * x / w) - 1; + auto v = 1 - (2.0 * y / h); +// zeno::log_info("u: {}, v: {}", u, v); + auto cy = v * tan(glm::radians(fov) / 2) * cz; + auto cx = u * tan(glm::radians(fov) / 2) * w / h * cz; + glm::vec4 cc = {cx, cy, -cz, 1}; + auto wc = glm::inverse(scene->camera->get_view_matrix()) * cc; + wc /= wc.w; + return glm::vec3(wc); + } }; static auto definer = RenderManager::registerRenderEngine("bate"); diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index cbd63ed2c3..bfae7ae3af 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -931,6 +931,15 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { bool meshNeedUpdate = true; bool matNeedUpdate = true; bool staticNeedUpdate = true; + std::optional getClickedPos(int x, int y) override { + glm::vec3 posWS = xinxinoptix::get_click_pos(x, y); + if (posWS == glm::vec3()) { + return {}; + } + auto const &cam = *scene->camera; + posWS += cam.m_pos; + return posWS; + } auto setupState() { return std::tuple{ @@ -987,7 +996,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { graphicsMan->load_shader_uniforms(scene->objectsMan->pairs()); } -#define MY_CAM_ID(cam) cam.m_nx, cam.m_ny, cam.m_lodup, cam.m_lodfront, cam.m_lodcenter, cam.m_fov, cam.focalPlaneDistance, cam.m_aperture +#define MY_CAM_ID(cam) cam.m_nx, cam.m_ny, cam.m_rotation, cam.m_pos, cam.m_fov, cam.focalPlaneDistance, cam.m_aperture #define MY_SIZE_ID(cam) cam.m_nx, cam.m_ny std::optional())})> oldcamid; std::optional())})> oldsizeid; @@ -1120,9 +1129,9 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { if (sizeNeedUpdate || camNeedUpdate) { zeno::log_debug("[zeno-optix] updating camera"); - - auto lodright = glm::normalize(glm::cross(cam.m_lodfront, cam.m_lodup)); - auto lodup = glm::normalize(glm::cross(lodright, cam.m_lodfront)); + auto lodright = cam.m_rotation * glm::vec3(1, 0, 0); + auto lodup = cam.m_rotation * glm::vec3(0, 1, 0); + auto lodfront = cam.m_rotation * glm::vec3(0, 0, -1); std::random_device rd; std::mt19937 gen(rd()); @@ -1131,7 +1140,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { xinxinoptix::set_outside_random_number(dis(gen)); xinxinoptix::set_perspective(glm::value_ptr(lodright), glm::value_ptr(lodup), - glm::value_ptr(cam.m_lodfront), glm::value_ptr(cam.m_lodcenter), + glm::value_ptr(lodfront), glm::value_ptr(cam.m_pos), cam.getAspect(), cam.m_fov, cam.focalPlaneDistance, cam.m_aperture); xinxinoptix::set_physical_camera_param( cam.zOptixCameraSettingInfo.aperture, diff --git a/zenovis/src/zhxx/RenderEngineZhxx.cpp b/zenovis/src/zhxx/RenderEngineZhxx.cpp index f95e8f0649..df49189ed5 100644 --- a/zenovis/src/zhxx/RenderEngineZhxx.cpp +++ b/zenovis/src/zhxx/RenderEngineZhxx.cpp @@ -85,7 +85,6 @@ struct RenderEngineZhxx : RenderEngine, zeno::disable_copy { auto guard = setupState(); auto const &cam = *scene->camera; auto const &opt = *scene->drawOptions; - auto const &zxx = cam.m_zxx; if (!giWasEnable && opt.enable_gi) { giNeedUpdate = true; @@ -106,8 +105,8 @@ struct RenderEngineZhxx : RenderEngine, zeno::disable_copy { zenvis::setDOF(cam.m_dof); zenvis::setAperature(cam.m_aperture); zenvis::set_window_size(cam.m_nx, cam.m_ny); - zenvis::look_perspective(zxx.cx, zxx.cy, zxx.cz, zxx.theta, - zxx.phi, zxx.radius, zxx.fov, zxx.ortho_mode); +// zenvis::look_perspective(zxx.cx, zxx.cy, zxx.cz, zxx.theta, +// zxx.phi, zxx.radius, zxx.fov, zxx.ortho_mode); int targetFBO = 0; CHECK_GL(glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &targetFBO)); CHECK_GL(glClearColor(scene->drawOptions->bgcolor.r, scene->drawOptions->bgcolor.g, diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index b79153f4f0..4750b192ca 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -441,6 +441,7 @@ extern "C" __global__ void __closesthit__radiance() //MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); MatOutput mats = optixDirectCall( rt_data->dc_index, rt_data->textures, rt_data->uniforms, attrs ); prd->mask_value = mats.mask_value; + prd->click_pos = P; if (prd->test_distance) { diff --git a/zenovis/xinxinoptix/PTKernel.cu b/zenovis/xinxinoptix/PTKernel.cu index 843ce5bf47..fdcbd92ca6 100644 --- a/zenovis/xinxinoptix/PTKernel.cu +++ b/zenovis/xinxinoptix/PTKernel.cu @@ -178,6 +178,7 @@ extern "C" __global__ void __raygen__rg() float3 tmp_normal{}; unsigned int sobolseed = subframe_index; float3 mask_value = make_float3( 0.0f ); + float3 click_pos = make_float3( 0.0f ); do{ // The center of each pixel is at fraction (0.5,0.5) @@ -262,6 +263,7 @@ extern "C" __global__ void __raygen__rg() prd.direction = ray_direction; prd.samplePdf = 1.0f; prd.mask_value = make_float3( 0.0f ); + prd.click_pos = make_float3( 0.0f ); prd.depth = 0; prd.diffDepth = 0; @@ -288,6 +290,7 @@ extern "C" __global__ void __raygen__rg() prd.alphaHit = false; traceRadiance(params.handle, ray_origin, ray_direction, _tmin_, prd.maxDistance, &prd, _mask_); + click_pos = prd.click_pos; float3 m = prd.mask_value; mask_value = mask_value + m; @@ -430,9 +433,9 @@ extern "C" __global__ void __raygen__rg() params.accum_buffer_S[ image_index ] = make_float3( accum_color_s.x,accum_color_s.y, accum_color_s.z); params.accum_buffer_T[ image_index ] = make_float3( accum_color_t.x,accum_color_t.y,accum_color_t.z); params.accum_buffer_B[ image_index ] = float_to_half(accum_color_b.x); - params.frame_buffer[ image_index ] = make_color ( accum_color ); params.frame_buffer_M[ image_index ] = float3_to_half3(accum_mask); + params.frame_buffer_P[ image_index ] = float3_to_half3(click_pos); if (params.denoise) { params.albedo_buffer[ image_index ] = tmp_albedo; diff --git a/zenovis/xinxinoptix/TraceStuff.h b/zenovis/xinxinoptix/TraceStuff.h index 18808a7dec..794a91b3ea 100644 --- a/zenovis/xinxinoptix/TraceStuff.h +++ b/zenovis/xinxinoptix/TraceStuff.h @@ -105,6 +105,7 @@ struct RadiancePRD unsigned char adepth; bool alphaHit; vec3 mask_value; + vec3 click_pos; unsigned char max_depth; uint16_t lightmask = EverythingMask; diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 6575430c19..f4ccbd84c2 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -299,6 +299,7 @@ struct PathTracerState raii accum_buffer_s; raii accum_buffer_t; raii accum_buffer_b; + raii frame_buffer_p; raii accum_buffer_m; raii finite_lights_ptr; @@ -635,6 +636,10 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params reinterpret_cast( &state.accum_buffer_m .reset()), params.width * params.height * sizeof( ushort3 ) ) ); + CUDA_CHECK( cudaMalloc( + reinterpret_cast( &state.frame_buffer_p .reset()), + params.width * params.height * sizeof( ushort3 ) + ) ); CUDA_CHECK( cudaMalloc( reinterpret_cast( &state.accum_buffer_b .reset()), params.width * params.height * sizeof( ushort1 ) @@ -657,6 +662,7 @@ static void handleResize( sutil::CUDAOutputBuffer& output_buffer, Params state.params.accum_buffer_S = (float3*)(CUdeviceptr)state.accum_buffer_s; state.params.accum_buffer_T = (float3*)(CUdeviceptr)state.accum_buffer_t; state.params.frame_buffer_M = (ushort3*)(CUdeviceptr)state.accum_buffer_m; + state.params.frame_buffer_P = (ushort3*)(CUdeviceptr)state.frame_buffer_p; state.params.accum_buffer_B = (ushort1*)(CUdeviceptr)state.accum_buffer_b; state.params.subframe_index = 0; } @@ -3629,7 +3635,6 @@ void set_window_size(int nx, int ny) { camera_changed = true; resize_dirty = true; } - void set_physical_camera_param(float aperture, float shutter_speed, float iso, bool aces, bool exposure) { state.params.physical_camera_aperture = aperture; state.params.physical_camera_shutter_speed = shutter_speed; @@ -3732,6 +3737,16 @@ std::vector optixgetimg_extra2(std::string name, int w, int h) { tex_data[i * 3 + 2] = v.z; } } + else if (name == "pos") { + std::vector temp_buffer(w * h); + cudaMemcpy(temp_buffer.data(), (void*)state.frame_buffer_p.handle, sizeof(ushort3) * temp_buffer.size(), cudaMemcpyDeviceToHost); + for (auto i = 0; i < temp_buffer.size(); i++) { + float3 v = toFloat(temp_buffer[i]); + tex_data[i * 3 + 0] = v.x; + tex_data[i * 3 + 1] = v.y; + tex_data[i * 3 + 2] = v.z; + } + } else if (name == "color") { cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_p.handle, sizeof(float) * tex_data.size(), cudaMemcpyDeviceToHost); } @@ -3776,6 +3791,9 @@ std::vector optixgetimg_extra3(std::string name, int w, int h) { else if (name == "mask") { cudaMemcpy(tex_data.data(), (void*)state.accum_buffer_m.handle, sizeof(half) * tex_data.size(), cudaMemcpyDeviceToHost); } + else if (name == "pos") { + cudaMemcpy(tex_data.data(), (void*)state.frame_buffer_p.handle, sizeof(half) * tex_data.size(), cudaMemcpyDeviceToHost); + } else if (name == "color") { std::vector temp_buffer(w * h * 3); cudaMemcpy(temp_buffer.data(), (void*)state.accum_buffer_p.handle, sizeof(temp_buffer[0]) * temp_buffer.size(), cudaMemcpyDeviceToHost); @@ -3789,6 +3807,16 @@ std::vector optixgetimg_extra3(std::string name, int w, int h) { zeno::image_flip_vertical((ushort3*)tex_data.data(), w, h); return tex_data; } + +glm::vec3 get_click_pos(int x, int y) { + int w = state.params.width; + int h = state.params.height; + auto frame_buffer_pos = optixgetimg_extra2("pos", w, h); + auto index = x + (h - 1 - y) * w; + auto posWS = ((glm::vec3*)frame_buffer_pos.data())[index]; + return posWS; +} + static void save_exr(float3* ptr, int w, int h, std::string path) { std::vector data(w * h); std::copy_n(ptr, w * h, data.data()); diff --git a/zenovis/xinxinoptix/optixPathTracer.h b/zenovis/xinxinoptix/optixPathTracer.h index 636366da22..a5c4e83d75 100644 --- a/zenovis/xinxinoptix/optixPathTracer.h +++ b/zenovis/xinxinoptix/optixPathTracer.h @@ -161,6 +161,7 @@ struct Params ushort1* accum_buffer_B; uchar4* frame_buffer; ushort3* frame_buffer_M; + ushort3* frame_buffer_P; float3* debug_buffer; float3* albedo_buffer; diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h index a75af3a313..51c54320ca 100644 --- a/zenovis/xinxinoptix/xinxinoptixapi.h +++ b/zenovis/xinxinoptix/xinxinoptixapi.h @@ -6,6 +6,7 @@ #include #include +#include #include "optixSphere.h" #include "zeno/utils/vec.h" #include "zeno/types/LightObject.h" @@ -66,6 +67,7 @@ void load_object(std::string const &key, std::string const &mtlid, const std::st void unload_object(std::string const &key); void load_inst(const std::string &key, const std::string &instID, const std::string &onbType, std::size_t numInsts, const float *pos, const float *nrm, const float *uv, const float *clr, const float *tang); void unload_inst(const std::string &key); +glm::vec3 get_click_pos(int x, int y); struct LightDat { std::vector v0; From 3874253c6992bc9e06f026e460d1acbe1668d230 Mon Sep 17 00:00:00 2001 From: iaomw Date: Tue, 2 Jul 2024 15:57:55 +0800 Subject: [PATCH 083/244] Texture BlockCompression (#1951) * update stbi * bc3 + bc4 * BCX --- zeno/include/zeno/types/TextureObject.h | 9 + zeno/src/nodes/mtl/MakeTexture.cpp | 2 + zeno/src/nodes/mtl/ShaderTexture.cpp | 4 +- zenovis/src/optx/RenderEngineOptx.cpp | 8 +- zenovis/stbi/include/stb_dxt.h | 719 ++ zenovis/stbi/include/stb_image.h | 3163 +++-- zenovis/stbi/include/tinyexr.h | 13315 ---------------------- zenovis/stbi/src/stbi.c | 3 + zenovis/stbi/src/tinyexr.cpp | 2 - zenovis/xinxinoptix/BCX.h | 80 + zenovis/xinxinoptix/CMakeLists.txt | 2 + zenovis/xinxinoptix/OptiXStuff.h | 142 +- 12 files changed, 3121 insertions(+), 14328 deletions(-) create mode 100644 zenovis/stbi/include/stb_dxt.h delete mode 100644 zenovis/stbi/include/tinyexr.h delete mode 100644 zenovis/stbi/src/tinyexr.cpp create mode 100644 zenovis/xinxinoptix/BCX.h diff --git a/zeno/include/zeno/types/TextureObject.h b/zeno/include/zeno/types/TextureObject.h index 6131e4db18..229d587707 100644 --- a/zeno/include/zeno/types/TextureObject.h +++ b/zeno/include/zeno/types/TextureObject.h @@ -35,6 +35,8 @@ namespace zeno TexFilterEnum minFilter; TexFilterEnum magFilter; + bool blockCompression; + size_t serializeSize() { size_t size{0}; @@ -49,6 +51,7 @@ namespace zeno size += sizeof(minFilter); size += sizeof(magFilter); + size += sizeof(blockCompression); return size; } @@ -75,6 +78,9 @@ namespace zeno memcpy(str.data() + i, &magFilter, sizeof(magFilter)); i += sizeof(magFilter); + memcpy(str.data() + i, &blockCompression, sizeof(blockCompression)); + i += sizeof(blockCompression); + return str; } @@ -103,6 +109,9 @@ namespace zeno memcpy(&(tex.magFilter), str.data() + i, sizeof(magFilter)); i += sizeof(magFilter); + memcpy(&(tex.blockCompression), str.data() + i, sizeof(blockCompression)); + i += sizeof(blockCompression); + return tex; } diff --git a/zeno/src/nodes/mtl/MakeTexture.cpp b/zeno/src/nodes/mtl/MakeTexture.cpp index d5dd75e2c5..322f575bdd 100644 --- a/zeno/src/nodes/mtl/MakeTexture.cpp +++ b/zeno/src/nodes/mtl/MakeTexture.cpp @@ -88,6 +88,7 @@ namespace zeno #undef SET_TEX_FILTER + tex->blockCompression = get_input2("blockCompression"); set_output("tex", std::move(tex)); } }; @@ -102,6 +103,7 @@ namespace zeno {(std::string) "enum " + texWrapping, "wrapT", "REPEAT"}, {(std::string) "enum " + texFiltering, "minFilter", "LINEAR"}, {(std::string) "enum " + texFiltering, "magFilter", "LINEAR"}, + {"bool", "blockCompression", "false"} }, { {"texture", "tex"}, diff --git a/zeno/src/nodes/mtl/ShaderTexture.cpp b/zeno/src/nodes/mtl/ShaderTexture.cpp index 59e3692122..4ec10b750a 100644 --- a/zeno/src/nodes/mtl/ShaderTexture.cpp +++ b/zeno/src/nodes/mtl/ShaderTexture.cpp @@ -262,6 +262,7 @@ struct SmartTexture2D : ShaderNodeClone stbi_flip_vertically_on_write(false); stbi_write_png(tex->path.c_str(), width, height, 3, col.data(), 0); } + tex->blockCompression = get_input2("blockCompression"); #define SET_TEX_WRAP(TEX, WRAP) \ if (WRAP == "REPEAT") \ @@ -351,7 +352,8 @@ ZENDEFNODE(SmartTexture2D, { {"vec2f", "uvtiling", "1,1"}, {"vec4f", "value", "0,0,0,0"}, {"enum float vec2 vec3 vec4 R G B A", "type", "vec3"}, - {"enum raw srgb normal_map", "post_process", "raw"} + {"enum raw srgb normal_map", "post_process", "raw"}, + {"bool", "blockCompression", "false"} }, { {"shader", "out"}, diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index bfae7ae3af..3b808a3401 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -1227,14 +1227,14 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { // Auto unload unused texure { - std::set realNeedTexPaths; + std::map realNeedTexPaths; for(auto const &[matkey, mtldet] : matMap) { if (mtldet->parameters.find("vol") != std::string::npos || cachedMeshesMaterials.count(mtldet->mtlidkey) > 0 || cachedSphereMaterials.count(mtldet->mtlidkey) > 0) { for(auto& tex: mtldet->tex2Ds) { - realNeedTexPaths.insert(tex->path); + realNeedTexPaths.insert( {tex->path, tex->blockCompression} ); } } @@ -1245,7 +1245,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { // realNeedTexPaths.emplace_back(ld.profileKey); // } if (ld.textureKey.size()) { - realNeedTexPaths.insert(ld.textureKey); + realNeedTexPaths.insert( {ld.textureKey, false}); } } std::vector needToRemoveTexPaths; @@ -1265,7 +1265,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { OptixUtil::removeTexture(need_remove_tex); } for (const auto& realNeedTexPath: realNeedTexPaths) { - OptixUtil::addTexture(realNeedTexPath); + OptixUtil::addTexture(realNeedTexPath.first, realNeedTexPath.second); } } for(auto const &[matkey, mtldet] : matMap) diff --git a/zenovis/stbi/include/stb_dxt.h b/zenovis/stbi/include/stb_dxt.h new file mode 100644 index 0000000000..6150a87f08 --- /dev/null +++ b/zenovis/stbi/include/stb_dxt.h @@ -0,0 +1,719 @@ +// stb_dxt.h - v1.12 - DXT1/DXT5 compressor - public domain +// original by fabian "ryg" giesen - ported to C by stb +// use '#define STB_DXT_IMPLEMENTATION' before including to create the implementation +// +// USAGE: +// call stb_compress_dxt_block() for every block (you must pad) +// source should be a 4x4 block of RGBA data in row-major order; +// Alpha channel is not stored if you specify alpha=0 (but you +// must supply some constant alpha in the alpha channel). +// You can turn on dithering and "high quality" using mode. +// +// version history: +// v1.12 - (ryg) fix bug in single-color table generator +// v1.11 - (ryg) avoid racy global init, better single-color tables, remove dither +// v1.10 - (i.c) various small quality improvements +// v1.09 - (stb) update documentation re: surprising alpha channel requirement +// v1.08 - (stb) fix bug in dxt-with-alpha block +// v1.07 - (stb) bc4; allow not using libc; add STB_DXT_STATIC +// v1.06 - (stb) fix to known-broken 1.05 +// v1.05 - (stb) support bc5/3dc (Arvids Kokins), use extern "C" in C++ (Pavel Krajcevski) +// v1.04 - (ryg) default to no rounding bias for lerped colors (as per S3TC/DX10 spec); +// single color match fix (allow for inexact color interpolation); +// optimal DXT5 index finder; "high quality" mode that runs multiple refinement steps. +// v1.03 - (stb) endianness support +// v1.02 - (stb) fix alpha encoding bug +// v1.01 - (stb) fix bug converting to RGB that messed up quality, thanks ryg & cbloom +// v1.00 - (stb) first release +// +// contributors: +// Rich Geldreich (more accurate index selection) +// Kevin Schmidt (#defines for "freestanding" compilation) +// github:ppiastucki (BC4 support) +// Ignacio Castano - improve DXT endpoint quantization +// Alan Hickman - static table initialization +// +// LICENSE +// +// See end of file for license information. + +#ifndef STB_INCLUDE_STB_DXT_H +#define STB_INCLUDE_STB_DXT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef STB_DXT_STATIC +#define STBDDEF static +#else +#define STBDDEF extern +#endif + +// compression mode (bitflags) +#define STB_DXT_NORMAL 0 +#define STB_DXT_DITHER 1 // use dithering. was always dubious, now deprecated. does nothing! +#define STB_DXT_HIGHQUAL 2 // high quality mode, does two refinement steps instead of 1. ~30-40% slower. + +STBDDEF void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src_rgba_four_bytes_per_pixel, int alpha, int mode); +STBDDEF void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src_r_one_byte_per_pixel); +STBDDEF void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src_rg_two_byte_per_pixel); + +#define STB_COMPRESS_DXT_BLOCK + +#ifdef __cplusplus +} +#endif +#endif // STB_INCLUDE_STB_DXT_H + +#ifdef STB_DXT_IMPLEMENTATION + +// configuration options for DXT encoder. set them in the project/makefile or just define +// them at the top. + +// STB_DXT_USE_ROUNDING_BIAS +// use a rounding bias during color interpolation. this is closer to what "ideal" +// interpolation would do but doesn't match the S3TC/DX10 spec. old versions (pre-1.03) +// implicitly had this turned on. +// +// in case you're targeting a specific type of hardware (e.g. console programmers): +// NVidia and Intel GPUs (as of 2010) as well as DX9 ref use DXT decoders that are closer +// to STB_DXT_USE_ROUNDING_BIAS. AMD/ATI, S3 and DX10 ref are closer to rounding with no bias. +// you also see "(a*5 + b*3) / 8" on some old GPU designs. +// #define STB_DXT_USE_ROUNDING_BIAS + +#include + +#if !defined(STBD_FABS) +#include +#endif + +#ifndef STBD_FABS +#define STBD_FABS(x) fabs(x) +#endif + +static const unsigned char stb__OMatch5[256][2] = { + { 0, 0 }, { 0, 0 }, { 0, 1 }, { 0, 1 }, { 1, 0 }, { 1, 0 }, { 1, 0 }, { 1, 1 }, + { 1, 1 }, { 1, 1 }, { 1, 2 }, { 0, 4 }, { 2, 1 }, { 2, 1 }, { 2, 1 }, { 2, 2 }, + { 2, 2 }, { 2, 2 }, { 2, 3 }, { 1, 5 }, { 3, 2 }, { 3, 2 }, { 4, 0 }, { 3, 3 }, + { 3, 3 }, { 3, 3 }, { 3, 4 }, { 3, 4 }, { 3, 4 }, { 3, 5 }, { 4, 3 }, { 4, 3 }, + { 5, 2 }, { 4, 4 }, { 4, 4 }, { 4, 5 }, { 4, 5 }, { 5, 4 }, { 5, 4 }, { 5, 4 }, + { 6, 3 }, { 5, 5 }, { 5, 5 }, { 5, 6 }, { 4, 8 }, { 6, 5 }, { 6, 5 }, { 6, 5 }, + { 6, 6 }, { 6, 6 }, { 6, 6 }, { 6, 7 }, { 5, 9 }, { 7, 6 }, { 7, 6 }, { 8, 4 }, + { 7, 7 }, { 7, 7 }, { 7, 7 }, { 7, 8 }, { 7, 8 }, { 7, 8 }, { 7, 9 }, { 8, 7 }, + { 8, 7 }, { 9, 6 }, { 8, 8 }, { 8, 8 }, { 8, 9 }, { 8, 9 }, { 9, 8 }, { 9, 8 }, + { 9, 8 }, { 10, 7 }, { 9, 9 }, { 9, 9 }, { 9, 10 }, { 8, 12 }, { 10, 9 }, { 10, 9 }, + { 10, 9 }, { 10, 10 }, { 10, 10 }, { 10, 10 }, { 10, 11 }, { 9, 13 }, { 11, 10 }, { 11, 10 }, + { 12, 8 }, { 11, 11 }, { 11, 11 }, { 11, 11 }, { 11, 12 }, { 11, 12 }, { 11, 12 }, { 11, 13 }, + { 12, 11 }, { 12, 11 }, { 13, 10 }, { 12, 12 }, { 12, 12 }, { 12, 13 }, { 12, 13 }, { 13, 12 }, + { 13, 12 }, { 13, 12 }, { 14, 11 }, { 13, 13 }, { 13, 13 }, { 13, 14 }, { 12, 16 }, { 14, 13 }, + { 14, 13 }, { 14, 13 }, { 14, 14 }, { 14, 14 }, { 14, 14 }, { 14, 15 }, { 13, 17 }, { 15, 14 }, + { 15, 14 }, { 16, 12 }, { 15, 15 }, { 15, 15 }, { 15, 15 }, { 15, 16 }, { 15, 16 }, { 15, 16 }, + { 15, 17 }, { 16, 15 }, { 16, 15 }, { 17, 14 }, { 16, 16 }, { 16, 16 }, { 16, 17 }, { 16, 17 }, + { 17, 16 }, { 17, 16 }, { 17, 16 }, { 18, 15 }, { 17, 17 }, { 17, 17 }, { 17, 18 }, { 16, 20 }, + { 18, 17 }, { 18, 17 }, { 18, 17 }, { 18, 18 }, { 18, 18 }, { 18, 18 }, { 18, 19 }, { 17, 21 }, + { 19, 18 }, { 19, 18 }, { 20, 16 }, { 19, 19 }, { 19, 19 }, { 19, 19 }, { 19, 20 }, { 19, 20 }, + { 19, 20 }, { 19, 21 }, { 20, 19 }, { 20, 19 }, { 21, 18 }, { 20, 20 }, { 20, 20 }, { 20, 21 }, + { 20, 21 }, { 21, 20 }, { 21, 20 }, { 21, 20 }, { 22, 19 }, { 21, 21 }, { 21, 21 }, { 21, 22 }, + { 20, 24 }, { 22, 21 }, { 22, 21 }, { 22, 21 }, { 22, 22 }, { 22, 22 }, { 22, 22 }, { 22, 23 }, + { 21, 25 }, { 23, 22 }, { 23, 22 }, { 24, 20 }, { 23, 23 }, { 23, 23 }, { 23, 23 }, { 23, 24 }, + { 23, 24 }, { 23, 24 }, { 23, 25 }, { 24, 23 }, { 24, 23 }, { 25, 22 }, { 24, 24 }, { 24, 24 }, + { 24, 25 }, { 24, 25 }, { 25, 24 }, { 25, 24 }, { 25, 24 }, { 26, 23 }, { 25, 25 }, { 25, 25 }, + { 25, 26 }, { 24, 28 }, { 26, 25 }, { 26, 25 }, { 26, 25 }, { 26, 26 }, { 26, 26 }, { 26, 26 }, + { 26, 27 }, { 25, 29 }, { 27, 26 }, { 27, 26 }, { 28, 24 }, { 27, 27 }, { 27, 27 }, { 27, 27 }, + { 27, 28 }, { 27, 28 }, { 27, 28 }, { 27, 29 }, { 28, 27 }, { 28, 27 }, { 29, 26 }, { 28, 28 }, + { 28, 28 }, { 28, 29 }, { 28, 29 }, { 29, 28 }, { 29, 28 }, { 29, 28 }, { 30, 27 }, { 29, 29 }, + { 29, 29 }, { 29, 30 }, { 29, 30 }, { 30, 29 }, { 30, 29 }, { 30, 29 }, { 30, 30 }, { 30, 30 }, + { 30, 30 }, { 30, 31 }, { 30, 31 }, { 31, 30 }, { 31, 30 }, { 31, 30 }, { 31, 31 }, { 31, 31 }, +}; +static const unsigned char stb__OMatch6[256][2] = { + { 0, 0 }, { 0, 1 }, { 1, 0 }, { 1, 1 }, { 1, 1 }, { 1, 2 }, { 2, 1 }, { 2, 2 }, + { 2, 2 }, { 2, 3 }, { 3, 2 }, { 3, 3 }, { 3, 3 }, { 3, 4 }, { 4, 3 }, { 4, 4 }, + { 4, 4 }, { 4, 5 }, { 5, 4 }, { 5, 5 }, { 5, 5 }, { 5, 6 }, { 6, 5 }, { 6, 6 }, + { 6, 6 }, { 6, 7 }, { 7, 6 }, { 7, 7 }, { 7, 7 }, { 7, 8 }, { 8, 7 }, { 8, 8 }, + { 8, 8 }, { 8, 9 }, { 9, 8 }, { 9, 9 }, { 9, 9 }, { 9, 10 }, { 10, 9 }, { 10, 10 }, + { 10, 10 }, { 10, 11 }, { 11, 10 }, { 8, 16 }, { 11, 11 }, { 11, 12 }, { 12, 11 }, { 9, 17 }, + { 12, 12 }, { 12, 13 }, { 13, 12 }, { 11, 16 }, { 13, 13 }, { 13, 14 }, { 14, 13 }, { 12, 17 }, + { 14, 14 }, { 14, 15 }, { 15, 14 }, { 14, 16 }, { 15, 15 }, { 15, 16 }, { 16, 14 }, { 16, 15 }, + { 17, 14 }, { 16, 16 }, { 16, 17 }, { 17, 16 }, { 18, 15 }, { 17, 17 }, { 17, 18 }, { 18, 17 }, + { 20, 14 }, { 18, 18 }, { 18, 19 }, { 19, 18 }, { 21, 15 }, { 19, 19 }, { 19, 20 }, { 20, 19 }, + { 20, 20 }, { 20, 20 }, { 20, 21 }, { 21, 20 }, { 21, 21 }, { 21, 21 }, { 21, 22 }, { 22, 21 }, + { 22, 22 }, { 22, 22 }, { 22, 23 }, { 23, 22 }, { 23, 23 }, { 23, 23 }, { 23, 24 }, { 24, 23 }, + { 24, 24 }, { 24, 24 }, { 24, 25 }, { 25, 24 }, { 25, 25 }, { 25, 25 }, { 25, 26 }, { 26, 25 }, + { 26, 26 }, { 26, 26 }, { 26, 27 }, { 27, 26 }, { 24, 32 }, { 27, 27 }, { 27, 28 }, { 28, 27 }, + { 25, 33 }, { 28, 28 }, { 28, 29 }, { 29, 28 }, { 27, 32 }, { 29, 29 }, { 29, 30 }, { 30, 29 }, + { 28, 33 }, { 30, 30 }, { 30, 31 }, { 31, 30 }, { 30, 32 }, { 31, 31 }, { 31, 32 }, { 32, 30 }, + { 32, 31 }, { 33, 30 }, { 32, 32 }, { 32, 33 }, { 33, 32 }, { 34, 31 }, { 33, 33 }, { 33, 34 }, + { 34, 33 }, { 36, 30 }, { 34, 34 }, { 34, 35 }, { 35, 34 }, { 37, 31 }, { 35, 35 }, { 35, 36 }, + { 36, 35 }, { 36, 36 }, { 36, 36 }, { 36, 37 }, { 37, 36 }, { 37, 37 }, { 37, 37 }, { 37, 38 }, + { 38, 37 }, { 38, 38 }, { 38, 38 }, { 38, 39 }, { 39, 38 }, { 39, 39 }, { 39, 39 }, { 39, 40 }, + { 40, 39 }, { 40, 40 }, { 40, 40 }, { 40, 41 }, { 41, 40 }, { 41, 41 }, { 41, 41 }, { 41, 42 }, + { 42, 41 }, { 42, 42 }, { 42, 42 }, { 42, 43 }, { 43, 42 }, { 40, 48 }, { 43, 43 }, { 43, 44 }, + { 44, 43 }, { 41, 49 }, { 44, 44 }, { 44, 45 }, { 45, 44 }, { 43, 48 }, { 45, 45 }, { 45, 46 }, + { 46, 45 }, { 44, 49 }, { 46, 46 }, { 46, 47 }, { 47, 46 }, { 46, 48 }, { 47, 47 }, { 47, 48 }, + { 48, 46 }, { 48, 47 }, { 49, 46 }, { 48, 48 }, { 48, 49 }, { 49, 48 }, { 50, 47 }, { 49, 49 }, + { 49, 50 }, { 50, 49 }, { 52, 46 }, { 50, 50 }, { 50, 51 }, { 51, 50 }, { 53, 47 }, { 51, 51 }, + { 51, 52 }, { 52, 51 }, { 52, 52 }, { 52, 52 }, { 52, 53 }, { 53, 52 }, { 53, 53 }, { 53, 53 }, + { 53, 54 }, { 54, 53 }, { 54, 54 }, { 54, 54 }, { 54, 55 }, { 55, 54 }, { 55, 55 }, { 55, 55 }, + { 55, 56 }, { 56, 55 }, { 56, 56 }, { 56, 56 }, { 56, 57 }, { 57, 56 }, { 57, 57 }, { 57, 57 }, + { 57, 58 }, { 58, 57 }, { 58, 58 }, { 58, 58 }, { 58, 59 }, { 59, 58 }, { 59, 59 }, { 59, 59 }, + { 59, 60 }, { 60, 59 }, { 60, 60 }, { 60, 60 }, { 60, 61 }, { 61, 60 }, { 61, 61 }, { 61, 61 }, + { 61, 62 }, { 62, 61 }, { 62, 62 }, { 62, 62 }, { 62, 63 }, { 63, 62 }, { 63, 63 }, { 63, 63 }, +}; + +static int stb__Mul8Bit(int a, int b) +{ + int t = a*b + 128; + return (t + (t >> 8)) >> 8; +} + +static void stb__From16Bit(unsigned char *out, unsigned short v) +{ + int rv = (v & 0xf800) >> 11; + int gv = (v & 0x07e0) >> 5; + int bv = (v & 0x001f) >> 0; + + // expand to 8 bits via bit replication + out[0] = (rv * 33) >> 2; + out[1] = (gv * 65) >> 4; + out[2] = (bv * 33) >> 2; + out[3] = 0; +} + +static unsigned short stb__As16Bit(int r, int g, int b) +{ + return (unsigned short)((stb__Mul8Bit(r,31) << 11) + (stb__Mul8Bit(g,63) << 5) + stb__Mul8Bit(b,31)); +} + +// linear interpolation at 1/3 point between a and b, using desired rounding type +static int stb__Lerp13(int a, int b) +{ +#ifdef STB_DXT_USE_ROUNDING_BIAS + // with rounding bias + return a + stb__Mul8Bit(b-a, 0x55); +#else + // without rounding bias + // replace "/ 3" by "* 0xaaab) >> 17" if your compiler sucks or you really need every ounce of speed. + return (2*a + b) / 3; +#endif +} + +// lerp RGB color +static void stb__Lerp13RGB(unsigned char *out, unsigned char *p1, unsigned char *p2) +{ + out[0] = (unsigned char)stb__Lerp13(p1[0], p2[0]); + out[1] = (unsigned char)stb__Lerp13(p1[1], p2[1]); + out[2] = (unsigned char)stb__Lerp13(p1[2], p2[2]); +} + +/****************************************************************************/ + +static void stb__EvalColors(unsigned char *color,unsigned short c0,unsigned short c1) +{ + stb__From16Bit(color+ 0, c0); + stb__From16Bit(color+ 4, c1); + stb__Lerp13RGB(color+ 8, color+0, color+4); + stb__Lerp13RGB(color+12, color+4, color+0); +} + +// The color matching function +static unsigned int stb__MatchColorsBlock(unsigned char *block, unsigned char *color) +{ + unsigned int mask = 0; + int dirr = color[0*4+0] - color[1*4+0]; + int dirg = color[0*4+1] - color[1*4+1]; + int dirb = color[0*4+2] - color[1*4+2]; + int dots[16]; + int stops[4]; + int i; + int c0Point, halfPoint, c3Point; + + for(i=0;i<16;i++) + dots[i] = block[i*4+0]*dirr + block[i*4+1]*dirg + block[i*4+2]*dirb; + + for(i=0;i<4;i++) + stops[i] = color[i*4+0]*dirr + color[i*4+1]*dirg + color[i*4+2]*dirb; + + // think of the colors as arranged on a line; project point onto that line, then choose + // next color out of available ones. we compute the crossover points for "best color in top + // half"/"best in bottom half" and then the same inside that subinterval. + // + // relying on this 1d approximation isn't always optimal in terms of euclidean distance, + // but it's very close and a lot faster. + // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html + + c0Point = (stops[1] + stops[3]); + halfPoint = (stops[3] + stops[2]); + c3Point = (stops[2] + stops[0]); + + for (i=15;i>=0;i--) { + int dot = dots[i]*2; + mask <<= 2; + + if(dot < halfPoint) + mask |= (dot < c0Point) ? 1 : 3; + else + mask |= (dot < c3Point) ? 2 : 0; + } + + return mask; +} + +// The color optimization function. (Clever code, part 1) +static void stb__OptimizeColorsBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16) +{ + int mind,maxd; + unsigned char *minp, *maxp; + double magn; + int v_r,v_g,v_b; + static const int nIterPower = 4; + float covf[6],vfr,vfg,vfb; + + // determine color distribution + int cov[6]; + int mu[3],min[3],max[3]; + int ch,i,iter; + + for(ch=0;ch<3;ch++) + { + const unsigned char *bp = ((const unsigned char *) block) + ch; + int muv,minv,maxv; + + muv = minv = maxv = bp[0]; + for(i=4;i<64;i+=4) + { + muv += bp[i]; + if (bp[i] < minv) minv = bp[i]; + else if (bp[i] > maxv) maxv = bp[i]; + } + + mu[ch] = (muv + 8) >> 4; + min[ch] = minv; + max[ch] = maxv; + } + + // determine covariance matrix + for (i=0;i<6;i++) + cov[i] = 0; + + for (i=0;i<16;i++) + { + int r = block[i*4+0] - mu[0]; + int g = block[i*4+1] - mu[1]; + int b = block[i*4+2] - mu[2]; + + cov[0] += r*r; + cov[1] += r*g; + cov[2] += r*b; + cov[3] += g*g; + cov[4] += g*b; + cov[5] += b*b; + } + + // convert covariance matrix to float, find principal axis via power iter + for(i=0;i<6;i++) + covf[i] = cov[i] / 255.0f; + + vfr = (float) (max[0] - min[0]); + vfg = (float) (max[1] - min[1]); + vfb = (float) (max[2] - min[2]); + + for(iter=0;iter magn) magn = STBD_FABS(vfg); + if (STBD_FABS(vfb) > magn) magn = STBD_FABS(vfb); + + if(magn < 4.0f) { // too small, default to luminance + v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000. + v_g = 587; + v_b = 114; + } else { + magn = 512.0 / magn; + v_r = (int) (vfr * magn); + v_g = (int) (vfg * magn); + v_b = (int) (vfb * magn); + } + + minp = maxp = block; + mind = maxd = block[0]*v_r + block[1]*v_g + block[2]*v_b; + // Pick colors at extreme points + for(i=1;i<16;i++) + { + int dot = block[i*4+0]*v_r + block[i*4+1]*v_g + block[i*4+2]*v_b; + + if (dot < mind) { + mind = dot; + minp = block+i*4; + } + + if (dot > maxd) { + maxd = dot; + maxp = block+i*4; + } + } + + *pmax16 = stb__As16Bit(maxp[0],maxp[1],maxp[2]); + *pmin16 = stb__As16Bit(minp[0],minp[1],minp[2]); +} + +static const float stb__midpoints5[32] = { + 0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f, + 0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f +}; + +static const float stb__midpoints6[64] = { + 0.007843f, 0.023529f, 0.039216f, 0.054902f, 0.070588f, 0.086275f, 0.101961f, 0.117647f, 0.133333f, 0.149020f, 0.164706f, 0.180392f, 0.196078f, 0.211765f, 0.227451f, 0.245098f, + 0.262745f, 0.278431f, 0.294118f, 0.309804f, 0.325490f, 0.341176f, 0.356863f, 0.372549f, 0.388235f, 0.403922f, 0.419608f, 0.435294f, 0.450980f, 0.466667f, 0.482353f, 0.500000f, + 0.517647f, 0.533333f, 0.549020f, 0.564706f, 0.580392f, 0.596078f, 0.611765f, 0.627451f, 0.643137f, 0.658824f, 0.674510f, 0.690196f, 0.705882f, 0.721569f, 0.737255f, 0.754902f, + 0.772549f, 0.788235f, 0.803922f, 0.819608f, 0.835294f, 0.850980f, 0.866667f, 0.882353f, 0.898039f, 0.913725f, 0.929412f, 0.945098f, 0.960784f, 0.976471f, 0.992157f, 1.0f +}; + +static unsigned short stb__Quantize5(float x) +{ + unsigned short q; + x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate + q = (unsigned short)(x * 31); + q += (x > stb__midpoints5[q]); + return q; +} + +static unsigned short stb__Quantize6(float x) +{ + unsigned short q; + x = x < 0 ? 0 : x > 1 ? 1 : x; // saturate + q = (unsigned short)(x * 63); + q += (x > stb__midpoints6[q]); + return q; +} + +// The refinement function. (Clever code, part 2) +// Tries to optimize colors to suit block contents better. +// (By solving a least squares system via normal equations+Cramer's rule) +static int stb__RefineBlock(unsigned char *block, unsigned short *pmax16, unsigned short *pmin16, unsigned int mask) +{ + static const int w1Tab[4] = { 3,0,2,1 }; + static const int prods[4] = { 0x090000,0x000900,0x040102,0x010402 }; + // ^some magic to save a lot of multiplies in the accumulating loop... + // (precomputed products of weights for least squares system, accumulated inside one 32-bit register) + + float f; + unsigned short oldMin, oldMax, min16, max16; + int i, akku = 0, xx,xy,yy; + int At1_r,At1_g,At1_b; + int At2_r,At2_g,At2_b; + unsigned int cm = mask; + + oldMin = *pmin16; + oldMax = *pmax16; + + if((mask ^ (mask<<2)) < 4) // all pixels have the same index? + { + // yes, linear system would be singular; solve using optimal + // single-color match on average color + int r = 8, g = 8, b = 8; + for (i=0;i<16;++i) { + r += block[i*4+0]; + g += block[i*4+1]; + b += block[i*4+2]; + } + + r >>= 4; g >>= 4; b >>= 4; + + max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0]; + min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1]; + } else { + At1_r = At1_g = At1_b = 0; + At2_r = At2_g = At2_b = 0; + for (i=0;i<16;++i,cm>>=2) { + int step = cm&3; + int w1 = w1Tab[step]; + int r = block[i*4+0]; + int g = block[i*4+1]; + int b = block[i*4+2]; + + akku += prods[step]; + At1_r += w1*r; + At1_g += w1*g; + At1_b += w1*b; + At2_r += r; + At2_g += g; + At2_b += b; + } + + At2_r = 3*At2_r - At1_r; + At2_g = 3*At2_g - At1_g; + At2_b = 3*At2_b - At1_b; + + // extract solutions and decide solvability + xx = akku >> 16; + yy = (akku >> 8) & 0xff; + xy = (akku >> 0) & 0xff; + + f = 3.0f / 255.0f / (xx*yy - xy*xy); + + max16 = stb__Quantize5((At1_r*yy - At2_r * xy) * f) << 11; + max16 |= stb__Quantize6((At1_g*yy - At2_g * xy) * f) << 5; + max16 |= stb__Quantize5((At1_b*yy - At2_b * xy) * f) << 0; + + min16 = stb__Quantize5((At2_r*xx - At1_r * xy) * f) << 11; + min16 |= stb__Quantize6((At2_g*xx - At1_g * xy) * f) << 5; + min16 |= stb__Quantize5((At2_b*xx - At1_b * xy) * f) << 0; + } + + *pmin16 = min16; + *pmax16 = max16; + return oldMin != min16 || oldMax != max16; +} + +// Color block compression +static void stb__CompressColorBlock(unsigned char *dest, unsigned char *block, int mode) +{ + unsigned int mask; + int i; + int refinecount; + unsigned short max16, min16; + unsigned char color[4*4]; + + refinecount = (mode & STB_DXT_HIGHQUAL) ? 2 : 1; + + // check if block is constant + for (i=1;i<16;i++) + if (((unsigned int *) block)[i] != ((unsigned int *) block)[0]) + break; + + if(i == 16) { // constant color + int r = block[0], g = block[1], b = block[2]; + mask = 0xaaaaaaaa; + max16 = (stb__OMatch5[r][0]<<11) | (stb__OMatch6[g][0]<<5) | stb__OMatch5[b][0]; + min16 = (stb__OMatch5[r][1]<<11) | (stb__OMatch6[g][1]<<5) | stb__OMatch5[b][1]; + } else { + // first step: PCA+map along principal axis + stb__OptimizeColorsBlock(block,&max16,&min16); + if (max16 != min16) { + stb__EvalColors(color,max16,min16); + mask = stb__MatchColorsBlock(block,color); + } else + mask = 0; + + // third step: refine (multiple times if requested) + for (i=0;i> 8); + dest[2] = (unsigned char) (min16); + dest[3] = (unsigned char) (min16 >> 8); + dest[4] = (unsigned char) (mask); + dest[5] = (unsigned char) (mask >> 8); + dest[6] = (unsigned char) (mask >> 16); + dest[7] = (unsigned char) (mask >> 24); +} + +// Alpha block compression (this is easy for a change) +static void stb__CompressAlphaBlock(unsigned char *dest,unsigned char *src, int stride) +{ + int i,dist,bias,dist4,dist2,bits,mask; + + // find min/max color + int mn,mx; + mn = mx = src[0]; + + for (i=1;i<16;i++) + { + if (src[i*stride] < mn) mn = src[i*stride]; + else if (src[i*stride] > mx) mx = src[i*stride]; + } + + // encode them + dest[0] = (unsigned char)mx; + dest[1] = (unsigned char)mn; + dest += 2; + + // determine bias and emit color indices + // given the choice of mx/mn, these indices are optimal: + // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/ + dist = mx-mn; + dist4 = dist*4; + dist2 = dist*2; + bias = (dist < 8) ? (dist - 1) : (dist/2 + 2); + bias -= mn * 7; + bits = 0,mask=0; + + for (i=0;i<16;i++) { + int a = src[i*stride]*7 + bias; + int ind,t; + + // select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max). + t = (a >= dist4) ? -1 : 0; ind = t & 4; a -= dist4 & t; + t = (a >= dist2) ? -1 : 0; ind += t & 2; a -= dist2 & t; + ind += (a >= dist); + + // turn linear scale into DXT index (0/1 are extremal pts) + ind = -ind & 7; + ind ^= (2 > ind); + + // write index + mask |= ind << bits; + if((bits += 3) >= 8) { + *dest++ = (unsigned char)mask; + mask >>= 8; + bits -= 8; + } + } +} + +void stb_compress_dxt_block(unsigned char *dest, const unsigned char *src, int alpha, int mode) +{ + unsigned char data[16][4]; + if (alpha) { + int i; + stb__CompressAlphaBlock(dest,(unsigned char*) src+3, 4); + dest += 8; + // make a new copy of the data in which alpha is opaque, + // because code uses a fast test for color constancy + memcpy(data, src, 4*16); + for (i=0; i < 16; ++i) + data[i][3] = 255; + src = &data[0][0]; + } + + stb__CompressColorBlock(dest,(unsigned char*) src,mode); +} + +void stb_compress_bc4_block(unsigned char *dest, const unsigned char *src) +{ + stb__CompressAlphaBlock(dest,(unsigned char*) src, 1); +} + +void stb_compress_bc5_block(unsigned char *dest, const unsigned char *src) +{ + stb__CompressAlphaBlock(dest,(unsigned char*) src,2); + stb__CompressAlphaBlock(dest + 8,(unsigned char*) src+1,2); +} +#endif // STB_DXT_IMPLEMENTATION + +// Compile with STB_DXT_IMPLEMENTATION and STB_DXT_GENERATE_TABLES +// defined to generate the tables above. +#ifdef STB_DXT_GENERATE_TABLES +#include + +int main() +{ + int i, j; + const char *omatch_names[] = { "stb__OMatch5", "stb__OMatch6" }; + int dequant_mults[2] = { 33*4, 65 }; // .4 fixed-point dequant multipliers + + // optimal endpoint tables + for (i = 0; i < 2; ++i) { + int dequant = dequant_mults[i]; + int size = i ? 64 : 32; + printf("static const unsigned char %s[256][2] = {\n", omatch_names[i]); + for (int j = 0; j < 256; ++j) { + int mn, mx; + int best_mn = 0, best_mx = 0; + int best_err = 256 * 100; + for (mn=0;mn> 4; + int maxe = (mx * dequant) >> 4; + int err = abs(stb__Lerp13(maxe, mine) - j) * 100; + + // DX10 spec says that interpolation must be within 3% of "correct" result, + // add this as error term. Normally we'd expect a random distribution of + // +-1.5% error, but nowhere in the spec does it say that the error has to be + // unbiased - better safe than sorry. + err += abs(maxe - mine) * 3; + + if(err < best_err) { + best_mn = mn; + best_mx = mx; + best_err = err; + } + } + } + if ((j % 8) == 0) printf(" "); // 2 spaces, third is done below + printf(" { %2d, %2d },", best_mx, best_mn); + if ((j % 8) == 7) printf("\n"); + } + printf("};\n"); + } + + return 0; +} +#endif + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/zenovis/stbi/include/stb_image.h b/zenovis/stbi/include/stb_image.h index c891d775bb..9eedabedc4 100644 --- a/zenovis/stbi/include/stb_image.h +++ b/zenovis/stbi/include/stb_image.h @@ -1,5 +1,5 @@ -/* stb_image - v2.12 - public domain image loader - http://nothings.org/stb_image.h - no warranty implied; use at your own risk +/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb + no warranty implied; use at your own risk Do this: #define STB_IMAGE_IMPLEMENTATION @@ -21,7 +21,7 @@ avoid problematic images and only need the trivial interface JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) - PNG 1/2/4/8-bit-per-channel (16 bpc not supported) + PNG 1/2/4/8/16-bit-per-channel TGA (not sure what subset, if a subset) BMP non-1bpp, non-RLE @@ -42,136 +42,37 @@ Full documentation under "DOCUMENTATION" below. - Revision 2.00 release notes: - - - Progressive JPEG is now supported. - - - PPM and PGM binary formats are now supported, thanks to Ken Miller. - - - x86 platforms now make use of SSE2 SIMD instructions for - JPEG decoding, and ARM platforms can use NEON SIMD if requested. - This work was done by Fabian "ryg" Giesen. SSE2 is used by - default, but NEON must be enabled explicitly; see docs. - - With other JPEG optimizations included in this version, we see - 2x speedup on a JPEG on an x86 machine, and a 1.5x speedup - on a JPEG on an ARM machine, relative to previous versions of this - library. The same results will not obtain for all JPGs and for all - x86/ARM machines. (Note that progressive JPEGs are significantly - slower to decode than regular JPEGs.) This doesn't mean that this - is the fastest JPEG decoder in the land; rather, it brings it - closer to parity with standard libraries. If you want the fastest - decode, look elsewhere. (See "Philosophy" section of docs below.) - - See final bullet items below for more info on SIMD. - - - Added STBI_MALLOC, STBI_REALLOC, and STBI_FREE macros for replacing - the memory allocator. Unlike other STBI libraries, these macros don't - support a context parameter, so if you need to pass a context in to - the allocator, you'll have to store it in a global or a thread-local - variable. - - - Split existing STBI_NO_HDR flag into two flags, STBI_NO_HDR and - STBI_NO_LINEAR. - STBI_NO_HDR: suppress implementation of .hdr reader format - STBI_NO_LINEAR: suppress high-dynamic-range light-linear float API - - - You can suppress implementation of any of the decoders to reduce - your code footprint by #defining one or more of the following - symbols before creating the implementation. - - STBI_NO_JPEG - STBI_NO_PNG - STBI_NO_BMP - STBI_NO_PSD - STBI_NO_TGA - STBI_NO_GIF - STBI_NO_HDR - STBI_NO_PIC - STBI_NO_PNM (.ppm and .pgm) - - - You can request *only* certain decoders and suppress all other ones - (this will be more forward-compatible, as addition of new decoders - doesn't require you to disable them explicitly): - - STBI_ONLY_JPEG - STBI_ONLY_PNG - STBI_ONLY_BMP - STBI_ONLY_PSD - STBI_ONLY_TGA - STBI_ONLY_GIF - STBI_ONLY_HDR - STBI_ONLY_PIC - STBI_ONLY_PNM (.ppm and .pgm) - - Note that you can define multiples of these, and you will get all - of them ("only x" and "only y" is interpreted to mean "only x&y"). - - - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still - want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB - - - Compilation of all SIMD code can be suppressed with - #define STBI_NO_SIMD - It should not be necessary to disable SIMD unless you have issues - compiling (e.g. using an x86 compiler which doesn't support SSE - intrinsics or that doesn't support the method used to detect - SSE2 support at run-time), and even those can be reported as - bugs so I can refine the built-in compile-time checking to be - smarter. - - - The old STBI_SIMD system which allowed installing a user-defined - IDCT etc. has been removed. If you need this, don't upgrade. My - assumption is that almost nobody was doing this, and those who - were will find the built-in SIMD more satisfactory anyway. - - - RGB values computed for JPEG images are slightly different from - previous versions of stb_image. (This is due to using less - integer precision in SIMD.) The C code has been adjusted so - that the same RGB values will be computed regardless of whether - SIMD support is available, so your app should always produce - consistent results. But these results are slightly different from - previous versions. (Specifically, about 3% of available YCbCr values - will compute different RGB results from pre-1.49 versions by +-1; - most of the deviating values are one smaller in the G channel.) - - - If you must produce consistent results with previous versions of - stb_image, #define STBI_JPEG_OLD and you will get the same results - you used to; however, you will not get the SIMD speedups for - the YCbCr-to-RGB conversion step (although you should still see - significant JPEG speedup from the other changes). - - Please note that STBI_JPEG_OLD is a temporary feature; it will be - removed in future versions of the library. It is only intended for - near-term back-compatibility use. - - - Latest revision history: +LICENSE + + See end of file for license information. + +RECENT REVISION HISTORY: + + 2.30 (2024-05-31) avoid erroneous gcc warning + 2.29 (2023-05-xx) optimizations + 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff + 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes + 2.26 (2020-07-13) many minor fixes + 2.25 (2020-02-02) fix warnings + 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically + 2.23 (2019-08-11) fix clang static analysis warning + 2.22 (2019-03-04) gif fixes, fix warnings + 2.21 (2019-02-25) fix typo in comment + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings + 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes + 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 RGB-format JPEG; remove white matting in PSD; - allocate large structures on the stack; + allocate large structures on the stack; correct channel count for PNG & BMP 2.10 (2016-01-22) avoid warning introduced in 2.09 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED - 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA - 2.07 (2015-09-13) partial animated GIF support - limited 16-bit PSD support - minor bugs, code cleanup, and compiler warnings - 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value - 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning - 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit - 2.03 (2015-04-12) additional corruption checking - stbi_set_flip_vertically_on_load - fix NEON support; fix mingw support - 2.02 (2015-01-19) fix incorrect assert, fix warning - 2.01 (2015-01-17) fix various warnings - 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG - 2.00 (2014-12-25) optimize JPEG, including x86 SSE2 & ARM NEON SIMD - progressive JPEG - PGM/PPM support - STBI_MALLOC,STBI_REALLOC,STBI_FREE - STBI_NO_*, STBI_ONLY_* - GIF bugfix See end of file for full revision history. @@ -186,34 +87,43 @@ Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) - urraka@github (animated gif) Junggon Kim (PNM comments) - Daniel Gibson (16-bit TGA) - - Optimizations & bugfixes - Fabian "ryg" Giesen - Arseny Kapoulkine + github:urraka (animated gif) Junggon Kim (PNM comments) + Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) + socks-the-fox (16-bit PNG) + Jeremy Sawicki (handle all ImageNet JPGs) + Optimizations & bugfixes Mikhail Morozov (1-bit BMP) + Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) + Arseny Kapoulkine Simon Breuss (16-bit PNM) + John-Mark Allen + Carmelo J Fdez-Aguera Bug & warning fixes - Marc LeBlanc David Woo Guillaume George Martins Mozeiko - Christpher Lloyd Martin Golini Jerry Jansson Joseph Thomson - Dave Moore Roy Eltham Hayaki Saito Phil Jordan - Won Chun Luke Graham Johan Duparc Nathan Reed - the Horde3D community Thomas Ruf Ronny Chevalier Nick Verigakis - Janez Zemva John Bartholomew Michal Cichon svdijk@github - Jonathan Blow Ken Hamada Tero Hanninen Baldur Karlsson - Laurent Gomila Cort Stratton Sergio Gonzalez romigrou@github - Aruelien Pocheville Thibault Reuille Cass Everitt Matthew Gregan - Ryamond Barbiero Paul Du Bois Engin Manap snagar@github - Michaelangel007@github Oriol Ferrer Mesia socks-the-fox - Blazej Dariusz Roszkowski - - -LICENSE - -This software is dual-licensed to the public domain and under the following -license: you are granted a perpetual, irrevocable license to copy, modify, -publish, and distribute this file as you see fit. - + Marc LeBlanc David Woo Guillaume George Martins Mozeiko + Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski + Phil Jordan Dave Moore Roy Eltham + Hayaki Saito Nathan Reed Won Chun + Luke Graham Johan Duparc Nick Verigakis the Horde3D community + Thomas Ruf Ronny Chevalier github:rlyeh + Janez Zemva John Bartholomew Michal Cichon github:romigrou + Jonathan Blow Ken Hamada Tero Hanninen github:svdijk + Eugene Golushkov Laurent Gomila Cort Stratton github:snagar + Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex + Cass Everitt Ryamond Barbiero github:grim210 + Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw + Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus + Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo + Julian Raschke Gregory Mullen Christian Floisand github:darealshinji + Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007 + Brad Weinberger Matvey Cherevko github:mosra + Luca Sas Alexander Veselov Zack Middleton [reserved] + Ryan C. Gordon [reserved] [reserved] + DO NOT ADD YOUR NAME HERE + + Jacko Dirks + + To add your name to the credits, pick a random blank space in the middle and fill it. + 80% of merge conflicts on stb PRs are due to people adding their name at the end + of the credits. */ #ifndef STBI_INCLUDE_STB_IMAGE_H @@ -222,10 +132,8 @@ publish, and distribute this file as you see fit. // DOCUMENTATION // // Limitations: -// - no 16-bit-per-channel PNG // - no 12-bit-per-channel JPEG // - no JPEGs with arithmetic coding -// - no 1-bit BMP // - GIF always returns *comp=4 // // Basic usage (see HDR discussion below for HDR usage): @@ -235,13 +143,13 @@ publish, and distribute this file as you see fit. // // ... x = width, y = height, n = # 8-bit components per pixel ... // // ... replace '0' with '1'..'4' to force that many components per pixel // // ... but 'n' will always be the number that it would have been if you said 0 -// stbi_image_free(data) +// stbi_image_free(data); // // Standard parameters: -// int *x -- outputs image width in pixels -// int *y -- outputs image height in pixels -// int *comp -- outputs # of image components in image file -// int req_comp -- if non-zero, # of image components requested in result +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *channels_in_file -- outputs # of image components in image file +// int desired_channels -- if non-zero, # of image components requested in result // // The return value from an image loader is an 'unsigned char *' which points // to the pixel data, or NULL on an allocation failure or if the image is @@ -249,11 +157,12 @@ publish, and distribute this file as you see fit. // with each pixel consisting of N interleaved 8-bit components; the first // pixel pointed to is top-left-most in the image. There is no padding between // image scanlines or between pixels, regardless of format. The number of -// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise. -// If req_comp is non-zero, *comp has the number of components that _would_ -// have been output otherwise. E.g. if you set req_comp to 4, you will always -// get RGBA output, but you can check *comp to see if it's trivially opaque -// because e.g. there were only 3 channels in the source image. +// components N is 'desired_channels' if desired_channels is non-zero, or +// *channels_in_file otherwise. If desired_channels is non-zero, +// *channels_in_file has the number of components that _would_ have been +// output otherwise. E.g. if you set desired_channels to 4, you will always +// get RGBA output, but you can check *channels_in_file to see if it's trivially +// opaque because e.g. there were only 3 channels in the source image. // // An output image with N components has the following components interleaved // in this order in each pixel: @@ -265,14 +174,50 @@ publish, and distribute this file as you see fit. // 4 red, green, blue, alpha // // If image loading fails for any reason, the return value will be NULL, -// and *x, *y, *comp will be unchanged. The function stbi_failure_reason() -// can be queried for an extremely brief, end-user unfriendly explanation -// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid -// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// and *x, *y, *channels_in_file will be unchanged. The function +// stbi_failure_reason() can be queried for an extremely brief, end-user +// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS +// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly // more user-friendly ones. // // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. // +// To query the width, height and component count of an image without having to +// decode the full file, you can use the stbi_info family of functions: +// +// int x,y,n,ok; +// ok = stbi_info(filename, &x, &y, &n); +// // returns ok=1 and sets x, y, n if image is a supported format, +// // 0 otherwise. +// +// Note that stb_image pervasively uses ints in its public API for sizes, +// including sizes of memory buffers. This is now part of the API and thus +// hard to change without causing breakage. As a result, the various image +// loaders all have certain limits on image size; these differ somewhat +// by format but generally boil down to either just under 2GB or just under +// 1GB. When the decoded image would be larger than this, stb_image decoding +// will fail. +// +// Additionally, stb_image will reject image files that have any of their +// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, +// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, +// the only way to have an image with such dimensions load correctly +// is for it to have a rather extreme aspect ratio. Either way, the +// assumption here is that such larger images are likely to be malformed +// or malicious. If you do need to load an image with individual dimensions +// larger than that, and it still fits in the overall size limit, you can +// #define STBI_MAX_DIMENSIONS on your own to be something larger. +// +// =========================================================================== +// +// UNICODE: +// +// If compiling for Windows and you wish to use Unicode filenames, compile +// with +// #define STBI_WINDOWS_UTF8 +// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert +// Windows wchar_t filenames to utf8. +// // =========================================================================== // // Philosophy @@ -285,15 +230,15 @@ publish, and distribute this file as you see fit. // // Sometimes I let "good performance" creep up in priority over "easy to maintain", // and for best performance I may provide less-easy-to-use APIs that give higher -// performance, in addition to the easy to use ones. Nevertheless, it's important +// performance, in addition to the easy-to-use ones. Nevertheless, it's important // to keep in mind that from the standpoint of you, a client of this library, -// all you care about is #1 and #3, and stb libraries do not emphasize #3 above all. +// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. // // Some secondary priorities arise directly from the first two, some of which -// make more explicit reasons why performance can't be emphasized. +// provide more explicit reasons why performance can't be emphasized. // // - Portable ("ease of use") -// - Small footprint ("easy to maintain") +// - Small source code footprint ("easy to maintain") // - No dependencies ("ease of use") // // =========================================================================== @@ -325,13 +270,6 @@ publish, and distribute this file as you see fit. // (at least this is true for iOS and Android). Therefore, the NEON support is // toggled by a build flag: define STBI_NEON to get NEON loops. // -// The output of the JPEG decoder is slightly different from versions where -// SIMD support was introduced (that is, for versions before 1.49). The -// difference is only +-1 in the 8-bit RGB channels, and only on a small -// fraction of pixels. You can force the pre-1.49 behavior by defining -// STBI_JPEG_OLD, but this will disable some of the SIMD decoding path -// and hence cost some performance. -// // If for some reason you do not want to use any of SIMD code, or if // you have issues compiling it, you can disable it entirely by // defining STBI_NO_SIMD. @@ -340,11 +278,10 @@ publish, and distribute this file as you see fit. // // HDR image support (disable by defining STBI_NO_HDR) // -// stb_image now supports loading HDR images in general, and currently -// the Radiance .HDR file format, although the support is provided -// generically. You can still load any file through the existing interface; -// if you attempt to load an HDR file, it will be automatically remapped to -// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// stb_image supports loading HDR images in general, and currently the Radiance +// .HDR file format specifically. You can still load any file through the existing +// interface; if you attempt to load an HDR file, it will be automatically remapped +// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; // both of these constants can be reconfigured through this interface: // // stbi_hdr_to_ldr_gamma(2.2f); @@ -376,18 +313,59 @@ publish, and distribute this file as you see fit. // // iPhone PNG support: // -// By default we convert iphone-formatted PNGs back to RGB, even though -// they are internally encoded differently. You can disable this conversion -// by by calling stbi_convert_iphone_png_to_rgb(0), in which case -// you will always just get the native iphone "format" through (which -// is BGR stored in RGB). +// We optionally support converting iPhone-formatted PNGs (which store +// premultiplied BGRA) back to RGB, even though they're internally encoded +// differently. To enable this conversion, call +// stbi_convert_iphone_png_to_rgb(1). // // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per // pixel to remove any premultiplied alpha *only* if the image file explicitly // says there's premultiplied data (currently only happens in iPhone images, // and only if iPhone convert-to-rgb processing is on). // - +// =========================================================================== +// +// ADDITIONAL CONFIGURATION +// +// - You can suppress implementation of any of the decoders to reduce +// your code footprint by #defining one or more of the following +// symbols before creating the implementation. +// +// STBI_NO_JPEG +// STBI_NO_PNG +// STBI_NO_BMP +// STBI_NO_PSD +// STBI_NO_TGA +// STBI_NO_GIF +// STBI_NO_HDR +// STBI_NO_PIC +// STBI_NO_PNM (.ppm and .pgm) +// +// - You can request *only* certain decoders and suppress all other ones +// (this will be more forward-compatible, as addition of new decoders +// doesn't require you to disable them explicitly): +// +// STBI_ONLY_JPEG +// STBI_ONLY_PNG +// STBI_ONLY_BMP +// STBI_ONLY_PSD +// STBI_ONLY_TGA +// STBI_ONLY_GIF +// STBI_ONLY_HDR +// STBI_ONLY_PIC +// STBI_ONLY_PNM (.ppm and .pgm) +// +// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still +// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB +// +// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater +// than that size (in either width or height) without further processing. +// This is to let programs in the wild set an upper bound to prevent +// denial-of-service attacks on untrusted data, as one could generate a +// valid image of gigantic dimensions and force stb_image to allocate a +// huge block of memory and spend disproportionate time decoding it. By +// default this is set to (1 << 24), which is 16777216, but that's still +// very big. #ifndef STBI_NO_STDIO #include @@ -397,7 +375,7 @@ publish, and distribute this file as you see fit. enum { - STBI_default = 0, // only used for req_comp + STBI_default = 0, // only used for desired_channels STBI_grey = 1, STBI_grey_alpha = 2, @@ -405,17 +383,21 @@ enum STBI_rgb_alpha = 4 }; +#include typedef unsigned char stbi_uc; +typedef unsigned short stbi_us; #ifdef __cplusplus extern "C" { #endif +#ifndef STBIDEF #ifdef STB_IMAGE_STATIC #define STBIDEF static #else #define STBIDEF extern #endif +#endif ////////////////////////////////////////////////////////////////////////////// // @@ -433,22 +415,52 @@ typedef struct int (*eof) (void *user); // returns nonzero if we are at end of file/data } stbi_io_callbacks; -STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp); -STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *comp, int req_comp); -STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *comp, int req_comp); +//////////////////////////////////// +// +// 8-bits-per-channel interface +// + +STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO -STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); // for stbi_load_from_file, file pointer is left pointing immediately after image #endif +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); +#endif + +#ifdef STBI_WINDOWS_UTF8 +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); +#endif + +//////////////////////////////////// +// +// 16-bits-per-channel interface +// + +STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); + +#ifndef STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); +STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); +#endif + +//////////////////////////////////// +// +// float-per-channel interface +// #ifndef STBI_NO_LINEAR - STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp); - STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); - STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp); + STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO - STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); + STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); #endif #endif @@ -472,7 +484,7 @@ STBIDEF int stbi_is_hdr_from_file(FILE *f); // get a VERY brief reason for failure -// NOT THREADSAFE +// on most compilers (and ALL modern mainstream compilers) this is threadsafe STBIDEF const char *stbi_failure_reason (void); // free the loaded image -- this is just free() @@ -481,11 +493,14 @@ STBIDEF void stbi_image_free (void *retval_from_stbi_load); // get image dimensions & components without fully decoding STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); #ifndef STBI_NO_STDIO -STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); -STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); - +STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); +STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); +STBIDEF int stbi_is_16_bit (char const *filename); +STBIDEF int stbi_is_16_bit_from_file(FILE *f); #endif @@ -502,6 +517,13 @@ STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); // flip the image vertically, so the first pixel in the output array is the bottom left STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); +// as above, but only applies to images loaded on the thread that calls the function +// this function is only available if your compiler supports thread-local variables; +// calling it will fail to link if your compiler doesn't +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply); +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert); +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); + // ZLIB client - used by PNG, available for other purposes STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); @@ -566,9 +588,10 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #include // ptrdiff_t on osx #include #include +#include #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) -#include // ldexp +#include // ldexp, pow #endif #ifndef STBI_NO_STDIO @@ -580,6 +603,12 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #define STBI_ASSERT(x) assert(x) #endif +#ifdef __cplusplus +#define STBI_EXTERN extern "C" +#else +#define STBI_EXTERN extern +#endif + #ifndef _MSC_VER #ifdef __cplusplus @@ -591,8 +620,25 @@ STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const ch #define stbi_inline __forceinline #endif +#ifndef STBI_NO_THREAD_LOCALS + #if defined(__cplusplus) && __cplusplus >= 201103L + #define STBI_THREAD_LOCAL thread_local + #elif defined(__GNUC__) && __GNUC__ < 5 + #define STBI_THREAD_LOCAL __thread + #elif defined(_MSC_VER) + #define STBI_THREAD_LOCAL __declspec(thread) + #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) + #define STBI_THREAD_LOCAL _Thread_local + #endif -#ifdef _MSC_VER + #ifndef STBI_THREAD_LOCAL + #if defined(__GNUC__) + #define STBI_THREAD_LOCAL __thread + #endif + #endif +#endif + +#if defined(_MSC_VER) || defined(__SYMBIAN32__) typedef unsigned short stbi__uint16; typedef signed short stbi__int16; typedef unsigned int stbi__uint32; @@ -621,7 +667,7 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef STBI_HAS_LROTL #define stbi_lrot(x,y) _lrotl(x,y) #else - #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) @@ -649,12 +695,14 @@ typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #define STBI__X86_TARGET #endif -#if defined(__GNUC__) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) -// NOTE: not clear do we actually need this for the 64-bit path? +#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) // gcc doesn't support sse2 intrinsics unless you compile with -msse2, -// (but compiling with -msse2 allows the compiler to use SSE2 everywhere; -// this is just broken and gcc are jerks for not fixing it properly -// http://www.virtualdub.org/blog/pivot/entry.php?id=363 ) +// which in turn means it gets to use SSE2 everywhere. This is unfortunate, +// but previous attempts to provide the SSE2 functions with runtime +// detection caused numerous issues. The way architecture extensions are +// exposed in GCC/Clang is, sadly, not really suited for one-file libs. +// New behavior: if compiled with -msse2, we use SSE2 without any +// detection; if not, we don't use it at all. #define STBI_NO_SIMD #endif @@ -702,25 +750,27 @@ static int stbi__cpuid3(void) #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name -static int stbi__sse2_available() +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) { int info3 = stbi__cpuid3(); return ((info3 >> 26) & 1) != 0; } +#endif + #else // assume GCC-style if not VC++ #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) -static int stbi__sse2_available() +#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) +static int stbi__sse2_available(void) { -#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 // GCC 4.8 or later - // GCC 4.8+ has a nice way to do this - return __builtin_cpu_supports("sse2"); -#else - // portable way to do this, preferably without using GCC inline ASM? - // just bail for now. - return 0; -#endif + // If we're even attempting to compile this on GCC/Clang, that means + // -msse2 is on, which means the compiler is allowed to use SSE2 + // instructions at will, and so are we. + return 1; } +#endif + #endif #endif @@ -731,14 +781,21 @@ static int stbi__sse2_available() #ifdef STBI_NEON #include -// assume GCC or Clang on ARM targets +#ifdef _MSC_VER +#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name +#else #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) #endif +#endif #ifndef STBI_SIMD_ALIGN #define STBI_SIMD_ALIGN(type, name) type name #endif +#ifndef STBI_MAX_DIMENSIONS +#define STBI_MAX_DIMENSIONS (1 << 24) +#endif + /////////////////////////////////////////////// // // stbi__context struct and start_xxx functions @@ -756,6 +813,7 @@ typedef struct int read_from_callbacks; int buflen; stbi_uc buffer_start[128]; + int callback_already_read; stbi_uc *img_buffer, *img_buffer_end; stbi_uc *img_buffer_original, *img_buffer_original_end; @@ -769,6 +827,7 @@ static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) { s->io.read = NULL; s->read_from_callbacks = 0; + s->callback_already_read = 0; s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; } @@ -780,7 +839,8 @@ static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void * s->io_user_data = user; s->buflen = sizeof(s->buffer_start); s->read_from_callbacks = 1; - s->img_buffer_original = s->buffer_start; + s->callback_already_read = 0; + s->img_buffer = s->img_buffer_original = s->buffer_start; stbi__refill_buffer(s); s->img_buffer_original_end = s->img_buffer_end; } @@ -794,12 +854,17 @@ static int stbi__stdio_read(void *user, char *data, int size) static void stbi__stdio_skip(void *user, int n) { + int ch; fseek((FILE*) user, n, SEEK_CUR); + ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */ + if (ch != EOF) { + ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */ + } } static int stbi__stdio_eof(void *user) { - return feof((FILE*) user); + return feof((FILE*) user) || ferror((FILE *) user); } static stbi_io_callbacks stbi__stdio_callbacks = @@ -827,79 +892,197 @@ static void stbi__rewind(stbi__context *s) s->img_buffer_end = s->img_buffer_original_end; } +enum +{ + STBI_ORDER_RGB, + STBI_ORDER_BGR +}; + +typedef struct +{ + int bits_per_channel; + int num_channels; + int channel_order; +} stbi__result_info; + #ifndef STBI_NO_JPEG static int stbi__jpeg_test(stbi__context *s); -static stbi_uc *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNG static int stbi__png_test(stbi__context *s); -static stbi_uc *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__png_is16(stbi__context *s); #endif #ifndef STBI_NO_BMP static int stbi__bmp_test(stbi__context *s); -static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_TGA static int stbi__tga_test(stbi__context *s); -static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PSD static int stbi__psd_test(stbi__context *s); -static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__psd_is16(stbi__context *s); #endif #ifndef STBI_NO_HDR static int stbi__hdr_test(stbi__context *s); -static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PIC static int stbi__pic_test(stbi__context *s); -static stbi_uc *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_GIF static int stbi__gif_test(stbi__context *s); -static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNM static int stbi__pnm_test(stbi__context *s); -static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp); +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); +static int stbi__pnm_is16(stbi__context *s); #endif -// this is not threadsafe -static const char *stbi__g_failure_reason; +static +#ifdef STBI_THREAD_LOCAL +STBI_THREAD_LOCAL +#endif +const char *stbi__g_failure_reason; STBIDEF const char *stbi_failure_reason(void) { return stbi__g_failure_reason; } +#ifndef STBI_NO_FAILURE_STRINGS static int stbi__err(const char *str) { stbi__g_failure_reason = str; return 0; } +#endif static void *stbi__malloc(size_t size) { return STBI_MALLOC(size); } +// stb_image uses ints pervasively, including for offset calculations. +// therefore the largest decoded image size we can support with the +// current code, even on 64-bit targets, is INT_MAX. this is not a +// significant limitation for the intended use case. +// +// we do, however, need to make sure our size calculations don't +// overflow. hence a few helper functions for size calculations that +// multiply integers together, making sure that they're non-negative +// and no overflow occurs. + +// return 1 if the sum is valid, 0 on overflow. +// negative terms are considered invalid. +static int stbi__addsizes_valid(int a, int b) +{ + if (b < 0) return 0; + // now 0 <= b <= INT_MAX, hence also + // 0 <= INT_MAX - b <= INTMAX. + // And "a + b <= INT_MAX" (which might overflow) is the + // same as a <= INT_MAX - b (no overflow) + return a <= INT_MAX - b; +} + +// returns 1 if the product is valid, 0 on overflow. +// negative factors are considered invalid. +static int stbi__mul2sizes_valid(int a, int b) +{ + if (a < 0 || b < 0) return 0; + if (b == 0) return 1; // mul-by-0 is always safe + // portable way to check for no overflows in a*b + return a <= INT_MAX/b; +} + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow +static int stbi__mad2sizes_valid(int a, int b, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); +} +#endif + +// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow +static int stbi__mad3sizes_valid(int a, int b, int c, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__addsizes_valid(a*b*c, add); +} + +// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) +{ + return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && + stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); +} +#endif + +#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) +// mallocs with size overflow checking +static void *stbi__malloc_mad2(int a, int b, int add) +{ + if (!stbi__mad2sizes_valid(a, b, add)) return NULL; + return stbi__malloc(a*b + add); +} +#endif + +static void *stbi__malloc_mad3(int a, int b, int c, int add) +{ + if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; + return stbi__malloc(a*b*c + add); +} + +#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) +static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) +{ + if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; + return stbi__malloc(a*b*c*d + add); +} +#endif + +// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow. +static int stbi__addints_valid(int a, int b) +{ + if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow + if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0. + return a <= INT_MAX - b; +} + +// returns 1 if the product of two ints fits in a signed short, 0 on overflow. +static int stbi__mul2shorts_valid(int a, int b) +{ + if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow + if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid + if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN + return a >= SHRT_MIN / b; +} + // stbi__err - error // stbi__errpf - error returning pointer to float // stbi__errpuc - error returning pointer to unsigned char @@ -928,40 +1111,69 @@ static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); #endif -static int stbi__vertically_flip_on_load = 0; +static int stbi__vertically_flip_on_load_global = 0; STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) { - stbi__vertically_flip_on_load = flag_true_if_should_flip; + stbi__vertically_flip_on_load_global = flag_true_if_should_flip; } -static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) +#ifndef STBI_THREAD_LOCAL +#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global +#else +static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; + +STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) { - #ifndef STBI_NO_JPEG - if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp); - #endif + stbi__vertically_flip_on_load_local = flag_true_if_should_flip; + stbi__vertically_flip_on_load_set = 1; +} + +#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \ + ? stbi__vertically_flip_on_load_local \ + : stbi__vertically_flip_on_load_global) +#endif // STBI_THREAD_LOCAL + +static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) +{ + memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields + ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed + ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order + ri->num_channels = 0; + + // test the formats with a very explicit header first (at least a FOURCC + // or distinctive magic number first) #ifndef STBI_NO_PNG - if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp); + if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_BMP - if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp); + if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_GIF - if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp); + if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_PSD - if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp); + if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); + #else + STBI_NOTUSED(bpc); #endif #ifndef STBI_NO_PIC - if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp); + if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); + #endif + + // then the formats that can end up attempting to load with just 1 or 2 + // bytes matching expectations; these are prone to false positives, so + // try them later + #ifndef STBI_NO_JPEG + if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_PNM - if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp); + if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { - float *hdr = stbi__hdr_load(s, x,y,comp,req_comp); + float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); } #endif @@ -969,66 +1181,179 @@ static unsigned char *stbi__load_main(stbi__context *s, int *x, int *y, int *com #ifndef STBI_NO_TGA // test tga last because it's a crappy test! if (stbi__tga_test(s)) - return stbi__tga_load(s,x,y,comp,req_comp); + return stbi__tga_load(s,x,y,comp,req_comp, ri); #endif return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); } -static unsigned char *stbi__load_flip(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) { - unsigned char *result = stbi__load_main(s, x, y, comp, req_comp); + int i; + int img_len = w * h * channels; + stbi_uc *reduced; - if (stbi__vertically_flip_on_load && result != NULL) { - int w = *x, h = *y; - int depth = req_comp ? req_comp : *comp; - int row,col,z; - stbi_uc temp; - - // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once - for (row = 0; row < (h>>1); row++) { - for (col = 0; col < w; col++) { - for (z = 0; z < depth; z++) { - temp = result[(row * w + col) * depth + z]; - result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z]; - result[((h - row - 1) * w + col) * depth + z] = temp; - } - } + reduced = (stbi_uc *) stbi__malloc(img_len); + if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling + + STBI_FREE(orig); + return reduced; +} + +static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) +{ + int i; + int img_len = w * h * channels; + stbi__uint16 *enlarged; + + enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); + if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + + for (i = 0; i < img_len; ++i) + enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff + + STBI_FREE(orig); + return enlarged; +} + +static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) +{ + int row; + size_t bytes_per_row = (size_t)w * bytes_per_pixel; + stbi_uc temp[2048]; + stbi_uc *bytes = (stbi_uc *)image; + + for (row = 0; row < (h>>1); row++) { + stbi_uc *row0 = bytes + row*bytes_per_row; + stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; + // swap row0 with row1 + size_t bytes_left = bytes_per_row; + while (bytes_left) { + size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); + memcpy(temp, row0, bytes_copy); + memcpy(row0, row1, bytes_copy); + memcpy(row1, temp, bytes_copy); + row0 += bytes_copy; + row1 += bytes_copy; + bytes_left -= bytes_copy; } } +} + +#ifndef STBI_NO_GIF +static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) +{ + int slice; + int slice_size = w * h * bytes_per_pixel; + + stbi_uc *bytes = (stbi_uc *)image; + for (slice = 0; slice < z; ++slice) { + stbi__vertical_flip(bytes, w, h, bytes_per_pixel); + bytes += slice_size; + } +} +#endif - return result; +static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 8) { + result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 8; + } + + // @TODO: move stbi__convert_format to here + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); + } + + return (unsigned char *) result; } -#ifndef STBI_NO_HDR +static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) +{ + stbi__result_info ri; + void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); + + if (result == NULL) + return NULL; + + // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. + STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); + + if (ri.bits_per_channel != 16) { + result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); + ri.bits_per_channel = 16; + } + + // @TODO: move stbi__convert_format16 to here + // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision + + if (stbi__vertically_flip_on_load) { + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); + } + + return (stbi__uint16 *) result; +} + +#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) { if (stbi__vertically_flip_on_load && result != NULL) { - int w = *x, h = *y; - int depth = req_comp ? req_comp : *comp; - int row,col,z; - float temp; - - // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once - for (row = 0; row < (h>>1); row++) { - for (col = 0; col < w; col++) { - for (z = 0; z < depth; z++) { - temp = result[(row * w + col) * depth + z]; - result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z]; - result[((h - row - 1) * w + col) * depth + z] = temp; - } - } - } + int channels = req_comp ? req_comp : *comp; + stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); } } #endif #ifndef STBI_NO_STDIO +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); +STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); +#endif + +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) +STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) +{ + return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); +} +#endif + static FILE *stbi__fopen(char const *filename, char const *mode) { FILE *f; +#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) + wchar_t wMode[64]; + wchar_t wFilename[1024]; + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) + return 0; + + if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) + return 0; + #if defined(_MSC_VER) && _MSC_VER >= 1400 + if (0 != _wfopen_s(&f, wFilename, wMode)) + f = 0; +#else + f = _wfopen(wFilename, wMode); +#endif + +#elif defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != fopen_s(&f, filename, mode)) f=0; #else @@ -1053,42 +1378,98 @@ STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req unsigned char *result; stbi__context s; stbi__start_file(&s,f); - result = stbi__load_flip(&s,x,y,comp,req_comp); + result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); if (result) { // need to 'unget' all the characters in the IO buffer fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); } return result; } + +STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi__uint16 *result; + stbi__context s; + stbi__start_file(&s,f); + result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); + if (result) { + // need to 'unget' all the characters in the IO buffer + fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); + } + return result; +} + +STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = stbi__fopen(filename, "rb"); + stbi__uint16 *result; + if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file_16(f,x,y,comp,req_comp); + fclose(f); + return result; +} + + #endif //!STBI_NO_STDIO +STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + +STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); + return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); +} + STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_mem(&s,buffer,len); - return stbi__load_flip(&s,x,y,comp,req_comp); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); } STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); - return stbi__load_flip(&s,x,y,comp,req_comp); + return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); } +#ifndef STBI_NO_GIF +STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + unsigned char *result; + stbi__context s; + stbi__start_mem(&s,buffer,len); + + result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); + if (stbi__vertically_flip_on_load) { + stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); + } + + return result; +} +#endif + #ifndef STBI_NO_LINEAR static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) { unsigned char *data; #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { - float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp); + stbi__result_info ri; + float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); if (hdr_data) stbi__float_postprocess(hdr_data,x,y,comp,req_comp); return hdr_data; } #endif - data = stbi__load_flip(s, x, y, comp, req_comp); + data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); if (data) return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); @@ -1158,12 +1539,16 @@ STBIDEF int stbi_is_hdr (char const *filename) return result; } -STBIDEF int stbi_is_hdr_from_file(FILE *f) +STBIDEF int stbi_is_hdr_from_file(FILE *f) { #ifndef STBI_NO_HDR + long pos = ftell(f); + int res; stbi__context s; stbi__start_file(&s,f); - return stbi__hdr_test(&s); + res = stbi__hdr_test(&s); + fseek(f, pos, SEEK_SET); + return res; #else STBI_NOTUSED(f); return 0; @@ -1212,6 +1597,7 @@ enum static void stbi__refill_buffer(stbi__context *s) { int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); + s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); if (n == 0) { // at end of file, treat same as if from memory, but need to handle case // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file @@ -1236,6 +1622,9 @@ stbi_inline static stbi_uc stbi__get8(stbi__context *s) return 0; } +#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else stbi_inline static int stbi__at_eof(stbi__context *s) { if (s->io.read) { @@ -1247,9 +1636,14 @@ stbi_inline static int stbi__at_eof(stbi__context *s) return s->img_buffer >= s->img_buffer_end; } +#endif +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) +// nothing +#else static void stbi__skip(stbi__context *s, int n) { + if (n == 0) return; // already there! if (n < 0) { s->img_buffer = s->img_buffer_end; return; @@ -1264,7 +1658,11 @@ static void stbi__skip(stbi__context *s, int n) } s->img_buffer += n; } +#endif +#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) +// nothing +#else static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) { if (s->io.read) { @@ -1288,18 +1686,27 @@ static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) } else return 0; } +#endif +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else static int stbi__get16be(stbi__context *s) { int z = stbi__get8(s); return (z << 8) + stbi__get8(s); } +#endif +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) +// nothing +#else static stbi__uint32 stbi__get32be(stbi__context *s) { stbi__uint32 z = stbi__get16be(s); return (z << 16) + stbi__get16be(s); } +#endif #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) // nothing @@ -1315,13 +1722,16 @@ static int stbi__get16le(stbi__context *s) static stbi__uint32 stbi__get32le(stbi__context *s) { stbi__uint32 z = stbi__get16le(s); - return z + (stbi__get16le(s) << 16); + z += (stbi__uint32)stbi__get16le(s) << 16; + return z; } #endif #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings - +#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else ////////////////////////////////////////////////////////////////////////////// // // generic converter from built-in img_n to req_comp @@ -1337,7 +1747,11 @@ static stbi_uc stbi__compute_y(int r, int g, int b) { return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); } +#endif +#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) +// nothing +#else static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) { int i,j; @@ -1346,7 +1760,7 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r if (req_comp == img_n) return data; STBI_ASSERT(req_comp >= 1 && req_comp <= 4); - good = (unsigned char *) stbi__malloc(req_comp * x * y); + good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); if (good == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); @@ -1356,37 +1770,97 @@ static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int r unsigned char *src = data + j * x * img_n ; unsigned char *dest = good + j * x * req_comp; - #define COMBO(a,b) ((a)*8+(b)) - #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) // convert source image with img_n components to one with req_comp components; // avoid switch per pixel, so use switch per scanline and massive macros - switch (COMBO(img_n, req_comp)) { - CASE(1,2) dest[0]=src[0], dest[1]=255; break; - CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break; - CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break; - CASE(2,1) dest[0]=src[0]; break; - CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break; - CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break; - CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break; - CASE(3,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break; - CASE(3,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; break; - CASE(4,1) dest[0]=stbi__compute_y(src[0],src[1],src[2]); break; - CASE(4,2) dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break; - CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break; - default: STBI_ASSERT(0); + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); } - #undef CASE + #undef STBI__CASE } STBI_FREE(data); return good; } +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 stbi__compute_y_16(int r, int g, int b) +{ + return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); +} +#endif + +#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) +// nothing +#else +static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) +{ + int i,j; + stbi__uint16 *good; + + if (req_comp == img_n) return data; + STBI_ASSERT(req_comp >= 1 && req_comp <= 4); + + good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); + if (good == NULL) { + STBI_FREE(data); + return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + stbi__uint16 *src = data + j * x * img_n ; + stbi__uint16 *dest = good + j * x * req_comp; + + #define STBI__COMBO(a,b) ((a)*8+(b)) + #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (STBI__COMBO(img_n, req_comp)) { + STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break; + STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break; + STBI__CASE(2,1) { dest[0]=src[0]; } break; + STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; + STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; + STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break; + STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; + STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; + STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; + STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; + default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); + } + #undef STBI__CASE + } + + STBI_FREE(data); + return good; +} +#endif #ifndef STBI_NO_LINEAR static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) { int i,k,n; - float *output = (float *) stbi__malloc(x * y * comp * sizeof(float)); + float *output; + if (!data) return NULL; + output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; @@ -1394,7 +1868,11 @@ static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) for (k=0; k < n; ++k) { output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); } - if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + if (n < comp) { + for (i=0; i < x*y; ++i) { + output[i*comp + n] = data[i*comp + n]/255.0f; + } } STBI_FREE(data); return output; @@ -1406,7 +1884,9 @@ static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) { int i,k,n; - stbi_uc *output = (stbi_uc *) stbi__malloc(x * y * comp); + stbi_uc *output; + if (!data) return NULL; + output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; @@ -1471,7 +1951,7 @@ typedef struct stbi__context *s; stbi__huffman huff_dc[4]; stbi__huffman huff_ac[4]; - stbi_uc dequant[4][64]; + stbi__uint16 dequant[4][64]; stbi__int16 fast_ac[4][1 << FAST_BITS]; // sizes for components, interleaved MCUs @@ -1507,6 +1987,8 @@ typedef struct int succ_high; int succ_low; int eob_run; + int jfif; + int app14_color_transform; // Adobe APP14 tag int rgb; int scan_n, order[4]; @@ -1520,11 +2002,15 @@ typedef struct static int stbi__build_huffman(stbi__huffman *h, int *count) { - int i,j,k=0,code; + int i,j,k=0; + unsigned int code; // build size list for each symbol (from JPEG spec) - for (i=0; i < 16; ++i) - for (j=0; j < count[i]; ++j) + for (i=0; i < 16; ++i) { + for (j=0; j < count[i]; ++j) { h->size[k++] = (stbi_uc) (i+1); + if(k >= 257) return stbi__err("bad size list","Corrupt JPEG"); + } + } h->size[k] = 0; // compute actual symbols (from jpeg spec) @@ -1536,7 +2022,7 @@ static int stbi__build_huffman(stbi__huffman *h, int *count) if (h->size[k] == j) { while (h->size[k] == j) h->code[k++] = (stbi__uint16) (code++); - if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG"); + if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); } // compute largest code + 1 for this size, preshifted as needed later h->maxcode[j] = code << (16-j); @@ -1577,10 +2063,10 @@ static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) // magnitude code followed by receive_extend code int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); int m = 1 << (magbits - 1); - if (k < m) k += (-1 << magbits) + 1; + if (k < m) k += (~0U << magbits) + 1; // if the result is small enough, we can fit it in fast_ac table if (k >= -128 && k <= 127) - fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits)); + fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); } } } @@ -1589,9 +2075,10 @@ static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) static void stbi__grow_buffer_unsafe(stbi__jpeg *j) { do { - int b = j->nomore ? 0 : stbi__get8(j->s); + unsigned int b = j->nomore ? 0 : stbi__get8(j->s); if (b == 0xff) { int c = stbi__get8(j->s); + while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes if (c != 0) { j->marker = (unsigned char) c; j->nomore = 1; @@ -1604,7 +2091,7 @@ static void stbi__grow_buffer_unsafe(stbi__jpeg *j) } // (1 << n) - 1 -static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; +static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; // decode a jpeg huffman value from the bitstream stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) @@ -1648,6 +2135,8 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) // convert the huffman code to the symbol id c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; + if(c < 0 || c >= 256) // symbol id out of bounds! + return -1; STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); // convert the id to a symbol @@ -1657,7 +2146,7 @@ stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) } // bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); + if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing - sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB + sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative) k = stbi_lrot(j->code_buffer, n); - STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; j->code_bits -= n; - return k + (stbi__jbias[n] & ~sgn); + return k + (stbi__jbias[n] & (sgn - 1)); } // get some unsigned bits @@ -1681,6 +2170,7 @@ stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) { unsigned int k; if (j->code_bits < n) stbi__grow_buffer_unsafe(j); + if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing k = stbi_lrot(j->code_buffer, n); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; @@ -1692,6 +2182,7 @@ stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) { unsigned int k; if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); + if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing k = j->code_buffer; j->code_buffer <<= 1; --j->code_bits; @@ -1700,7 +2191,7 @@ stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) // given a value that's at position X in the zigzag stream, // where does it appear in the 8x8 matrix coded as row-major? -static stbi_uc stbi__jpeg_dezigzag[64+15] = +static const stbi_uc stbi__jpeg_dezigzag[64+15] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, @@ -1716,21 +2207,23 @@ static stbi_uc stbi__jpeg_dezigzag[64+15] = }; // decode one 64-entry block-- -static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi_uc *dequant) +static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) { int diff,dc,k; int t; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); t = stbi__jpeg_huff_decode(j, hdc); - if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); + if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG"); // 0 all the ac values now so we can do it 32-bits at a time memset(data,0,64*sizeof(data[0])); diff = t ? stbi__extend_receive(j, t) : 0; + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG"); dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; + if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); data[0] = (short) (dc * dequant[0]); // decode AC components, see JPEG spec @@ -1744,6 +2237,7 @@ static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length + if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); j->code_buffer <<= s; j->code_bits -= s; // decode into unzigzag'd location @@ -1780,11 +2274,14 @@ static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__ // first scan for DC coefficient, must be first memset(data,0,64*sizeof(data[0])); // 0 all the ac values now t = stbi__jpeg_huff_decode(j, hdc); + if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); diff = t ? stbi__extend_receive(j, t) : 0; + if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG"); dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; - data[0] = (short) (dc << j->succ_low); + if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); + data[0] = (short) (dc * (1 << j->succ_low)); } else { // refinement scan for DC coefficient if (stbi__jpeg_get_bit(j)) @@ -1818,10 +2315,11 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__ if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length + if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); j->code_buffer <<= s; j->code_bits -= s; zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) ((r >> 8) << shift); + data[zig] = (short) ((r >> 8) * (1 << shift)); } else { int rs = stbi__jpeg_huff_decode(j, hac); if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); @@ -1839,7 +2337,7 @@ static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__ } else { k += r; zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) (stbi__extend_receive(j,s) << shift); + data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift)); } } } while (k <= j->spec_end); @@ -1926,7 +2424,7 @@ stbi_inline static stbi_uc stbi__clamp(int x) } #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) -#define stbi__fsh(x) ((x) << 12) +#define stbi__fsh(x) ((x) * 4096) // derived from jidctint -- DCT_ISLOW #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ @@ -1981,7 +2479,7 @@ static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) // (1|2|3|4|5|6|7)==0 0 seconds // all separate -0.047 seconds // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds - int dcterm = d[0] << 2; + int dcterm = d[0]*4; v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; } else { STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) @@ -2425,7 +2923,7 @@ static stbi_uc stbi__get_marker(stbi__jpeg *j) x = stbi__get8(j->s); if (x != 0xff) return STBI__MARKER_none; while (x == 0xff) - x = stbi__get8(j->s); + x = stbi__get8(j->s); // consume repeated 0xff fill bytes return x; } @@ -2440,7 +2938,7 @@ static void stbi__jpeg_reset(stbi__jpeg *j) j->code_bits = 0; j->code_buffer = 0; j->nomore = 0; - j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; j->marker = STBI__MARKER_none; j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; j->eob_run = 0; @@ -2572,7 +3070,7 @@ static int stbi__parse_entropy_coded_data(stbi__jpeg *z) } } -static void stbi__jpeg_dequantize(short *data, stbi_uc *dequant) +static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) { int i; for (i=0; i < 64; ++i) @@ -2614,13 +3112,14 @@ static int stbi__process_marker(stbi__jpeg *z, int m) L = stbi__get16be(z->s)-2; while (L > 0) { int q = stbi__get8(z->s); - int p = q >> 4; + int p = q >> 4, sixteen = (p != 0); int t = q & 15,i; - if (p != 0) return stbi__err("bad DQT type","Corrupt JPEG"); + if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); + for (i=0; i < 64; ++i) - z->dequant[t][stbi__jpeg_dezigzag[i]] = stbi__get8(z->s); - L -= 65; + z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); + L -= (sixteen ? 129 : 65); } return L==0; @@ -2637,6 +3136,7 @@ static int stbi__process_marker(stbi__jpeg *z, int m) sizes[i] = stbi__get8(z->s); n += sizes[i]; } + if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values! L -= 17; if (tc == 0) { if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; @@ -2653,12 +3153,50 @@ static int stbi__process_marker(stbi__jpeg *z, int m) } return L==0; } + // check for comment block or APP blocks if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { - stbi__skip(z->s, stbi__get16be(z->s)-2); + L = stbi__get16be(z->s); + if (L < 2) { + if (m == 0xFE) + return stbi__err("bad COM len","Corrupt JPEG"); + else + return stbi__err("bad APP len","Corrupt JPEG"); + } + L -= 2; + + if (m == 0xE0 && L >= 5) { // JFIF APP0 segment + static const unsigned char tag[5] = {'J','F','I','F','\0'}; + int ok = 1; + int i; + for (i=0; i < 5; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 5; + if (ok) + z->jfif = 1; + } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment + static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; + int ok = 1; + int i; + for (i=0; i < 6; ++i) + if (stbi__get8(z->s) != tag[i]) + ok = 0; + L -= 6; + if (ok) { + stbi__get8(z->s); // version + stbi__get16be(z->s); // flags0 + stbi__get16be(z->s); // flags1 + z->app14_color_transform = stbi__get8(z->s); // color transform + L -= 6; + } + } + + stbi__skip(z->s, L); return 1; } - return 0; + + return stbi__err("unknown marker","Corrupt JPEG"); } // after we see SOS @@ -2701,6 +3239,28 @@ static int stbi__process_scan_header(stbi__jpeg *z) return 1; } +static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) +{ + int i; + for (i=0; i < ncomp; ++i) { + if (z->img_comp[i].raw_data) { + STBI_FREE(z->img_comp[i].raw_data); + z->img_comp[i].raw_data = NULL; + z->img_comp[i].data = NULL; + } + if (z->img_comp[i].raw_coeff) { + STBI_FREE(z->img_comp[i].raw_coeff); + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].coeff = 0; + } + if (z->img_comp[i].linebuf) { + STBI_FREE(z->img_comp[i].linebuf); + z->img_comp[i].linebuf = NULL; + } + } + return why; +} + static int stbi__process_frame_header(stbi__jpeg *z, int scan) { stbi__context *s = z->s; @@ -2709,8 +3269,10 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); c = stbi__get8(s); - if (c != 3 && c != 1) return stbi__err("bad component count","Corrupt JPEG"); // JFIF requires + if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); s->img_n = c; for (i=0; i < c; ++i) { z->img_comp[i].data = NULL; @@ -2721,15 +3283,10 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) z->rgb = 0; for (i=0; i < s->img_n; ++i) { - static unsigned char rgb[3] = { 'R', 'G', 'B' }; + static const unsigned char rgb[3] = { 'R', 'G', 'B' }; z->img_comp[i].id = stbi__get8(s); - if (z->img_comp[i].id != i+1) // JFIF requires - if (z->img_comp[i].id != i) { // some version of jpegtran outputs non-JFIF-compliant files! - // somethings output this (see http://fileformats.archiveteam.org/wiki/JPEG#Color_format) - if (z->img_comp[i].id != rgb[i]) - return stbi__err("bad component ID","Corrupt JPEG"); - ++z->rgb; - } + if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) + ++z->rgb; q = stbi__get8(s); z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); @@ -2738,18 +3295,26 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) if (scan != STBI__SCAN_load) return 1; - if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); + if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); for (i=0; i < s->img_n; ++i) { if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; } + // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios + // and I've never seen a non-corrupted JPEG file actually use them + for (i=0; i < s->img_n; ++i) { + if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG"); + if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG"); + } + // compute interleaved mcu info z->img_h_max = h_max; z->img_v_max = v_max; z->img_mcu_w = h_max * 8; z->img_mcu_h = v_max * 8; + // these sizes can't be more than 17 bits z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; @@ -2761,28 +3326,27 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) // the bogus oversized data from using interleaved MCUs and their // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't // discard the extra data until colorspace conversion + // + // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) + // so these muls can't overflow with 32-bit ints (which we require) z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; - z->img_comp[i].raw_data = stbi__malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15); - - if (z->img_comp[i].raw_data == NULL) { - for(--i; i >= 0; --i) { - STBI_FREE(z->img_comp[i].raw_data); - z->img_comp[i].raw_data = NULL; - } - return stbi__err("outofmem", "Out of memory"); - } + z->img_comp[i].coeff = 0; + z->img_comp[i].raw_coeff = 0; + z->img_comp[i].linebuf = NULL; + z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); + if (z->img_comp[i].raw_data == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); // align blocks for idct using mmx/sse z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); - z->img_comp[i].linebuf = NULL; if (z->progressive) { - z->img_comp[i].coeff_w = (z->img_comp[i].w2 + 7) >> 3; - z->img_comp[i].coeff_h = (z->img_comp[i].h2 + 7) >> 3; - z->img_comp[i].raw_coeff = STBI_MALLOC(z->img_comp[i].coeff_w * z->img_comp[i].coeff_h * 64 * sizeof(short) + 15); + // w2, h2 are multiples of 8 (see above) + z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; + z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; + z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); + if (z->img_comp[i].raw_coeff == NULL) + return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); - } else { - z->img_comp[i].coeff = 0; - z->img_comp[i].raw_coeff = 0; } } @@ -2801,6 +3365,8 @@ static int stbi__process_frame_header(stbi__jpeg *z, int scan) static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) { int m; + z->jfif = 0; + z->app14_color_transform = -1; // valid values are 0,1,2 z->marker = STBI__MARKER_none; // initialize cached marker to empty m = stbi__get_marker(z); if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); @@ -2820,6 +3386,28 @@ static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) return 1; } +static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) +{ + // some JPEGs have junk at end, skip over it but if we find what looks + // like a valid marker, resume there + while (!stbi__at_eof(j->s)) { + stbi_uc x = stbi__get8(j->s); + while (x == 0xff) { // might be a marker + if (stbi__at_eof(j->s)) return STBI__MARKER_none; + x = stbi__get8(j->s); + if (x != 0x00 && x != 0xff) { + // not a stuffed zero or lead-in to another marker, looks + // like an actual marker, return it + return x; + } + // stuffed zero has x=0 now which ends the loop, meaning we go + // back to regular scan loop. + // repeated 0xff keeps trying to read the next byte of the marker. + } + } + return STBI__MARKER_none; +} + // decode image to YCbCr format static int stbi__decode_jpeg_image(stbi__jpeg *j) { @@ -2836,22 +3424,22 @@ static int stbi__decode_jpeg_image(stbi__jpeg *j) if (!stbi__process_scan_header(j)) return 0; if (!stbi__parse_entropy_coded_data(j)) return 0; if (j->marker == STBI__MARKER_none ) { - // handle 0s at the end of image data from IP Kamera 9060 - while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - if (x == 255) { - j->marker = stbi__get8(j->s); - break; - } else if (x != 0) { - return stbi__err("junk before marker", "Corrupt JPEG"); - } - } + j->marker = stbi__skip_jpeg_junk_at_end(j); // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 } + m = stbi__get_marker(j); + if (STBI__RESTART(m)) + m = stbi__get_marker(j); + } else if (stbi__DNL(m)) { + int Ld = stbi__get16be(j->s); + stbi__uint32 NL = stbi__get16be(j->s); + if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); + if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); + m = stbi__get_marker(j); } else { - if (!stbi__process_marker(j, m)) return 0; + if (!stbi__process_marker(j, m)) return 1; + m = stbi__get_marker(j); } - m = stbi__get_marker(j); } if (j->progressive) stbi__jpeg_finish(j); @@ -3066,38 +3654,9 @@ static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_ return out; } -#ifdef STBI_JPEG_OLD -// this is the same YCbCr-to-RGB calculation that stb_image has used -// historically before the algorithm changes in 1.49 -#define float2fixed(x) ((int) ((x) * 65536 + 0.5)) -static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) -{ - int i; - for (i=0; i < count; ++i) { - int y_fixed = (y[i] << 16) + 32768; // rounding - int r,g,b; - int cr = pcr[i] - 128; - int cb = pcb[i] - 128; - r = y_fixed + cr*float2fixed(1.40200f); - g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); - b = y_fixed + cb*float2fixed(1.77200f); - r >>= 16; - g >>= 16; - b >>= 16; - if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } - if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } - if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } - out[0] = (stbi_uc)r; - out[1] = (stbi_uc)g; - out[2] = (stbi_uc)b; - out[3] = 255; - out += step; - } -} -#else // this is a reduced-precision calculation of YCbCr-to-RGB introduced // to make sure the code produces the same results in both SIMD and scalar -#define float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) +#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) { int i; @@ -3106,9 +3665,9 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc int r,g,b; int cr = pcr[i] - 128; int cb = pcb[i] - 128; - r = y_fixed + cr* float2fixed(1.40200f); - g = y_fixed + (cr*-float2fixed(0.71414f)) + ((cb*-float2fixed(0.34414f)) & 0xffff0000); - b = y_fixed + cb* float2fixed(1.77200f); + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); r >>= 20; g >>= 20; b >>= 20; @@ -3122,7 +3681,6 @@ static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc out += step; } } -#endif #if defined(STBI_SSE2) || defined(STBI_NEON) static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) @@ -3241,9 +3799,9 @@ static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc cons int r,g,b; int cr = pcr[i] - 128; int cb = pcb[i] - 128; - r = y_fixed + cr* float2fixed(1.40200f); - g = y_fixed + cr*-float2fixed(0.71414f) + ((cb*-float2fixed(0.34414f)) & 0xffff0000); - b = y_fixed + cb* float2fixed(1.77200f); + r = y_fixed + cr* stbi__float2fixed(1.40200f); + g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); + b = y_fixed + cb* stbi__float2fixed(1.77200f); r >>= 20; g >>= 20; b >>= 20; @@ -3269,18 +3827,14 @@ static void stbi__setup_jpeg(stbi__jpeg *j) #ifdef STBI_SSE2 if (stbi__sse2_available()) { j->idct_block_kernel = stbi__idct_simd; - #ifndef STBI_JPEG_OLD j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; - #endif j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; } #endif #ifdef STBI_NEON j->idct_block_kernel = stbi__idct_simd; - #ifndef STBI_JPEG_OLD j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; - #endif j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; #endif } @@ -3288,23 +3842,7 @@ static void stbi__setup_jpeg(stbi__jpeg *j) // clean up the temporary component buffers static void stbi__cleanup_jpeg(stbi__jpeg *j) { - int i; - for (i=0; i < j->s->img_n; ++i) { - if (j->img_comp[i].raw_data) { - STBI_FREE(j->img_comp[i].raw_data); - j->img_comp[i].raw_data = NULL; - j->img_comp[i].data = NULL; - } - if (j->img_comp[i].raw_coeff) { - STBI_FREE(j->img_comp[i].raw_coeff); - j->img_comp[i].raw_coeff = 0; - j->img_comp[i].coeff = 0; - } - if (j->img_comp[i].linebuf) { - STBI_FREE(j->img_comp[i].linebuf); - j->img_comp[i].linebuf = NULL; - } - } + stbi__free_jpeg_components(j, j->s->img_n, 0); } typedef struct @@ -3317,9 +3855,16 @@ typedef struct int ypos; // which pre-expansion row we're on } stbi__resample; +// fast 0..255 * 0..255 => 0..255 rounded multiplication +static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) +{ + unsigned int t = x*y + 128; + return (stbi_uc) ((t + (t >>8)) >> 8); +} + static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) { - int n, decode_n; + int n, decode_n, is_rgb; z->s->img_n = 0; // make stbi__cleanup_jpeg safe // validate req_comp @@ -3329,19 +3874,25 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } // determine actual number of components to generate - n = req_comp ? req_comp : z->s->img_n; + n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; + + is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); - if (z->s->img_n == 3 && n < 3) + if (z->s->img_n == 3 && n < 3 && !is_rgb) decode_n = 1; else decode_n = z->s->img_n; + // nothing to do if no components requested; check this now to avoid + // accessing uninitialized coutput[0] later + if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; } + // resample and color-convert { int k; unsigned int i,j; stbi_uc *output; - stbi_uc *coutput[4]; + stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; stbi__resample res_comp[4]; @@ -3368,7 +3919,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp } // can't error after this so, this is safe - output = (stbi_uc *) stbi__malloc(n * z->s->img_x * z->s->img_y + 1); + output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } // now go ahead and resample @@ -3391,7 +3942,7 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp if (n >= 3) { stbi_uc *y = coutput[0]; if (z->s->img_n == 3) { - if (z->rgb == 3) { + if (is_rgb) { for (i=0; i < z->s->img_x; ++i) { out[0] = y[i]; out[1] = coutput[1][i]; @@ -3402,6 +3953,28 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp } else { z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); } + } else if (z->s->img_n == 4) { + if (z->app14_color_transform == 0) { // CMYK + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(coutput[0][i], m); + out[1] = stbi__blinn_8x8(coutput[1][i], m); + out[2] = stbi__blinn_8x8(coutput[2][i], m); + out[3] = 255; + out += n; + } + } else if (z->app14_color_transform == 2) { // YCCK + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + out[0] = stbi__blinn_8x8(255 - out[0], m); + out[1] = stbi__blinn_8x8(255 - out[1], m); + out[2] = stbi__blinn_8x8(255 - out[2], m); + out += n; + } + } else { // YCbCr + alpha? Ignore the fourth channel for now + z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); + } } else for (i=0; i < z->s->img_x; ++i) { out[0] = out[1] = out[2] = y[i]; @@ -3409,25 +3982,56 @@ static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp out += n; } } else { - stbi_uc *y = coutput[0]; - if (n == 1) - for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; - else - for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; + if (is_rgb) { + if (n == 1) + for (i=0; i < z->s->img_x; ++i) + *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + else { + for (i=0; i < z->s->img_x; ++i, out += 2) { + out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); + out[1] = 255; + } + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { + for (i=0; i < z->s->img_x; ++i) { + stbi_uc m = coutput[3][i]; + stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); + stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); + stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); + out[0] = stbi__compute_y(r, g, b); + out[1] = 255; + out += n; + } + } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { + for (i=0; i < z->s->img_x; ++i) { + out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); + out[1] = 255; + out += n; + } + } else { + stbi_uc *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } + } } } stbi__cleanup_jpeg(z); *out_x = z->s->img_x; *out_y = z->s->img_y; - if (comp) *comp = z->s->img_n; // report original components, not output + if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output return output; } } -static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { unsigned char* result; stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__errpuc("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + STBI_NOTUSED(ri); j->s = s; stbi__setup_jpeg(j); result = load_jpeg_image(j, x,y,comp,req_comp); @@ -3438,11 +4042,14 @@ static unsigned char *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *com static int stbi__jpeg_test(stbi__context *s) { int r; - stbi__jpeg j; - j.s = s; - stbi__setup_jpeg(&j); - r = stbi__decode_jpeg_header(&j, STBI__SCAN_type); + stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); + if (!j) return stbi__err("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); + j->s = s; + stbi__setup_jpeg(j); + r = stbi__decode_jpeg_header(j, STBI__SCAN_type); stbi__rewind(s); + STBI_FREE(j); return r; } @@ -3454,7 +4061,7 @@ static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) } if (x) *x = j->s->img_x; if (y) *y = j->s->img_y; - if (comp) *comp = j->s->img_n; + if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; return 1; } @@ -3462,6 +4069,8 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) { int result; stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); + if (!j) return stbi__err("outofmem", "Out of memory"); + memset(j, 0, sizeof(stbi__jpeg)); j->s = s; result = stbi__jpeg_info_raw(j, x, y, comp); STBI_FREE(j); @@ -3481,6 +4090,7 @@ static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) // fast-way is faster to check than jpeg huffman, but slow way is slower #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) +#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet // zlib-style huffman encoding // (jpegs packs from left, zlib from right, so can't share code) @@ -3490,8 +4100,8 @@ typedef struct stbi__uint16 firstcode[16]; int maxcode[17]; stbi__uint16 firstsymbol[16]; - stbi_uc size[288]; - stbi__uint16 value[288]; + stbi_uc size[STBI__ZNSYMS]; + stbi__uint16 value[STBI__ZNSYMS]; } stbi__zhuffman; stbi_inline static int stbi__bitreverse16(int n) @@ -3511,7 +4121,7 @@ stbi_inline static int stbi__bit_reverse(int v, int bits) return stbi__bitreverse16(v) >> (16-bits); } -static int stbi__zbuild_huffman(stbi__zhuffman *z, stbi_uc *sizelist, int num) +static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) { int i,k=0; int code, next_code[16], sizes[17]; @@ -3568,6 +4178,7 @@ typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; + int hit_zeof_once; stbi__uint32 code_buffer; char *zout; @@ -3578,16 +4189,23 @@ typedef struct stbi__zhuffman z_length, z_distance; } stbi__zbuf; +stbi_inline static int stbi__zeof(stbi__zbuf *z) +{ + return (z->zbuffer >= z->zbuffer_end); +} + stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) { - if (z->zbuffer >= z->zbuffer_end) return 0; - return *z->zbuffer++; + return stbi__zeof(z) ? 0 : *z->zbuffer++; } static void stbi__fill_bits(stbi__zbuf *z) { do { - STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); + if (z->code_buffer >= (1U << z->num_bits)) { + z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */ + return; + } z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; z->num_bits += 8; } while (z->num_bits <= 24); @@ -3612,10 +4230,11 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) for (s=STBI__ZFAST_BITS+1; ; ++s) if (k < z->maxcode[s]) break; - if (s == 16) return -1; // invalid code! + if (s >= 16) return -1; // invalid code! // code size is s, so: b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; - STBI_ASSERT(z->size[b] == s); + if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere! + if (z->size[b] != s) return -1; // was originally an assert, but report failure instead. a->code_buffer >>= s; a->num_bits -= s; return z->value[b]; @@ -3624,7 +4243,23 @@ static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) { int b,s; - if (a->num_bits < 16) stbi__fill_bits(a); + if (a->num_bits < 16) { + if (stbi__zeof(a)) { + if (!a->hit_zeof_once) { + // This is the first time we hit eof, insert 16 extra padding btis + // to allow us to keep going; if we actually consume any of them + // though, that is invalid data. This is caught later. + a->hit_zeof_once = 1; + a->num_bits += 16; // add 16 implicit zero bits + } else { + // We already inserted our extra 16 padding bits and are again + // out, this stream is actually prematurely terminated. + return -1; + } + } else { + stbi__fill_bits(a); + } + } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { s = b >> 9; @@ -3638,13 +4273,16 @@ stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes { char *q; - int cur, limit, old_limit; + unsigned int cur, limit, old_limit; z->zout = zout; if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); - cur = (int) (z->zout - z->zout_start); - limit = old_limit = (int) (z->zout_end - z->zout_start); - while (cur + n > limit) + cur = (unsigned int) (z->zout - z->zout_start); + limit = old_limit = (unsigned) (z->zout_end - z->zout_start); + if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); + while (cur + n > limit) { + if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); limit *= 2; + } q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); STBI_NOTUSED(old_limit); if (q == NULL) return stbi__err("outofmem", "Out of memory"); @@ -3654,18 +4292,18 @@ static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room return 1; } -static int stbi__zlength_base[31] = { +static const int stbi__zlength_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; -static int stbi__zlength_extra[31]= +static const int stbi__zlength_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; -static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; -static int stbi__zdist_extra[32] = +static const int stbi__zdist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; static int stbi__parse_huffman_block(stbi__zbuf *a) @@ -3685,17 +4323,25 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) int len,dist; if (z == 256) { a->zout = zout; + if (a->hit_zeof_once && a->num_bits < 16) { + // The first time we hit zeof, we inserted 16 extra zero bits into our bit + // buffer so the decoder can just do its speculative decoding. But if we + // actually consumed any of those bits (which is the case when num_bits < 16), + // the stream actually read past the end so it is malformed. + return stbi__err("unexpected end","Corrupt PNG"); + } return 1; } + if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data z -= 257; len = stbi__zlength_base[z]; if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); z = stbi__zhuffman_decode(a, &a->z_distance); - if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); + if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (zout + len > a->zout_end) { + if (len > a->zout_end - zout) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } @@ -3712,7 +4358,7 @@ static int stbi__parse_huffman_block(stbi__zbuf *a) static int stbi__compute_huffman_codes(stbi__zbuf *a) { - static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; stbi__zhuffman z_codelength; stbi_uc lencodes[286+32+137];//padding for maximum single op stbi_uc codelength_sizes[19]; @@ -3721,6 +4367,7 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a) int hlit = stbi__zreceive(a,5) + 257; int hdist = stbi__zreceive(a,5) + 1; int hclen = stbi__zreceive(a,4) + 4; + int ntot = hlit + hdist; memset(codelength_sizes, 0, sizeof(codelength_sizes)); for (i=0; i < hclen; ++i) { @@ -3730,27 +4377,30 @@ static int stbi__compute_huffman_codes(stbi__zbuf *a) if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; n = 0; - while (n < hlit + hdist) { + while (n < ntot) { int c = stbi__zhuffman_decode(a, &z_codelength); if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); if (c < 16) lencodes[n++] = (stbi_uc) c; - else if (c == 16) { - c = stbi__zreceive(a,2)+3; - memset(lencodes+n, lencodes[n-1], c); - n += c; - } else if (c == 17) { - c = stbi__zreceive(a,3)+3; - memset(lencodes+n, 0, c); - n += c; - } else { - STBI_ASSERT(c == 18); - c = stbi__zreceive(a,7)+11; - memset(lencodes+n, 0, c); + else { + stbi_uc fill = 0; + if (c == 16) { + c = stbi__zreceive(a,2)+3; + if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); + fill = lencodes[n-1]; + } else if (c == 17) { + c = stbi__zreceive(a,3)+3; + } else if (c == 18) { + c = stbi__zreceive(a,7)+11; + } else { + return stbi__err("bad codelengths", "Corrupt PNG"); + } + if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); + memset(lencodes+n, fill, c); n += c; } } - if (n != hlit+hdist) return stbi__err("bad codelengths","Corrupt PNG"); + if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; return 1; @@ -3769,7 +4419,7 @@ static int stbi__parse_uncompressed_block(stbi__zbuf *a) a->code_buffer >>= 8; a->num_bits -= 8; } - STBI_ASSERT(a->num_bits == 0); + if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); // now fill header the normal way while (k < 4) header[k++] = stbi__zget8(a); @@ -3791,6 +4441,7 @@ static int stbi__parse_zlib_header(stbi__zbuf *a) int cm = cmf & 15; /* int cinfo = cmf >> 4; */ int flg = stbi__zget8(a); + if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png @@ -3798,9 +4449,24 @@ static int stbi__parse_zlib_header(stbi__zbuf *a) return 1; } -// @TODO: should statically initialize these for optimal thread safety -static stbi_uc stbi__zdefault_length[288], stbi__zdefault_distance[32]; -static void stbi__init_zdefaults(void) +static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = +{ + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, + 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 +}; +static const stbi_uc stbi__zdefault_distance[32] = +{ + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 +}; +/* +Init algorithm: { int i; // use <= to match clearly with spec for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; @@ -3810,6 +4476,7 @@ static void stbi__init_zdefaults(void) for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; } +*/ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) { @@ -3818,6 +4485,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; + a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); @@ -3828,8 +4496,7 @@ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) } else { if (type == 1) { // use fixed code lengths - if (!stbi__zdefault_distance[31]) stbi__init_zdefaults(); - if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; + if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0; if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; } else { if (!stbi__compute_huffman_codes(a)) return 0; @@ -3953,7 +4620,7 @@ static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) static int stbi__check_png_header(stbi__context *s) { - static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; + static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; int i; for (i=0; i < 8; ++i) if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); @@ -3974,9 +4641,8 @@ enum { STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, - // synthetic filters used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first, - STBI__F_paeth_first + // synthetic filter used for first scanline to avoid needing a dummy row of 0s + STBI__F_avg_first }; static stbi_uc first_row_filter[5] = @@ -3985,29 +4651,56 @@ static stbi_uc first_row_filter[5] = STBI__F_sub, STBI__F_none, STBI__F_avg_first, - STBI__F_paeth_first + STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub }; static int stbi__paeth(int a, int b, int c) { - int p = a + b - c; - int pa = abs(p-a); - int pb = abs(p-b); - int pc = abs(p-c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; + // This formulation looks very different from the reference in the PNG spec, but is + // actually equivalent and has favorable data dependencies and admits straightforward + // generation of branch-free code, which helps performance significantly. + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; } -static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; +static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; + +// adds an extra all-255 alpha channel +// dest == src is legal +// img_n must be 1 or 3 +static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) +{ + int i; + // must process data backwards since we allow dest==src + if (img_n == 1) { + for (i=x-1; i >= 0; --i) { + dest[i*2+1] = 255; + dest[i*2+0] = src[i]; + } + } else { + STBI_ASSERT(img_n == 3); + for (i=x-1; i >= 0; --i) { + dest[i*4+3] = 255; + dest[i*4+2] = src[i*3+2]; + dest[i*4+1] = src[i*3+1]; + dest[i*4+0] = src[i*3+0]; + } + } +} // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { - int bytes = (depth == 16? 2 : 1); + int bytes = (depth == 16 ? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; + stbi_uc *filter_buf; + int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later @@ -4016,211 +4709,167 @@ static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 r int width = x; STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); - a->out = (stbi_uc *) stbi__malloc(x * y * output_bytes); // extra bytes to write off the end into + a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); + // note: error exits here don't need to clean up a->out individually, + // stbi__do_png always does on error. + if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); + if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; - if (s->img_x == x && s->img_y == y) { - if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG"); - } else { // interlaced: - if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, + // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), + // so just check for raw_len < img_len always. + if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); + + // Allocate two scan lines worth of filter workspace buffer. + filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); + if (!filter_buf) return stbi__err("outofmem", "Out of memory"); + + // Filtering for low-bit-depth images + if (depth < 8) { + filter_bytes = 1; + width = img_width_bytes; } for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *prior = cur - stride; + // cur/prior filter buffers alternate + stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; + stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; + stbi_uc *dest = a->out + stride*j; + int nk = width * filter_bytes; int filter = *raw++; - if (filter > 4) - return stbi__err("invalid filter","Corrupt PNG"); - - if (depth < 8) { - STBI_ASSERT(img_width_bytes <= x); - cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; + // check filter type + if (filter > 4) { + all_ok = stbi__err("invalid filter","Corrupt PNG"); + break; } // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; - // handle first byte explicitly - for (k=0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none : cur[k] = raw[k]; break; - case STBI__F_sub : cur[k] = raw[k]; break; - case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; - case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; - case STBI__F_avg_first : cur[k] = raw[k]; break; - case STBI__F_paeth_first: cur[k] = raw[k]; break; - } - } - - if (depth == 8) { - if (img_n != out_n) - cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes+1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; + // perform actual filtering + switch (filter) { + case STBI__F_none: + memcpy(cur, raw, nk); + break; + case STBI__F_sub: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); + break; + case STBI__F_up: + for (k = 0; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); + break; + case STBI__F_avg: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); + break; + case STBI__F_paeth: + for (k = 0; k < filter_bytes; ++k) + cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); + break; + case STBI__F_avg_first: + memcpy(cur, raw, filter_bytes); + for (k = filter_bytes; k < nk; ++k) + cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); + break; } - // this is a little gross, so that we don't switch per-pixel or per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1)*filter_bytes; - #define CASE(f) \ - case f: \ - for (k=0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: memcpy(cur, raw, nk); break; - CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break; - CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break; - CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); break; - CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break; - CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); break; - } - #undef CASE - raw += nk; - } else { - STBI_ASSERT(img_n+1 == out_n); - #define CASE(f) \ - case f: \ - for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ - for (k=0; k < filter_bytes; ++k) - switch (filter) { - CASE(STBI__F_none) cur[k] = raw[k]; break; - CASE(STBI__F_sub) cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); break; - CASE(STBI__F_up) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - CASE(STBI__F_avg) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); break; - CASE(STBI__F_paeth) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); break; - CASE(STBI__F_avg_first) cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); break; - CASE(STBI__F_paeth_first) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); break; - } - #undef CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride*j; // start at the beginning of the row again - for (i=0; i < x; ++i,cur+=output_bytes) { - cur[filter_bytes+1] = 255; - } - } - } - } + raw += nk; - // we make a separate pass to expand bits to pixels; for performance, - // this could run two scanlines behind the above code, so it won't - // intefere with filtering but will still be in the cache. - if (depth < 8) { - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit - // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop + // expand decoded bits in cur to dest, also adding an extra alpha channel if desired + if (depth < 8) { stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range + stbi_uc *in = cur; + stbi_uc *out = dest; + stbi_uc inb = 0; + stbi__uint32 nsmp = x*img_n; - // note that the final byte might overshoot and write more data than desired. - // we can allocate enough data that this never writes out of memory, but it - // could also overwrite the next scanline. can it overwrite non-empty data - // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. - // so we need to explicitly clamp the final ones - + // expand bits to bytes first if (depth == 4) { - for (k=x*img_n; k >= 2; k-=2, ++in) { - *cur++ = scale * ((*in >> 4) ); - *cur++ = scale * ((*in ) & 0x0f); + for (i=0; i < nsmp; ++i) { + if ((i & 1) == 0) inb = *in++; + *out++ = scale * (inb >> 4); + inb <<= 4; } - if (k > 0) *cur++ = scale * ((*in >> 4) ); } else if (depth == 2) { - for (k=x*img_n; k >= 4; k-=4, ++in) { - *cur++ = scale * ((*in >> 6) ); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in ) & 0x03); + for (i=0; i < nsmp; ++i) { + if ((i & 3) == 0) inb = *in++; + *out++ = scale * (inb >> 6); + inb <<= 2; } - if (k > 0) *cur++ = scale * ((*in >> 6) ); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k=x*img_n; k >= 8; k-=8, ++in) { - *cur++ = scale * ((*in >> 7) ); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in ) & 0x01); + } else { + STBI_ASSERT(depth == 1); + for (i=0; i < nsmp; ++i) { + if ((i & 7) == 0) inb = *in++; + *out++ = scale * (inb >> 7); + inb <<= 1; } - if (k > 0) *cur++ = scale * ((*in >> 7) ); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride*j; + + // insert alpha=255 values if desired + if (img_n != out_n) + stbi__create_png_alpha_expand8(dest, dest, x, img_n); + } else if (depth == 8) { + if (img_n == out_n) + memcpy(dest, cur, x*img_n); + else + stbi__create_png_alpha_expand8(dest, cur, x, img_n); + } else if (depth == 16) { + // convert the image data from big-endian to platform-native + stbi__uint16 *dest16 = (stbi__uint16*)dest; + stbi__uint32 nsmp = x*img_n; + + if (img_n == out_n) { + for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) + *dest16 = (cur[0] << 8) | cur[1]; + } else { + STBI_ASSERT(img_n+1 == out_n); if (img_n == 1) { - for (q=x-1; q >= 0; --q) { - cur[q*2+1] = 255; - cur[q*2+0] = cur[q]; + for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = 0xffff; } } else { STBI_ASSERT(img_n == 3); - for (q=x-1; q >= 0; --q) { - cur[q*4+3] = 255; - cur[q*4+2] = cur[q*3+2]; - cur[q*4+1] = cur[q*3+1]; - cur[q*4+0] = cur[q*3+0]; + for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { + dest16[0] = (cur[0] << 8) | cur[1]; + dest16[1] = (cur[2] << 8) | cur[3]; + dest16[2] = (cur[4] << 8) | cur[5]; + dest16[3] = 0xffff; } } } } - } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - stbi_uc *cur = a->out; - stbi__uint16 *cur16 = (stbi__uint16*)cur; - - for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { - *cur16 = (cur[0] << 8) | cur[1]; - } } + STBI_FREE(filter_buf); + if (!all_ok) return 0; + return 1; } static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) { + int bytes = (depth == 16 ? 2 : 1); + int out_bytes = out_n * bytes; stbi_uc *final; int p; if (!interlaced) return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); // de-interlacing - final = (stbi_uc *) stbi__malloc(a->s->img_x * a->s->img_y * out_n); + final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); + if (!final) return stbi__err("outofmem", "Out of memory"); for (p=0; p < 7; ++p) { int xorig[] = { 0,4,0,2,0,1,0 }; int yorig[] = { 0,0,4,0,2,0,1 }; @@ -4240,8 +4889,8 @@ static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint3 for (i=0; i < x; ++i) { int out_y = j*yspc[p]+yorig[p]; int out_x = i*xspc[p]+xorig[p]; - memcpy(final + out_y*a->s->img_x*out_n + out_x*out_n, - a->out + (j*x+i)*out_n, out_n); + memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, + a->out + (j*x+i)*out_bytes, out_bytes); } } STBI_FREE(a->out); @@ -4309,7 +4958,7 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; stbi_uc *p, *temp_out, *orig = a->out; - p = (stbi_uc *) stbi__malloc(pixel_count * pal_img_n); + p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); if (p == NULL) return stbi__err("outofmem", "Out of memory"); // between here and free(out) below, exitting would leak @@ -4341,39 +4990,46 @@ static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int return 1; } -static int stbi__reduce_png(stbi__png *p) -{ - int i; - int img_len = p->s->img_x * p->s->img_y * p->s->img_out_n; - stbi_uc *reduced; - stbi__uint16 *orig = (stbi__uint16*)p->out; - - if (p->depth != 16) return 1; // don't need to do anything if not 16-bit data - - reduced = (stbi_uc *)stbi__malloc(img_len); - if (p == NULL) return stbi__err("outofmem", "Out of memory"); - - for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is a decent approx of 16->8 bit scaling +static int stbi__unpremultiply_on_load_global = 0; +static int stbi__de_iphone_flag_global = 0; - p->out = reduced; - STBI_FREE(orig); +STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply; +} - return 1; +STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi__de_iphone_flag_global = flag_true_if_should_convert; } -static int stbi__unpremultiply_on_load = 0; -static int stbi__de_iphone_flag = 0; +#ifndef STBI_THREAD_LOCAL +#define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global +#define stbi__de_iphone_flag stbi__de_iphone_flag_global +#else +static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set; +static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set; -STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) { - stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; + stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply; + stbi__unpremultiply_on_load_set = 1; } -STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) { - stbi__de_iphone_flag = flag_true_if_should_convert; + stbi__de_iphone_flag_local = flag_true_if_should_convert; + stbi__de_iphone_flag_set = 1; } +#define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \ + ? stbi__unpremultiply_on_load_local \ + : stbi__unpremultiply_on_load_global) +#define stbi__de_iphone_flag (stbi__de_iphone_flag_set \ + ? stbi__de_iphone_flag_local \ + : stbi__de_iphone_flag_global) +#endif // STBI_THREAD_LOCAL + static void stbi__de_iphone(stbi__png *z) { stbi__context *s = z->s; @@ -4395,9 +5051,10 @@ static void stbi__de_iphone(stbi__png *z) stbi_uc a = p[3]; stbi_uc t = p[0]; if (a) { - p[0] = p[2] * 255 / a; - p[1] = p[1] * 255 / a; - p[2] = t * 255 / a; + stbi_uc half = a / 2; + p[0] = (p[2] * 255 + half) / a; + p[1] = (p[1] * 255 + half) / a; + p[2] = ( t * 255 + half) / a; } else { p[0] = p[2]; p[2] = t; @@ -4416,12 +5073,12 @@ static void stbi__de_iphone(stbi__png *z) } } -#define STBI__PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) +#define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { stbi_uc palette[1024], pal_img_n=0; - stbi_uc has_trans=0, tc[3]; + stbi_uc has_trans=0, tc[3]={0}; stbi__uint16 tc16[3]; stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; int first=1,k,interlace=0, color=0, is_iphone=0; @@ -4447,11 +5104,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); first = 0; if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); - s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); - s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); + s->img_x = stbi__get32be(s); + s->img_y = stbi__get32be(s); + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); - if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); + if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); @@ -4460,14 +5119,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) if (!pal_img_n) { s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); - if (scan == STBI__SCAN_header) return 1; } else { // if paletted, then pal_n is our final components, and // img_n is # components to decompress/filter. s->img_n = 1; if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); - // if SCAN_header, have to scan to see if we have a tRNS } + // even with SCAN_header, have to scan to see if we have a tRNS break; } @@ -4499,10 +5157,14 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); has_trans = 1; + // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. + if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } if (z->depth == 16) { - for (k = 0; k < s->img_n; ++k) tc16[k] = stbi__get16be(s); // copy the values as-is + for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning + tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is } else { - for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger + for (k = 0; k < s->img_n && k < 3; ++k) + tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger } } break; @@ -4511,7 +5173,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) case STBI__PNG_TYPE('I','D','A','T'): { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); - if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } + if (scan == STBI__SCAN_header) { + // header scan definitely stops at first IDAT + if (pal_img_n) + s->img_n = pal_img_n; + return 1; + } + if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes"); if ((int)(ioff + c.length) < (int)ioff) return 0; if (ioff + c.length > idata_limit) { stbi__uint32 idata_limit_old = idata_limit; @@ -4560,8 +5228,13 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) if (req_comp >= 3) s->img_out_n = req_comp; if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) return 0; + } else if (has_trans) { + // non-paletted image with tRNS -> source image has (constant) alpha + ++s->img_n; } STBI_FREE(z->expanded); z->expanded = NULL; + // end of PNG chunk, read and skip CRC + stbi__get32be(s); return 1; } @@ -4587,20 +5260,24 @@ static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) } } -static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp) +static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) { - unsigned char *result=NULL; + void *result=NULL; if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { - if (p->depth == 16) { - if (!stbi__reduce_png(p)) { - return result; - } - } + if (p->depth <= 8) + ri->bits_per_channel = 8; + else if (p->depth == 16) + ri->bits_per_channel = 16; + else + return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); result = p->out; p->out = NULL; if (req_comp && req_comp != p->s->img_out_n) { - result = stbi__convert_format(result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + if (ri->bits_per_channel == 8) + result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); + else + result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); p->s->img_out_n = req_comp; if (result == NULL) return result; } @@ -4615,11 +5292,11 @@ static unsigned char *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req return result; } -static unsigned char *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi__png p; p.s = s; - return stbi__do_png(&p, x,y,comp,req_comp); + return stbi__do_png(&p, x,y,comp,req_comp, ri); } static int stbi__png_test(stbi__context *s) @@ -4642,11 +5319,24 @@ static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) return 1; } -static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) +{ + stbi__png p; + p.s = s; + return stbi__png_info_raw(&p, x, y, comp); +} + +static int stbi__png_is16(stbi__context *s) { stbi__png p; p.s = s; - return stbi__png_info_raw(&p, x, y, comp); + if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) + return 0; + if (p.depth != 16) { + stbi__rewind(p.s); + return 0; + } + return 1; } #endif @@ -4681,11 +5371,11 @@ static int stbi__high_bit(unsigned int z) { int n=0; if (z == 0) return -1; - if (z >= 0x10000) n += 16, z >>= 16; - if (z >= 0x00100) n += 8, z >>= 8; - if (z >= 0x00010) n += 4, z >>= 4; - if (z >= 0x00004) n += 2, z >>= 2; - if (z >= 0x00002) n += 1, z >>= 1; + if (z >= 0x10000) { n += 16; z >>= 16; } + if (z >= 0x00100) { n += 8; z >>= 8; } + if (z >= 0x00010) { n += 4; z >>= 4; } + if (z >= 0x00004) { n += 2; z >>= 2; } + if (z >= 0x00002) { n += 1;/* >>= 1;*/ } return n; } @@ -4699,29 +5389,62 @@ static int stbi__bitcount(unsigned int a) return a & 0xff; } -static int stbi__shiftsigned(int v, int shift, int bits) -{ - int result; - int z=0; - - if (shift < 0) v <<= -shift; - else v >>= shift; - result = v; - - z = bits; - while (z < 8) { - result += v >> z; - z += bits; - } - return result; +// extract an arbitrarily-aligned N-bit value (N=bits) +// from v, and then make it 8-bits long and fractionally +// extend it to full full range. +static int stbi__shiftsigned(unsigned int v, int shift, int bits) +{ + static unsigned int mul_table[9] = { + 0, + 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, + 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, + }; + static unsigned int shift_table[9] = { + 0, 0,0,1,0,2,4,6,0, + }; + if (shift < 0) + v <<= -shift; + else + v >>= shift; + STBI_ASSERT(v < 256); + v >>= (8-bits); + STBI_ASSERT(bits >= 0 && bits <= 8); + return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; } typedef struct { int bpp, offset, hsz; unsigned int mr,mg,mb,ma, all_a; + int extra_read; } stbi__bmp_data; +static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress) +{ + // BI_BITFIELDS specifies masks explicitly, don't override + if (compress == 3) + return 1; + + if (compress == 0) { + if (info->bpp == 16) { + info->mr = 31u << 10; + info->mg = 31u << 5; + info->mb = 31u << 0; + } else if (info->bpp == 32) { + info->mr = 0xffu << 16; + info->mg = 0xffu << 8; + info->mb = 0xffu << 0; + info->ma = 0xffu << 24; + info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 + } else { + // otherwise, use defaults, which is all-0 + info->mr = info->mg = info->mb = info->ma = 0; + } + return 1; + } + return 0; // error +} + static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) { int hsz; @@ -4732,7 +5455,10 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) info->offset = stbi__get32le(s); info->hsz = hsz = stbi__get32le(s); info->mr = info->mg = info->mb = info->ma = 0; - + info->extra_read = 14; + + if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); if (hsz == 12) { s->img_x = stbi__get16le(s); @@ -4743,10 +5469,11 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) } if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); info->bpp = stbi__get16le(s); - if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit"); if (hsz != 12) { int compress = stbi__get32le(s); if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); + if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes + if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel stbi__get32le(s); // discard sizeof stbi__get32le(s); // discard hres stbi__get32le(s); // discard vres @@ -4761,21 +5488,12 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) } if (info->bpp == 16 || info->bpp == 32) { if (compress == 0) { - if (info->bpp == 32) { - info->mr = 0xffu << 16; - info->mg = 0xffu << 8; - info->mb = 0xffu << 0; - info->ma = 0xffu << 24; - info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 - } else { - info->mr = 31u << 10; - info->mg = 31u << 5; - info->mb = 31u << 0; - } + stbi__bmp_set_mask_defaults(info, compress); } else if (compress == 3) { info->mr = stbi__get32le(s); info->mg = stbi__get32le(s); info->mb = stbi__get32le(s); + info->extra_read += 12; // not documented, but generated by photoshop and handled by mspaint if (info->mr == info->mg && info->mg == info->mb) { // ?!?!? @@ -4785,6 +5503,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) return stbi__errpuc("bad BMP", "bad BMP"); } } else { + // V4/V5 header int i; if (hsz != 108 && hsz != 124) return stbi__errpuc("bad BMP", "bad BMP"); @@ -4792,6 +5511,8 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) info->mg = stbi__get32le(s); info->mb = stbi__get32le(s); info->ma = stbi__get32le(s); + if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs + stbi__bmp_set_mask_defaults(info, compress); stbi__get32le(s); // discard color space for (i=0; i < 12; ++i) stbi__get32le(s); // discard color space parameters @@ -4807,7 +5528,7 @@ static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) } -static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *out; unsigned int mr=0,mg=0,mb=0,ma=0, all_a; @@ -4815,14 +5536,18 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int int psize=0,i,j,width; int flip_vertically, pad, target; stbi__bmp_data info; + STBI_NOTUSED(ri); - info.all_a = 255; + info.all_a = 255; if (stbi__bmp_parse_header(s, &info) == NULL) return NULL; // error code already set flip_vertically = ((int) s->img_y) > 0; s->img_y = abs((int) s->img_y); + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + mr = info.mr; mg = info.mg; mb = info.mb; @@ -4831,19 +5556,45 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int if (info.hsz == 12) { if (info.bpp < 24) - psize = (info.offset - 14 - 24) / 3; + psize = (info.offset - info.extra_read - 24) / 3; } else { if (info.bpp < 16) - psize = (info.offset - 14 - info.hsz) >> 2; + psize = (info.offset - info.extra_read - info.hsz) >> 2; + } + if (psize == 0) { + // accept some number of extra bytes after the header, but if the offset points either to before + // the header ends or implies a large amount of extra data, reject the file as malformed + int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original); + int header_limit = 1024; // max we actually read is below 256 bytes currently. + int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size. + if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) { + return stbi__errpuc("bad header", "Corrupt BMP"); + } + // we established that bytes_read_so_far is positive and sensible. + // the first half of this test rejects offsets that are either too small positives, or + // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn + // ensures the number computed in the second half of the test can't overflow. + if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) { + return stbi__errpuc("bad offset", "Corrupt BMP"); + } else { + stbi__skip(s, info.offset - bytes_read_so_far); + } } - s->img_n = ma ? 4 : 3; + if (info.bpp == 24 && ma == 0xff000000) + s->img_n = 3; + else + s->img_n = ma ? 4 : 3; if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 target = req_comp; else target = s->img_n; // if they want monochrome, we'll post-convert - out = (stbi_uc *) stbi__malloc(target * s->img_x * s->img_y); + // sanity-check size + if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) + return stbi__errpuc("too large", "Corrupt BMP"); + + out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); if (info.bpp < 16) { int z=0; @@ -4855,36 +5606,56 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int if (info.hsz != 12) stbi__get8(s); pal[i][3] = 255; } - stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); - if (info.bpp == 4) width = (s->img_x + 1) >> 1; + stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); + if (info.bpp == 1) width = (s->img_x + 7) >> 3; + else if (info.bpp == 4) width = (s->img_x + 1) >> 1; else if (info.bpp == 8) width = s->img_x; else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } pad = (-width)&3; - for (j=0; j < (int) s->img_y; ++j) { - for (i=0; i < (int) s->img_x; i += 2) { - int v=stbi__get8(s),v2=0; - if (info.bpp == 4) { - v2 = v & 15; - v >>= 4; + if (info.bpp == 1) { + for (j=0; j < (int) s->img_y; ++j) { + int bit_offset = 7, v = stbi__get8(s); + for (i=0; i < (int) s->img_x; ++i) { + int color = (v>>bit_offset)&0x1; + out[z++] = pal[color][0]; + out[z++] = pal[color][1]; + out[z++] = pal[color][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + if((--bit_offset) < 0) { + bit_offset = 7; + v = stbi__get8(s); + } } - out[z++] = pal[v][0]; - out[z++] = pal[v][1]; - out[z++] = pal[v][2]; - if (target == 4) out[z++] = 255; - if (i+1 == (int) s->img_x) break; - v = (info.bpp == 8) ? stbi__get8(s) : v2; - out[z++] = pal[v][0]; - out[z++] = pal[v][1]; - out[z++] = pal[v][2]; - if (target == 4) out[z++] = 255; + stbi__skip(s, pad); + } + } else { + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=stbi__get8(s),v2=0; + if (info.bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (info.bpp == 8) ? stbi__get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + stbi__skip(s, pad); } - stbi__skip(s, pad); } } else { int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; int z = 0; int easy=0; - stbi__skip(s, info.offset - 14 - info.hsz); + stbi__skip(s, info.offset - info.extra_read - info.hsz); if (info.bpp == 24) width = 3 * s->img_x; else if (info.bpp == 16) width = 2*s->img_x; else /* bpp = 32 and pad = 0 */ width=0; @@ -4902,6 +5673,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); + if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } } for (j=0; j < (int) s->img_y; ++j) { if (easy) { @@ -4919,7 +5691,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int int bpp = info.bpp; for (i=0; i < (int) s->img_x; ++i) { stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); - int a; + unsigned int a; out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); @@ -4931,7 +5703,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int stbi__skip(s, pad); } } - + // if alpha channel is all 0s, replace with all 255s if (target == 4 && all_a == 0) for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) @@ -4943,7 +5715,7 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int stbi_uc *p1 = out + j *s->img_x*target; stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; for (i=0; i < (int) s->img_x*target; ++i) { - t = p1[i], p1[i] = p2[i], p2[i] = t; + t = p1[i]; p1[i] = p2[i]; p2[i] = t; } } } @@ -4967,14 +5739,14 @@ static stbi_uc *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) { // only RGB or RGBA (incl. 16bit) or grey allowed - if(is_rgb16) *is_rgb16 = 0; + if (is_rgb16) *is_rgb16 = 0; switch(bits_per_pixel) { case 8: return STBI_grey; case 16: if(is_grey) return STBI_grey_alpha; - // else: fall-through + // fallthrough case 15: if(is_rgb16) *is_rgb16 = 1; - return STBI_rgb; - case 24: // fall-through + return STBI_rgb; + case 24: // fallthrough case 32: return bits_per_pixel/8; default: return 0; } @@ -5077,18 +5849,18 @@ static int stbi__tga_test(stbi__context *s) } // read 16bit value and convert to 24bit RGB -void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) +static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) { - stbi__uint16 px = stbi__get16le(s); + stbi__uint16 px = (stbi__uint16)stbi__get16le(s); stbi__uint16 fiveBitMask = 31; // we have 3 channels with 5bits each int r = (px >> 10) & fiveBitMask; int g = (px >> 5) & fiveBitMask; int b = px & fiveBitMask; // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later - out[0] = (r * 255)/31; - out[1] = (g * 255)/31; - out[2] = (b * 255)/31; + out[0] = (stbi_uc)((r * 255)/31); + out[1] = (stbi_uc)((g * 255)/31); + out[2] = (stbi_uc)((b * 255)/31); // some people claim that the most significant bit might be used for alpha // (possibly if an alpha-bit is set in the "image descriptor byte") @@ -5096,7 +5868,7 @@ void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) // so let's treat all 15 and 16bit TGAs as RGB with no alpha. } -static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { // read in the TGA header stuff int tga_offset = stbi__get8(s); @@ -5118,10 +5890,16 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int unsigned char *tga_data; unsigned char *tga_palette = NULL; int i, j; - unsigned char raw_data[4]; + unsigned char raw_data[4] = {0}; int RLE_count = 0; int RLE_repeating = 0; int read_next_pixel = 1; + STBI_NOTUSED(ri); + STBI_NOTUSED(tga_x_origin); // @TODO + STBI_NOTUSED(tga_y_origin); // @TODO + + if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); // do a tiny bit of precessing if ( tga_image_type >= 8 ) @@ -5143,7 +5921,10 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int *y = tga_height; if (comp) *comp = tga_comp; - tga_data = (unsigned char*)stbi__malloc( (size_t)tga_width * tga_height * tga_comp ); + if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) + return stbi__errpuc("too large", "Corrupt TGA"); + + tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); // skip to the data's starting position (offset usually = 0) @@ -5159,10 +5940,15 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int // do I need to load a palette? if ( tga_indexed) { + if (tga_palette_len == 0) { /* you have to have at least one entry! */ + STBI_FREE(tga_data); + return stbi__errpuc("bad palette", "Corrupt TGA"); + } + // any data to skip? (offset usually = 0) stbi__skip(s, tga_palette_start ); // load the palette - tga_palette = (unsigned char*)stbi__malloc( tga_palette_len * tga_comp ); + tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); if (!tga_palette) { STBI_FREE(tga_data); return stbi__errpuc("outofmem", "Out of memory"); @@ -5282,6 +6068,7 @@ static stbi_uc *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int // Microsoft's C compilers happy... [8^( tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin = tga_y_origin = 0; + STBI_NOTUSED(tga_palette_start); // OK, done return tga_data; } @@ -5298,14 +6085,53 @@ static int stbi__psd_test(stbi__context *s) return r; } -static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) +{ + int count, nleft, len; + + count = 0; + while ((nleft = pixelCount - count) > 0) { + len = stbi__get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + if (len > nleft) return 0; // corrupt data + count += len; + while (len) { + *p = stbi__get8(s); + p += 4; + len--; + } + } else if (len > 128) { + stbi_uc val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len = 257 - len; + if (len > nleft) return 0; // corrupt data + val = stbi__get8(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + + return 1; +} + +static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) { - int pixelCount; + int pixelCount; int channelCount, compression; - int channel, i, count, len; + int channel, i; int bitdepth; int w,h; stbi_uc *out; + STBI_NOTUSED(ri); // Check identifier if (stbi__get32be(s) != 0x38425053) // "8BPS" @@ -5327,6 +6153,9 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int h = stbi__get32be(s); w = stbi__get32be(s); + if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + // Make sure the depth is 8 bits. bitdepth = stbi__get16be(s); if (bitdepth != 8 && bitdepth != 16) @@ -5362,8 +6191,18 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int if (compression > 1) return stbi__errpuc("bad compression", "PSD has an unknown compression format"); + // Check size + if (!stbi__mad3sizes_valid(4, w, h, 0)) + return stbi__errpuc("too large", "Corrupt PSD"); + // Create the destination image. - out = (stbi_uc *) stbi__malloc(4 * w*h); + + if (!compression && bitdepth == 16 && bpc == 16) { + out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); + ri->bits_per_channel = 16; + } else + out = (stbi_uc *) stbi__malloc(4 * w*h); + if (!out) return stbi__errpuc("outofmem", "Out of memory"); pixelCount = w*h; @@ -5380,7 +6219,7 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int // Else if n is 128, noop. // Endloop - // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, // which we're going to just skip. stbi__skip(s, h * channelCount * 2 ); @@ -5395,82 +6234,86 @@ static stbi_uc *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int *p = (channel == 3 ? 255 : 0); } else { // Read the RLE data. - count = 0; - while (count < pixelCount) { - len = stbi__get8(s); - if (len == 128) { - // No-op. - } else if (len < 128) { - // Copy next len+1 bytes literally. - len++; - count += len; - while (len) { - *p = stbi__get8(s); - p += 4; - len--; - } - } else if (len > 128) { - stbi_uc val; - // Next -len+1 bytes in the dest are replicated from next source byte. - // (Interpret len as a negative 8-bit int.) - len ^= 0x0FF; - len += 2; - val = stbi__get8(s); - count += len; - while (len) { - *p = val; - p += 4; - len--; - } - } + if (!stbi__psd_decode_rle(s, p, pixelCount)) { + STBI_FREE(out); + return stbi__errpuc("corrupt", "bad RLE data"); } } } } else { // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) - // where each channel consists of an 8-bit value for each pixel in the image. + // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. // Read the data by channel. for (channel = 0; channel < 4; channel++) { - stbi_uc *p; - - p = out + channel; if (channel >= channelCount) { // Fill this channel with default data. - stbi_uc val = channel == 3 ? 255 : 0; - for (i = 0; i < pixelCount; i++, p += 4) - *p = val; - } else { - // Read the data. - if (bitdepth == 16) { - for (i = 0; i < pixelCount; i++, p += 4) - *p = (stbi_uc) (stbi__get16be(s) >> 8); + if (bitdepth == 16 && bpc == 16) { + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + stbi__uint16 val = channel == 3 ? 65535 : 0; + for (i = 0; i < pixelCount; i++, q += 4) + *q = val; } else { + stbi_uc *p = out+channel; + stbi_uc val = channel == 3 ? 255 : 0; for (i = 0; i < pixelCount; i++, p += 4) - *p = stbi__get8(s); + *p = val; + } + } else { + if (ri->bits_per_channel == 16) { // output bpc + stbi__uint16 *q = ((stbi__uint16 *) out) + channel; + for (i = 0; i < pixelCount; i++, q += 4) + *q = (stbi__uint16) stbi__get16be(s); + } else { + stbi_uc *p = out+channel; + if (bitdepth == 16) { // input bpc + for (i = 0; i < pixelCount; i++, p += 4) + *p = (stbi_uc) (stbi__get16be(s) >> 8); + } else { + for (i = 0; i < pixelCount; i++, p += 4) + *p = stbi__get8(s); + } } } } } + // remove weird white matte from PSD if (channelCount >= 4) { - for (i=0; i < w*h; ++i) { - unsigned char *pixel = out + 4*i; - if (pixel[3] != 0 && pixel[3] != 255) { - // remove weird white matte from PSD - float a = pixel[3] / 255.0f; - float ra = 1.0f / a; - float inv_a = 255.0f * (1 - ra); - pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); - pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); - pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + if (ri->bits_per_channel == 16) { + for (i=0; i < w*h; ++i) { + stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; + if (pixel[3] != 0 && pixel[3] != 65535) { + float a = pixel[3] / 65535.0f; + float ra = 1.0f / a; + float inv_a = 65535.0f * (1 - ra); + pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); + pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); + pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); + } + } + } else { + for (i=0; i < w*h; ++i) { + unsigned char *pixel = out + 4*i; + if (pixel[3] != 0 && pixel[3] != 255) { + float a = pixel[3] / 255.0f; + float ra = 1.0f / a; + float inv_a = 255.0f * (1 - ra); + pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); + pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); + pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); + } } } } + // convert to desired output format if (req_comp && req_comp != 4) { - out = stbi__convert_format(out, 4, req_comp, w, h); + if (ri->bits_per_channel == 16) + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); + else + out = stbi__convert_format(out, 4, req_comp, w, h); if (out == NULL) return out; // stbi__convert_format frees input on failure } @@ -5654,25 +6497,33 @@ static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *c return result; } -static stbi_uc *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp) +static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) { stbi_uc *result; - int i, x,y; + int i, x,y, internal_comp; + STBI_NOTUSED(ri); + + if (!comp) comp = &internal_comp; for (i=0; i<92; ++i) stbi__get8(s); x = stbi__get16be(s); y = stbi__get16be(s); + + if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); - if ((1 << 28) / x < y) return stbi__errpuc("too large", "Image too large to decode"); + if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); stbi__get32be(s); //skip `ratio' stbi__get16be(s); //skip `fields' stbi__get16be(s); //skip `pad' // intermediate buffer is RGBA - result = (stbi_uc *) stbi__malloc(x*y*4); + result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); + if (!result) return stbi__errpuc("outofmem", "Out of memory"); memset(result, 0xff, x*y*4); if (!stbi__pic_load_core(s,x,y,comp, result)) { @@ -5709,11 +6560,13 @@ typedef struct typedef struct { int w,h; - stbi_uc *out, *old_out; // output buffer (always 4 components) - int flags, bgindex, ratio, transparent, eflags, delay; + stbi_uc *out; // output buffer (always 4 components) + stbi_uc *background; // The current "background" as far as a gif is concerned + stbi_uc *history; + int flags, bgindex, ratio, transparent, eflags; stbi_uc pal[256][4]; stbi_uc lpal[256][4]; - stbi__gif_lzw codes[4096]; + stbi__gif_lzw codes[8192]; stbi_uc *color_table; int parse, step; int lflags; @@ -5721,6 +6574,7 @@ typedef struct int max_x, max_y; int cur_x, cur_y; int line_size; + int delay; } stbi__gif; static int stbi__gif_test_raw(stbi__context *s) @@ -5769,6 +6623,9 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in g->ratio = stbi__get8(s); g->transparent = -1; + if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments if (is_info) return 1; @@ -5782,6 +6639,7 @@ static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_in static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) { stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); + if (!g) return stbi__err("outofmem", "Out of memory"); if (!stbi__gif_header(s, g, comp, 1)) { STBI_FREE(g); stbi__rewind( s ); @@ -5796,6 +6654,7 @@ static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) { stbi_uc *p, *c; + int idx; // recurse to decode the prefixes, since the linked-list is backwards, // and working backwards through an interleaved image would be nasty @@ -5804,10 +6663,12 @@ static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) if (g->cur_y >= g->max_y) return; - p = &g->out[g->cur_x + g->cur_y]; - c = &g->color_table[g->codes[code].suffix * 4]; + idx = g->cur_x + g->cur_y; + p = &g->out[idx]; + g->history[idx / 4] = 1; - if (c[3] >= 128) { + c = &g->color_table[g->codes[code].suffix * 4]; + if (c[3] > 128) { // don't render transparent pixels; p[0] = c[2]; p[1] = c[1]; p[2] = c[0]; @@ -5881,11 +6742,16 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) stbi__skip(s,len); return g->out; } else if (code <= avail) { - if (first) return stbi__errpuc("no clear code", "Corrupt GIF"); + if (first) { + return stbi__errpuc("no clear code", "Corrupt GIF"); + } if (oldcode >= 0) { p = &g->codes[avail++]; - if (avail > 4096) return stbi__errpuc("too many codes", "Corrupt GIF"); + if (avail > 8192) { + return stbi__errpuc("too many codes", "Corrupt GIF"); + } + p->prefix = (stbi__int16) oldcode; p->first = g->codes[oldcode].first; p->suffix = (code == avail) ? p->first : g->codes[code].first; @@ -5907,59 +6773,77 @@ static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) } } -static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1) -{ - int x, y; - stbi_uc *c = g->pal[g->bgindex]; - for (y = y0; y < y1; y += 4 * g->w) { - for (x = x0; x < x1; x += 4) { - stbi_uc *p = &g->out[y + x]; - p[0] = c[2]; - p[1] = c[1]; - p[2] = c[0]; - p[3] = 0; - } - } -} - // this function is designed to support animated gifs, although stb_image doesn't support it -static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp) +// two back is the image from two frames ago, used for a very specific disposal format +static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) { - int i; - stbi_uc *prev_out = 0; + int dispose; + int first_frame; + int pi; + int pcount; + STBI_NOTUSED(req_comp); - if (g->out == 0 && !stbi__gif_header(s, g, comp,0)) - return 0; // stbi__g_failure_reason set by stbi__gif_header + // on first frame, any non-written pixels get the background colour (non-transparent) + first_frame = 0; + if (g->out == 0) { + if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header + if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) + return stbi__errpuc("too large", "GIF image is too large"); + pcount = g->w * g->h; + g->out = (stbi_uc *) stbi__malloc(4 * pcount); + g->background = (stbi_uc *) stbi__malloc(4 * pcount); + g->history = (stbi_uc *) stbi__malloc(pcount); + if (!g->out || !g->background || !g->history) + return stbi__errpuc("outofmem", "Out of memory"); + + // image is treated as "transparent" at the start - ie, nothing overwrites the current background; + // background colour is only used for pixels that are not rendered first frame, after that "background" + // color refers to the color that was there the previous frame. + memset(g->out, 0x00, 4 * pcount); + memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) + memset(g->history, 0x00, pcount); // pixels that were affected previous frame + first_frame = 1; + } else { + // second frame - how do we dispose of the previous one? + dispose = (g->eflags & 0x1C) >> 2; + pcount = g->w * g->h; - prev_out = g->out; - g->out = (stbi_uc *) stbi__malloc(4 * g->w * g->h); - if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); + if ((dispose == 3) && (two_back == 0)) { + dispose = 2; // if I don't have an image to revert back to, default to the old background + } - switch ((g->eflags & 0x1C) >> 2) { - case 0: // unspecified (also always used on 1st frame) - stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h); - break; - case 1: // do not dispose - if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h); - g->old_out = prev_out; - break; - case 2: // dispose to background - if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h); - stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y); - break; - case 3: // dispose to previous - if (g->old_out) { - for (i = g->start_y; i < g->max_y; i += 4 * g->w) - memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x); + if (dispose == 3) { // use previous graphic + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); + } } - break; + } else if (dispose == 2) { + // restore what was changed last frame to background before that frame; + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi]) { + memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); + } + } + } else { + // This is a non-disposal case eithe way, so just + // leave the pixels as is, and they will become the new background + // 1: do not dispose + // 0: not specified. + } + + // background is what out is after the undoing of the previou frame; + memcpy( g->background, g->out, 4 * g->w * g->h ); } + // clear my history; + memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame + for (;;) { - switch (stbi__get8(s)) { + int tag = stbi__get8(s); + switch (tag) { case 0x2C: /* Image Descriptor */ { - int prev_trans = -1; stbi__int32 x, y, w, h; stbi_uc *o; @@ -5978,6 +6862,13 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i g->cur_x = g->start_x; g->cur_y = g->start_y; + // if the width of the specified rectangle is 0, that means + // we may not see *any* pixels or the image is malformed; + // to make sure this is caught, move the current y down to + // max_y (which is what out_gif_code checks). + if (w == 0) + g->cur_y = g->max_y; + g->lflags = stbi__get8(s); if (g->lflags & 0x40) { @@ -5992,19 +6883,24 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); g->color_table = (stbi_uc *) g->lpal; } else if (g->flags & 0x80) { - if (g->transparent >= 0 && (g->eflags & 0x01)) { - prev_trans = g->pal[g->transparent][3]; - g->pal[g->transparent][3] = 0; - } g->color_table = (stbi_uc *) g->pal; } else return stbi__errpuc("missing color table", "Corrupt GIF"); o = stbi__process_gif_raster(s, g); - if (o == NULL) return NULL; - - if (prev_trans != -1) - g->pal[g->transparent][3] = (stbi_uc) prev_trans; + if (!o) return NULL; + + // if this was the first frame, + pcount = g->w * g->h; + if (first_frame && (g->bgindex > 0)) { + // if first frame, any pixel not drawn to gets the background color + for (pi = 0; pi < pcount; ++pi) { + if (g->history[pi] == 0) { + g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; + memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); + } + } + } return o; } @@ -6012,19 +6908,35 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i case 0x21: // Comment Extension. { int len; - if (stbi__get8(s) == 0xF9) { // Graphic Control Extension. + int ext = stbi__get8(s); + if (ext == 0xF9) { // Graphic Control Extension. len = stbi__get8(s); if (len == 4) { g->eflags = stbi__get8(s); - g->delay = stbi__get16le(s); - g->transparent = stbi__get8(s); + g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. + + // unset old transparent + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 255; + } + if (g->eflags & 0x01) { + g->transparent = stbi__get8(s); + if (g->transparent >= 0) { + g->pal[g->transparent][3] = 0; + } + } else { + // don't need transparent + stbi__skip(s, 1); + g->transparent = -1; + } } else { stbi__skip(s, len); break; } } - while ((len = stbi__get8(s)) != 0) + while ((len = stbi__get8(s)) != 0) { stbi__skip(s, len); + } break; } @@ -6035,27 +6947,130 @@ static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, i return stbi__errpuc("unknown code", "Corrupt GIF"); } } +} - STBI_NOTUSED(req_comp); +static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays) +{ + STBI_FREE(g->out); + STBI_FREE(g->history); + STBI_FREE(g->background); + + if (out) STBI_FREE(out); + if (delays && *delays) STBI_FREE(*delays); + return stbi__errpuc("outofmem", "Out of memory"); } -static stbi_uc *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) +{ + if (stbi__gif_test(s)) { + int layers = 0; + stbi_uc *u = 0; + stbi_uc *out = 0; + stbi_uc *two_back = 0; + stbi__gif g; + int stride; + int out_size = 0; + int delays_size = 0; + + STBI_NOTUSED(out_size); + STBI_NOTUSED(delays_size); + + memset(&g, 0, sizeof(g)); + if (delays) { + *delays = 0; + } + + do { + u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); + if (u == (stbi_uc *) s) u = 0; // end of animated gif marker + + if (u) { + *x = g.w; + *y = g.h; + ++layers; + stride = g.w * g.h * 4; + + if (out) { + void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); + if (!tmp) + return stbi__load_gif_main_outofmem(&g, out, delays); + else { + out = (stbi_uc*) tmp; + out_size = layers * stride; + } + + if (delays) { + int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); + if (!new_delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + *delays = new_delays; + delays_size = layers * sizeof(int); + } + } else { + out = (stbi_uc*)stbi__malloc( layers * stride ); + if (!out) + return stbi__load_gif_main_outofmem(&g, out, delays); + out_size = layers * stride; + if (delays) { + *delays = (int*) stbi__malloc( layers * sizeof(int) ); + if (!*delays) + return stbi__load_gif_main_outofmem(&g, out, delays); + delays_size = layers * sizeof(int); + } + } + memcpy( out + ((layers - 1) * stride), u, stride ); + if (layers >= 2) { + two_back = out - 2 * stride; + } + + if (delays) { + (*delays)[layers - 1U] = g.delay; + } + } + } while (u != 0); + + // free temp buffer; + STBI_FREE(g.out); + STBI_FREE(g.history); + STBI_FREE(g.background); + + // do the final conversion after loading everything; + if (req_comp && req_comp != 4) + out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); + + *z = layers; + return out; + } else { + return stbi__errpuc("not GIF", "Image was not as a gif type."); + } +} + +static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *u = 0; - stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); - memset(g, 0, sizeof(*g)); + stbi__gif g; + memset(&g, 0, sizeof(g)); + STBI_NOTUSED(ri); - u = stbi__gif_load_next(s, g, comp, req_comp); + u = stbi__gif_load_next(s, &g, comp, req_comp, 0); if (u == (stbi_uc *) s) u = 0; // end of animated gif marker if (u) { - *x = g->w; - *y = g->h; + *x = g.w; + *y = g.h; + + // moved conversion to after successful load so that the same + // can be done for multiple frames. if (req_comp && req_comp != 4) - u = stbi__convert_format(u, 4, req_comp, g->w, g->h); + u = stbi__convert_format(u, 4, req_comp, g.w, g.h); + } else if (g.out) { + // if there was an error and we allocated an image buffer, free it! + STBI_FREE(g.out); } - else if (g->out) - STBI_FREE(g->out); - STBI_FREE(g); + + // free buffers needed for multiple frame loading; + STBI_FREE(g.history); + STBI_FREE(g.background); + return u; } @@ -6069,20 +7084,24 @@ static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) // Radiance RGBE HDR loader // originally by Nicolas Schulz #ifndef STBI_NO_HDR -static int stbi__hdr_test_core(stbi__context *s) +static int stbi__hdr_test_core(stbi__context *s, const char *signature) { - const char *signature = "#?RADIANCE\n"; int i; for (i=0; signature[i]; ++i) if (stbi__get8(s) != signature[i]) - return 0; + return 0; + stbi__rewind(s); return 1; } static int stbi__hdr_test(stbi__context* s) { - int r = stbi__hdr_test_core(s); + int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); stbi__rewind(s); + if(!r) { + r = stbi__hdr_test_core(s, "#?RGBE\n"); + stbi__rewind(s); + } return r; } @@ -6136,7 +7155,7 @@ static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) } } -static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { char buffer[STBI__HDR_BUFLEN]; char *token; @@ -6147,10 +7166,12 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re int len; unsigned char count, value; int i, j, k, c1,c2, z; - + const char *headerToken; + STBI_NOTUSED(ri); // Check identifier - if (strcmp(stbi__hdr_gettoken(s,buffer), "#?RADIANCE") != 0) + headerToken = stbi__hdr_gettoken(s,buffer); + if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) return stbi__errpf("not HDR", "Corrupt HDR image"); // Parse header @@ -6173,14 +7194,22 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re token += 3; width = (int) strtol(token, NULL, 10); + if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); + *x = width; *y = height; if (comp) *comp = 3; if (req_comp == 0) req_comp = 3; + if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) + return stbi__errpf("too large", "HDR image is too large"); + // Read data - hdr_data = (float *) stbi__malloc(height * width * req_comp * sizeof(float)); + hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); + if (!hdr_data) + return stbi__errpf("outofmem", "Out of memory"); // Load image data // image data is stored as some number of sca @@ -6219,20 +7248,29 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re len <<= 8; len |= stbi__get8(s); if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } - if (scanline == NULL) scanline = (stbi_uc *) stbi__malloc(width * 4); + if (scanline == NULL) { + scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); + if (!scanline) { + STBI_FREE(hdr_data); + return stbi__errpf("outofmem", "Out of memory"); + } + } for (k = 0; k < 4; ++k) { + int nleft; i = 0; - while (i < width) { + while ((nleft = width - i) > 0) { count = stbi__get8(s); if (count > 128) { // Run value = stbi__get8(s); count -= 128; + if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } for (z = 0; z < count; ++z) scanline[i++ * 4 + k] = value; } else { // Dump + if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } for (z = 0; z < count; ++z) scanline[i++ * 4 + k] = stbi__get8(s); } @@ -6241,7 +7279,8 @@ static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int re for (i=0; i < width; ++i) stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); } - STBI_FREE(scanline); + if (scanline) + STBI_FREE(scanline); } return hdr_data; @@ -6252,6 +7291,11 @@ static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) char buffer[STBI__HDR_BUFLEN]; char *token; int valid = 0; + int dummy; + + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; if (stbi__hdr_test(s) == 0) { stbi__rewind( s ); @@ -6293,14 +7337,20 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) void *p; stbi__bmp_data info; - info.all_a = 255; + info.all_a = 255; p = stbi__bmp_parse_header(s, &info); - stbi__rewind( s ); - if (p == NULL) + if (p == NULL) { + stbi__rewind( s ); return 0; - *x = s->img_x; - *y = s->img_y; - *comp = info.ma ? 4 : 3; + } + if (x) *x = s->img_x; + if (y) *y = s->img_y; + if (comp) { + if (info.bpp == 24 && info.ma == 0xff000000) + *comp = 3; + else + *comp = info.ma ? 4 : 3; + } return 1; } #endif @@ -6308,7 +7358,10 @@ static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) #ifndef STBI_NO_PSD static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) { - int channelCount; + int channelCount, dummy, depth; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; if (stbi__get32be(s) != 0x38425053) { stbi__rewind( s ); return 0; @@ -6325,7 +7378,8 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) } *y = stbi__get32be(s); *x = stbi__get32be(s); - if (stbi__get16be(s) != 8) { + depth = stbi__get16be(s); + if (depth != 8 && depth != 16) { stbi__rewind( s ); return 0; } @@ -6336,14 +7390,45 @@ static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) *comp = 4; return 1; } + +static int stbi__psd_is16(stbi__context *s) +{ + int channelCount, depth; + if (stbi__get32be(s) != 0x38425053) { + stbi__rewind( s ); + return 0; + } + if (stbi__get16be(s) != 1) { + stbi__rewind( s ); + return 0; + } + stbi__skip(s, 6); + channelCount = stbi__get16be(s); + if (channelCount < 0 || channelCount > 16) { + stbi__rewind( s ); + return 0; + } + STBI_NOTUSED(stbi__get32be(s)); + STBI_NOTUSED(stbi__get32be(s)); + depth = stbi__get16be(s); + if (depth != 16) { + stbi__rewind( s ); + return 0; + } + return 1; +} #endif #ifndef STBI_NO_PIC static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) { - int act_comp=0,num_packets=0,chained; + int act_comp=0,num_packets=0,chained,dummy; stbi__pic_packet packets[10]; + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { stbi__rewind(s); return 0; @@ -6403,7 +7488,6 @@ static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) // Known limitations: // Does not support comments in the header section // Does not support ASCII image data (formats P2 and P3) -// Does not support 16-bit-per-channel #ifndef STBI_NO_PNM @@ -6419,21 +7503,38 @@ static int stbi__pnm_test(stbi__context *s) return 1; } -static stbi_uc *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp) +static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *out; - if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) + STBI_NOTUSED(ri); + + ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); + if (ri->bits_per_channel == 0) return 0; + + if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); + *x = s->img_x; *y = s->img_y; - *comp = s->img_n; + if (comp) *comp = s->img_n; + + if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0)) + return stbi__errpuc("too large", "PNM too large"); - out = (stbi_uc *) stbi__malloc(s->img_n * s->img_x * s->img_y); + out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); - stbi__getn(s, out, s->img_n * s->img_x * s->img_y); + if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) { + STBI_FREE(out); + return stbi__errpuc("bad PNM", "PNM file truncated"); + } if (req_comp && req_comp != s->img_n) { - out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + if (ri->bits_per_channel == 16) { + out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y); + } else { + out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); + } if (out == NULL) return out; // stbi__convert_format frees input on failure } return out; @@ -6470,6 +7571,8 @@ static int stbi__pnm_getinteger(stbi__context *s, char *c) while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { value = value*10 + (*c - '0'); *c = (char) stbi__get8(s); + if((value > 214748364) || (value == 214748364 && *c > '7')) + return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int"); } return value; @@ -6477,16 +7580,20 @@ static int stbi__pnm_getinteger(stbi__context *s, char *c) static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) { - int maxv; + int maxv, dummy; char c, p, t; - stbi__rewind( s ); + if (!x) x = &dummy; + if (!y) y = &dummy; + if (!comp) comp = &dummy; + + stbi__rewind(s); // Get identifier p = (char) stbi__get8(s); t = (char) stbi__get8(s); if (p != 'P' || (t != '5' && t != '6')) { - stbi__rewind( s ); + stbi__rewind(s); return 0; } @@ -6496,17 +7603,29 @@ static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) stbi__pnm_skip_whitespace(s, &c); *x = stbi__pnm_getinteger(s, &c); // read width + if(*x == 0) + return stbi__err("invalid width", "PPM image header had zero or overflowing width"); stbi__pnm_skip_whitespace(s, &c); *y = stbi__pnm_getinteger(s, &c); // read height + if (*y == 0) + return stbi__err("invalid width", "PPM image header had zero or overflowing width"); stbi__pnm_skip_whitespace(s, &c); maxv = stbi__pnm_getinteger(s, &c); // read max value - - if (maxv > 255) - return stbi__err("max value > 255", "PPM image not 8-bit"); + if (maxv > 65535) + return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images"); + else if (maxv > 255) + return 16; else - return 1; + return 8; +} + +static int stbi__pnm_is16(stbi__context *s) +{ + if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) + return 1; + return 0; } #endif @@ -6552,6 +7671,22 @@ static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) return stbi__err("unknown image type", "Image not of any known type, or corrupt"); } +static int stbi__is_16_main(stbi__context *s) +{ + #ifndef STBI_NO_PNG + if (stbi__png_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PSD + if (stbi__psd_is16(s)) return 1; + #endif + + #ifndef STBI_NO_PNM + if (stbi__pnm_is16(s)) return 1; + #endif + return 0; +} + #ifndef STBI_NO_STDIO STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) { @@ -6573,6 +7708,27 @@ STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) fseek(f,pos,SEEK_SET); return r; } + +STBIDEF int stbi_is_16_bit(char const *filename) +{ + FILE *f = stbi__fopen(filename, "rb"); + int result; + if (!f) return stbi__err("can't fopen", "Unable to open file"); + result = stbi_is_16_bit_from_file(f); + fclose(f); + return result; +} + +STBIDEF int stbi_is_16_bit_from_file(FILE *f) +{ + int r; + stbi__context s; + long pos = ftell(f); + stbi__start_file(&s, f); + r = stbi__is_16_main(&s); + fseek(f,pos,SEEK_SET); + return r; +} #endif // !STBI_NO_STDIO STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) @@ -6589,10 +7745,44 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int return stbi__info_main(&s,x,y,comp); } +STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) +{ + stbi__context s; + stbi__start_mem(&s,buffer,len); + return stbi__is_16_main(&s); +} + +STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) +{ + stbi__context s; + stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); + return stbi__is_16_main(&s); +} + #endif // STB_IMAGE_IMPLEMENTATION /* revision history: + 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs + 2.19 (2018-02-11) fix warning + 2.18 (2018-01-30) fix warnings + 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug + 1-bit BMP + *_is_16_bit api + avoid warnings + 2.16 (2017-07-23) all functions have 16-bit variants; + STBI_NO_STDIO works again; + compilation fixes; + fix rounding in unpremultiply; + optimize vertical flip; + disable raw_len validation; + documentation fixes + 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; + warning fixes; disable run-time SSE detection on gcc; + uniform handling of optional "return" values; + thread-safe initialization of zlib tables + 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs + 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02) allocate large structures on the stack remove white matting for transparent PSD @@ -6752,4 +7942,47 @@ STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int on 'test' only check type, not whether we support this variant 0.50 (2006-11-19) first released version -*/ \ No newline at end of file +*/ + + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/zenovis/stbi/include/tinyexr.h b/zenovis/stbi/include/tinyexr.h deleted file mode 100644 index 20adfeffbb..0000000000 --- a/zenovis/stbi/include/tinyexr.h +++ /dev/null @@ -1,13315 +0,0 @@ -/* -Copyright (c) 2014 - 2019, Syoyo Fujita and many contributors. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Syoyo Fujita nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// TinyEXR contains some OpenEXR code, which is licensed under ------------ - -/////////////////////////////////////////////////////////////////////////// -// -// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas -// Digital Ltd. LLC -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Industrial Light & Magic nor the names of -// its contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////// - -// End of OpenEXR license ------------------------------------------------- - -#ifndef TINYEXR_H_ -#define TINYEXR_H_ - -// -// -// Do this: -// #define TINYEXR_IMPLEMENTATION -// before you include this file in *one* C or C++ file to create the -// implementation. -// -// // i.e. it should look like this: -// #include ... -// #include ... -// #include ... -// #define TINYEXR_IMPLEMENTATION -// #include "tinyexr.h" -// -// - -#include // for size_t -#include // guess stdint.h is available(C99) - -#ifdef __cplusplus -extern "C" { -#endif - -// Use embedded miniz or not to decode ZIP format pixel. Linking with zlib -// required if this flas is 0. -#ifndef TINYEXR_USE_MINIZ -#define TINYEXR_USE_MINIZ (1) -#endif - -// Disable PIZ comporession when applying cpplint. -#ifndef TINYEXR_USE_PIZ -#define TINYEXR_USE_PIZ (1) -#endif - -#ifndef TINYEXR_USE_ZFP -#define TINYEXR_USE_ZFP (0) // TinyEXR extension. -// http://computation.llnl.gov/projects/floating-point-compression -#endif - -#define TINYEXR_SUCCESS (0) -#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1) -#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2) -#define TINYEXR_ERROR_INVALID_ARGUMENT (-3) -#define TINYEXR_ERROR_INVALID_DATA (-4) -#define TINYEXR_ERROR_INVALID_FILE (-5) -#define TINYEXR_ERROR_INVALID_PARAMETER (-6) -#define TINYEXR_ERROR_CANT_OPEN_FILE (-7) -#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-8) -#define TINYEXR_ERROR_INVALID_HEADER (-9) -#define TINYEXR_ERROR_UNSUPPORTED_FEATURE (-10) -#define TINYEXR_ERROR_CANT_WRITE_FILE (-11) -#define TINYEXR_ERROR_SERIALZATION_FAILED (-12) - -// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf } - -// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2 -#define TINYEXR_PIXELTYPE_UINT (0) -#define TINYEXR_PIXELTYPE_HALF (1) -#define TINYEXR_PIXELTYPE_FLOAT (2) - -#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024) -#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128) - -#define TINYEXR_COMPRESSIONTYPE_NONE (0) -#define TINYEXR_COMPRESSIONTYPE_RLE (1) -#define TINYEXR_COMPRESSIONTYPE_ZIPS (2) -#define TINYEXR_COMPRESSIONTYPE_ZIP (3) -#define TINYEXR_COMPRESSIONTYPE_PIZ (4) -#define TINYEXR_COMPRESSIONTYPE_ZFP (128) // TinyEXR extension - -#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0) -#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1) -#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2) - -#define TINYEXR_TILE_ONE_LEVEL (0) -#define TINYEXR_TILE_MIPMAP_LEVELS (1) -#define TINYEXR_TILE_RIPMAP_LEVELS (2) - -#define TINYEXR_TILE_ROUND_DOWN (0) -#define TINYEXR_TILE_ROUND_UP (1) - -typedef struct _EXRVersion { - int version; // this must be 2 - int tiled; // tile format image - int long_name; // long name attribute - int non_image; // deep image(EXR 2.0) - int multipart; // multi-part(EXR 2.0) -} EXRVersion; - -typedef struct _EXRAttribute { - char name[256]; // name and type are up to 255 chars long. - char type[256]; - unsigned char *value; // uint8_t* - int size; - int pad0; -} EXRAttribute; - -typedef struct _EXRChannelInfo { - char name[256]; // less than 255 bytes long - int pixel_type; - int x_sampling; - int y_sampling; - unsigned char p_linear; - unsigned char pad[3]; -} EXRChannelInfo; - -typedef struct _EXRTile { - int offset_x; - int offset_y; - int level_x; - int level_y; - - int width; // actual width in a tile. - int height; // actual height int a tile. - - unsigned char **images; // image[channels][pixels] -} EXRTile; - -typedef struct _EXRHeader { - float pixel_aspect_ratio; - int line_order; - int data_window[4]; - int display_window[4]; - float screen_window_center[2]; - float screen_window_width; - - int chunk_count; - - // Properties for tiled format(`tiledesc`). - int tiled; - int tile_size_x; - int tile_size_y; - int tile_level_mode; - int tile_rounding_mode; - - int long_name; - int non_image; - int multipart; - unsigned int header_len; - - // Custom attributes(exludes required attributes(e.g. `channels`, - // `compression`, etc) - int num_custom_attributes; - EXRAttribute *custom_attributes; // array of EXRAttribute. size = - // `num_custom_attributes`. - - EXRChannelInfo *channels; // [num_channels] - - int *pixel_types; // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for - // each channel. This is overwritten with `requested_pixel_types` when - // loading. - int num_channels; - - int compression_type; // compression type(TINYEXR_COMPRESSIONTYPE_*) - int *requested_pixel_types; // Filled initially by - // ParseEXRHeaderFrom(Meomory|File), then users - // can edit it(only valid for HALF pixel type - // channel) - -} EXRHeader; - -typedef struct _EXRMultiPartHeader { - int num_headers; - EXRHeader *headers; - -} EXRMultiPartHeader; - -typedef struct _EXRImage { - EXRTile *tiles; // Tiled pixel data. The application must reconstruct image - // from tiles manually. NULL if scanline format. - unsigned char **images; // image[channels][pixels]. NULL if tiled format. - - int width; - int height; - int num_channels; - - // Properties for tile format. - int num_tiles; - -} EXRImage; - -typedef struct _EXRMultiPartImage { - int num_images; - EXRImage *images; - -} EXRMultiPartImage; - -typedef struct _DeepImage { - const char **channel_names; - float ***image; // image[channels][scanlines][samples] - int **offset_table; // offset_table[scanline][offsets] - int num_channels; - int width; - int height; - int pad0; -} DeepImage; - -// @deprecated { to be removed. } -// Loads single-frame OpenEXR image. Assume EXR image contains A(single channel -// alpha) or RGB(A) channels. -// Application must free image data as returned by `out_rgba` -// Result image format is: float x RGBA x width x hight -// Returns negative value and may set error string in `err` when there's an -// error -extern int LoadEXR(float **out_rgba, int *width, int *height, - const char *filename, const char **err); - -// @deprecated { to be removed. } -// Simple wrapper API for ParseEXRHeaderFromFile. -// checking given file is a EXR file(by just look up header) -// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for -// others -extern int IsEXR(const char *filename); - -// @deprecated { to be removed. } -// Saves single-frame OpenEXR image. Assume EXR image contains RGB(A) channels. -// components must be 1(Grayscale), 3(RGB) or 4(RGBA). -// Input image format is: `float x width x height`, or `float x RGB(A) x width x -// hight` -// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero -// value. -// Save image as fp32(FLOAT) format when `save_as_fp16` is 0. -// Use ZIP compression by default. -// Returns negative value and may set error string in `err` when there's an -// error -extern int SaveEXR(const float *data, const int width, const int height, - const int components, const int save_as_fp16, - const char *filename, const char **err); - -// Initialize EXRHeader struct -extern void InitEXRHeader(EXRHeader *exr_header); - -// Initialize EXRImage struct -extern void InitEXRImage(EXRImage *exr_image); - -// Free's internal data of EXRHeader struct -extern int FreeEXRHeader(EXRHeader *exr_header); - -// Free's internal data of EXRImage struct -extern int FreeEXRImage(EXRImage *exr_image); - -// Free's error message -extern void FreeEXRErrorMessage(const char *msg); - -// Parse EXR version header of a file. -extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename); - -// Parse EXR version header from memory-mapped EXR data. -extern int ParseEXRVersionFromMemory(EXRVersion *version, - const unsigned char *memory, size_t size); - -// Parse single-part OpenEXR header from a file and initialize `EXRHeader`. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version, - const char *filename, const char **err); - -// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRHeaderFromMemory(EXRHeader *header, - const EXRVersion *version, - const unsigned char *memory, size_t size, - const char **err); - -// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*` -// array. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers, - int *num_headers, - const EXRVersion *version, - const char *filename, - const char **err); - -// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*` -// array -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers, - int *num_headers, - const EXRVersion *version, - const unsigned char *memory, - size_t size, const char **err); - -// Loads single-part OpenEXR image from a file. -// Application must setup `ParseEXRHeaderFromFile` before calling this function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header, - const char *filename, const char **err); - -// Loads single-part OpenEXR image from a memory. -// Application must setup `EXRHeader` with -// `ParseEXRHeaderFromMemory` before calling this function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header, - const unsigned char *memory, - const size_t size, const char **err); - -// Loads multi-part OpenEXR image from a file. -// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this -// function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRMultipartImageFromFile(EXRImage *images, - const EXRHeader **headers, - unsigned int num_parts, - const char *filename, - const char **err); - -// Loads multi-part OpenEXR image from a memory. -// Application must setup `EXRHeader*` array with -// `ParseEXRMultipartHeaderFromMemory` before calling this function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRMultipartImageFromMemory(EXRImage *images, - const EXRHeader **headers, - unsigned int num_parts, - const unsigned char *memory, - const size_t size, const char **err); - -// Saves multi-channel, single-frame OpenEXR image to a file. -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int SaveEXRImageToFile(const EXRImage *image, - const EXRHeader *exr_header, const char *filename, - const char **err); - -// Saves multi-channel, single-frame OpenEXR image to a memory. -// Image is compressed using EXRImage.compression value. -// Return the number of bytes if success. -// Return zero and will set error string in `err` when there's an -// error. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern size_t SaveEXRImageToMemory(const EXRImage *image, - const EXRHeader *exr_header, - unsigned char **memory, const char **err); - -// Loads single-frame OpenEXR deep image. -// Application must free memory of variables in DeepImage(image, offset_table) -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadDeepEXR(DeepImage *out_image, const char *filename, - const char **err); - -// NOT YET IMPLEMENTED: -// Saves single-frame OpenEXR deep image. -// Returns negative value and may set error string in `err` when there's an -// error -// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename, -// const char **err); - -// NOT YET IMPLEMENTED: -// Loads multi-part OpenEXR deep image. -// Application must free memory of variables in DeepImage(image, offset_table) -// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const -// char *filename, -// const char **err); - -// For emscripten. -// Loads single-frame OpenEXR image from memory. Assume EXR image contains -// RGB(A) channels. -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, - const unsigned char *memory, size_t size, - const char **err); - -#ifdef __cplusplus -} -#endif - -#endif // TINYEXR_H_ - -#ifdef TINYEXR_IMPLEMENTATION -#ifndef TINYEXR_IMPLEMENTATION_DEIFNED -#define TINYEXR_IMPLEMENTATION_DEIFNED - -#include -#include -#include -#include -#include -#include - -//#include // debug - -#include -#include -#include - -#if __cplusplus > 199711L -// C++11 -#include -#endif // __cplusplus > 199711L - -#ifdef _OPENMP -#include -#endif - -#if TINYEXR_USE_MINIZ -#else -// Issue #46. Please include your own zlib-compatible API header before -// including `tinyexr.h` -//#include "zlib.h" -#endif - -#if TINYEXR_USE_ZFP -#include "zfp.h" -#endif - -namespace tinyexr { - -#if __cplusplus > 199711L -// C++11 -typedef uint64_t tinyexr_uint64; -typedef int64_t tinyexr_int64; -#else -// Although `long long` is not a standard type pre C++11, assume it is defined -// as a compiler's extension. -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-long-long" -#endif -typedef unsigned long long tinyexr_uint64; -typedef long long tinyexr_int64; -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#endif - -#if TINYEXR_USE_MINIZ - -namespace miniz { - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-long-long" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wpadded" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wc++11-extensions" -#pragma clang diagnostic ignored "-Wconversion" -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wundef" - -#if __has_warning("-Wcomma") -#pragma clang diagnostic ignored "-Wcomma" -#endif - -#if __has_warning("-Wmacro-redefined") -#pragma clang diagnostic ignored "-Wmacro-redefined" -#endif - -#if __has_warning("-Wcast-qual") -#pragma clang diagnostic ignored "-Wcast-qual" -#endif - -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - -#if __has_warning("-Wtautological-constant-compare") -#pragma clang diagnostic ignored "-Wtautological-constant-compare" -#endif - -#endif - -/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP - reading/writing/appending, PNG writing - See "unlicense" statement at the end of this file. - Rich Geldreich , last updated Oct. 13, 2013 - Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: - http://www.ietf.org/rfc/rfc1951.txt - - Most API's defined in miniz.c are optional. For example, to disable the - archive related functions just define - MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO - (see the list below for more macros). - - * Change History - 10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major - release with Zip64 support (almost there!): - - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug - (thanks kahmyong.moon@hp.com) which could cause locate files to not find - files. This bug - would only have occured in earlier versions if you explicitly used this - flag, OR if you used mz_zip_extract_archive_file_to_heap() or - mz_zip_add_mem_to_archive_file_in_place() - (which used this flag). If you can't switch to v1.15 but want to fix - this bug, just remove the uses of this flag from both helper funcs (and of - course don't use the flag). - - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when - pUser_read_buf is not NULL and compressed size is > uncompressed size - - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract - compressed data from directory entries, to account for weird zipfiles which - contain zero-size compressed data on dir entries. - Hopefully this fix won't cause any issues on weird zip archives, - because it assumes the low 16-bits of zip external attributes are DOS - attributes (which I believe they always are in practice). - - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the - internal attributes, just the filename and external attributes - - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed - - Added cmake support for Linux builds which builds all the examples, - tested with clang v3.3 and gcc v4.6. - - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti - - Merged MZ_FORCEINLINE fix from hdeanclark - - Fix include before config #ifdef, thanks emil.brink - - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping - (super useful for OpenGL apps), and explicit control over the compression - level (so you can - set it to 1 for real-time compression). - - Merged in some compiler fixes from paulharris's github repro. - - Retested this build under Windows (VS 2010, including static analysis), - tcc 0.9.26, gcc v4.6 and clang v3.3. - - Added example6.c, which dumps an image of the mandelbrot set to a PNG - file. - - Modified example2 to help test the - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. - - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix - possible src file fclose() leak if alignment bytes+local header file write - faiiled - - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): - Was pushing the wrong central dir header offset, appears harmless in this - release, but it became a problem in the zip64 branch - 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, - #include (thanks fermtect). - 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix - mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. - - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and - re-ran a randomized regression test on ~500k files. - - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. - - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze - (static analysis) option and fixed all warnings (except for the silly - "Use of the comma-operator in a tested expression.." analysis warning, - which I purposely use to work around a MSVC compiler warning). - - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and - tested Linux executables. The codeblocks workspace is compatible with - Linux+Win32/x64. - - Added miniz_tester solution/project, which is a useful little app - derived from LZHAM's tester app that I use as part of the regression test. - - Ran miniz.c and tinfl.c through another series of regression testing on - ~500,000 files and archives. - - Modified example5.c so it purposely disables a bunch of high-level - functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the - MINIZ_NO_STDIO bug report.) - - Fix ftell() usage in examples so they exit with an error on files which - are too large (a limitation of the examples, not miniz itself). - 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple - minor level_and_flags issues in the archive API's. - level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce - Dawson for the feedback/bug report. - 5/28/11 v1.11 - Added statement from unlicense.org - 5/27/11 v1.10 - Substantial compressor optimizations: - - Level 1 is now ~4x faster than before. The L1 compressor's throughput - now varies between 70-110MB/sec. on a - - Core i7 (actual throughput varies depending on the type of data, and x64 - vs. x86). - - Improved baseline L2-L9 compression perf. Also, greatly improved - compression perf. issues on some file types. - - Refactored the compression code for better readability and - maintainability. - - Added level 10 compression level (L10 has slightly better ratio than - level 9, but could have a potentially large - drop in throughput on some files). - 5/15/11 v1.09 - Initial stable release. - - * Low-level Deflate/Inflate implementation notes: - - Compression: Use the "tdefl" API's. The compressor supports raw, static, - and dynamic blocks, lazy or - greedy parsing, match length filtering, RLE-only, and Huffman-only streams. - It performs and compresses - approximately as well as zlib. - - Decompression: Use the "tinfl" API's. The entire decompressor is - implemented as a single function - coroutine: see tinfl_decompress(). It supports decompression into a 32KB - (or larger power of 2) wrapping buffer, or into a memory - block large enough to hold the entire file. - - The low-level tdefl/tinfl API's do not make any use of dynamic memory - allocation. - - * zlib-style API notes: - - miniz.c implements a fairly large subset of zlib. There's enough - functionality present for it to be a drop-in - zlib replacement in many apps: - The z_stream struct, optional memory allocation callbacks - deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound - inflateInit/inflateInit2/inflate/inflateEnd - compress, compress2, compressBound, uncompress - CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly - routines. - Supports raw deflate streams or standard zlib streams with adler-32 - checking. - - Limitations: - The callback API's are not implemented yet. No support for gzip headers or - zlib static dictionaries. - I've tried to closely emulate zlib's various flavors of stream flushing - and return status codes, but - there are no guarantees that miniz.c pulls this off perfectly. - - * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, - originally written by - Alex Evans. Supports 1-4 bytes/pixel images. - - * ZIP archive API notes: - - The ZIP archive API's where designed with simplicity and efficiency in - mind, with just enough abstraction to - get the job done with minimal fuss. There are simple API's to retrieve file - information, read files from - existing archives, create new archives, append new files to existing - archives, or clone archive data from - one archive to another. It supports archives located in memory or the heap, - on disk (using stdio.h), - or you can specify custom file read/write callbacks. - - - Archive reading: Just call this function to read a single file from a - disk archive: - - void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const - char *pArchive_name, - size_t *pSize, mz_uint zip_flags); - - For more complex cases, use the "mz_zip_reader" functions. Upon opening an - archive, the entire central - directory is located and read as-is into memory, and subsequent file access - only occurs when reading individual files. - - - Archives file scanning: The simple way is to use this function to scan a - loaded archive for a specific file: - - int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags); - - The locate operation can optionally check file comments too, which (as one - example) can be used to identify - multiple versions of the same file in an archive. This function uses a - simple linear search through the central - directory, so it's not very fast. - - Alternately, you can iterate through all the files in an archive (using - mz_zip_reader_get_num_files()) and - retrieve detailed info on each file by calling mz_zip_reader_file_stat(). - - - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer - immediately writes compressed file data - to disk and builds an exact image of the central directory in memory. The - central directory image is written - all at once at the end of the archive file when the archive is finalized. - - The archive writer can optionally align each file's local header and file - data to any power of 2 alignment, - which can be useful when the archive will be read from optical media. Also, - the writer supports placing - arbitrary data blobs at the very beginning of ZIP archives. Archives - written using either feature are still - readable by any ZIP tool. - - - Archive appending: The simple way to add a single file to an archive is - to call this function: - - mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, - const char *pArchive_name, - const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 - comment_size, mz_uint level_and_flags); - - The archive will be created if it doesn't already exist, otherwise it'll be - appended to. - Note the appending is done in-place and is not an atomic operation, so if - something goes wrong - during the operation it's possible the archive could be left without a - central directory (although the local - file headers and file data will be fine, so the archive will be - recoverable). - - For more complex archive modification scenarios: - 1. The safest way is to use a mz_zip_reader to read the existing archive, - cloning only those bits you want to - preserve into a new archive using using the - mz_zip_writer_add_from_zip_reader() function (which compiles the - compressed file data as-is). When you're done, delete the old archive and - rename the newly written archive, and - you're done. This is safe but requires a bunch of temporary disk space or - heap memory. - - 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using - mz_zip_writer_init_from_reader(), - append new files as needed, then finalize the archive which will write an - updated central directory to the - original archive. (This is basically what - mz_zip_add_mem_to_archive_file_in_place() does.) There's a - possibility that the archive's central directory could be lost with this - method if anything goes wrong, though. - - - ZIP archive support limitations: - No zip64 or spanning support. Extraction functions can only handle - unencrypted, stored or deflated files. - Requires streams capable of seeking. - - * This is a header file library, like stb_image.c. To get only a header file, - either cut and paste the - below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then - include miniz.c from it. - - * Important: For best perf. be sure to customize the below macros for your - target platform: - #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 - #define MINIZ_LITTLE_ENDIAN 1 - #define MINIZ_HAS_64BIT_REGISTERS 1 - - * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before - including miniz.c to ensure miniz - uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be - able to process large files - (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). -*/ - -#ifndef MINIZ_HEADER_INCLUDED -#define MINIZ_HEADER_INCLUDED - -//#include - -// Defines to completely disable specific portions of miniz.c: -// If all macros here are defined the only functionality remaining will be -// CRC-32, adler-32, tinfl, and tdefl. - -// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on -// stdio for file I/O. -//#define MINIZ_NO_STDIO - -// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able -// to get the current time, or -// get/set file times, and the C run-time funcs that get/set times won't be -// called. -// The current downside is the times written to your archives will be from 1979. -#define MINIZ_NO_TIME - -// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. -#define MINIZ_NO_ARCHIVE_APIS - -// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive -// API's. -//#define MINIZ_NO_ARCHIVE_WRITING_APIS - -// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression -// API's. -//#define MINIZ_NO_ZLIB_APIS - -// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent -// conflicts against stock zlib. -//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES - -// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. -// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom -// user alloc/free/realloc -// callbacks to the zlib and archive API's, and a few stand-alone helper API's -// which don't provide custom user -// functions (such as tdefl_compress_mem_to_heap() and -// tinfl_decompress_mem_to_heap()) won't work. -//#define MINIZ_NO_MALLOC - -#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) -// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc -// on Linux -#define MINIZ_NO_TIME -#endif - -#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) -//#include -#endif - -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ - defined(__i386) || defined(__i486__) || defined(__i486) || \ - defined(i386) || defined(__ia64__) || defined(__x86_64__) -// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. -#define MINIZ_X86_OR_X64_CPU 1 -#endif - -#if defined(__sparcv9) -// Big endian -#else -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. -#define MINIZ_LITTLE_ENDIAN 1 -#endif -#endif - -#if MINIZ_X86_OR_X64_CPU -// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient -// integer loads and stores from unaligned addresses. -//#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 -#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES \ - 0 // disable to suppress compiler warnings -#endif - -#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || \ - defined(_LP64) || defined(__LP64__) || defined(__ia64__) || \ - defined(__x86_64__) -// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are -// reasonably fast (and don't involve compiler generated calls to helper -// functions). -#define MINIZ_HAS_64BIT_REGISTERS 1 -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// ------------------- zlib-style API Definitions. - -// For more compatibility with zlib, miniz.c uses unsigned long for some -// parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! -typedef unsigned long mz_ulong; - -// mz_free() internally uses the MZ_FREE() macro (which by default calls free() -// unless you've modified the MZ_MALLOC macro) to release a block allocated from -// the heap. -void mz_free(void *p); - -#define MZ_ADLER32_INIT (1) -// mz_adler32() returns the initial adler-32 value to use when called with -// ptr==NULL. -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); - -#define MZ_CRC32_INIT (0) -// mz_crc32() returns the initial CRC-32 value to use when called with -// ptr==NULL. -mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); - -// Compression strategies. -enum { - MZ_DEFAULT_STRATEGY = 0, - MZ_FILTERED = 1, - MZ_HUFFMAN_ONLY = 2, - MZ_RLE = 3, - MZ_FIXED = 4 -}; - -// Method -#define MZ_DEFLATED 8 - -#ifndef MINIZ_NO_ZLIB_APIS - -// Heap allocation callbacks. -// Note that mz_alloc_func parameter types purpsosely differ from zlib's: -// items/size is size_t, not unsigned long. -typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); -typedef void (*mz_free_func)(void *opaque, void *address); -typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, - size_t size); - -#define MZ_VERSION "9.1.15" -#define MZ_VERNUM 0x91F0 -#define MZ_VER_MAJOR 9 -#define MZ_VER_MINOR 1 -#define MZ_VER_REVISION 15 -#define MZ_VER_SUBREVISION 0 - -// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The -// other values are for advanced use (refer to the zlib docs). -enum { - MZ_NO_FLUSH = 0, - MZ_PARTIAL_FLUSH = 1, - MZ_SYNC_FLUSH = 2, - MZ_FULL_FLUSH = 3, - MZ_FINISH = 4, - MZ_BLOCK = 5 -}; - -// Return status codes. MZ_PARAM_ERROR is non-standard. -enum { - MZ_OK = 0, - MZ_STREAM_END = 1, - MZ_NEED_DICT = 2, - MZ_ERRNO = -1, - MZ_STREAM_ERROR = -2, - MZ_DATA_ERROR = -3, - MZ_MEM_ERROR = -4, - MZ_BUF_ERROR = -5, - MZ_VERSION_ERROR = -6, - MZ_PARAM_ERROR = -10000 -}; - -// Compression levels: 0-9 are the standard zlib-style levels, 10 is best -// possible compression (not zlib compatible, and may be very slow), -// MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. -enum { - MZ_NO_COMPRESSION = 0, - MZ_BEST_SPEED = 1, - MZ_BEST_COMPRESSION = 9, - MZ_UBER_COMPRESSION = 10, - MZ_DEFAULT_LEVEL = 6, - MZ_DEFAULT_COMPRESSION = -1 -}; - -// Window bits -#define MZ_DEFAULT_WINDOW_BITS 15 - -struct mz_internal_state; - -// Compression/decompression stream struct. -typedef struct mz_stream_s { - const unsigned char *next_in; // pointer to next byte to read - unsigned int avail_in; // number of bytes available at next_in - mz_ulong total_in; // total number of bytes consumed so far - - unsigned char *next_out; // pointer to next byte to write - unsigned int avail_out; // number of bytes that can be written to next_out - mz_ulong total_out; // total number of bytes produced so far - - char *msg; // error msg (unused) - struct mz_internal_state *state; // internal state, allocated by zalloc/zfree - - mz_alloc_func - zalloc; // optional heap allocation function (defaults to malloc) - mz_free_func zfree; // optional heap free function (defaults to free) - void *opaque; // heap alloc function user pointer - - int data_type; // data_type (unused) - mz_ulong adler; // adler32 of the source or uncompressed data - mz_ulong reserved; // not used -} mz_stream; - -typedef mz_stream *mz_streamp; - -// Returns the version string of miniz.c. -const char *mz_version(void); - -// mz_deflateInit() initializes a compressor with default options: -// Parameters: -// pStream must point to an initialized mz_stream struct. -// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. -// level 1 enables a specially optimized compression function that's been -// optimized purely for performance, not ratio. -// (This special func. is currently only enabled when -// MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) -// Return values: -// MZ_OK on success. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_PARAM_ERROR if the input parameters are bogus. -// MZ_MEM_ERROR on out of memory. -int mz_deflateInit(mz_streamp pStream, int level); - -// mz_deflateInit2() is like mz_deflate(), except with more control: -// Additional parameters: -// method must be MZ_DEFLATED -// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with -// zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no -// header or footer) -// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, - int mem_level, int strategy); - -// Quickly resets a compressor without having to reallocate anything. Same as -// calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). -int mz_deflateReset(mz_streamp pStream); - -// mz_deflate() compresses the input to output, consuming as much of the input -// and producing as much output as possible. -// Parameters: -// pStream is the stream to read from and write to. You must initialize/update -// the next_in, avail_in, next_out, and avail_out members. -// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or -// MZ_FINISH. -// Return values: -// MZ_OK on success (when flushing, or if more input is needed but not -// available, and/or there's more output to be written but the output buffer -// is full). -// MZ_STREAM_END if all input has been consumed and all output bytes have been -// written. Don't call mz_deflate() on the stream anymore. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_PARAM_ERROR if one of the parameters is invalid. -// MZ_BUF_ERROR if no forward progress is possible because the input and/or -// output buffers are empty. (Fill up the input buffer or free up some output -// space and try again.) -int mz_deflate(mz_streamp pStream, int flush); - -// mz_deflateEnd() deinitializes a compressor: -// Return values: -// MZ_OK on success. -// MZ_STREAM_ERROR if the stream is bogus. -int mz_deflateEnd(mz_streamp pStream); - -// mz_deflateBound() returns a (very) conservative upper bound on the amount of -// data that could be generated by deflate(), assuming flush is set to only -// MZ_NO_FLUSH or MZ_FINISH. -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); - -// Single-call compression functions mz_compress() and mz_compress2(): -// Returns MZ_OK on success, or one of the error codes from mz_deflate() on -// failure. -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len); -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len, int level); - -// mz_compressBound() returns a (very) conservative upper bound on the amount of -// data that could be generated by calling mz_compress(). -mz_ulong mz_compressBound(mz_ulong source_len); - -// Initializes a decompressor. -int mz_inflateInit(mz_streamp pStream); - -// mz_inflateInit2() is like mz_inflateInit() with an additional option that -// controls the window size and whether or not the stream has been wrapped with -// a zlib header/footer: -// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -// -MZ_DEFAULT_WINDOW_BITS (raw deflate). -int mz_inflateInit2(mz_streamp pStream, int window_bits); - -// Decompresses the input stream to the output, consuming only as much of the -// input as needed, and writing as much to the output as possible. -// Parameters: -// pStream is the stream to read from and write to. You must initialize/update -// the next_in, avail_in, next_out, and avail_out members. -// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. -// On the first call, if flush is MZ_FINISH it's assumed the input and output -// buffers are both sized large enough to decompress the entire stream in a -// single call (this is slightly faster). -// MZ_FINISH implies that there are no more source bytes available beside -// what's already in the input buffer, and that the output buffer is large -// enough to hold the rest of the decompressed data. -// Return values: -// MZ_OK on success. Either more input is needed but not available, and/or -// there's more output to be written but the output buffer is full. -// MZ_STREAM_END if all needed input has been consumed and all output bytes -// have been written. For zlib streams, the adler-32 of the decompressed data -// has also been verified. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_DATA_ERROR if the deflate stream is invalid. -// MZ_PARAM_ERROR if one of the parameters is invalid. -// MZ_BUF_ERROR if no forward progress is possible because the input buffer is -// empty but the inflater needs more input to continue, or if the output -// buffer is not large enough. Call mz_inflate() again -// with more input data, or with more room in the output buffer (except when -// using single call decompression, described above). -int mz_inflate(mz_streamp pStream, int flush); - -// Deinitializes a decompressor. -int mz_inflateEnd(mz_streamp pStream); - -// Single-call decompression. -// Returns MZ_OK on success, or one of the error codes from mz_inflate() on -// failure. -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len); - -// Returns a string description of the specified error code, or NULL if the -// error code is invalid. -const char *mz_error(int err); - -// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used -// as a drop-in replacement for the subset of zlib that miniz.c supports. -// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you -// use zlib in the same project. -#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES -typedef unsigned char Byte; -typedef unsigned int uInt; -typedef mz_ulong uLong; -typedef Byte Bytef; -typedef uInt uIntf; -typedef char charf; -typedef int intf; -typedef void *voidpf; -typedef uLong uLongf; -typedef void *voidp; -typedef void *const voidpc; -#define Z_NULL 0 -#define Z_NO_FLUSH MZ_NO_FLUSH -#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH -#define Z_SYNC_FLUSH MZ_SYNC_FLUSH -#define Z_FULL_FLUSH MZ_FULL_FLUSH -#define Z_FINISH MZ_FINISH -#define Z_BLOCK MZ_BLOCK -#define Z_OK MZ_OK -#define Z_STREAM_END MZ_STREAM_END -#define Z_NEED_DICT MZ_NEED_DICT -#define Z_ERRNO MZ_ERRNO -#define Z_STREAM_ERROR MZ_STREAM_ERROR -#define Z_DATA_ERROR MZ_DATA_ERROR -#define Z_MEM_ERROR MZ_MEM_ERROR -#define Z_BUF_ERROR MZ_BUF_ERROR -#define Z_VERSION_ERROR MZ_VERSION_ERROR -#define Z_PARAM_ERROR MZ_PARAM_ERROR -#define Z_NO_COMPRESSION MZ_NO_COMPRESSION -#define Z_BEST_SPEED MZ_BEST_SPEED -#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION -#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION -#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY -#define Z_FILTERED MZ_FILTERED -#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY -#define Z_RLE MZ_RLE -#define Z_FIXED MZ_FIXED -#define Z_DEFLATED MZ_DEFLATED -#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS -#define alloc_func mz_alloc_func -#define free_func mz_free_func -#define internal_state mz_internal_state -#define z_stream mz_stream -#define deflateInit mz_deflateInit -#define deflateInit2 mz_deflateInit2 -#define deflateReset mz_deflateReset -#define deflate mz_deflate -#define deflateEnd mz_deflateEnd -#define deflateBound mz_deflateBound -#define compress mz_compress -#define compress2 mz_compress2 -#define compressBound mz_compressBound -#define inflateInit mz_inflateInit -#define inflateInit2 mz_inflateInit2 -#define inflate mz_inflate -#define inflateEnd mz_inflateEnd -#define uncompress mz_uncompress -#define crc32 mz_crc32 -#define adler32 mz_adler32 -#define MAX_WBITS 15 -#define MAX_MEM_LEVEL 9 -#define zError mz_error -#define ZLIB_VERSION MZ_VERSION -#define ZLIB_VERNUM MZ_VERNUM -#define ZLIB_VER_MAJOR MZ_VER_MAJOR -#define ZLIB_VER_MINOR MZ_VER_MINOR -#define ZLIB_VER_REVISION MZ_VER_REVISION -#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION -#define zlibVersion mz_version -#define zlib_version mz_version() -#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES - -#endif // MINIZ_NO_ZLIB_APIS - -// ------------------- Types and macros - -typedef unsigned char mz_uint8; -typedef signed short mz_int16; -typedef unsigned short mz_uint16; -typedef unsigned int mz_uint32; -typedef unsigned int mz_uint; -typedef long long mz_int64; -typedef unsigned long long mz_uint64; -typedef int mz_bool; - -#define MZ_FALSE (0) -#define MZ_TRUE (1) - -// An attempt to work around MSVC's spammy "warning C4127: conditional -// expression is constant" message. -#ifdef _MSC_VER -#define MZ_MACRO_END while (0, 0) -#else -#define MZ_MACRO_END while (0) -#endif - -// ------------------- ZIP archive reading/writing - -#ifndef MINIZ_NO_ARCHIVE_APIS - -enum { - MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, - MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, - MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 -}; - -typedef struct { - mz_uint32 m_file_index; - mz_uint32 m_central_dir_ofs; - mz_uint16 m_version_made_by; - mz_uint16 m_version_needed; - mz_uint16 m_bit_flag; - mz_uint16 m_method; -#ifndef MINIZ_NO_TIME - time_t m_time; -#endif - mz_uint32 m_crc32; - mz_uint64 m_comp_size; - mz_uint64 m_uncomp_size; - mz_uint16 m_internal_attr; - mz_uint32 m_external_attr; - mz_uint64 m_local_header_ofs; - mz_uint32 m_comment_size; - char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; - char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; -} mz_zip_archive_file_stat; - -typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n); -typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n); - -struct mz_zip_internal_state_tag; -typedef struct mz_zip_internal_state_tag mz_zip_internal_state; - -typedef enum { - MZ_ZIP_MODE_INVALID = 0, - MZ_ZIP_MODE_READING = 1, - MZ_ZIP_MODE_WRITING = 2, - MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 -} mz_zip_mode; - -typedef struct mz_zip_archive_tag { - mz_uint64 m_archive_size; - mz_uint64 m_central_directory_file_ofs; - mz_uint m_total_files; - mz_zip_mode m_zip_mode; - - mz_uint m_file_offset_alignment; - - mz_alloc_func m_pAlloc; - mz_free_func m_pFree; - mz_realloc_func m_pRealloc; - void *m_pAlloc_opaque; - - mz_file_read_func m_pRead; - mz_file_write_func m_pWrite; - void *m_pIO_opaque; - - mz_zip_internal_state *m_pState; - -} mz_zip_archive; - -typedef enum { - MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, - MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, - MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 -} mz_zip_flags; - -// ZIP archive reading - -// Inits a ZIP archive reader. -// These functions read and validate the archive's central directory. -mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, - mz_uint32 flags); -mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, - size_t size, mz_uint32 flags); - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint32 flags); -#endif - -// Returns the total number of files in the archive. -mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); - -// Returns detailed information about an archive file entry. -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, - mz_zip_archive_file_stat *pStat); - -// Determines if an archive file entry is a directory entry. -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, - mz_uint file_index); -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, - mz_uint file_index); - -// Retrieves the filename of an archive file entry. -// Returns the number of bytes written to pFilename, or if filename_buf_size is -// 0 this function returns the number of bytes needed to fully store the -// filename. -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, - char *pFilename, mz_uint filename_buf_size); - -// Attempts to locates a file in the archive's central directory. -// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH -// Returns -1 if the file cannot be found. -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags); - -// Extracts a archive file to a memory buffer using no memory allocation. -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, - mz_uint file_index, void *pBuf, - size_t buf_size, mz_uint flags, - void *pUser_read_buf, - size_t user_read_buf_size); -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( - mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, - mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); - -// Extracts a archive file to a memory buffer. -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, - void *pBuf, size_t buf_size, - mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, - const char *pFilename, void *pBuf, - size_t buf_size, mz_uint flags); - -// Extracts a archive file to a dynamically allocated heap buffer. -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, - size_t *pSize, mz_uint flags); -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, - const char *pFilename, size_t *pSize, - mz_uint flags); - -// Extracts a archive file using a callback function to output the file's data. -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, - mz_uint file_index, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, - const char *pFilename, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags); - -#ifndef MINIZ_NO_STDIO -// Extracts a archive file to a disk file and sets its last accessed and -// modified times. -// This function only extracts files, not archive directory records. -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, - const char *pDst_filename, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, - const char *pArchive_filename, - const char *pDst_filename, - mz_uint flags); -#endif - -// Ends archive reading, freeing all allocations, and closing the input archive -// file if mz_zip_reader_init_file() was used. -mz_bool mz_zip_reader_end(mz_zip_archive *pZip); - -// ZIP archive writing - -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -// Inits a ZIP archive writer. -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, - size_t size_to_reserve_at_beginning, - size_t initial_allocation_size); - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint64 size_to_reserve_at_beginning); -#endif - -// Converts a ZIP archive reader object into a writer object, to allow efficient -// in-place file appends to occur on an existing archive. -// For archives opened using mz_zip_reader_init_file, pFilename must be the -// archive's filename so it can be reopened for writing. If the file can't be -// reopened, mz_zip_reader_end() will be called. -// For archives opened using mz_zip_reader_init_mem, the memory block must be -// growable using the realloc callback (which defaults to realloc unless you've -// overridden it). -// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's -// user provided m_pWrite function cannot be NULL. -// Note: In-place archive modification is not recommended unless you know what -// you're doing, because if execution stops or something goes wrong before -// the archive is finalized the file's central directory will be hosed. -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, - const char *pFilename); - -// Adds the contents of a memory buffer to an archive. These functions record -// the current local time into the archive. -// To add a directory entry, call this method with an archive name ending in a -// forwardslash with empty buffer. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, - const void *pBuf, size_t buf_size, - mz_uint level_and_flags); -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, - const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags, mz_uint64 uncomp_size, - mz_uint32 uncomp_crc32); - -#ifndef MINIZ_NO_STDIO -// Adds the contents of a disk file to an archive. This function also records -// the disk file's modified time into the archive. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, - const char *pSrc_filename, const void *pComment, - mz_uint16 comment_size, mz_uint level_and_flags); -#endif - -// Adds a file to an archive by fully cloning the data from another archive. -// This function fully clones the source file's compressed data (no -// recompression), along with its full filename, extra data, and comment fields. -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, - mz_zip_archive *pSource_zip, - mz_uint file_index); - -// Finalizes the archive by writing the central directory records followed by -// the end of central directory record. -// After an archive is finalized, the only valid call on the mz_zip_archive -// struct is mz_zip_writer_end(). -// An archive must be manually finalized by calling this function for it to be -// valid. -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, - size_t *pSize); - -// Ends archive writing, freeing all allocations, and closing the output file if -// mz_zip_writer_init_file() was used. -// Note for the archive to be valid, it must have been finalized before ending. -mz_bool mz_zip_writer_end(mz_zip_archive *pZip); - -// Misc. high-level helper functions: - -// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) -// appends a memory blob to a ZIP archive. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_add_mem_to_archive_file_in_place( - const char *pZip_filename, const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, mz_uint16 comment_size, - mz_uint level_and_flags); - -// Reads a single file from an archive into a heap block. -// Returns NULL on failure. -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, - const char *pArchive_name, - size_t *pSize, mz_uint zip_flags); - -#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -#endif // #ifndef MINIZ_NO_ARCHIVE_APIS - -// ------------------- Low-level Decompression API Definitions - -// Decompression flags used by tinfl_decompress(). -// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and -// ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the -// input is a raw deflate stream. -// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available -// beyond the end of the supplied input buffer. If clear, the input buffer -// contains all remaining input. -// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large -// enough to hold the entire decompressed stream. If clear, the output buffer is -// at least the size of the dictionary (typically 32KB). -// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the -// decompressed bytes. -enum { - TINFL_FLAG_PARSE_ZLIB_HEADER = 1, - TINFL_FLAG_HAS_MORE_INPUT = 2, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, - TINFL_FLAG_COMPUTE_ADLER32 = 8 -}; - -// High level decompression functions: -// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block -// allocated via malloc(). -// On entry: -// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data -// to decompress. -// On return: -// Function returns a pointer to the decompressed data, or NULL on failure. -// *pOut_len will be set to the decompressed data's size, which could be larger -// than src_buf_len on uncompressible data. -// The caller must call mz_free() on the returned block when it's no longer -// needed. -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags); - -// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block -// in memory. -// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes -// written on success. -#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags); - -// tinfl_decompress_mem_to_callback() decompresses a block in memory to an -// internal 32KB buffer, and a user provided callback function will be called to -// flush the buffer. -// Returns 1 on success or 0 on failure. -typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, - tinfl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -struct tinfl_decompressor_tag; -typedef struct tinfl_decompressor_tag tinfl_decompressor; - -// Max size of LZ dictionary. -#define TINFL_LZ_DICT_SIZE 32768 - -// Return status. -typedef enum { - TINFL_STATUS_BAD_PARAM = -3, - TINFL_STATUS_ADLER32_MISMATCH = -2, - TINFL_STATUS_FAILED = -1, - TINFL_STATUS_DONE = 0, - TINFL_STATUS_NEEDS_MORE_INPUT = 1, - TINFL_STATUS_HAS_MORE_OUTPUT = 2 -} tinfl_status; - -// Initializes the decompressor to its initial state. -#define tinfl_init(r) \ - do { \ - (r)->m_state = 0; \ - } \ - MZ_MACRO_END -#define tinfl_get_adler32(r) (r)->m_check_adler32 - -// Main low-level decompressor coroutine function. This is the only function -// actually needed for decompression. All the other functions are just -// high-level helpers for improved usability. -// This is a universal API, i.e. it can be used as a building block to build any -// desired higher level decompression API. In the limit case, it can be called -// once per every byte input or output. -tinfl_status tinfl_decompress(tinfl_decompressor *r, - const mz_uint8 *pIn_buf_next, - size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, - mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, - const mz_uint32 decomp_flags); - -// Internal/private bits follow. -enum { - TINFL_MAX_HUFF_TABLES = 3, - TINFL_MAX_HUFF_SYMBOLS_0 = 288, - TINFL_MAX_HUFF_SYMBOLS_1 = 32, - TINFL_MAX_HUFF_SYMBOLS_2 = 19, - TINFL_FAST_LOOKUP_BITS = 10, - TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS -}; - -typedef struct { - mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; - mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], - m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; -} tinfl_huff_table; - -#if MINIZ_HAS_64BIT_REGISTERS -#define TINFL_USE_64BIT_BITBUF 1 -#endif - -#if TINFL_USE_64BIT_BITBUF -typedef mz_uint64 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (64) -#else -typedef mz_uint32 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (32) -#endif - -struct tinfl_decompressor_tag { - mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, - m_check_adler32, m_dist, m_counter, m_num_extra, - m_table_sizes[TINFL_MAX_HUFF_TABLES]; - tinfl_bit_buf_t m_bit_buf; - size_t m_dist_from_out_buf_start; - tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; - mz_uint8 m_raw_header[4], - m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; -}; - -// ------------------- Low-level Compression API Definitions - -// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly -// slower, and raw/dynamic blocks will be output more frequently). -#define TDEFL_LESS_MEMORY 0 - -// tdefl_init() compression flags logically OR'd together (low 12 bits contain -// the max. number of probes per dictionary search): -// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes -// per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap -// compression), 4095=Huffman+LZ (slowest/best compression). -enum { - TDEFL_HUFFMAN_ONLY = 0, - TDEFL_DEFAULT_MAX_PROBES = 128, - TDEFL_MAX_PROBES_MASK = 0xFFF -}; - -// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before -// the deflate data, and the Adler-32 of the source data at the end. Otherwise, -// you'll get raw deflate data. -// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even -// when not writing zlib headers). -// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more -// efficient lazy parsing. -// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's -// initialization time to the minimum, but the output may vary from run to run -// given the same input (depending on the contents of memory). -// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) -// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. -// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. -// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. -// The low 12 bits are reserved to control the max # of hash probes per -// dictionary lookup (see TDEFL_MAX_PROBES_MASK). -enum { - TDEFL_WRITE_ZLIB_HEADER = 0x01000, - TDEFL_COMPUTE_ADLER32 = 0x02000, - TDEFL_GREEDY_PARSING_FLAG = 0x04000, - TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, - TDEFL_RLE_MATCHES = 0x10000, - TDEFL_FILTER_MATCHES = 0x20000, - TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, - TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 -}; - -// High level compression functions: -// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block -// allocated via malloc(). -// On entry: -// pSrc_buf, src_buf_len: Pointer and size of source block to compress. -// flags: The max match finder probes (default is 128) logically OR'd against -// the above flags. Higher probes are slower but improve compression. -// On return: -// Function returns a pointer to the compressed data, or NULL on failure. -// *pOut_len will be set to the compressed data's size, which could be larger -// than src_buf_len on uncompressible data. -// The caller must free() the returned block when it's no longer needed. -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags); - -// tdefl_compress_mem_to_mem() compresses a block in memory to another block in -// memory. -// Returns 0 on failure. -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags); - -// Compresses an image to a compressed PNG file in memory. -// On entry: -// pImage, w, h, and num_chans describe the image to compress. num_chans may be -// 1, 2, 3, or 4. -// The image pitch in bytes per scanline will be w*num_chans. The leftmost -// pixel on the top scanline is stored first in memory. -// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL -// If flip is true, the image will be flipped on the Y axis (useful for OpenGL -// apps). -// On return: -// Function returns a pointer to the compressed data, or NULL on failure. -// *pLen_out will be set to the size of the PNG image file. -// The caller must mz_free() the returned heap block (which will typically be -// larger than *pLen_out) when it's no longer needed. -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, - int h, int num_chans, - size_t *pLen_out, - mz_uint level, mz_bool flip); -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, - int num_chans, size_t *pLen_out); - -// Output stream interface. The compressor uses this interface to write -// compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. -typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, - void *pUser); - -// tdefl_compress_mem_to_output() compresses a block to an output stream. The -// above helpers use this function internally. -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -enum { - TDEFL_MAX_HUFF_TABLES = 3, - TDEFL_MAX_HUFF_SYMBOLS_0 = 288, - TDEFL_MAX_HUFF_SYMBOLS_1 = 32, - TDEFL_MAX_HUFF_SYMBOLS_2 = 19, - TDEFL_LZ_DICT_SIZE = 32768, - TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, - TDEFL_MIN_MATCH_LEN = 3, - TDEFL_MAX_MATCH_LEN = 258 -}; - -// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed -// output block (using static/fixed Huffman codes). -#if TDEFL_LESS_MEMORY -enum { - TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 12, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#else -enum { - TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 15, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#endif - -// The low-level tdefl functions below may be used directly if the above helper -// functions aren't flexible enough. The low-level functions don't make any heap -// allocations, unlike the above helper functions. -typedef enum { - TDEFL_STATUS_BAD_PARAM = -2, - TDEFL_STATUS_PUT_BUF_FAILED = -1, - TDEFL_STATUS_OKAY = 0, - TDEFL_STATUS_DONE = 1 -} tdefl_status; - -// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums -typedef enum { - TDEFL_NO_FLUSH = 0, - TDEFL_SYNC_FLUSH = 2, - TDEFL_FULL_FLUSH = 3, - TDEFL_FINISH = 4 -} tdefl_flush; - -// tdefl's compression state structure. -typedef struct { - tdefl_put_buf_func_ptr m_pPut_buf_func; - void *m_pPut_buf_user; - mz_uint m_flags, m_max_probes[2]; - int m_greedy_parsing; - mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; - mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; - mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, - m_bit_buffer; - mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, - m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, - m_wants_to_finish; - tdefl_status m_prev_return_status; - const void *m_pIn_buf; - void *m_pOut_buf; - size_t *m_pIn_buf_size, *m_pOut_buf_size; - tdefl_flush m_flush; - const mz_uint8 *m_pSrc; - size_t m_src_buf_left, m_out_buf_ofs; - mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; - mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; - mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; - mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; - mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; -} tdefl_compressor; - -// Initializes the compressor. -// There is no corresponding deinit() function because the tdefl API's do not -// dynamically allocate memory. -// pBut_buf_func: If NULL, output data will be supplied to the specified -// callback. In this case, the user should call the tdefl_compress_buffer() API -// for compression. -// If pBut_buf_func is NULL the user should always call the tdefl_compress() -// API. -// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, -// etc.) -tdefl_status tdefl_init(tdefl_compressor *d, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -// Compresses a block of data, consuming as much of the specified input buffer -// as possible, and writing as much compressed data to the specified output -// buffer as possible. -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, - size_t *pIn_buf_size, void *pOut_buf, - size_t *pOut_buf_size, tdefl_flush flush); - -// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a -// non-NULL tdefl_put_buf_func_ptr. -// tdefl_compress_buffer() always consumes the entire input buffer. -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, - size_t in_buf_size, tdefl_flush flush); - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); -mz_uint32 tdefl_get_adler32(tdefl_compressor *d); - -// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't -// defined, because it uses some of its macros. -#ifndef MINIZ_NO_ZLIB_APIS -// Create tdefl_compress() flags given zlib-style compression parameters. -// level may range from [0,10] (where 10 is absolute max compression, but may be -// much slower on some files) -// window_bits may be -15 (raw deflate) or 15 (zlib) -// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, -// MZ_RLE, or MZ_FIXED -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, - int strategy); -#endif // #ifndef MINIZ_NO_ZLIB_APIS - -#ifdef __cplusplus -} -#endif - -#endif // MINIZ_HEADER_INCLUDED - -// ------------------- End of Header: Implementation follows. (If you only want -// the header, define MINIZ_HEADER_FILE_ONLY.) - -#ifndef MINIZ_HEADER_FILE_ONLY - -typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; -typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; -typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; - -//#include -//#include - -#define MZ_ASSERT(x) assert(x) - -#ifdef MINIZ_NO_MALLOC -#define MZ_MALLOC(x) NULL -#define MZ_FREE(x) (void)x, ((void)0) -#define MZ_REALLOC(p, x) NULL -#else -#define MZ_MALLOC(x) malloc(x) -#define MZ_FREE(x) free(x) -#define MZ_REALLOC(p, x) realloc(p, x) -#endif - -#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) -#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) -#else -#define MZ_READ_LE16(p) \ - ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) -#define MZ_READ_LE32(p) \ - ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) -#endif - -#ifdef _MSC_VER -#define MZ_FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define MZ_FORCEINLINE inline __attribute__((__always_inline__)) -#else -#define MZ_FORCEINLINE inline -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// ------------------- zlib-style API's - -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) { - mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); - size_t block_len = buf_len % 5552; - if (!ptr) return MZ_ADLER32_INIT; - while (buf_len) { - for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { - s1 += ptr[0], s2 += s1; - s1 += ptr[1], s2 += s1; - s1 += ptr[2], s2 += s1; - s1 += ptr[3], s2 += s1; - s1 += ptr[4], s2 += s1; - s1 += ptr[5], s2 += s1; - s1 += ptr[6], s2 += s1; - s1 += ptr[7], s2 += s1; - } - for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; - s1 %= 65521U, s2 %= 65521U; - buf_len -= block_len; - block_len = 5552; - } - return (s2 << 16) + s1; -} - -// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C -// implementation that balances processor cache usage against speed": -// http://www.geocities.com/malbrain/ -mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { - static const mz_uint32 s_crc32[16] = { - 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, - 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, - 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c}; - mz_uint32 crcu32 = (mz_uint32)crc; - if (!ptr) return MZ_CRC32_INIT; - crcu32 = ~crcu32; - while (buf_len--) { - mz_uint8 b = *ptr++; - crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; - crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; - } - return ~crcu32; -} - -void mz_free(void *p) { MZ_FREE(p); } - -#ifndef MINIZ_NO_ZLIB_APIS - -static void *def_alloc_func(void *opaque, size_t items, size_t size) { - (void)opaque, (void)items, (void)size; - return MZ_MALLOC(items * size); -} -static void def_free_func(void *opaque, void *address) { - (void)opaque, (void)address; - MZ_FREE(address); -} -// static void *def_realloc_func(void *opaque, void *address, size_t items, -// size_t size) { -// (void)opaque, (void)address, (void)items, (void)size; -// return MZ_REALLOC(address, items * size); -//} - -const char *mz_version(void) { return MZ_VERSION; } - -int mz_deflateInit(mz_streamp pStream, int level) { - return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, - MZ_DEFAULT_STRATEGY); -} - -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, - int mem_level, int strategy) { - tdefl_compressor *pComp; - mz_uint comp_flags = - TDEFL_COMPUTE_ADLER32 | - tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); - - if (!pStream) return MZ_STREAM_ERROR; - if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || - ((window_bits != MZ_DEFAULT_WINDOW_BITS) && - (-window_bits != MZ_DEFAULT_WINDOW_BITS))) - return MZ_PARAM_ERROR; - - pStream->data_type = 0; - pStream->adler = MZ_ADLER32_INIT; - pStream->msg = NULL; - pStream->reserved = 0; - pStream->total_in = 0; - pStream->total_out = 0; - if (!pStream->zalloc) pStream->zalloc = def_alloc_func; - if (!pStream->zfree) pStream->zfree = def_free_func; - - pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, - sizeof(tdefl_compressor)); - if (!pComp) return MZ_MEM_ERROR; - - pStream->state = (struct mz_internal_state *)pComp; - - if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { - mz_deflateEnd(pStream); - return MZ_PARAM_ERROR; - } - - return MZ_OK; -} - -int mz_deflateReset(mz_streamp pStream) { - if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || - (!pStream->zfree)) - return MZ_STREAM_ERROR; - pStream->total_in = pStream->total_out = 0; - tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, - ((tdefl_compressor *)pStream->state)->m_flags); - return MZ_OK; -} - -int mz_deflate(mz_streamp pStream, int flush) { - size_t in_bytes, out_bytes; - mz_ulong orig_total_in, orig_total_out; - int mz_status = MZ_OK; - - if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || - (!pStream->next_out)) - return MZ_STREAM_ERROR; - if (!pStream->avail_out) return MZ_BUF_ERROR; - - if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; - - if (((tdefl_compressor *)pStream->state)->m_prev_return_status == - TDEFL_STATUS_DONE) - return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; - - orig_total_in = pStream->total_in; - orig_total_out = pStream->total_out; - for (;;) { - tdefl_status defl_status; - in_bytes = pStream->avail_in; - out_bytes = pStream->avail_out; - - defl_status = tdefl_compress((tdefl_compressor *)pStream->state, - pStream->next_in, &in_bytes, pStream->next_out, - &out_bytes, (tdefl_flush)flush); - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); - - pStream->next_out += (mz_uint)out_bytes; - pStream->avail_out -= (mz_uint)out_bytes; - pStream->total_out += (mz_uint)out_bytes; - - if (defl_status < 0) { - mz_status = MZ_STREAM_ERROR; - break; - } else if (defl_status == TDEFL_STATUS_DONE) { - mz_status = MZ_STREAM_END; - break; - } else if (!pStream->avail_out) - break; - else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { - if ((flush) || (pStream->total_in != orig_total_in) || - (pStream->total_out != orig_total_out)) - break; - return MZ_BUF_ERROR; // Can't make forward progress without some input. - } - } - return mz_status; -} - -int mz_deflateEnd(mz_streamp pStream) { - if (!pStream) return MZ_STREAM_ERROR; - if (pStream->state) { - pStream->zfree(pStream->opaque, pStream->state); - pStream->state = NULL; - } - return MZ_OK; -} - -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { - (void)pStream; - // This is really over conservative. (And lame, but it's actually pretty - // tricky to compute a true upper bound given the way tdefl's blocking works.) - return MZ_MAX(128 + (source_len * 110) / 100, - 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); -} - -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len, int level) { - int status; - mz_stream stream; - memset(&stream, 0, sizeof(stream)); - - // In case mz_ulong is 64-bits (argh I hate longs). - if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; - - stream.next_in = pSource; - stream.avail_in = (mz_uint32)source_len; - stream.next_out = pDest; - stream.avail_out = (mz_uint32)*pDest_len; - - status = mz_deflateInit(&stream, level); - if (status != MZ_OK) return status; - - status = mz_deflate(&stream, MZ_FINISH); - if (status != MZ_STREAM_END) { - mz_deflateEnd(&stream); - return (status == MZ_OK) ? MZ_BUF_ERROR : status; - } - - *pDest_len = stream.total_out; - return mz_deflateEnd(&stream); -} - -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len) { - return mz_compress2(pDest, pDest_len, pSource, source_len, - MZ_DEFAULT_COMPRESSION); -} - -mz_ulong mz_compressBound(mz_ulong source_len) { - return mz_deflateBound(NULL, source_len); -} - -typedef struct { - tinfl_decompressor m_decomp; - mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; - int m_window_bits; - mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; - tinfl_status m_last_status; -} inflate_state; - -int mz_inflateInit2(mz_streamp pStream, int window_bits) { - inflate_state *pDecomp; - if (!pStream) return MZ_STREAM_ERROR; - if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && - (-window_bits != MZ_DEFAULT_WINDOW_BITS)) - return MZ_PARAM_ERROR; - - pStream->data_type = 0; - pStream->adler = 0; - pStream->msg = NULL; - pStream->total_in = 0; - pStream->total_out = 0; - pStream->reserved = 0; - if (!pStream->zalloc) pStream->zalloc = def_alloc_func; - if (!pStream->zfree) pStream->zfree = def_free_func; - - pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, - sizeof(inflate_state)); - if (!pDecomp) return MZ_MEM_ERROR; - - pStream->state = (struct mz_internal_state *)pDecomp; - - tinfl_init(&pDecomp->m_decomp); - pDecomp->m_dict_ofs = 0; - pDecomp->m_dict_avail = 0; - pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; - pDecomp->m_first_call = 1; - pDecomp->m_has_flushed = 0; - pDecomp->m_window_bits = window_bits; - - return MZ_OK; -} - -int mz_inflateInit(mz_streamp pStream) { - return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); -} - -int mz_inflate(mz_streamp pStream, int flush) { - inflate_state *pState; - mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; - size_t in_bytes, out_bytes, orig_avail_in; - tinfl_status status; - - if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; - if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; - if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) - return MZ_STREAM_ERROR; - - pState = (inflate_state *)pStream->state; - if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; - orig_avail_in = pStream->avail_in; - - first_call = pState->m_first_call; - pState->m_first_call = 0; - if (pState->m_last_status < 0) return MZ_DATA_ERROR; - - if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; - pState->m_has_flushed |= (flush == MZ_FINISH); - - if ((flush == MZ_FINISH) && (first_call)) { - // MZ_FINISH on the first call implies that the input and output buffers are - // large enough to hold the entire compressed/decompressed file. - decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; - in_bytes = pStream->avail_in; - out_bytes = pStream->avail_out; - status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, - pStream->next_out, pStream->next_out, &out_bytes, - decomp_flags); - pState->m_last_status = status; - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tinfl_get_adler32(&pState->m_decomp); - pStream->next_out += (mz_uint)out_bytes; - pStream->avail_out -= (mz_uint)out_bytes; - pStream->total_out += (mz_uint)out_bytes; - - if (status < 0) - return MZ_DATA_ERROR; - else if (status != TINFL_STATUS_DONE) { - pState->m_last_status = TINFL_STATUS_FAILED; - return MZ_BUF_ERROR; - } - return MZ_STREAM_END; - } - // flush != MZ_FINISH then we must assume there's more input. - if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; - - if (pState->m_dict_avail) { - n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); - memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); - pStream->next_out += n; - pStream->avail_out -= n; - pStream->total_out += n; - pState->m_dict_avail -= n; - pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); - return ((pState->m_last_status == TINFL_STATUS_DONE) && - (!pState->m_dict_avail)) - ? MZ_STREAM_END - : MZ_OK; - } - - for (;;) { - in_bytes = pStream->avail_in; - out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; - - status = tinfl_decompress( - &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, - pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); - pState->m_last_status = status; - - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tinfl_get_adler32(&pState->m_decomp); - - pState->m_dict_avail = (mz_uint)out_bytes; - - n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); - memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); - pStream->next_out += n; - pStream->avail_out -= n; - pStream->total_out += n; - pState->m_dict_avail -= n; - pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); - - if (status < 0) - return MZ_DATA_ERROR; // Stream is corrupted (there could be some - // uncompressed data left in the output dictionary - - // oh well). - else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) - return MZ_BUF_ERROR; // Signal caller that we can't make forward progress - // without supplying more input or by setting flush - // to MZ_FINISH. - else if (flush == MZ_FINISH) { - // The output buffer MUST be large to hold the remaining uncompressed data - // when flush==MZ_FINISH. - if (status == TINFL_STATUS_DONE) - return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; - // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's - // at least 1 more byte on the way. If there's no more room left in the - // output buffer then something is wrong. - else if (!pStream->avail_out) - return MZ_BUF_ERROR; - } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || - (!pStream->avail_out) || (pState->m_dict_avail)) - break; - } - - return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) - ? MZ_STREAM_END - : MZ_OK; -} - -int mz_inflateEnd(mz_streamp pStream) { - if (!pStream) return MZ_STREAM_ERROR; - if (pStream->state) { - pStream->zfree(pStream->opaque, pStream->state); - pStream->state = NULL; - } - return MZ_OK; -} - -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len) { - mz_stream stream; - int status; - memset(&stream, 0, sizeof(stream)); - - // In case mz_ulong is 64-bits (argh I hate longs). - if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; - - stream.next_in = pSource; - stream.avail_in = (mz_uint32)source_len; - stream.next_out = pDest; - stream.avail_out = (mz_uint32)*pDest_len; - - status = mz_inflateInit(&stream); - if (status != MZ_OK) return status; - - status = mz_inflate(&stream, MZ_FINISH); - if (status != MZ_STREAM_END) { - mz_inflateEnd(&stream); - return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR - : status; - } - *pDest_len = stream.total_out; - - return mz_inflateEnd(&stream); -} - -const char *mz_error(int err) { - static struct { - int m_err; - const char *m_pDesc; - } s_error_descs[] = {{MZ_OK, ""}, - {MZ_STREAM_END, "stream end"}, - {MZ_NEED_DICT, "need dictionary"}, - {MZ_ERRNO, "file error"}, - {MZ_STREAM_ERROR, "stream error"}, - {MZ_DATA_ERROR, "data error"}, - {MZ_MEM_ERROR, "out of memory"}, - {MZ_BUF_ERROR, "buf error"}, - {MZ_VERSION_ERROR, "version error"}, - {MZ_PARAM_ERROR, "parameter error"}}; - mz_uint i; - for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) - if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; - return NULL; -} - -#endif // MINIZ_NO_ZLIB_APIS - -// ------------------- Low-level Decompression (completely independent from all -// compression API's) - -#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) -#define TINFL_MEMSET(p, c, l) memset(p, c, l) - -#define TINFL_CR_BEGIN \ - switch (r->m_state) { \ - case 0: -#define TINFL_CR_RETURN(state_index, result) \ - do { \ - status = result; \ - r->m_state = state_index; \ - goto common_exit; \ - case state_index:; \ - } \ - MZ_MACRO_END -#define TINFL_CR_RETURN_FOREVER(state_index, result) \ - do { \ - for (;;) { \ - TINFL_CR_RETURN(state_index, result); \ - } \ - } \ - MZ_MACRO_END -#define TINFL_CR_FINISH } - -// TODO: If the caller has indicated that there's no more input, and we attempt -// to read beyond the input buf, then something is wrong with the input because -// the inflator never -// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of -// the stream with 0's in this scenario. -#define TINFL_GET_BYTE(state_index, c) \ - do { \ - if (pIn_buf_cur >= pIn_buf_end) { \ - for (;;) { \ - if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ - TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ - if (pIn_buf_cur < pIn_buf_end) { \ - c = *pIn_buf_cur++; \ - break; \ - } \ - } else { \ - c = 0; \ - break; \ - } \ - } \ - } else \ - c = *pIn_buf_cur++; \ - } \ - MZ_MACRO_END - -#define TINFL_NEED_BITS(state_index, n) \ - do { \ - mz_uint c; \ - TINFL_GET_BYTE(state_index, c); \ - bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ - num_bits += 8; \ - } while (num_bits < (mz_uint)(n)) -#define TINFL_SKIP_BITS(state_index, n) \ - do { \ - if (num_bits < (mz_uint)(n)) { \ - TINFL_NEED_BITS(state_index, n); \ - } \ - bit_buf >>= (n); \ - num_bits -= (n); \ - } \ - MZ_MACRO_END -#define TINFL_GET_BITS(state_index, b, n) \ - do { \ - if (num_bits < (mz_uint)(n)) { \ - TINFL_NEED_BITS(state_index, n); \ - } \ - b = bit_buf & ((1 << (n)) - 1); \ - bit_buf >>= (n); \ - num_bits -= (n); \ - } \ - MZ_MACRO_END - -// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes -// remaining in the input buffer falls below 2. -// It reads just enough bytes from the input stream that are needed to decode -// the next Huffman code (and absolutely no more). It works by trying to fully -// decode a -// Huffman code by using whatever bits are currently present in the bit buffer. -// If this fails, it reads another byte, and tries again until it succeeds or -// until the -// bit buffer contains >=15 bits (deflate's max. Huffman code size). -#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ - do { \ - temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ - if (temp >= 0) { \ - code_len = temp >> 9; \ - if ((code_len) && (num_bits >= code_len)) break; \ - } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ - code_len = TINFL_FAST_LOOKUP_BITS; \ - do { \ - temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ - } while ((temp < 0) && (num_bits >= (code_len + 1))); \ - if (temp >= 0) break; \ - } \ - TINFL_GET_BYTE(state_index, c); \ - bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ - num_bits += 8; \ - } while (num_bits < 15); - -// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex -// than you would initially expect because the zlib API expects the decompressor -// to never read -// beyond the final byte of the deflate stream. (In other words, when this macro -// wants to read another byte from the input, it REALLY needs another byte in -// order to fully -// decode the next Huffman code.) Handling this properly is particularly -// important on raw deflate (non-zlib) streams, which aren't followed by a byte -// aligned adler-32. -// The slow path is only executed at the very end of the input buffer. -#define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ - do { \ - int temp; \ - mz_uint code_len, c; \ - if (num_bits < 15) { \ - if ((pIn_buf_end - pIn_buf_cur) < 2) { \ - TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ - } else { \ - bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | \ - (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ - pIn_buf_cur += 2; \ - num_bits += 16; \ - } \ - } \ - if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= \ - 0) \ - code_len = temp >> 9, temp &= 511; \ - else { \ - code_len = TINFL_FAST_LOOKUP_BITS; \ - do { \ - temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ - } while (temp < 0); \ - } \ - sym = temp; \ - bit_buf >>= code_len; \ - num_bits -= code_len; \ - } \ - MZ_MACRO_END - -tinfl_status tinfl_decompress(tinfl_decompressor *r, - const mz_uint8 *pIn_buf_next, - size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, - mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, - const mz_uint32 decomp_flags) { - static const int s_length_base[31] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, - 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, - 4, 4, 5, 5, 5, 5, 0, 0, 0}; - static const int s_dist_base[32] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, - 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, - 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0}; - static const int s_dist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; - static const mz_uint8 s_length_dezigzag[19] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - static const int s_min_table_sizes[3] = {257, 1, 4}; - - tinfl_status status = TINFL_STATUS_FAILED; - mz_uint32 num_bits, dist, counter, num_extra; - tinfl_bit_buf_t bit_buf; - const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = - pIn_buf_next + *pIn_buf_size; - mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = - pOut_buf_next + *pOut_buf_size; - size_t out_buf_size_mask = - (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) - ? (size_t)-1 - : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, - dist_from_out_buf_start; - - // Ensure the output buffer's size is a power of 2, unless the output buffer - // is large enough to hold the entire output file (in which case it doesn't - // matter). - if (((out_buf_size_mask + 1) & out_buf_size_mask) || - (pOut_buf_next < pOut_buf_start)) { - *pIn_buf_size = *pOut_buf_size = 0; - return TINFL_STATUS_BAD_PARAM; - } - - num_bits = r->m_num_bits; - bit_buf = r->m_bit_buf; - dist = r->m_dist; - counter = r->m_counter; - num_extra = r->m_num_extra; - dist_from_out_buf_start = r->m_dist_from_out_buf_start; - TINFL_CR_BEGIN - - bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; - r->m_z_adler32 = r->m_check_adler32 = 1; - if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { - TINFL_GET_BYTE(1, r->m_zhdr0); - TINFL_GET_BYTE(2, r->m_zhdr1); - counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || - (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); - if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) - counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || - ((out_buf_size_mask + 1) < - (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); - if (counter) { - TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); - } - } - - do { - TINFL_GET_BITS(3, r->m_final, 3); - r->m_type = r->m_final >> 1; - if (r->m_type == 0) { - TINFL_SKIP_BITS(5, num_bits & 7); - for (counter = 0; counter < 4; ++counter) { - if (num_bits) - TINFL_GET_BITS(6, r->m_raw_header[counter], 8); - else - TINFL_GET_BYTE(7, r->m_raw_header[counter]); - } - if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != - (mz_uint)(0xFFFF ^ - (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { - TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); - } - while ((counter) && (num_bits)) { - TINFL_GET_BITS(51, dist, 8); - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = (mz_uint8)dist; - counter--; - } - while (counter) { - size_t n; - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); - } - while (pIn_buf_cur >= pIn_buf_end) { - if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { - TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); - } else { - TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); - } - } - n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), - (size_t)(pIn_buf_end - pIn_buf_cur)), - counter); - TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); - pIn_buf_cur += n; - pOut_buf_cur += n; - counter -= (mz_uint)n; - } - } else if (r->m_type == 3) { - TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); - } else { - if (r->m_type == 1) { - mz_uint8 *p = r->m_tables[0].m_code_size; - mz_uint i; - r->m_table_sizes[0] = 288; - r->m_table_sizes[1] = 32; - TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); - for (i = 0; i <= 143; ++i) *p++ = 8; - for (; i <= 255; ++i) *p++ = 9; - for (; i <= 279; ++i) *p++ = 7; - for (; i <= 287; ++i) *p++ = 8; - } else { - for (counter = 0; counter < 3; counter++) { - TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); - r->m_table_sizes[counter] += s_min_table_sizes[counter]; - } - MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); - for (counter = 0; counter < r->m_table_sizes[2]; counter++) { - mz_uint s; - TINFL_GET_BITS(14, s, 3); - r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; - } - r->m_table_sizes[2] = 19; - } - for (; (int)r->m_type >= 0; r->m_type--) { - int tree_next, tree_cur; - tinfl_huff_table *pTable; - mz_uint i, j, used_syms, total, sym_index, next_code[17], - total_syms[16]; - pTable = &r->m_tables[r->m_type]; - MZ_CLEAR_OBJ(total_syms); - MZ_CLEAR_OBJ(pTable->m_look_up); - MZ_CLEAR_OBJ(pTable->m_tree); - for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) - total_syms[pTable->m_code_size[i]]++; - used_syms = 0, total = 0; - next_code[0] = next_code[1] = 0; - for (i = 1; i <= 15; ++i) { - used_syms += total_syms[i]; - next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); - } - if ((65536 != total) && (used_syms > 1)) { - TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); - } - for (tree_next = -1, sym_index = 0; - sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { - mz_uint rev_code = 0, l, cur_code, - code_size = pTable->m_code_size[sym_index]; - if (!code_size) continue; - cur_code = next_code[code_size]++; - for (l = code_size; l > 0; l--, cur_code >>= 1) - rev_code = (rev_code << 1) | (cur_code & 1); - if (code_size <= TINFL_FAST_LOOKUP_BITS) { - mz_int16 k = (mz_int16)((code_size << 9) | sym_index); - while (rev_code < TINFL_FAST_LOOKUP_SIZE) { - pTable->m_look_up[rev_code] = k; - rev_code += (1 << code_size); - } - continue; - } - if (0 == - (tree_cur = pTable->m_look_up[rev_code & - (TINFL_FAST_LOOKUP_SIZE - 1)])) { - pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = - (mz_int16)tree_next; - tree_cur = tree_next; - tree_next -= 2; - } - rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); - for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { - tree_cur -= ((rev_code >>= 1) & 1); - if (!pTable->m_tree[-tree_cur - 1]) { - pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; - tree_cur = tree_next; - tree_next -= 2; - } else - tree_cur = pTable->m_tree[-tree_cur - 1]; - } - tree_cur -= ((rev_code >>= 1) & 1); - pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; - } - if (r->m_type == 2) { - for (counter = 0; - counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { - mz_uint s; - TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); - if (dist < 16) { - r->m_len_codes[counter++] = (mz_uint8)dist; - continue; - } - if ((dist == 16) && (!counter)) { - TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); - } - num_extra = "\02\03\07"[dist - 16]; - TINFL_GET_BITS(18, s, num_extra); - s += "\03\03\013"[dist - 16]; - TINFL_MEMSET(r->m_len_codes + counter, - (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); - counter += s; - } - if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { - TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); - } - TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, - r->m_table_sizes[0]); - TINFL_MEMCPY(r->m_tables[1].m_code_size, - r->m_len_codes + r->m_table_sizes[0], - r->m_table_sizes[1]); - } - } - for (;;) { - mz_uint8 *pSrc; - for (;;) { - if (((pIn_buf_end - pIn_buf_cur) < 4) || - ((pOut_buf_end - pOut_buf_cur) < 2)) { - TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); - if (counter >= 256) break; - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = (mz_uint8)counter; - } else { - int sym2; - mz_uint code_len; -#if TINFL_USE_64BIT_BITBUF - if (num_bits < 30) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 4; - num_bits += 32; - } -#else - if (num_bits < 15) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 2; - num_bits += 16; - } -#endif - if ((sym2 = - r->m_tables[0] - .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= - 0) - code_len = sym2 >> 9; - else { - code_len = TINFL_FAST_LOOKUP_BITS; - do { - sym2 = r->m_tables[0] - .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; - } while (sym2 < 0); - } - counter = sym2; - bit_buf >>= code_len; - num_bits -= code_len; - if (counter & 256) break; - -#if !TINFL_USE_64BIT_BITBUF - if (num_bits < 15) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 2; - num_bits += 16; - } -#endif - if ((sym2 = - r->m_tables[0] - .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= - 0) - code_len = sym2 >> 9; - else { - code_len = TINFL_FAST_LOOKUP_BITS; - do { - sym2 = r->m_tables[0] - .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; - } while (sym2 < 0); - } - bit_buf >>= code_len; - num_bits -= code_len; - - pOut_buf_cur[0] = (mz_uint8)counter; - if (sym2 & 256) { - pOut_buf_cur++; - counter = sym2; - break; - } - pOut_buf_cur[1] = (mz_uint8)sym2; - pOut_buf_cur += 2; - } - } - if ((counter &= 511) == 256) break; - - num_extra = s_length_extra[counter - 257]; - counter = s_length_base[counter - 257]; - if (num_extra) { - mz_uint extra_bits; - TINFL_GET_BITS(25, extra_bits, num_extra); - counter += extra_bits; - } - - TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); - num_extra = s_dist_extra[dist]; - dist = s_dist_base[dist]; - if (num_extra) { - mz_uint extra_bits; - TINFL_GET_BITS(27, extra_bits, num_extra); - dist += extra_bits; - } - - dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; - if ((dist > dist_from_out_buf_start) && - (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { - TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); - } - - pSrc = pOut_buf_start + - ((dist_from_out_buf_start - dist) & out_buf_size_mask); - - if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { - while (counter--) { - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = - pOut_buf_start[(dist_from_out_buf_start++ - dist) & - out_buf_size_mask]; - } - continue; - } -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES - else if ((counter >= 9) && (counter <= dist)) { - const mz_uint8 *pSrc_end = pSrc + (counter & ~7); - do { - ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; - ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; - pOut_buf_cur += 8; - } while ((pSrc += 8) < pSrc_end); - if ((counter &= 7) < 3) { - if (counter) { - pOut_buf_cur[0] = pSrc[0]; - if (counter > 1) pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur += counter; - } - continue; - } - } -#endif - do { - pOut_buf_cur[0] = pSrc[0]; - pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur[2] = pSrc[2]; - pOut_buf_cur += 3; - pSrc += 3; - } while ((int)(counter -= 3) > 2); - if ((int)counter > 0) { - pOut_buf_cur[0] = pSrc[0]; - if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur += counter; - } - } - } - } while (!(r->m_final & 1)); - if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { - TINFL_SKIP_BITS(32, num_bits & 7); - for (counter = 0; counter < 4; ++counter) { - mz_uint s; - if (num_bits) - TINFL_GET_BITS(41, s, 8); - else - TINFL_GET_BYTE(42, s); - r->m_z_adler32 = (r->m_z_adler32 << 8) | s; - } - } - TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); - TINFL_CR_FINISH - -common_exit: - r->m_num_bits = num_bits; - r->m_bit_buf = bit_buf; - r->m_dist = dist; - r->m_counter = counter; - r->m_num_extra = num_extra; - r->m_dist_from_out_buf_start = dist_from_out_buf_start; - *pIn_buf_size = pIn_buf_cur - pIn_buf_next; - *pOut_buf_size = pOut_buf_cur - pOut_buf_next; - if ((decomp_flags & - (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && - (status >= 0)) { - const mz_uint8 *ptr = pOut_buf_next; - size_t buf_len = *pOut_buf_size; - mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, - s2 = r->m_check_adler32 >> 16; - size_t block_len = buf_len % 5552; - while (buf_len) { - for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { - s1 += ptr[0], s2 += s1; - s1 += ptr[1], s2 += s1; - s1 += ptr[2], s2 += s1; - s1 += ptr[3], s2 += s1; - s1 += ptr[4], s2 += s1; - s1 += ptr[5], s2 += s1; - s1 += ptr[6], s2 += s1; - s1 += ptr[7], s2 += s1; - } - for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; - s1 %= 65521U, s2 %= 65521U; - buf_len -= block_len; - block_len = 5552; - } - r->m_check_adler32 = (s2 << 16) + s1; - if ((status == TINFL_STATUS_DONE) && - (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && - (r->m_check_adler32 != r->m_z_adler32)) - status = TINFL_STATUS_ADLER32_MISMATCH; - } - return status; -} - -// Higher level helper functions. -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags) { - tinfl_decompressor decomp; - void *pBuf = NULL, *pNew_buf; - size_t src_buf_ofs = 0, out_buf_capacity = 0; - *pOut_len = 0; - tinfl_init(&decomp); - for (;;) { - size_t src_buf_size = src_buf_len - src_buf_ofs, - dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; - tinfl_status status = tinfl_decompress( - &decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, - (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, - &dst_buf_size, - (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); - if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { - MZ_FREE(pBuf); - *pOut_len = 0; - return NULL; - } - src_buf_ofs += src_buf_size; - *pOut_len += dst_buf_size; - if (status == TINFL_STATUS_DONE) break; - new_out_buf_capacity = out_buf_capacity * 2; - if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; - pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); - if (!pNew_buf) { - MZ_FREE(pBuf); - *pOut_len = 0; - return NULL; - } - pBuf = pNew_buf; - out_buf_capacity = new_out_buf_capacity; - } - return pBuf; -} - -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags) { - tinfl_decompressor decomp; - tinfl_status status; - tinfl_init(&decomp); - status = - tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, - (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, - (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); - return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED - : out_buf_len; -} - -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, - tinfl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - int result = 0; - tinfl_decompressor decomp; - mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); - size_t in_buf_ofs = 0, dict_ofs = 0; - if (!pDict) return TINFL_STATUS_FAILED; - tinfl_init(&decomp); - for (;;) { - size_t in_buf_size = *pIn_buf_size - in_buf_ofs, - dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; - tinfl_status status = - tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, - &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, - (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); - in_buf_ofs += in_buf_size; - if ((dst_buf_size) && - (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) - break; - if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { - result = (status == TINFL_STATUS_DONE); - break; - } - dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); - } - MZ_FREE(pDict); - *pIn_buf_size = in_buf_ofs; - return result; -} - -// ------------------- Low-level Compression (independent from all decompression -// API's) - -// Purposely making these tables static for faster init and thread safety. -static const mz_uint16 s_tdefl_len_sym[256] = { - 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, - 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, - 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, - 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, - 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, - 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, - 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, - 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, - 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, - 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, - 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, - 285}; - -static const mz_uint8 s_tdefl_len_extra[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0}; - -static const mz_uint8 s_tdefl_small_dist_sym[512] = { - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, - 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; - -static const mz_uint8 s_tdefl_small_dist_extra[512] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; - -static const mz_uint8 s_tdefl_large_dist_sym[128] = { - 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, - 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; - -static const mz_uint8 s_tdefl_large_dist_extra[128] = { - 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13}; - -// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted -// values. -typedef struct { - mz_uint16 m_key, m_sym_index; -} tdefl_sym_freq; -static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, - tdefl_sym_freq *pSyms0, - tdefl_sym_freq *pSyms1) { - mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; - tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; - MZ_CLEAR_OBJ(hist); - for (i = 0; i < num_syms; i++) { - mz_uint freq = pSyms0[i].m_key; - hist[freq & 0xFF]++; - hist[256 + ((freq >> 8) & 0xFF)]++; - } - while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) - total_passes--; - for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { - const mz_uint32 *pHist = &hist[pass << 8]; - mz_uint offsets[256], cur_ofs = 0; - for (i = 0; i < 256; i++) { - offsets[i] = cur_ofs; - cur_ofs += pHist[i]; - } - for (i = 0; i < num_syms; i++) - pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = - pCur_syms[i]; - { - tdefl_sym_freq *t = pCur_syms; - pCur_syms = pNew_syms; - pNew_syms = t; - } - } - return pCur_syms; -} - -// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, -// alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. -static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) { - int root, leaf, next, avbl, used, dpth; - if (n == 0) - return; - else if (n == 1) { - A[0].m_key = 1; - return; - } - A[0].m_key += A[1].m_key; - root = 0; - leaf = 2; - for (next = 1; next < n - 1; next++) { - if (leaf >= n || A[root].m_key < A[leaf].m_key) { - A[next].m_key = A[root].m_key; - A[root++].m_key = (mz_uint16)next; - } else - A[next].m_key = A[leaf++].m_key; - if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { - A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); - A[root++].m_key = (mz_uint16)next; - } else - A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); - } - A[n - 2].m_key = 0; - for (next = n - 3; next >= 0; next--) - A[next].m_key = A[A[next].m_key].m_key + 1; - avbl = 1; - used = dpth = 0; - root = n - 2; - next = n - 1; - while (avbl > 0) { - while (root >= 0 && (int)A[root].m_key == dpth) { - used++; - root--; - } - while (avbl > used) { - A[next--].m_key = (mz_uint16)(dpth); - avbl--; - } - avbl = 2 * used; - dpth++; - used = 0; - } -} - -// Limits canonical Huffman code table's max code size. -enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; -static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, - int code_list_len, - int max_code_size) { - int i; - mz_uint32 total = 0; - if (code_list_len <= 1) return; - for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) - pNum_codes[max_code_size] += pNum_codes[i]; - for (i = max_code_size; i > 0; i--) - total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); - while (total != (1UL << max_code_size)) { - pNum_codes[max_code_size]--; - for (i = max_code_size - 1; i > 0; i--) - if (pNum_codes[i]) { - pNum_codes[i]--; - pNum_codes[i + 1] += 2; - break; - } - total--; - } -} - -static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, - int table_len, int code_size_limit, - int static_table) { - int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; - mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; - MZ_CLEAR_OBJ(num_codes); - if (static_table) { - for (i = 0; i < table_len; i++) - num_codes[d->m_huff_code_sizes[table_num][i]]++; - } else { - tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], - *pSyms; - int num_used_syms = 0; - const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; - for (i = 0; i < table_len; i++) - if (pSym_count[i]) { - syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; - syms0[num_used_syms++].m_sym_index = (mz_uint16)i; - } - - pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); - tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); - - for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; - - tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, - code_size_limit); - - MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); - MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); - for (i = 1, j = num_used_syms; i <= code_size_limit; i++) - for (l = num_codes[i]; l > 0; l--) - d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); - } - - next_code[1] = 0; - for (j = 0, i = 2; i <= code_size_limit; i++) - next_code[i] = j = ((j + num_codes[i - 1]) << 1); - - for (i = 0; i < table_len; i++) { - mz_uint rev_code = 0, code, code_size; - if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; - code = next_code[code_size]++; - for (l = code_size; l > 0; l--, code >>= 1) - rev_code = (rev_code << 1) | (code & 1); - d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; - } -} - -#define TDEFL_PUT_BITS(b, l) \ - do { \ - mz_uint bits = b; \ - mz_uint len = l; \ - MZ_ASSERT(bits <= ((1U << len) - 1U)); \ - d->m_bit_buffer |= (bits << d->m_bits_in); \ - d->m_bits_in += len; \ - while (d->m_bits_in >= 8) { \ - if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ - *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ - d->m_bit_buffer >>= 8; \ - d->m_bits_in -= 8; \ - } \ - } \ - MZ_MACRO_END - -#define TDEFL_RLE_PREV_CODE_SIZE() \ - { \ - if (rle_repeat_count) { \ - if (rle_repeat_count < 3) { \ - d->m_huff_count[2][prev_code_size] = (mz_uint16)( \ - d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ - while (rle_repeat_count--) \ - packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ - } else { \ - d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 16; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_repeat_count - 3); \ - } \ - rle_repeat_count = 0; \ - } \ - } - -#define TDEFL_RLE_ZERO_CODE_SIZE() \ - { \ - if (rle_z_count) { \ - if (rle_z_count < 3) { \ - d->m_huff_count[2][0] = \ - (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ - while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ - } else if (rle_z_count <= 10) { \ - d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 17; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_z_count - 3); \ - } else { \ - d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 18; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_z_count - 11); \ - } \ - rle_z_count = 0; \ - } \ - } - -static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - -static void tdefl_start_dynamic_block(tdefl_compressor *d) { - int num_lit_codes, num_dist_codes, num_bit_lengths; - mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, - rle_repeat_count, packed_code_sizes_index; - mz_uint8 - code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], - packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], - prev_code_size = 0xFF; - - d->m_huff_count[0][256] = 1; - - tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); - tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); - - for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) - if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; - for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) - if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; - - memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); - memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], - num_dist_codes); - total_code_sizes_to_pack = num_lit_codes + num_dist_codes; - num_packed_code_sizes = 0; - rle_z_count = 0; - rle_repeat_count = 0; - - memset(&d->m_huff_count[2][0], 0, - sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); - for (i = 0; i < total_code_sizes_to_pack; i++) { - mz_uint8 code_size = code_sizes_to_pack[i]; - if (!code_size) { - TDEFL_RLE_PREV_CODE_SIZE(); - if (++rle_z_count == 138) { - TDEFL_RLE_ZERO_CODE_SIZE(); - } - } else { - TDEFL_RLE_ZERO_CODE_SIZE(); - if (code_size != prev_code_size) { - TDEFL_RLE_PREV_CODE_SIZE(); - d->m_huff_count[2][code_size] = - (mz_uint16)(d->m_huff_count[2][code_size] + 1); - packed_code_sizes[num_packed_code_sizes++] = code_size; - } else if (++rle_repeat_count == 6) { - TDEFL_RLE_PREV_CODE_SIZE(); - } - } - prev_code_size = code_size; - } - if (rle_repeat_count) { - TDEFL_RLE_PREV_CODE_SIZE(); - } else { - TDEFL_RLE_ZERO_CODE_SIZE(); - } - - tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); - - TDEFL_PUT_BITS(2, 2); - - TDEFL_PUT_BITS(num_lit_codes - 257, 5); - TDEFL_PUT_BITS(num_dist_codes - 1, 5); - - for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) - if (d->m_huff_code_sizes - [2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) - break; - num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); - TDEFL_PUT_BITS(num_bit_lengths - 4, 4); - for (i = 0; (int)i < num_bit_lengths; i++) - TDEFL_PUT_BITS( - d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); - - for (packed_code_sizes_index = 0; - packed_code_sizes_index < num_packed_code_sizes;) { - mz_uint code = packed_code_sizes[packed_code_sizes_index++]; - MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); - TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); - if (code >= 16) - TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], - "\02\03\07"[code - 16]); - } -} - -static void tdefl_start_static_block(tdefl_compressor *d) { - mz_uint i; - mz_uint8 *p = &d->m_huff_code_sizes[0][0]; - - for (i = 0; i <= 143; ++i) *p++ = 8; - for (; i <= 255; ++i) *p++ = 9; - for (; i <= 279; ++i) *p++ = 7; - for (; i <= 287; ++i) *p++ = 8; - - memset(d->m_huff_code_sizes[1], 5, 32); - - tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); - tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); - - TDEFL_PUT_BITS(1, 2); -} - -static const mz_uint mz_bitmasks[17] = { - 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, - 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF}; - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && \ - MINIZ_HAS_64BIT_REGISTERS -static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { - mz_uint flags; - mz_uint8 *pLZ_codes; - mz_uint8 *pOutput_buf = d->m_pOutput_buf; - mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; - mz_uint64 bit_buffer = d->m_bit_buffer; - mz_uint bits_in = d->m_bits_in; - -#define TDEFL_PUT_BITS_FAST(b, l) \ - { \ - bit_buffer |= (((mz_uint64)(b)) << bits_in); \ - bits_in += (l); \ - } - - flags = 1; - for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; - flags >>= 1) { - if (flags == 1) flags = *pLZ_codes++ | 0x100; - - if (flags & 1) { - mz_uint s0, s1, n0, n1, sym, num_extra_bits; - mz_uint match_len = pLZ_codes[0], - match_dist = *(const mz_uint16 *)(pLZ_codes + 1); - pLZ_codes += 3; - - MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], - d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], - s_tdefl_len_extra[match_len]); - - // This sequence coaxes MSVC into using cmov's vs. jmp's. - s0 = s_tdefl_small_dist_sym[match_dist & 511]; - n0 = s_tdefl_small_dist_extra[match_dist & 511]; - s1 = s_tdefl_large_dist_sym[match_dist >> 8]; - n1 = s_tdefl_large_dist_extra[match_dist >> 8]; - sym = (match_dist < 512) ? s0 : s1; - num_extra_bits = (match_dist < 512) ? n0 : n1; - - MZ_ASSERT(d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], - d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], - num_extra_bits); - } else { - mz_uint lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - - if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { - flags >>= 1; - lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - - if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { - flags >>= 1; - lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - } - } - } - - if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; - - *(mz_uint64 *)pOutput_buf = bit_buffer; - pOutput_buf += (bits_in >> 3); - bit_buffer >>= (bits_in & ~7); - bits_in &= 7; - } - -#undef TDEFL_PUT_BITS_FAST - - d->m_pOutput_buf = pOutput_buf; - d->m_bits_in = 0; - d->m_bit_buffer = 0; - - while (bits_in) { - mz_uint32 n = MZ_MIN(bits_in, 16); - TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); - bit_buffer >>= n; - bits_in -= n; - } - - TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); - - return (d->m_pOutput_buf < d->m_pOutput_buf_end); -} -#else -static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { - mz_uint flags; - mz_uint8 *pLZ_codes; - - flags = 1; - for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; - flags >>= 1) { - if (flags == 1) flags = *pLZ_codes++ | 0x100; - if (flags & 1) { - mz_uint sym, num_extra_bits; - mz_uint match_len = pLZ_codes[0], - match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); - pLZ_codes += 3; - - MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], - d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], - s_tdefl_len_extra[match_len]); - - if (match_dist < 512) { - sym = s_tdefl_small_dist_sym[match_dist]; - num_extra_bits = s_tdefl_small_dist_extra[match_dist]; - } else { - sym = s_tdefl_large_dist_sym[match_dist >> 8]; - num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; - } - MZ_ASSERT(d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); - } else { - mz_uint lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); - } - } - - TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); - - return (d->m_pOutput_buf < d->m_pOutput_buf_end); -} -#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && - // MINIZ_HAS_64BIT_REGISTERS - -static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) { - if (static_block) - tdefl_start_static_block(d); - else - tdefl_start_dynamic_block(d); - return tdefl_compress_lz_codes(d); -} - -static int tdefl_flush_block(tdefl_compressor *d, int flush) { - mz_uint saved_bit_buf, saved_bits_in; - mz_uint8 *pSaved_output_buf; - mz_bool comp_block_succeeded = MZ_FALSE; - int n, use_raw_block = - ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && - (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; - mz_uint8 *pOutput_buf_start = - ((d->m_pPut_buf_func == NULL) && - ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) - ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) - : d->m_output_buf; - - d->m_pOutput_buf = pOutput_buf_start; - d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; - - MZ_ASSERT(!d->m_output_flush_remaining); - d->m_output_flush_ofs = 0; - d->m_output_flush_remaining = 0; - - *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); - d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); - - if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { - TDEFL_PUT_BITS(0x78, 8); - TDEFL_PUT_BITS(0x01, 8); - } - - TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); - - pSaved_output_buf = d->m_pOutput_buf; - saved_bit_buf = d->m_bit_buffer; - saved_bits_in = d->m_bits_in; - - if (!use_raw_block) - comp_block_succeeded = - tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || - (d->m_total_lz_bytes < 48)); - - // If the block gets expanded, forget the current contents of the output - // buffer and send a raw block instead. - if (((use_raw_block) || - ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= - d->m_total_lz_bytes))) && - ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { - mz_uint i; - d->m_pOutput_buf = pSaved_output_buf; - d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; - TDEFL_PUT_BITS(0, 2); - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { - TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); - } - for (i = 0; i < d->m_total_lz_bytes; ++i) { - TDEFL_PUT_BITS( - d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], - 8); - } - } - // Check for the extremely unlikely (if not impossible) case of the compressed - // block not fitting into the output buffer when using dynamic codes. - else if (!comp_block_succeeded) { - d->m_pOutput_buf = pSaved_output_buf; - d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; - tdefl_compress_block(d, MZ_TRUE); - } - - if (flush) { - if (flush == TDEFL_FINISH) { - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { - mz_uint i, a = d->m_adler32; - for (i = 0; i < 4; i++) { - TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); - a <<= 8; - } - } - } else { - mz_uint i, z = 0; - TDEFL_PUT_BITS(0, 3); - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - for (i = 2; i; --i, z ^= 0xFFFF) { - TDEFL_PUT_BITS(z & 0xFFFF, 16); - } - } - } - - MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); - - memset(&d->m_huff_count[0][0], 0, - sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); - memset(&d->m_huff_count[1][0], 0, - sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); - - d->m_pLZ_code_buf = d->m_lz_code_buf + 1; - d->m_pLZ_flags = d->m_lz_code_buf; - d->m_num_flags_left = 8; - d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; - d->m_total_lz_bytes = 0; - d->m_block_index++; - - if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { - if (d->m_pPut_buf_func) { - *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; - if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) - return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); - } else if (pOutput_buf_start == d->m_output_buf) { - int bytes_to_copy = (int)MZ_MIN( - (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); - memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, - bytes_to_copy); - d->m_out_buf_ofs += bytes_to_copy; - if ((n -= bytes_to_copy) != 0) { - d->m_output_flush_ofs = bytes_to_copy; - d->m_output_flush_remaining = n; - } - } else { - d->m_out_buf_ofs += n; - } - } - - return d->m_output_flush_remaining; -} - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES -#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) -static MZ_FORCEINLINE void tdefl_find_match( - tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, - mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { - mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, - match_len = *pMatch_len, probe_pos = pos, next_probe_pos, - probe_len; - mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; - const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; - mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), - s01 = TDEFL_READ_UNALIGNED_WORD(s); - MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); - if (max_match_len <= match_len) return; - for (;;) { - for (;;) { - if (--num_probes_left == 0) return; -#define TDEFL_PROBE \ - next_probe_pos = d->m_next[probe_pos]; \ - if ((!next_probe_pos) || \ - ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ - return; \ - probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ - if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ - break; - TDEFL_PROBE; - TDEFL_PROBE; - TDEFL_PROBE; - } - if (!dist) break; - q = (const mz_uint16 *)(d->m_dict + probe_pos); - if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; - p = s; - probe_len = 32; - do { - } while ( - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (--probe_len > 0)); - if (!probe_len) { - *pMatch_dist = dist; - *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); - break; - } else if ((probe_len = ((mz_uint)(p - s) * 2) + - (mz_uint)(*(const mz_uint8 *)p == - *(const mz_uint8 *)q)) > match_len) { - *pMatch_dist = dist; - if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == - max_match_len) - break; - c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); - } - } -} -#else -static MZ_FORCEINLINE void tdefl_find_match( - tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, - mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { - mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, - match_len = *pMatch_len, probe_pos = pos, next_probe_pos, - probe_len; - mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; - const mz_uint8 *s = d->m_dict + pos, *p, *q; - mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; - MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); - if (max_match_len <= match_len) return; - for (;;) { - for (;;) { - if (--num_probes_left == 0) return; -#define TDEFL_PROBE \ - next_probe_pos = d->m_next[probe_pos]; \ - if ((!next_probe_pos) || \ - ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ - return; \ - probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ - if ((d->m_dict[probe_pos + match_len] == c0) && \ - (d->m_dict[probe_pos + match_len - 1] == c1)) \ - break; - TDEFL_PROBE; - TDEFL_PROBE; - TDEFL_PROBE; - } - if (!dist) break; - p = s; - q = d->m_dict + probe_pos; - for (probe_len = 0; probe_len < max_match_len; probe_len++) - if (*p++ != *q++) break; - if (probe_len > match_len) { - *pMatch_dist = dist; - if ((*pMatch_len = match_len = probe_len) == max_match_len) return; - c0 = d->m_dict[pos + match_len]; - c1 = d->m_dict[pos + match_len - 1]; - } - } -} -#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -static mz_bool tdefl_compress_fast(tdefl_compressor *d) { - // Faster, minimally featured LZRW1-style match+parse loop with better - // register utilization. Intended for applications where raw throughput is - // valued more highly than ratio. - mz_uint lookahead_pos = d->m_lookahead_pos, - lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, - total_lz_bytes = d->m_total_lz_bytes, - num_flags_left = d->m_num_flags_left; - mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; - mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; - - while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { - const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; - mz_uint dst_pos = - (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; - mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( - d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); - d->m_src_buf_left -= num_bytes_to_process; - lookahead_size += num_bytes_to_process; - - while (num_bytes_to_process) { - mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); - memcpy(d->m_dict + dst_pos, d->m_pSrc, n); - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, - MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); - d->m_pSrc += n; - dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; - num_bytes_to_process -= n; - } - - dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); - if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) - break; - - while (lookahead_size >= 4) { - mz_uint cur_match_dist, cur_match_len = 1; - mz_uint8 *pCur_dict = d->m_dict + cur_pos; - mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; - mz_uint hash = - (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & - TDEFL_LEVEL1_HASH_SIZE_MASK; - mz_uint probe_pos = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)lookahead_pos; - - if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= - dict_size) && - ((*(const mz_uint32 *)(d->m_dict + - (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & - 0xFFFFFF) == first_trigram)) { - const mz_uint16 *p = (const mz_uint16 *)pCur_dict; - const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); - mz_uint32 probe_len = 32; - do { - } while ((TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (--probe_len > 0)); - cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + - (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); - if (!probe_len) - cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; - - if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || - ((cur_match_len == TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 8U * 1024U))) { - cur_match_len = 1; - *pLZ_code_buf++ = (mz_uint8)first_trigram; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - d->m_huff_count[0][(mz_uint8)first_trigram]++; - } else { - mz_uint32 s0, s1; - cur_match_len = MZ_MIN(cur_match_len, lookahead_size); - - MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 1) && - (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); - - cur_match_dist--; - - pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); - *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; - pLZ_code_buf += 3; - *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); - - s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; - s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; - d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; - - d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - - TDEFL_MIN_MATCH_LEN]]++; - } - } else { - *pLZ_code_buf++ = (mz_uint8)first_trigram; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - d->m_huff_count[0][(mz_uint8)first_trigram]++; - } - - if (--num_flags_left == 0) { - num_flags_left = 8; - pLZ_flags = pLZ_code_buf++; - } - - total_lz_bytes += cur_match_len; - lookahead_pos += cur_match_len; - dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); - cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; - MZ_ASSERT(lookahead_size >= cur_match_len); - lookahead_size -= cur_match_len; - - if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { - int n; - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - total_lz_bytes = d->m_total_lz_bytes; - pLZ_code_buf = d->m_pLZ_code_buf; - pLZ_flags = d->m_pLZ_flags; - num_flags_left = d->m_num_flags_left; - } - } - - while (lookahead_size) { - mz_uint8 lit = d->m_dict[cur_pos]; - - total_lz_bytes++; - *pLZ_code_buf++ = lit; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - if (--num_flags_left == 0) { - num_flags_left = 8; - pLZ_flags = pLZ_code_buf++; - } - - d->m_huff_count[0][lit]++; - - lookahead_pos++; - dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); - cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; - lookahead_size--; - - if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { - int n; - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - total_lz_bytes = d->m_total_lz_bytes; - pLZ_code_buf = d->m_pLZ_code_buf; - pLZ_flags = d->m_pLZ_flags; - num_flags_left = d->m_num_flags_left; - } - } - } - - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - return MZ_TRUE; -} -#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - -static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, - mz_uint8 lit) { - d->m_total_lz_bytes++; - *d->m_pLZ_code_buf++ = lit; - *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); - if (--d->m_num_flags_left == 0) { - d->m_num_flags_left = 8; - d->m_pLZ_flags = d->m_pLZ_code_buf++; - } - d->m_huff_count[0][lit]++; -} - -static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, - mz_uint match_len, - mz_uint match_dist) { - mz_uint32 s0, s1; - - MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && - (match_dist <= TDEFL_LZ_DICT_SIZE)); - - d->m_total_lz_bytes += match_len; - - d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); - - match_dist -= 1; - d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); - d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); - d->m_pLZ_code_buf += 3; - - *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); - if (--d->m_num_flags_left == 0) { - d->m_num_flags_left = 8; - d->m_pLZ_flags = d->m_pLZ_code_buf++; - } - - s0 = s_tdefl_small_dist_sym[match_dist & 511]; - s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; - d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; - - if (match_len >= TDEFL_MIN_MATCH_LEN) - d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; -} - -static mz_bool tdefl_compress_normal(tdefl_compressor *d) { - const mz_uint8 *pSrc = d->m_pSrc; - size_t src_buf_left = d->m_src_buf_left; - tdefl_flush flush = d->m_flush; - - while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { - mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; - // Update dictionary and hash chains. Keeps the lookahead size equal to - // TDEFL_MAX_MATCH_LEN. - if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { - mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & - TDEFL_LZ_DICT_SIZE_MASK, - ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; - mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] - << TDEFL_LZ_HASH_SHIFT) ^ - d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; - mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( - src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); - const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; - src_buf_left -= num_bytes_to_process; - d->m_lookahead_size += num_bytes_to_process; - while (pSrc != pSrc_end) { - mz_uint8 c = *pSrc++; - d->m_dict[dst_pos] = c; - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; - hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); - d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)(ins_pos); - dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; - ins_pos++; - } - } else { - while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { - mz_uint8 c = *pSrc++; - mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & - TDEFL_LZ_DICT_SIZE_MASK; - src_buf_left--; - d->m_dict[dst_pos] = c; - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; - if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { - mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; - mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] - << (TDEFL_LZ_HASH_SHIFT * 2)) ^ - (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] - << TDEFL_LZ_HASH_SHIFT) ^ - c) & - (TDEFL_LZ_HASH_SIZE - 1); - d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)(ins_pos); - } - } - } - d->m_dict_size = - MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); - if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; - - // Simple lazy/greedy parsing state machine. - len_to_move = 1; - cur_match_dist = 0; - cur_match_len = - d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); - cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; - if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { - if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { - mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; - cur_match_len = 0; - while (cur_match_len < d->m_lookahead_size) { - if (d->m_dict[cur_pos + cur_match_len] != c) break; - cur_match_len++; - } - if (cur_match_len < TDEFL_MIN_MATCH_LEN) - cur_match_len = 0; - else - cur_match_dist = 1; - } - } else { - tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, - d->m_lookahead_size, &cur_match_dist, &cur_match_len); - } - if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 8U * 1024U)) || - (cur_pos == cur_match_dist) || - ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { - cur_match_dist = cur_match_len = 0; - } - if (d->m_saved_match_len) { - if (cur_match_len > d->m_saved_match_len) { - tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); - if (cur_match_len >= 128) { - tdefl_record_match(d, cur_match_len, cur_match_dist); - d->m_saved_match_len = 0; - len_to_move = cur_match_len; - } else { - d->m_saved_lit = d->m_dict[cur_pos]; - d->m_saved_match_dist = cur_match_dist; - d->m_saved_match_len = cur_match_len; - } - } else { - tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); - len_to_move = d->m_saved_match_len - 1; - d->m_saved_match_len = 0; - } - } else if (!cur_match_dist) - tdefl_record_literal(d, - d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); - else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || - (cur_match_len >= 128)) { - tdefl_record_match(d, cur_match_len, cur_match_dist); - len_to_move = cur_match_len; - } else { - d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; - d->m_saved_match_dist = cur_match_dist; - d->m_saved_match_len = cur_match_len; - } - // Move the lookahead forward by len_to_move bytes. - d->m_lookahead_pos += len_to_move; - MZ_ASSERT(d->m_lookahead_size >= len_to_move); - d->m_lookahead_size -= len_to_move; - d->m_dict_size = - MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); - // Check if it's time to flush the current LZ codes to the internal output - // buffer. - if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || - ((d->m_total_lz_bytes > 31 * 1024) && - (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= - d->m_total_lz_bytes) || - (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { - int n; - d->m_pSrc = pSrc; - d->m_src_buf_left = src_buf_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - } - } - - d->m_pSrc = pSrc; - d->m_src_buf_left = src_buf_left; - return MZ_TRUE; -} - -static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) { - if (d->m_pIn_buf_size) { - *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; - } - - if (d->m_pOut_buf_size) { - size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, - d->m_output_flush_remaining); - memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, - d->m_output_buf + d->m_output_flush_ofs, n); - d->m_output_flush_ofs += (mz_uint)n; - d->m_output_flush_remaining -= (mz_uint)n; - d->m_out_buf_ofs += n; - - *d->m_pOut_buf_size = d->m_out_buf_ofs; - } - - return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE - : TDEFL_STATUS_OKAY; -} - -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, - size_t *pIn_buf_size, void *pOut_buf, - size_t *pOut_buf_size, tdefl_flush flush) { - if (!d) { - if (pIn_buf_size) *pIn_buf_size = 0; - if (pOut_buf_size) *pOut_buf_size = 0; - return TDEFL_STATUS_BAD_PARAM; - } - - d->m_pIn_buf = pIn_buf; - d->m_pIn_buf_size = pIn_buf_size; - d->m_pOut_buf = pOut_buf; - d->m_pOut_buf_size = pOut_buf_size; - d->m_pSrc = (const mz_uint8 *)(pIn_buf); - d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; - d->m_out_buf_ofs = 0; - d->m_flush = flush; - - if (((d->m_pPut_buf_func != NULL) == - ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || - (d->m_prev_return_status != TDEFL_STATUS_OKAY) || - (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || - (pIn_buf_size && *pIn_buf_size && !pIn_buf) || - (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { - if (pIn_buf_size) *pIn_buf_size = 0; - if (pOut_buf_size) *pOut_buf_size = 0; - return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); - } - d->m_wants_to_finish |= (flush == TDEFL_FINISH); - - if ((d->m_output_flush_remaining) || (d->m_finished)) - return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && - ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && - ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | - TDEFL_RLE_MATCHES)) == 0)) { - if (!tdefl_compress_fast(d)) return d->m_prev_return_status; - } else -#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - { - if (!tdefl_compress_normal(d)) return d->m_prev_return_status; - } - - if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && - (pIn_buf)) - d->m_adler32 = - (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, - d->m_pSrc - (const mz_uint8 *)pIn_buf); - - if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && - (!d->m_output_flush_remaining)) { - if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; - d->m_finished = (flush == TDEFL_FINISH); - if (flush == TDEFL_FULL_FLUSH) { - MZ_CLEAR_OBJ(d->m_hash); - MZ_CLEAR_OBJ(d->m_next); - d->m_dict_size = 0; - } - } - - return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); -} - -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, - size_t in_buf_size, tdefl_flush flush) { - MZ_ASSERT(d->m_pPut_buf_func); - return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); -} - -tdefl_status tdefl_init(tdefl_compressor *d, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - d->m_pPut_buf_func = pPut_buf_func; - d->m_pPut_buf_user = pPut_buf_user; - d->m_flags = (mz_uint)(flags); - d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; - d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; - d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; - if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); - d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = - d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; - d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = - d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; - d->m_pLZ_code_buf = d->m_lz_code_buf + 1; - d->m_pLZ_flags = d->m_lz_code_buf; - d->m_num_flags_left = 8; - d->m_pOutput_buf = d->m_output_buf; - d->m_pOutput_buf_end = d->m_output_buf; - d->m_prev_return_status = TDEFL_STATUS_OKAY; - d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; - d->m_adler32 = 1; - d->m_pIn_buf = NULL; - d->m_pOut_buf = NULL; - d->m_pIn_buf_size = NULL; - d->m_pOut_buf_size = NULL; - d->m_flush = TDEFL_NO_FLUSH; - d->m_pSrc = NULL; - d->m_src_buf_left = 0; - d->m_out_buf_ofs = 0; - memset(&d->m_huff_count[0][0], 0, - sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); - memset(&d->m_huff_count[1][0], 0, - sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); - return TDEFL_STATUS_OKAY; -} - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) { - return d->m_prev_return_status; -} - -mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; } - -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - tdefl_compressor *pComp; - mz_bool succeeded; - if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; - pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); - if (!pComp) return MZ_FALSE; - succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == - TDEFL_STATUS_OKAY); - succeeded = - succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == - TDEFL_STATUS_DONE); - MZ_FREE(pComp); - return succeeded; -} - -typedef struct { - size_t m_size, m_capacity; - mz_uint8 *m_pBuf; - mz_bool m_expandable; -} tdefl_output_buffer; - -static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, - void *pUser) { - tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; - size_t new_size = p->m_size + len; - if (new_size > p->m_capacity) { - size_t new_capacity = p->m_capacity; - mz_uint8 *pNew_buf; - if (!p->m_expandable) return MZ_FALSE; - do { - new_capacity = MZ_MAX(128U, new_capacity << 1U); - } while (new_size > new_capacity); - pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); - if (!pNew_buf) return MZ_FALSE; - p->m_pBuf = pNew_buf; - p->m_capacity = new_capacity; - } - memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); - p->m_size = new_size; - return MZ_TRUE; -} - -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags) { - tdefl_output_buffer out_buf; - MZ_CLEAR_OBJ(out_buf); - if (!pOut_len) - return MZ_FALSE; - else - *pOut_len = 0; - out_buf.m_expandable = MZ_TRUE; - if (!tdefl_compress_mem_to_output( - pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) - return NULL; - *pOut_len = out_buf.m_size; - return out_buf.m_pBuf; -} - -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags) { - tdefl_output_buffer out_buf; - MZ_CLEAR_OBJ(out_buf); - if (!pOut_buf) return 0; - out_buf.m_pBuf = (mz_uint8 *)pOut_buf; - out_buf.m_capacity = out_buf_len; - if (!tdefl_compress_mem_to_output( - pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) - return 0; - return out_buf.m_size; -} - -#ifndef MINIZ_NO_ZLIB_APIS -static const mz_uint s_tdefl_num_probes[11] = {0, 1, 6, 32, 16, 32, - 128, 256, 512, 768, 1500}; - -// level may actually range from [0,10] (10 is a "hidden" max level, where we -// want a bit more compression and it's fine if throughput to fall off a cliff -// on some files). -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, - int strategy) { - mz_uint comp_flags = - s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | - ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); - if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; - - if (!level) - comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; - else if (strategy == MZ_FILTERED) - comp_flags |= TDEFL_FILTER_MATCHES; - else if (strategy == MZ_HUFFMAN_ONLY) - comp_flags &= ~TDEFL_MAX_PROBES_MASK; - else if (strategy == MZ_FIXED) - comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; - else if (strategy == MZ_RLE) - comp_flags |= TDEFL_RLE_MATCHES; - - return comp_flags; -} -#endif // MINIZ_NO_ZLIB_APIS - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4204) // nonstandard extension used : non-constant - // aggregate initializer (also supported by GNU - // C and C99, so no big deal) -#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is - // deprecated. Instead, use the ISO C and C++ - // conformant name: _strdup. -#endif - -// Simple PNG writer function by Alex Evans, 2011. Released into the public -// domain: https://gist.github.com/908299, more context at -// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. -// This is actually a modification of Alex's original code so PNG files -// generated by this function pass pngcheck. -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, - int h, int num_chans, - size_t *pLen_out, - mz_uint level, mz_bool flip) { - // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was - // defined. - static const mz_uint s_tdefl_png_num_probes[11] = { - 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500}; - tdefl_compressor *pComp = - (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); - tdefl_output_buffer out_buf; - int i, bpl = w * num_chans, y, z; - mz_uint32 c; - *pLen_out = 0; - if (!pComp) return NULL; - MZ_CLEAR_OBJ(out_buf); - out_buf.m_expandable = MZ_TRUE; - out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); - if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) { - MZ_FREE(pComp); - return NULL; - } - // write dummy header - for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); - // compress image data - tdefl_init( - pComp, tdefl_output_buffer_putter, &out_buf, - s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); - for (y = 0; y < h; ++y) { - tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); - tdefl_compress_buffer(pComp, - (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, - bpl, TDEFL_NO_FLUSH); - } - if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != - TDEFL_STATUS_DONE) { - MZ_FREE(pComp); - MZ_FREE(out_buf.m_pBuf); - return NULL; - } - // write real header - *pLen_out = out_buf.m_size - 41; - { - static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; - mz_uint8 pnghdr[41] = {0x89, - 0x50, - 0x4e, - 0x47, - 0x0d, - 0x0a, - 0x1a, - 0x0a, - 0x00, - 0x00, - 0x00, - 0x0d, - 0x49, - 0x48, - 0x44, - 0x52, - 0, - 0, - (mz_uint8)(w >> 8), - (mz_uint8)w, - 0, - 0, - (mz_uint8)(h >> 8), - (mz_uint8)h, - 8, - chans[num_chans], - 0, - 0, - 0, - 0, - 0, - 0, - 0, - (mz_uint8)(*pLen_out >> 24), - (mz_uint8)(*pLen_out >> 16), - (mz_uint8)(*pLen_out >> 8), - (mz_uint8)*pLen_out, - 0x49, - 0x44, - 0x41, - 0x54}; - c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); - for (i = 0; i < 4; ++i, c <<= 8) - ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); - memcpy(out_buf.m_pBuf, pnghdr, 41); - } - // write footer (IDAT CRC-32, followed by IEND chunk) - if (!tdefl_output_buffer_putter( - "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { - *pLen_out = 0; - MZ_FREE(pComp); - MZ_FREE(out_buf.m_pBuf); - return NULL; - } - c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, - *pLen_out + 4); - for (i = 0; i < 4; ++i, c <<= 8) - (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); - // compute final size of file, grab compressed data buffer and return - *pLen_out += 57; - MZ_FREE(pComp); - return out_buf.m_pBuf; -} -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, - int num_chans, size_t *pLen_out) { - // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we - // can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's - // where #defined out) - return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, - pLen_out, 6, MZ_FALSE); -} - -// ------------------- .ZIP archive reading - -#ifndef MINIZ_NO_ARCHIVE_APIS -#error "No arvhive APIs" - -#ifdef MINIZ_NO_STDIO -#define MZ_FILE void * -#else -#include -#include - -#if defined(_MSC_VER) || defined(__MINGW64__) -static FILE *mz_fopen(const char *pFilename, const char *pMode) { - FILE *pFile = NULL; - fopen_s(&pFile, pFilename, pMode); - return pFile; -} -static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) { - FILE *pFile = NULL; - if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL; - return pFile; -} -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN mz_fopen -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 _ftelli64 -#define MZ_FSEEK64 _fseeki64 -#define MZ_FILE_STAT_STRUCT _stat -#define MZ_FILE_STAT _stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN mz_freopen -#define MZ_DELETE_FILE remove -#elif defined(__MINGW32__) -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello64 -#define MZ_FSEEK64 fseeko64 -#define MZ_FILE_STAT_STRUCT _stat -#define MZ_FILE_STAT _stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#elif defined(__TINYC__) -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftell -#define MZ_FSEEK64 fseek -#define MZ_FILE_STAT_STRUCT stat -#define MZ_FILE_STAT stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen64(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello64 -#define MZ_FSEEK64 fseeko64 -#define MZ_FILE_STAT_STRUCT stat64 -#define MZ_FILE_STAT stat64 -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(p, m, s) freopen64(p, m, s) -#define MZ_DELETE_FILE remove -#else -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello -#define MZ_FSEEK64 fseeko -#define MZ_FILE_STAT_STRUCT stat -#define MZ_FILE_STAT stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#endif // #ifdef _MSC_VER -#endif // #ifdef MINIZ_NO_STDIO - -#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) - -// Various ZIP archive enums. To completely avoid cross platform compiler -// alignment and platform endian issues, miniz.c doesn't use structs for any of -// this stuff. -enum { - // ZIP archive identifiers and record sizes - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, - MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, - MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, - // Central directory header record offsets - MZ_ZIP_CDH_SIG_OFS = 0, - MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, - MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, - MZ_ZIP_CDH_BIT_FLAG_OFS = 8, - MZ_ZIP_CDH_METHOD_OFS = 10, - MZ_ZIP_CDH_FILE_TIME_OFS = 12, - MZ_ZIP_CDH_FILE_DATE_OFS = 14, - MZ_ZIP_CDH_CRC32_OFS = 16, - MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, - MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, - MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, - MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, - MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, - MZ_ZIP_CDH_DISK_START_OFS = 34, - MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, - MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, - MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, - // Local directory header offsets - MZ_ZIP_LDH_SIG_OFS = 0, - MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, - MZ_ZIP_LDH_BIT_FLAG_OFS = 6, - MZ_ZIP_LDH_METHOD_OFS = 8, - MZ_ZIP_LDH_FILE_TIME_OFS = 10, - MZ_ZIP_LDH_FILE_DATE_OFS = 12, - MZ_ZIP_LDH_CRC32_OFS = 14, - MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, - MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, - MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, - MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, - // End of central directory offsets - MZ_ZIP_ECDH_SIG_OFS = 0, - MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, - MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, - MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, - MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, - MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, - MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, - MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, -}; - -typedef struct { - void *m_p; - size_t m_size, m_capacity; - mz_uint m_element_size; -} mz_zip_array; - -struct mz_zip_internal_state_tag { - mz_zip_array m_central_dir; - mz_zip_array m_central_dir_offsets; - mz_zip_array m_sorted_central_dir_offsets; - MZ_FILE *m_pFile; - void *m_pMem; - size_t m_mem_size; - size_t m_mem_capacity; -}; - -#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \ - (array_ptr)->m_element_size = element_size -#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \ - ((element_type *)((array_ptr)->m_p))[index] - -static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, - mz_zip_array *pArray) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); - memset(pArray, 0, sizeof(mz_zip_array)); -} - -static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t min_new_capacity, - mz_uint growing) { - void *pNew_p; - size_t new_capacity = min_new_capacity; - MZ_ASSERT(pArray->m_element_size); - if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; - if (growing) { - new_capacity = MZ_MAX(1, pArray->m_capacity); - while (new_capacity < min_new_capacity) new_capacity *= 2; - } - if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, - pArray->m_element_size, new_capacity))) - return MZ_FALSE; - pArray->m_p = pNew_p; - pArray->m_capacity = new_capacity; - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t new_capacity, - mz_uint growing) { - if (new_capacity > pArray->m_capacity) { - if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) - return MZ_FALSE; - } - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t new_size, - mz_uint growing) { - if (new_size > pArray->m_capacity) { - if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) - return MZ_FALSE; - } - pArray->m_size = new_size; - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t n) { - return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, - mz_zip_array *pArray, - const void *pElements, - size_t n) { - size_t orig_size = pArray->m_size; - if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) - return MZ_FALSE; - memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, - pElements, n * pArray->m_element_size); - return MZ_TRUE; -} - -#ifndef MINIZ_NO_TIME -static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) { - struct tm tm; - memset(&tm, 0, sizeof(tm)); - tm.tm_isdst = -1; - tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; - tm.tm_mon = ((dos_date >> 5) & 15) - 1; - tm.tm_mday = dos_date & 31; - tm.tm_hour = (dos_time >> 11) & 31; - tm.tm_min = (dos_time >> 5) & 63; - tm.tm_sec = (dos_time << 1) & 62; - return mktime(&tm); -} - -static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, - mz_uint16 *pDOS_date) { -#ifdef _MSC_VER - struct tm tm_struct; - struct tm *tm = &tm_struct; - errno_t err = localtime_s(tm, &time); - if (err) { - *pDOS_date = 0; - *pDOS_time = 0; - return; - } -#else - struct tm *tm = localtime(&time); -#endif - *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + - ((tm->tm_sec) >> 1)); - *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + - ((tm->tm_mon + 1) << 5) + tm->tm_mday); -} -#endif - -#ifndef MINIZ_NO_STDIO -static mz_bool mz_zip_get_file_modified_time(const char *pFilename, - mz_uint16 *pDOS_time, - mz_uint16 *pDOS_date) { -#ifdef MINIZ_NO_TIME - (void)pFilename; - *pDOS_date = *pDOS_time = 0; -#else - struct MZ_FILE_STAT_STRUCT file_stat; - // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 - // bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. - if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; - mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); -#endif // #ifdef MINIZ_NO_TIME - return MZ_TRUE; -} - -#ifndef MINIZ_NO_TIME -static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, - time_t modified_time) { - struct utimbuf t; - t.actime = access_time; - t.modtime = modified_time; - return !utime(pFilename, &t); -} -#endif // #ifndef MINIZ_NO_TIME -#endif // #ifndef MINIZ_NO_STDIO - -static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, - mz_uint32 flags) { - (void)flags; - if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) - return MZ_FALSE; - - if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; - if (!pZip->m_pFree) pZip->m_pFree = def_free_func; - if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; - - pZip->m_zip_mode = MZ_ZIP_MODE_READING; - pZip->m_archive_size = 0; - pZip->m_central_directory_file_ofs = 0; - pZip->m_total_files = 0; - - if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) - return MZ_FALSE; - memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, - sizeof(mz_uint8)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, - sizeof(mz_uint32)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, - sizeof(mz_uint32)); - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool -mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, - const mz_zip_array *pCentral_dir_offsets, - mz_uint l_index, mz_uint r_index) { - const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, - l_index)), - *pE; - const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); - mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), - r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); - mz_uint8 l = 0, r = 0; - pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pE = pL + MZ_MIN(l_len, r_len); - while (pL < pE) { - if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; - pL++; - pR++; - } - return (pL == pE) ? (l_len < r_len) : (l < r); -} - -#define MZ_SWAP_UINT32(a, b) \ - do { \ - mz_uint32 t = a; \ - a = b; \ - b = t; \ - } \ - MZ_MACRO_END - -// Heap sort of lowercased filenames, used to help accelerate plain central -// directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), -// but it could allocate memory.) -static void mz_zip_reader_sort_central_dir_offsets_by_filename( - mz_zip_archive *pZip) { - mz_zip_internal_state *pState = pZip->m_pState; - const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; - const mz_zip_array *pCentral_dir = &pState->m_central_dir; - mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( - &pState->m_sorted_central_dir_offsets, mz_uint32, 0); - const int size = pZip->m_total_files; - int start = (size - 2) >> 1, end; - while (start >= 0) { - int child, root = start; - for (;;) { - if ((child = (root << 1) + 1) >= size) break; - child += - (((child + 1) < size) && - (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[child], pIndices[child + 1]))); - if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[root], pIndices[child])) - break; - MZ_SWAP_UINT32(pIndices[root], pIndices[child]); - root = child; - } - start--; - } - - end = size - 1; - while (end > 0) { - int child, root = 0; - MZ_SWAP_UINT32(pIndices[end], pIndices[0]); - for (;;) { - if ((child = (root << 1) + 1) >= end) break; - child += - (((child + 1) < end) && - mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[child], pIndices[child + 1])); - if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[root], pIndices[child])) - break; - MZ_SWAP_UINT32(pIndices[root], pIndices[child]); - root = child; - } - end--; - } -} - -static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, - mz_uint32 flags) { - mz_uint cdir_size, num_this_disk, cdir_disk_index; - mz_uint64 cdir_ofs; - mz_int64 cur_file_ofs; - const mz_uint8 *p; - mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; - mz_uint8 *pBuf = (mz_uint8 *)buf_u32; - mz_bool sort_central_dir = - ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); - // Basic sanity checks - reject files which are too small, and check the first - // 4 bytes of the file to make sure a local header is there. - if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - // Find the end of central directory record by scanning the file from the end - // towards the beginning. - cur_file_ofs = - MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); - for (;;) { - int i, - n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) - return MZ_FALSE; - for (i = n - 4; i >= 0; --i) - if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break; - if (i >= 0) { - cur_file_ofs += i; - break; - } - if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= - (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) - return MZ_FALSE; - cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); - } - // Read and verify the end of central directory record. - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || - ((pZip->m_total_files = - MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != - MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) - return MZ_FALSE; - - num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); - cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); - if (((num_this_disk | cdir_disk_index) != 0) && - ((num_this_disk != 1) || (cdir_disk_index != 1))) - return MZ_FALSE; - - if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < - pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - - cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); - if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE; - - pZip->m_central_directory_file_ofs = cdir_ofs; - - if (pZip->m_total_files) { - mz_uint i, n; - - // Read the entire central directory into a heap block, and allocate another - // heap block to hold the unsorted central dir file record offsets, and - // another to hold the sorted indices. - if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, - MZ_FALSE)) || - (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, - pZip->m_total_files, MZ_FALSE))) - return MZ_FALSE; - - if (sort_central_dir) { - if (!mz_zip_array_resize(pZip, - &pZip->m_pState->m_sorted_central_dir_offsets, - pZip->m_total_files, MZ_FALSE)) - return MZ_FALSE; - } - - if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, - pZip->m_pState->m_central_dir.m_p, - cdir_size) != cdir_size) - return MZ_FALSE; - - // Now create an index into the central directory file records, do some - // basic sanity checking on each record, and check for zip64 entries (which - // are not yet supported). - p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; - for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { - mz_uint total_header_size, comp_size, decomp_size, disk_index; - if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || - (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) - return MZ_FALSE; - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - i) = - (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); - if (sort_central_dir) - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, - mz_uint32, i) = i; - comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && - (decomp_size != comp_size)) || - (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || - (comp_size == 0xFFFFFFFF)) - return MZ_FALSE; - disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); - if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE; - if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) - return MZ_FALSE; - if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > - n) - return MZ_FALSE; - n -= total_header_size; - p += total_header_size; - } - } - - if (sort_central_dir) - mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); - - return MZ_TRUE; -} - -mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, - mz_uint32 flags) { - if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE; - if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; - pZip->m_archive_size = size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} - -static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - size_t s = (file_ofs >= pZip->m_archive_size) - ? 0 - : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); - memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); - return s; -} - -mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, - size_t size, mz_uint32 flags) { - if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; - pZip->m_archive_size = size; - pZip->m_pRead = mz_zip_mem_read_func; - pZip->m_pIO_opaque = pZip; -#ifdef __cplusplus - pZip->m_pState->m_pMem = const_cast(pMem); -#else - pZip->m_pState->m_pMem = (void *)pMem; -#endif - pZip->m_pState->m_mem_size = size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); - if (((mz_int64)file_ofs < 0) || - (((cur_ofs != (mz_int64)file_ofs)) && - (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) - return 0; - return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); -} - -mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint32 flags) { - mz_uint64 file_size; - MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); - if (!pFile) return MZ_FALSE; - if (MZ_FSEEK64(pFile, 0, SEEK_END)) { - MZ_FCLOSE(pFile); - return MZ_FALSE; - } - file_size = MZ_FTELL64(pFile); - if (!mz_zip_reader_init_internal(pZip, flags)) { - MZ_FCLOSE(pFile); - return MZ_FALSE; - } - pZip->m_pRead = mz_zip_file_read_func; - pZip->m_pIO_opaque = pZip; - pZip->m_pState->m_pFile = pFile; - pZip->m_archive_size = file_size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) { - return pZip ? pZip->m_total_files : 0; -} - -static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh( - mz_zip_archive *pZip, mz_uint file_index) { - if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return NULL; - return &MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - file_index)); -} - -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, - mz_uint file_index) { - mz_uint m_bit_flag; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) return MZ_FALSE; - m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); - return (m_bit_flag & 1); -} - -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, - mz_uint file_index) { - mz_uint filename_len, external_attr; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) return MZ_FALSE; - - // First see if the filename ends with a '/' character. - filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - if (filename_len) { - if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') - return MZ_TRUE; - } - - // Bugfix: This code was also checking if the internal attribute was non-zero, - // which wasn't correct. - // Most/all zip writers (hopefully) set DOS file/directory attributes in the - // low 16-bits, so check for the DOS directory flag and ignore the source OS - // ID in the created by field. - // FIXME: Remove this check? Is it necessary - we already check the filename. - external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); - if ((external_attr & 0x10) != 0) return MZ_TRUE; - - return MZ_FALSE; -} - -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, - mz_zip_archive_file_stat *pStat) { - mz_uint n; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if ((!p) || (!pStat)) return MZ_FALSE; - - // Unpack the central directory record. - pStat->m_file_index = file_index; - pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); - pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); - pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); - pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); - pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); -#ifndef MINIZ_NO_TIME - pStat->m_time = - mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), - MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); -#endif - pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); - pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); - pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); - pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); - - // Copy as much of the filename and comment as possible. - n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); - memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); - pStat->m_filename[n] = '\0'; - - n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); - n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); - pStat->m_comment_size = n; - memcpy(pStat->m_comment, - p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), - n); - pStat->m_comment[n] = '\0'; - - return MZ_TRUE; -} - -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, - char *pFilename, mz_uint filename_buf_size) { - mz_uint n; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) { - if (filename_buf_size) pFilename[0] = '\0'; - return 0; - } - n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - if (filename_buf_size) { - n = MZ_MIN(n, filename_buf_size - 1); - memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); - pFilename[n] = '\0'; - } - return n + 1; -} - -static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, - const char *pB, - mz_uint len, - mz_uint flags) { - mz_uint i; - if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); - for (i = 0; i < len; ++i) - if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; - return MZ_TRUE; -} - -static MZ_FORCEINLINE int mz_zip_reader_filename_compare( - const mz_zip_array *pCentral_dir_array, - const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, - mz_uint r_len) { - const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, - l_index)), - *pE; - mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); - mz_uint8 l = 0, r = 0; - pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pE = pL + MZ_MIN(l_len, r_len); - while (pL < pE) { - if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; - pL++; - pR++; - } - return (pL == pE) ? (int)(l_len - r_len) : (l - r); -} - -static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, - const char *pFilename) { - mz_zip_internal_state *pState = pZip->m_pState; - const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; - const mz_zip_array *pCentral_dir = &pState->m_central_dir; - mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( - &pState->m_sorted_central_dir_offsets, mz_uint32, 0); - const int size = pZip->m_total_files; - const mz_uint filename_len = (mz_uint)strlen(pFilename); - int l = 0, h = size - 1; - while (l <= h) { - int m = (l + h) >> 1, file_index = pIndices[m], - comp = - mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, - file_index, pFilename, filename_len); - if (!comp) - return file_index; - else if (comp < 0) - l = m + 1; - else - h = m - 1; - } - return -1; -} - -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags) { - mz_uint file_index; - size_t name_len, comment_len; - if ((!pZip) || (!pZip->m_pState) || (!pName) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return -1; - if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && - (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) - return mz_zip_reader_locate_file_binary_search(pZip, pName); - name_len = strlen(pName); - if (name_len > 0xFFFF) return -1; - comment_len = pComment ? strlen(pComment) : 0; - if (comment_len > 0xFFFF) return -1; - for (file_index = 0; file_index < pZip->m_total_files; file_index++) { - const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - file_index)); - mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); - const char *pFilename = - (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - if (filename_len < name_len) continue; - if (comment_len) { - mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), - file_comment_len = - MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); - const char *pFile_comment = pFilename + filename_len + file_extra_len; - if ((file_comment_len != comment_len) || - (!mz_zip_reader_string_equal(pComment, pFile_comment, - file_comment_len, flags))) - continue; - } - if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { - int ofs = filename_len - 1; - do { - if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || - (pFilename[ofs] == ':')) - break; - } while (--ofs >= 0); - ofs++; - pFilename += ofs; - filename_len -= ofs; - } - if ((filename_len == name_len) && - (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) - return file_index; - } - return -1; -} - -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, - mz_uint file_index, void *pBuf, - size_t buf_size, mz_uint flags, - void *pUser_read_buf, - size_t user_read_buf_size) { - int status = TINFL_STATUS_DONE; - mz_uint64 needed_size, cur_file_ofs, comp_remaining, - out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; - mz_zip_archive_file_stat file_stat; - void *pRead_buf; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - tinfl_decompressor inflator; - - if ((buf_size) && (!pBuf)) return MZ_FALSE; - - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - - // Empty file, or a directory (but not always a directory - I've seen odd zips - // with directories that have compressed data which inflates to 0 bytes) - if (!file_stat.m_comp_size) return MZ_TRUE; - - // Entry is a subdirectory (I've seen old zips with dir entries which have - // compressed deflate data which inflates to 0 bytes, but these entries claim - // to uncompress to 512 bytes in the headers). - // I'm torn how to handle this case - should it fail instead? - if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; - - // Encryption and patch files are not supported. - if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; - - // This function only supports stored and deflate. - if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && - (file_stat.m_method != MZ_DEFLATED)) - return MZ_FALSE; - - // Ensure supplied output buffer is large enough. - needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size - : file_stat.m_uncomp_size; - if (buf_size < needed_size) return MZ_FALSE; - - // Read and parse the local directory entry. - cur_file_ofs = file_stat.m_local_header_ofs; - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - - cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) - return MZ_FALSE; - - if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { - // The file is stored or the caller has requested the compressed data. - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, - (size_t)needed_size) != needed_size) - return MZ_FALSE; - return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || - (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, - (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); - } - - // Decompress the file either directly from memory or from a file input - // buffer. - tinfl_init(&inflator); - - if (pZip->m_pState->m_pMem) { - // Read directly from the archive in memory. - pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; - read_buf_size = read_buf_avail = file_stat.m_comp_size; - comp_remaining = 0; - } else if (pUser_read_buf) { - // Use a user provided read buffer. - if (!user_read_buf_size) return MZ_FALSE; - pRead_buf = (mz_uint8 *)pUser_read_buf; - read_buf_size = user_read_buf_size; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } else { - // Temporarily allocate a read buffer. - read_buf_size = - MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && - (read_buf_size > 0x7FFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) -#endif - return MZ_FALSE; - if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - (size_t)read_buf_size))) - return MZ_FALSE; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } - - do { - size_t in_buf_size, - out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); - if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - read_buf_ofs = 0; - } - in_buf_size = (size_t)read_buf_avail; - status = tinfl_decompress( - &inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, - (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | - (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); - read_buf_avail -= in_buf_size; - read_buf_ofs += in_buf_size; - out_buf_ofs += out_buf_size; - } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); - - if (status == TINFL_STATUS_DONE) { - // Make sure the entire file was decompressed, and check its CRC. - if ((out_buf_ofs != file_stat.m_uncomp_size) || - (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, - (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) - status = TINFL_STATUS_FAILED; - } - - if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - - return status == TINFL_STATUS_DONE; -} - -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( - mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, - mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, - flags, pUser_read_buf, - user_read_buf_size); -} - -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, - void *pBuf, size_t buf_size, - mz_uint flags) { - return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, - flags, NULL, 0); -} - -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, - const char *pFilename, void *pBuf, - size_t buf_size, mz_uint flags) { - return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, - buf_size, flags, NULL, 0); -} - -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, - size_t *pSize, mz_uint flags) { - mz_uint64 comp_size, uncomp_size, alloc_size; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - void *pBuf; - - if (pSize) *pSize = 0; - if (!p) return NULL; - - comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - - alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) -#endif - return NULL; - if (NULL == - (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) - return NULL; - - if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, - flags)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return NULL; - } - - if (pSize) *pSize = (size_t)alloc_size; - return pBuf; -} - -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, - const char *pFilename, size_t *pSize, - mz_uint flags) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) { - if (pSize) *pSize = 0; - return MZ_FALSE; - } - return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); -} - -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, - mz_uint file_index, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags) { - int status = TINFL_STATUS_DONE; - mz_uint file_crc32 = MZ_CRC32_INIT; - mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, - out_buf_ofs = 0, cur_file_ofs; - mz_zip_archive_file_stat file_stat; - void *pRead_buf = NULL; - void *pWrite_buf = NULL; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - - // Empty file, or a directory (but not always a directory - I've seen odd zips - // with directories that have compressed data which inflates to 0 bytes) - if (!file_stat.m_comp_size) return MZ_TRUE; - - // Entry is a subdirectory (I've seen old zips with dir entries which have - // compressed deflate data which inflates to 0 bytes, but these entries claim - // to uncompress to 512 bytes in the headers). - // I'm torn how to handle this case - should it fail instead? - if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; - - // Encryption and patch files are not supported. - if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; - - // This function only supports stored and deflate. - if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && - (file_stat.m_method != MZ_DEFLATED)) - return MZ_FALSE; - - // Read and parse the local directory entry. - cur_file_ofs = file_stat.m_local_header_ofs; - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - - cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) - return MZ_FALSE; - - // Decompress the file either directly from memory or from a file input - // buffer. - if (pZip->m_pState->m_pMem) { - pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; - read_buf_size = read_buf_avail = file_stat.m_comp_size; - comp_remaining = 0; - } else { - read_buf_size = - MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); - if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - (size_t)read_buf_size))) - return MZ_FALSE; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } - - if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { - // The file is stored or the caller has requested the compressed data. - if (pZip->m_pState->m_pMem) { -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && - (file_stat.m_comp_size > 0xFFFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && - (file_stat.m_comp_size > 0xFFFFFFFF)) -#endif - return MZ_FALSE; - if (pCallback(pOpaque, out_buf_ofs, pRead_buf, - (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) - status = TINFL_STATUS_FAILED; - else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) - file_crc32 = - (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, - (size_t)file_stat.m_comp_size); - cur_file_ofs += file_stat.m_comp_size; - out_buf_ofs += file_stat.m_comp_size; - comp_remaining = 0; - } else { - while (comp_remaining) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - - if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) - file_crc32 = (mz_uint32)mz_crc32( - file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); - - if (pCallback(pOpaque, out_buf_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - out_buf_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - } - } - } else { - tinfl_decompressor inflator; - tinfl_init(&inflator); - - if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - TINFL_LZ_DICT_SIZE))) - status = TINFL_STATUS_FAILED; - else { - do { - mz_uint8 *pWrite_buf_cur = - (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); - size_t in_buf_size, - out_buf_size = - TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); - if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - read_buf_ofs = 0; - } - - in_buf_size = (size_t)read_buf_avail; - status = tinfl_decompress( - &inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, - (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, - comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); - read_buf_avail -= in_buf_size; - read_buf_ofs += in_buf_size; - - if (out_buf_size) { - if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != - out_buf_size) { - status = TINFL_STATUS_FAILED; - break; - } - file_crc32 = - (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); - if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { - status = TINFL_STATUS_FAILED; - break; - } - } - } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || - (status == TINFL_STATUS_HAS_MORE_OUTPUT)); - } - } - - if ((status == TINFL_STATUS_DONE) && - (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { - // Make sure the entire file was decompressed, and check its CRC. - if ((out_buf_ofs != file_stat.m_uncomp_size) || - (file_crc32 != file_stat.m_crc32)) - status = TINFL_STATUS_FAILED; - } - - if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); - - return status == TINFL_STATUS_DONE; -} - -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, - const char *pFilename, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, - flags); -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, - const void *pBuf, size_t n) { - (void)ofs; - return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); -} - -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, - const char *pDst_filename, - mz_uint flags) { - mz_bool status; - mz_zip_archive_file_stat file_stat; - MZ_FILE *pFile; - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - pFile = MZ_FOPEN(pDst_filename, "wb"); - if (!pFile) return MZ_FALSE; - status = mz_zip_reader_extract_to_callback( - pZip, file_index, mz_zip_file_write_callback, pFile, flags); - if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE; -#ifndef MINIZ_NO_TIME - if (status) - mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); -#endif - return status; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_reader_end(mz_zip_archive *pZip) { - if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return MZ_FALSE; - - if (pZip->m_pState) { - mz_zip_internal_state *pState = pZip->m_pState; - pZip->m_pState = NULL; - mz_zip_array_clear(pZip, &pState->m_central_dir); - mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); - mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); - -#ifndef MINIZ_NO_STDIO - if (pState->m_pFile) { - MZ_FCLOSE(pState->m_pFile); - pState->m_pFile = NULL; - } -#endif // #ifndef MINIZ_NO_STDIO - - pZip->m_pFree(pZip->m_pAlloc_opaque, pState); - } - pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; - - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, - const char *pArchive_filename, - const char *pDst_filename, - mz_uint flags) { - int file_index = - mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); -} -#endif - -// ------------------- .ZIP archive writing - -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { - p[0] = (mz_uint8)v; - p[1] = (mz_uint8)(v >> 8); -} -static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { - p[0] = (mz_uint8)v; - p[1] = (mz_uint8)(v >> 8); - p[2] = (mz_uint8)(v >> 16); - p[3] = (mz_uint8)(v >> 24); -} -#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) -#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) - -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) { - if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || - (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) - return MZ_FALSE; - - if (pZip->m_file_offset_alignment) { - // Ensure user specified file offset alignment is a power of 2. - if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) - return MZ_FALSE; - } - - if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; - if (!pZip->m_pFree) pZip->m_pFree = def_free_func; - if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; - - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; - pZip->m_archive_size = existing_size; - pZip->m_central_directory_file_ofs = 0; - pZip->m_total_files = 0; - - if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) - return MZ_FALSE; - memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, - sizeof(mz_uint8)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, - sizeof(mz_uint32)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, - sizeof(mz_uint32)); - return MZ_TRUE; -} - -static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_zip_internal_state *pState = pZip->m_pState; - mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); -#ifdef _MSC_VER - if ((!n) || - ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) -#else - if ((!n) || - ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) -#endif - return 0; - if (new_size > pState->m_mem_capacity) { - void *pNew_block; - size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); - while (new_capacity < new_size) new_capacity *= 2; - if (NULL == (pNew_block = pZip->m_pRealloc( - pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) - return 0; - pState->m_pMem = pNew_block; - pState->m_mem_capacity = new_capacity; - } - memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); - pState->m_mem_size = (size_t)new_size; - return n; -} - -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, - size_t size_to_reserve_at_beginning, - size_t initial_allocation_size) { - pZip->m_pWrite = mz_zip_heap_write_func; - pZip->m_pIO_opaque = pZip; - if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; - if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, - size_to_reserve_at_beginning))) { - if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - pZip->m_pState->m_mem_capacity = initial_allocation_size; - } - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); - if (((mz_int64)file_ofs < 0) || - (((cur_ofs != (mz_int64)file_ofs)) && - (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) - return 0; - return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); -} - -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint64 size_to_reserve_at_beginning) { - MZ_FILE *pFile; - pZip->m_pWrite = mz_zip_file_write_func; - pZip->m_pIO_opaque = pZip; - if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; - if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - pZip->m_pState->m_pFile = pFile; - if (size_to_reserve_at_beginning) { - mz_uint64 cur_ofs = 0; - char buf[4096]; - MZ_CLEAR_OBJ(buf); - do { - size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - cur_ofs += n; - size_to_reserve_at_beginning -= n; - } while (size_to_reserve_at_beginning); - } - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, - const char *pFilename) { - mz_zip_internal_state *pState; - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return MZ_FALSE; - // No sense in trying to write to an archive that's already at the support max - // size - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) - return MZ_FALSE; - - pState = pZip->m_pState; - - if (pState->m_pFile) { -#ifdef MINIZ_NO_STDIO - pFilename; - return MZ_FALSE; -#else - // Archive is being read from stdio - try to reopen as writable. - if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; - if (!pFilename) return MZ_FALSE; - pZip->m_pWrite = mz_zip_file_write_func; - if (NULL == - (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { - // The mz_zip_archive is now in a bogus state because pState->m_pFile is - // NULL, so just close it. - mz_zip_reader_end(pZip); - return MZ_FALSE; - } -#endif // #ifdef MINIZ_NO_STDIO - } else if (pState->m_pMem) { - // Archive lives in a memory block. Assume it's from the heap that we can - // resize using the realloc callback. - if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; - pState->m_mem_capacity = pState->m_mem_size; - pZip->m_pWrite = mz_zip_heap_write_func; - } - // Archive is being read via a user provided read function - make sure the - // user has specified a write function too. - else if (!pZip->m_pWrite) - return MZ_FALSE; - - // Start writing new files at the archive's current central directory - // location. - pZip->m_archive_size = pZip->m_central_directory_file_ofs; - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; - pZip->m_central_directory_file_ofs = 0; - - return MZ_TRUE; -} - -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, - const void *pBuf, size_t buf_size, - mz_uint level_and_flags) { - return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, - level_and_flags, 0, 0); -} - -typedef struct { - mz_zip_archive *m_pZip; - mz_uint64 m_cur_archive_file_ofs; - mz_uint64 m_comp_size; -} mz_zip_writer_add_state; - -static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, - void *pUser) { - mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; - if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, - pState->m_cur_archive_file_ofs, pBuf, - len) != len) - return MZ_FALSE; - pState->m_cur_archive_file_ofs += len; - pState->m_comp_size += len; - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_create_local_dir_header( - mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, - mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, - mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, - mz_uint16 dos_time, mz_uint16 dos_date) { - (void)pZip; - memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_create_central_dir_header( - mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, - mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, - mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, - mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, - mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { - (void)pZip; - memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_add_to_central_dir( - mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, - const void *pExtra, mz_uint16 extra_size, const void *pComment, - mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, - mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, - mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, - mz_uint32 ext_attributes) { - mz_zip_internal_state *pState = pZip->m_pState; - mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; - size_t orig_central_dir_size = pState->m_central_dir.m_size; - mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; - - // No zip64 support yet - if ((local_header_ofs > 0xFFFFFFFF) || - (((mz_uint64)pState->m_central_dir.m_size + - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + - comment_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_central_dir_header( - pZip, central_dir_header, filename_size, extra_size, comment_size, - uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, - dos_date, local_header_ofs, ext_attributes)) - return MZ_FALSE; - - if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, - filename_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, - extra_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, - comment_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, - ¢ral_dir_ofs, 1))) { - // Try to push the central directory array back into its original state. - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) { - // Basic ZIP archive filename validity checks: Valid filenames cannot start - // with a forward slash, cannot contain a drive letter, and cannot use - // DOS-style backward slashes. - if (*pArchive_name == '/') return MZ_FALSE; - while (*pArchive_name) { - if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE; - pArchive_name++; - } - return MZ_TRUE; -} - -static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment( - mz_zip_archive *pZip) { - mz_uint32 n; - if (!pZip->m_file_offset_alignment) return 0; - n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); - return (pZip->m_file_offset_alignment - n) & - (pZip->m_file_offset_alignment - 1); -} - -static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, - mz_uint64 cur_file_ofs, mz_uint32 n) { - char buf[4096]; - memset(buf, 0, MZ_MIN(sizeof(buf), n)); - while (n) { - mz_uint32 s = MZ_MIN(sizeof(buf), n); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) - return MZ_FALSE; - cur_file_ofs += s; - n -= s; - } - return MZ_TRUE; -} - -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, - const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags, mz_uint64 uncomp_size, - mz_uint32 uncomp_crc32) { - mz_uint16 method = 0, dos_time = 0, dos_date = 0; - mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; - mz_uint64 local_dir_header_ofs = pZip->m_archive_size, - cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; - size_t archive_name_size; - mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; - tdefl_compressor *pComp = NULL; - mz_bool store_data_uncompressed; - mz_zip_internal_state *pState; - - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - level = level_and_flags & 0xF; - store_data_uncompressed = - ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); - - if ((!pZip) || (!pZip->m_pState) || - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || - (!pArchive_name) || ((comment_size) && (!pComment)) || - (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - - pState = pZip->m_pState; - - if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) - return MZ_FALSE; - // No zip64 support yet - if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - -#ifndef MINIZ_NO_TIME - { - time_t cur_time; - time(&cur_time); - mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); - } -#endif // #ifndef MINIZ_NO_TIME - - archive_name_size = strlen(pArchive_name); - if (archive_name_size > 0xFFFF) return MZ_FALSE; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - comment_size + archive_name_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { - // Set DOS Subdirectory attribute bit. - ext_attributes |= 0x10; - // Subdirectories cannot contain data. - if ((buf_size) || (uncomp_size)) return MZ_FALSE; - } - - // Try to do any allocations before writing to the archive, so if an - // allocation fails the file remains unmodified. (A good idea if we're doing - // an in-place modification.) - if ((!mz_zip_array_ensure_room( - pZip, &pState->m_central_dir, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || - (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) - return MZ_FALSE; - - if ((!store_data_uncompressed) && (buf_size)) { - if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) - return MZ_FALSE; - } - - if (!mz_zip_writer_write_zeros( - pZip, cur_archive_file_ofs, - num_alignment_padding_bytes + sizeof(local_dir_header))) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - local_dir_header_ofs += num_alignment_padding_bytes; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - cur_archive_file_ofs += - num_alignment_padding_bytes + sizeof(local_dir_header); - - MZ_CLEAR_OBJ(local_dir_header); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, - archive_name_size) != archive_name_size) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - cur_archive_file_ofs += archive_name_size; - - if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { - uncomp_crc32 = - (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); - uncomp_size = buf_size; - if (uncomp_size <= 3) { - level = 0; - store_data_uncompressed = MZ_TRUE; - } - } - - if (store_data_uncompressed) { - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, - buf_size) != buf_size) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - - cur_archive_file_ofs += buf_size; - comp_size = buf_size; - - if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED; - } else if (buf_size) { - mz_zip_writer_add_state state; - - state.m_pZip = pZip; - state.m_cur_archive_file_ofs = cur_archive_file_ofs; - state.m_comp_size = 0; - - if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, - tdefl_create_comp_flags_from_zip_params( - level, -15, MZ_DEFAULT_STRATEGY)) != - TDEFL_STATUS_OKAY) || - (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != - TDEFL_STATUS_DONE)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - - comp_size = state.m_comp_size; - cur_archive_file_ofs = state.m_cur_archive_file_ofs; - - method = MZ_DEFLATED; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - pComp = NULL; - - // no zip64 support yet - if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_local_dir_header( - pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, - comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) - return MZ_FALSE; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, - sizeof(local_dir_header)) != sizeof(local_dir_header)) - return MZ_FALSE; - - if (!mz_zip_writer_add_to_central_dir( - pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, - comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, - dos_time, dos_date, local_dir_header_ofs, ext_attributes)) - return MZ_FALSE; - - pZip->m_total_files++; - pZip->m_archive_size = cur_archive_file_ofs; - - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, - const char *pSrc_filename, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags) { - mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; - mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; - mz_uint64 local_dir_header_ofs = pZip->m_archive_size, - cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, - comp_size = 0; - size_t archive_name_size; - mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; - MZ_FILE *pSrc_file = NULL; - - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - level = level_and_flags & 0xF; - - if ((!pZip) || (!pZip->m_pState) || - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || - ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - - archive_name_size = strlen(pArchive_name); - if (archive_name_size > 0xFFFF) return MZ_FALSE; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - comment_size + archive_name_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) - return MZ_FALSE; - - pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); - if (!pSrc_file) return MZ_FALSE; - MZ_FSEEK64(pSrc_file, 0, SEEK_END); - uncomp_size = MZ_FTELL64(pSrc_file); - MZ_FSEEK64(pSrc_file, 0, SEEK_SET); - - if (uncomp_size > 0xFFFFFFFF) { - // No zip64 support yet - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - if (uncomp_size <= 3) level = 0; - - if (!mz_zip_writer_write_zeros( - pZip, cur_archive_file_ofs, - num_alignment_padding_bytes + sizeof(local_dir_header))) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - local_dir_header_ofs += num_alignment_padding_bytes; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - cur_archive_file_ofs += - num_alignment_padding_bytes + sizeof(local_dir_header); - - MZ_CLEAR_OBJ(local_dir_header); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, - archive_name_size) != archive_name_size) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - cur_archive_file_ofs += archive_name_size; - - if (uncomp_size) { - mz_uint64 uncomp_remaining = uncomp_size; - void *pRead_buf = - pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); - if (!pRead_buf) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - if (!level) { - while (uncomp_remaining) { - mz_uint n = - (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); - if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || - (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, - n) != n)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - uncomp_crc32 = - (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); - uncomp_remaining -= n; - cur_archive_file_ofs += n; - } - comp_size = uncomp_size; - } else { - mz_bool result = MZ_FALSE; - mz_zip_writer_add_state state; - tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); - if (!pComp) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - state.m_pZip = pZip; - state.m_cur_archive_file_ofs = cur_archive_file_ofs; - state.m_comp_size = 0; - - if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, - tdefl_create_comp_flags_from_zip_params( - level, -15, MZ_DEFAULT_STRATEGY)) != - TDEFL_STATUS_OKAY) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - for (;;) { - size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, - (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); - tdefl_status status; - - if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) - break; - - uncomp_crc32 = (mz_uint32)mz_crc32( - uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); - uncomp_remaining -= in_buf_size; - - status = tdefl_compress_buffer( - pComp, pRead_buf, in_buf_size, - uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); - if (status == TDEFL_STATUS_DONE) { - result = MZ_TRUE; - break; - } else if (status != TDEFL_STATUS_OKAY) - break; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - - if (!result) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - comp_size = state.m_comp_size; - cur_archive_file_ofs = state.m_cur_archive_file_ofs; - - method = MZ_DEFLATED; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - } - - MZ_FCLOSE(pSrc_file); - pSrc_file = NULL; - - // no zip64 support yet - if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_local_dir_header( - pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, - comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) - return MZ_FALSE; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, - sizeof(local_dir_header)) != sizeof(local_dir_header)) - return MZ_FALSE; - - if (!mz_zip_writer_add_to_central_dir( - pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, - comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, - dos_time, dos_date, local_dir_header_ofs, ext_attributes)) - return MZ_FALSE; - - pZip->m_total_files++; - pZip->m_archive_size = cur_archive_file_ofs; - - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, - mz_zip_archive *pSource_zip, - mz_uint file_index) { - mz_uint n, bit_flags, num_alignment_padding_bytes; - mz_uint64 comp_bytes_remaining, local_dir_header_ofs; - mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; - size_t orig_central_dir_size; - mz_zip_internal_state *pState; - void *pBuf; - const mz_uint8 *pSrc_central_header; - - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) - return MZ_FALSE; - if (NULL == - (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) - return MZ_FALSE; - pState = pZip->m_pState; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > - 0xFFFFFFFF)) - return MZ_FALSE; - - cur_src_file_ofs = - MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); - cur_dst_file_ofs = pZip->m_archive_size; - - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, - pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; - - if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, - num_alignment_padding_bytes)) - return MZ_FALSE; - cur_dst_file_ofs += num_alignment_padding_bytes; - local_dir_header_ofs = cur_dst_file_ofs; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; - - n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - comp_bytes_remaining = - n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - - if (NULL == (pBuf = pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, - (size_t)MZ_MAX(sizeof(mz_uint32) * 4, - MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, - comp_bytes_remaining))))) - return MZ_FALSE; - - while (comp_bytes_remaining) { - n = (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, - n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - cur_src_file_ofs += n; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - cur_dst_file_ofs += n; - - comp_bytes_remaining -= n; - } - - bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); - if (bit_flags & 8) { - // Copy data descriptor - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, - sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - - n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - - cur_src_file_ofs += n; - cur_dst_file_ofs += n; - } - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - - // no zip64 support yet - if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE; - - orig_central_dir_size = pState->m_central_dir.m_size; - - memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, - local_dir_header_ofs); - if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) - return MZ_FALSE; - - n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + - MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); - if (!mz_zip_array_push_back( - pZip, &pState->m_central_dir, - pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) { - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE; - n = (mz_uint32)orig_central_dir_size; - if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - pZip->m_total_files++; - pZip->m_archive_size = cur_dst_file_ofs; - - return MZ_TRUE; -} - -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) { - mz_zip_internal_state *pState; - mz_uint64 central_dir_ofs, central_dir_size; - mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; - - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) - return MZ_FALSE; - - pState = pZip->m_pState; - - // no zip64 support yet - if ((pZip->m_total_files > 0xFFFF) || - ((pZip->m_archive_size + pState->m_central_dir.m_size + - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) - return MZ_FALSE; - - central_dir_ofs = 0; - central_dir_size = 0; - if (pZip->m_total_files) { - // Write central directory - central_dir_ofs = pZip->m_archive_size; - central_dir_size = pState->m_central_dir.m_size; - pZip->m_central_directory_file_ofs = central_dir_ofs; - if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, - pState->m_central_dir.m_p, - (size_t)central_dir_size) != central_dir_size) - return MZ_FALSE; - pZip->m_archive_size += central_dir_size; - } - - // Write end of central directory record - MZ_CLEAR_OBJ(hdr); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, - pZip->m_total_files); - MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); - - if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, - sizeof(hdr)) != sizeof(hdr)) - return MZ_FALSE; -#ifndef MINIZ_NO_STDIO - if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE; -#endif // #ifndef MINIZ_NO_STDIO - - pZip->m_archive_size += sizeof(hdr); - - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; - return MZ_TRUE; -} - -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, - size_t *pSize) { - if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE; - if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE; - if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; - - *pBuf = pZip->m_pState->m_pMem; - *pSize = pZip->m_pState->m_mem_size; - pZip->m_pState->m_pMem = NULL; - pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; - return MZ_TRUE; -} - -mz_bool mz_zip_writer_end(mz_zip_archive *pZip) { - mz_zip_internal_state *pState; - mz_bool status = MZ_TRUE; - if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || - ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) - return MZ_FALSE; - - pState = pZip->m_pState; - pZip->m_pState = NULL; - mz_zip_array_clear(pZip, &pState->m_central_dir); - mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); - mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); - -#ifndef MINIZ_NO_STDIO - if (pState->m_pFile) { - MZ_FCLOSE(pState->m_pFile); - pState->m_pFile = NULL; - } -#endif // #ifndef MINIZ_NO_STDIO - - if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); - pState->m_pMem = NULL; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pState); - pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; - return status; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_add_mem_to_archive_file_in_place( - const char *pZip_filename, const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, mz_uint16 comment_size, - mz_uint level_and_flags) { - mz_bool status, created_new_archive = MZ_FALSE; - mz_zip_archive zip_archive; - struct MZ_FILE_STAT_STRUCT file_stat; - MZ_CLEAR_OBJ(zip_archive); - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || - ((comment_size) && (!pComment)) || - ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { - // Create a new archive. - if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) - return MZ_FALSE; - created_new_archive = MZ_TRUE; - } else { - // Append to an existing archive. - if (!mz_zip_reader_init_file( - &zip_archive, pZip_filename, - level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) - return MZ_FALSE; - if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) { - mz_zip_reader_end(&zip_archive); - return MZ_FALSE; - } - } - status = - mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, - pComment, comment_size, level_and_flags, 0, 0); - // Always finalize, even if adding failed for some reason, so we have a valid - // central directory. (This may not always succeed, but we can try.) - if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE; - if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE; - if ((!status) && (created_new_archive)) { - // It's a new archive and something went wrong, so just delete it. - int ignoredStatus = MZ_DELETE_FILE(pZip_filename); - (void)ignoredStatus; - } - return status; -} - -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, - const char *pArchive_name, - size_t *pSize, mz_uint flags) { - int file_index; - mz_zip_archive zip_archive; - void *p = NULL; - - if (pSize) *pSize = 0; - - if ((!pZip_filename) || (!pArchive_name)) return NULL; - - MZ_CLEAR_OBJ(zip_archive); - if (!mz_zip_reader_init_file( - &zip_archive, pZip_filename, - flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) - return NULL; - - if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, - flags)) >= 0) - p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); - - mz_zip_reader_end(&zip_archive); - return p; -} - -#endif // #ifndef MINIZ_NO_STDIO - -#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -#endif // #ifndef MINIZ_NO_ARCHIVE_APIS - -#ifdef __cplusplus -} -#endif - -#endif // MINIZ_HEADER_FILE_ONLY - -/* - This is free and unencumbered software released into the public domain. - - Anyone is free to copy, modify, publish, use, compile, sell, or - distribute this software, either in source code form or as a compiled - binary, for any purpose, commercial or non-commercial, and by any - means. - - In jurisdictions that recognize copyright laws, the author or authors - of this software dedicate any and all copyright interest in the - software to the public domain. We make this dedication for the benefit - of the public at large and to the detriment of our heirs and - successors. We intend this dedication to be an overt act of - relinquishment in perpetuity of all present and future rights to this - software under copyright law. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - - For more information, please refer to -*/ - -// ---------------------- end of miniz ---------------------------------------- - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -#ifdef _MSC_VER -#pragma warning(pop) -#endif -} // namespace miniz -#else - -// Reuse MINIZ_LITTE_ENDIAN macro - -#if defined(__sparcv9) -// Big endian -#else -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. -#define MINIZ_LITTLE_ENDIAN 1 -#endif -#endif - -#endif // TINYEXR_USE_MINIZ - -// static bool IsBigEndian(void) { -// union { -// unsigned int i; -// char c[4]; -// } bint = {0x01020304}; -// -// return bint.c[0] == 1; -//} - -static void SetErrorMessage(const std::string &msg, const char **err) { - if (err) { -#ifdef _WIN32 - (*err) = _strdup(msg.c_str()); -#else - (*err) = strdup(msg.c_str()); -#endif - } -} - -static const int kEXRVersionSize = 8; - -static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; -} - -static void swap2(unsigned short *val) { -#ifdef MINIZ_LITTLE_ENDIAN - (void)val; -#else - unsigned short tmp = *val; - unsigned char *dst = reinterpret_cast(val); - unsigned char *src = reinterpret_cast(&tmp); - - dst[0] = src[1]; - dst[1] = src[0]; -#endif -} - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-function" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -#endif -static void cpy4(int *dst_val, const int *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -} - -static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -} - -static void cpy4(float *dst_val, const float *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -} -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -static void swap4(unsigned int *val) { -#ifdef MINIZ_LITTLE_ENDIAN - (void)val; -#else - unsigned int tmp = *val; - unsigned char *dst = reinterpret_cast(val); - unsigned char *src = reinterpret_cast(&tmp); - - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; -#endif -} - -#if 0 -static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - dst[4] = src[4]; - dst[5] = src[5]; - dst[6] = src[6]; - dst[7] = src[7]; -} -#endif - -static void swap8(tinyexr::tinyexr_uint64 *val) { -#ifdef MINIZ_LITTLE_ENDIAN - (void)val; -#else - tinyexr::tinyexr_uint64 tmp = (*val); - unsigned char *dst = reinterpret_cast(val); - unsigned char *src = reinterpret_cast(&tmp); - - dst[0] = src[7]; - dst[1] = src[6]; - dst[2] = src[5]; - dst[3] = src[4]; - dst[4] = src[3]; - dst[5] = src[2]; - dst[6] = src[1]; - dst[7] = src[0]; -#endif -} - -// https://gist.github.com/rygorous/2156668 -// Reuse MINIZ_LITTLE_ENDIAN flag from miniz. -union FP32 { - unsigned int u; - float f; - struct { -#if MINIZ_LITTLE_ENDIAN - unsigned int Mantissa : 23; - unsigned int Exponent : 8; - unsigned int Sign : 1; -#else - unsigned int Sign : 1; - unsigned int Exponent : 8; - unsigned int Mantissa : 23; -#endif - } s; -}; - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wpadded" -#endif - -union FP16 { - unsigned short u; - struct { -#if MINIZ_LITTLE_ENDIAN - unsigned int Mantissa : 10; - unsigned int Exponent : 5; - unsigned int Sign : 1; -#else - unsigned int Sign : 1; - unsigned int Exponent : 5; - unsigned int Mantissa : 10; -#endif - } s; -}; - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -static FP32 half_to_float(FP16 h) { - static const FP32 magic = {113 << 23}; - static const unsigned int shifted_exp = 0x7c00 - << 13; // exponent mask after shift - FP32 o; - - o.u = (h.u & 0x7fffU) << 13U; // exponent/mantissa bits - unsigned int exp_ = shifted_exp & o.u; // just the exponent - o.u += (127 - 15) << 23; // exponent adjust - - // handle exponent special cases - if (exp_ == shifted_exp) // Inf/NaN? - o.u += (128 - 16) << 23; // extra exp adjust - else if (exp_ == 0) // Zero/Denormal? - { - o.u += 1 << 23; // extra exp adjust - o.f -= magic.f; // renormalize - } - - o.u |= (h.u & 0x8000U) << 16U; // sign bit - return o; -} - -static FP16 float_to_half_full(FP32 f) { - FP16 o = {0}; - - // Based on ISPC reference code (with minor modifications) - if (f.s.Exponent == 0) // Signed zero/denormal (which will underflow) - o.s.Exponent = 0; - else if (f.s.Exponent == 255) // Inf or NaN (all exponent bits set) - { - o.s.Exponent = 31; - o.s.Mantissa = f.s.Mantissa ? 0x200 : 0; // NaN->qNaN and Inf->Inf - } else // Normalized number - { - // Exponent unbias the single, then bias the halfp - int newexp = f.s.Exponent - 127 + 15; - if (newexp >= 31) // Overflow, return signed infinity - o.s.Exponent = 31; - else if (newexp <= 0) // Underflow - { - if ((14 - newexp) <= 24) // Mantissa might be non-zero - { - unsigned int mant = f.s.Mantissa | 0x800000; // Hidden 1 bit - o.s.Mantissa = mant >> (14 - newexp); - if ((mant >> (13 - newexp)) & 1) // Check for rounding - o.u++; // Round, might overflow into exp bit, but this is OK - } - } else { - o.s.Exponent = static_cast(newexp); - o.s.Mantissa = f.s.Mantissa >> 13; - if (f.s.Mantissa & 0x1000) // Check for rounding - o.u++; // Round, might overflow to inf, this is OK - } - } - - o.s.Sign = f.s.Sign; - return o; -} - -// NOTE: From OpenEXR code -// #define IMF_INCREASING_Y 0 -// #define IMF_DECREASING_Y 1 -// #define IMF_RAMDOM_Y 2 -// -// #define IMF_NO_COMPRESSION 0 -// #define IMF_RLE_COMPRESSION 1 -// #define IMF_ZIPS_COMPRESSION 2 -// #define IMF_ZIP_COMPRESSION 3 -// #define IMF_PIZ_COMPRESSION 4 -// #define IMF_PXR24_COMPRESSION 5 -// #define IMF_B44_COMPRESSION 6 -// #define IMF_B44A_COMPRESSION 7 - -#ifdef __clang__ -#pragma clang diagnostic push - -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - -#endif - -static const char *ReadString(std::string *s, const char *ptr, size_t len) { - // Read untile NULL(\0). - const char *p = ptr; - const char *q = ptr; - while ((size_t(q - ptr) < len) && (*q) != 0) { - q++; - } - - if (size_t(q - ptr) >= len) { - (*s) = std::string(); - return NULL; - } - - (*s) = std::string(p, q); - - return q + 1; // skip '\0' -} - -static bool ReadAttribute(std::string *name, std::string *type, - std::vector *data, size_t *marker_size, - const char *marker, size_t size) { - size_t name_len = strnlen(marker, size); - if (name_len == size) { - // String does not have a terminating character. - return false; - } - *name = std::string(marker, name_len); - - marker += name_len + 1; - size -= name_len + 1; - - size_t type_len = strnlen(marker, size); - if (type_len == size) { - return false; - } - *type = std::string(marker, type_len); - - marker += type_len + 1; - size -= type_len + 1; - - if (size < sizeof(uint32_t)) { - return false; - } - - uint32_t data_len; - memcpy(&data_len, marker, sizeof(uint32_t)); - tinyexr::swap4(reinterpret_cast(&data_len)); - - if (data_len == 0) { - if ((*type).compare("string") == 0) { - // Accept empty string attribute. - - marker += sizeof(uint32_t); - size -= sizeof(uint32_t); - - *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t); - - data->resize(1); - (*data)[0] = '\0'; - - return true; - } else { - return false; - } - } - - marker += sizeof(uint32_t); - size -= sizeof(uint32_t); - - if (size < data_len) { - return false; - } - - data->resize(static_cast(data_len)); - memcpy(&data->at(0), marker, static_cast(data_len)); - - *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len; - return true; -} - -static void WriteAttributeToMemory(std::vector *out, - const char *name, const char *type, - const unsigned char *data, int len) { - out->insert(out->end(), name, name + strlen(name) + 1); - out->insert(out->end(), type, type + strlen(type) + 1); - - int outLen = len; - tinyexr::swap4(reinterpret_cast(&outLen)); - out->insert(out->end(), reinterpret_cast(&outLen), - reinterpret_cast(&outLen) + sizeof(int)); - out->insert(out->end(), data, data + len); -} - -typedef struct { - std::string name; // less than 255 bytes long - int pixel_type; - int x_sampling; - int y_sampling; - unsigned char p_linear; - unsigned char pad[3]; -} ChannelInfo; - -typedef struct HeaderInfo { - std::vector channels; - std::vector attributes; - - int data_window[4]; - int line_order; - int display_window[4]; - float screen_window_center[2]; - float screen_window_width; - float pixel_aspect_ratio; - - int chunk_count; - - // Tiled format - int tile_size_x; - int tile_size_y; - int tile_level_mode; - int tile_rounding_mode; - - unsigned int header_len; - - int compression_type; - - void clear() { - channels.clear(); - attributes.clear(); - - data_window[0] = 0; - data_window[1] = 0; - data_window[2] = 0; - data_window[3] = 0; - line_order = 0; - display_window[0] = 0; - display_window[1] = 0; - display_window[2] = 0; - display_window[3] = 0; - screen_window_center[0] = 0.0f; - screen_window_center[1] = 0.0f; - screen_window_width = 0.0f; - pixel_aspect_ratio = 0.0f; - - chunk_count = 0; - - // Tiled format - tile_size_x = 0; - tile_size_y = 0; - tile_level_mode = 0; - tile_rounding_mode = 0; - - header_len = 0; - compression_type = 0; - } -} HeaderInfo; - -static bool ReadChannelInfo(std::vector &channels, - const std::vector &data) { - const char *p = reinterpret_cast(&data.at(0)); - - for (;;) { - if ((*p) == 0) { - break; - } - ChannelInfo info; - - tinyexr_int64 data_len = static_cast(data.size()) - - (p - reinterpret_cast(data.data())); - if (data_len < 0) { - return false; - } - - p = ReadString(&info.name, p, size_t(data_len)); - if ((p == NULL) && (info.name.empty())) { - // Buffer overrun. Issue #51. - return false; - } - - const unsigned char *data_end = - reinterpret_cast(p) + 16; - if (data_end >= (data.data() + data.size())) { - return false; - } - - memcpy(&info.pixel_type, p, sizeof(int)); - p += 4; - info.p_linear = static_cast(p[0]); // uchar - p += 1 + 3; // reserved: uchar[3] - memcpy(&info.x_sampling, p, sizeof(int)); // int - p += 4; - memcpy(&info.y_sampling, p, sizeof(int)); // int - p += 4; - - tinyexr::swap4(reinterpret_cast(&info.pixel_type)); - tinyexr::swap4(reinterpret_cast(&info.x_sampling)); - tinyexr::swap4(reinterpret_cast(&info.y_sampling)); - - channels.push_back(info); - } - - return true; -} - -static void WriteChannelInfo(std::vector &data, - const std::vector &channels) { - size_t sz = 0; - - // Calculate total size. - for (size_t c = 0; c < channels.size(); c++) { - sz += strlen(channels[c].name.c_str()) + 1; // +1 for \0 - sz += 16; // 4 * int - } - data.resize(sz + 1); - - unsigned char *p = &data.at(0); - - for (size_t c = 0; c < channels.size(); c++) { - memcpy(p, channels[c].name.c_str(), strlen(channels[c].name.c_str())); - p += strlen(channels[c].name.c_str()); - (*p) = '\0'; - p++; - - int pixel_type = channels[c].pixel_type; - int x_sampling = channels[c].x_sampling; - int y_sampling = channels[c].y_sampling; - tinyexr::swap4(reinterpret_cast(&pixel_type)); - tinyexr::swap4(reinterpret_cast(&x_sampling)); - tinyexr::swap4(reinterpret_cast(&y_sampling)); - - memcpy(p, &pixel_type, sizeof(int)); - p += sizeof(int); - - (*p) = channels[c].p_linear; - p += 4; - - memcpy(p, &x_sampling, sizeof(int)); - p += sizeof(int); - - memcpy(p, &y_sampling, sizeof(int)); - p += sizeof(int); - } - - (*p) = '\0'; -} - -static void CompressZip(unsigned char *dst, - tinyexr::tinyexr_uint64 &compressedSize, - const unsigned char *src, unsigned long src_size) { - std::vector tmpBuf(src_size); - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfZipCompressor.cpp - // - - // - // Reorder the pixel data. - // - - const char *srcPtr = reinterpret_cast(src); - - { - char *t1 = reinterpret_cast(&tmpBuf.at(0)); - char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; - const char *stop = srcPtr + src_size; - - for (;;) { - if (srcPtr < stop) - *(t1++) = *(srcPtr++); - else - break; - - if (srcPtr < stop) - *(t2++) = *(srcPtr++); - else - break; - } - } - - // - // Predictor. - // - - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + src_size; - int p = t[-1]; - - while (t < stop) { - int d = int(t[0]) - p + (128 + 256); - p = t[0]; - t[0] = static_cast(d); - ++t; - } - } - -#if TINYEXR_USE_MINIZ - // - // Compress the data using miniz - // - - miniz::mz_ulong outSize = miniz::mz_compressBound(src_size); - int ret = miniz::mz_compress( - dst, &outSize, static_cast(&tmpBuf.at(0)), - src_size); - assert(ret == miniz::MZ_OK); - (void)ret; - - compressedSize = outSize; -#else - uLong outSize = compressBound(static_cast(src_size)); - int ret = compress(dst, &outSize, static_cast(&tmpBuf.at(0)), - src_size); - assert(ret == Z_OK); - - compressedSize = outSize; -#endif - - // Use uncompressed data when compressed data is larger than uncompressed. - // (Issue 40) - if (compressedSize >= src_size) { - compressedSize = src_size; - memcpy(dst, src, src_size); - } -} - -static bool DecompressZip(unsigned char *dst, - unsigned long *uncompressed_size /* inout */, - const unsigned char *src, unsigned long src_size) { - if ((*uncompressed_size) == src_size) { - // Data is not compressed(Issue 40). - memcpy(dst, src, src_size); - return true; - } - std::vector tmpBuf(*uncompressed_size); - -#if TINYEXR_USE_MINIZ - int ret = - miniz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); - if (miniz::MZ_OK != ret) { - return false; - } -#else - int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); - if (Z_OK != ret) { - return false; - } -#endif - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfZipCompressor.cpp - // - - // Predictor. - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size); - - while (t < stop) { - int d = int(t[-1]) + int(t[0]) - 128; - t[0] = static_cast(d); - ++t; - } - } - - // Reorder the pixel data. - { - const char *t1 = reinterpret_cast(&tmpBuf.at(0)); - const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + - (*uncompressed_size + 1) / 2; - char *s = reinterpret_cast(dst); - char *stop = s + (*uncompressed_size); - - for (;;) { - if (s < stop) - *(s++) = *(t1++); - else - break; - - if (s < stop) - *(s++) = *(t2++); - else - break; - } - } - - return true; -} - -// RLE code from OpenEXR -------------------------------------- - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wsign-conversion" -#endif - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4204) // nonstandard extension used : non-constant - // aggregate initializer (also supported by GNU - // C and C99, so no big deal) -#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is - // deprecated. Instead, use the ISO C and C++ - // conformant name: _strdup. -#endif - -const int MIN_RUN_LENGTH = 3; -const int MAX_RUN_LENGTH = 127; - -// -// Compress an array of bytes, using run-length encoding, -// and return the length of the compressed data. -// - -static int rleCompress(int inLength, const char in[], signed char out[]) { - const char *inEnd = in + inLength; - const char *runStart = in; - const char *runEnd = in + 1; - signed char *outWrite = out; - - while (runStart < inEnd) { - while (runEnd < inEnd && *runStart == *runEnd && - runEnd - runStart - 1 < MAX_RUN_LENGTH) { - ++runEnd; - } - - if (runEnd - runStart >= MIN_RUN_LENGTH) { - // - // Compressable run - // - - *outWrite++ = static_cast(runEnd - runStart) - 1; - *outWrite++ = *(reinterpret_cast(runStart)); - runStart = runEnd; - } else { - // - // Uncompressable run - // - - while (runEnd < inEnd && - ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) || - (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) && - runEnd - runStart < MAX_RUN_LENGTH) { - ++runEnd; - } - - *outWrite++ = static_cast(runStart - runEnd); - - while (runStart < runEnd) { - *outWrite++ = *(reinterpret_cast(runStart++)); - } - } - - ++runEnd; - } - - return static_cast(outWrite - out); -} - -// -// Uncompress an array of bytes compressed with rleCompress(). -// Returns the length of the oncompressed data, or 0 if the -// length of the uncompressed data would be more than maxLength. -// - -static int rleUncompress(int inLength, int maxLength, const signed char in[], - char out[]) { - char *outStart = out; - - while (inLength > 0) { - if (*in < 0) { - int count = -(static_cast(*in++)); - inLength -= count + 1; - - // Fixes #116: Add bounds check to in buffer. - if ((0 > (maxLength -= count)) || (inLength < 0)) return 0; - - memcpy(out, in, count); - out += count; - in += count; - } else { - int count = *in++; - inLength -= 2; - - if (0 > (maxLength -= count + 1)) return 0; - - memset(out, *reinterpret_cast(in), count + 1); - out += count + 1; - - in++; - } - } - - return static_cast(out - outStart); -} - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -// End of RLE code from OpenEXR ----------------------------------- - -static void CompressRle(unsigned char *dst, - tinyexr::tinyexr_uint64 &compressedSize, - const unsigned char *src, unsigned long src_size) { - std::vector tmpBuf(src_size); - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfRleCompressor.cpp - // - - // - // Reorder the pixel data. - // - - const char *srcPtr = reinterpret_cast(src); - - { - char *t1 = reinterpret_cast(&tmpBuf.at(0)); - char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; - const char *stop = srcPtr + src_size; - - for (;;) { - if (srcPtr < stop) - *(t1++) = *(srcPtr++); - else - break; - - if (srcPtr < stop) - *(t2++) = *(srcPtr++); - else - break; - } - } - - // - // Predictor. - // - - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + src_size; - int p = t[-1]; - - while (t < stop) { - int d = int(t[0]) - p + (128 + 256); - p = t[0]; - t[0] = static_cast(d); - ++t; - } - } - - // outSize will be (srcSiz * 3) / 2 at max. - int outSize = rleCompress(static_cast(src_size), - reinterpret_cast(&tmpBuf.at(0)), - reinterpret_cast(dst)); - assert(outSize > 0); - - compressedSize = static_cast(outSize); - - // Use uncompressed data when compressed data is larger than uncompressed. - // (Issue 40) - if (compressedSize >= src_size) { - compressedSize = src_size; - memcpy(dst, src, src_size); - } -} - -static bool DecompressRle(unsigned char *dst, - const unsigned long uncompressed_size, - const unsigned char *src, unsigned long src_size) { - if (uncompressed_size == src_size) { - // Data is not compressed(Issue 40). - memcpy(dst, src, src_size); - return true; - } - - // Workaround for issue #112. - // TODO(syoyo): Add more robust out-of-bounds check in `rleUncompress`. - if (src_size <= 2) { - return false; - } - - std::vector tmpBuf(uncompressed_size); - - int ret = rleUncompress(static_cast(src_size), - static_cast(uncompressed_size), - reinterpret_cast(src), - reinterpret_cast(&tmpBuf.at(0))); - if (ret != static_cast(uncompressed_size)) { - return false; - } - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfRleCompressor.cpp - // - - // Predictor. - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + uncompressed_size; - - while (t < stop) { - int d = int(t[-1]) + int(t[0]) - 128; - t[0] = static_cast(d); - ++t; - } - } - - // Reorder the pixel data. - { - const char *t1 = reinterpret_cast(&tmpBuf.at(0)); - const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + - (uncompressed_size + 1) / 2; - char *s = reinterpret_cast(dst); - char *stop = s + uncompressed_size; - - for (;;) { - if (s < stop) - *(s++) = *(t1++); - else - break; - - if (s < stop) - *(s++) = *(t2++); - else - break; - } - } - - return true; -} - -#if TINYEXR_USE_PIZ - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-long-long" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wpadded" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wc++11-extensions" -#pragma clang diagnostic ignored "-Wconversion" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" - -#if __has_warning("-Wcast-qual") -#pragma clang diagnostic ignored "-Wcast-qual" -#endif - -#endif - -// -// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp -// -// ----------------------------------------------------------------- -// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas -// Digital Ltd. LLC) -// (3 clause BSD license) -// - -struct PIZChannelData { - unsigned short *start; - unsigned short *end; - int nx; - int ny; - int ys; - int size; -}; - -//----------------------------------------------------------------------------- -// -// 16-bit Haar Wavelet encoding and decoding -// -// The source code in this file is derived from the encoding -// and decoding routines written by Christian Rouet for his -// PIZ image file format. -// -//----------------------------------------------------------------------------- - -// -// Wavelet basis functions without modulo arithmetic; they produce -// the best compression ratios when the wavelet-transformed data are -// Huffman-encoded, but the wavelet transform works only for 14-bit -// data (untransformed data values must be less than (1 << 14)). -// - -inline void wenc14(unsigned short a, unsigned short b, unsigned short &l, - unsigned short &h) { - short as = static_cast(a); - short bs = static_cast(b); - - short ms = (as + bs) >> 1; - short ds = as - bs; - - l = static_cast(ms); - h = static_cast(ds); -} - -inline void wdec14(unsigned short l, unsigned short h, unsigned short &a, - unsigned short &b) { - short ls = static_cast(l); - short hs = static_cast(h); - - int hi = hs; - int ai = ls + (hi & 1) + (hi >> 1); - - short as = static_cast(ai); - short bs = static_cast(ai - hi); - - a = static_cast(as); - b = static_cast(bs); -} - -// -// Wavelet basis functions with modulo arithmetic; they work with full -// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't -// compress the data quite as well. -// - -const int NBITS = 16; -const int A_OFFSET = 1 << (NBITS - 1); -const int M_OFFSET = 1 << (NBITS - 1); -const int MOD_MASK = (1 << NBITS) - 1; - -inline void wenc16(unsigned short a, unsigned short b, unsigned short &l, - unsigned short &h) { - int ao = (a + A_OFFSET) & MOD_MASK; - int m = ((ao + b) >> 1); - int d = ao - b; - - if (d < 0) m = (m + M_OFFSET) & MOD_MASK; - - d &= MOD_MASK; - - l = static_cast(m); - h = static_cast(d); -} - -inline void wdec16(unsigned short l, unsigned short h, unsigned short &a, - unsigned short &b) { - int m = l; - int d = h; - int bb = (m - (d >> 1)) & MOD_MASK; - int aa = (d + bb - A_OFFSET) & MOD_MASK; - b = static_cast(bb); - a = static_cast(aa); -} - -// -// 2D Wavelet encoding: -// - -static void wav2Encode( - unsigned short *in, // io: values are transformed in place - int nx, // i : x size - int ox, // i : x offset - int ny, // i : y size - int oy, // i : y offset - unsigned short mx) // i : maximum in[x][y] value -{ - bool w14 = (mx < (1 << 14)); - int n = (nx > ny) ? ny : nx; - int p = 1; // == 1 << level - int p2 = 2; // == 1 << (level+1) - - // - // Hierachical loop on smaller dimension n - // - - while (p2 <= n) { - unsigned short *py = in; - unsigned short *ey = in + oy * (ny - p2); - int oy1 = oy * p; - int oy2 = oy * p2; - int ox1 = ox * p; - int ox2 = ox * p2; - unsigned short i00, i01, i10, i11; - - // - // Y loop - // - - for (; py <= ey; py += oy2) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - // - // X loop - // - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - unsigned short *p10 = px + oy1; - unsigned short *p11 = p10 + ox1; - - // - // 2D wavelet encoding - // - - if (w14) { - wenc14(*px, *p01, i00, i01); - wenc14(*p10, *p11, i10, i11); - wenc14(i00, i10, *px, *p10); - wenc14(i01, i11, *p01, *p11); - } else { - wenc16(*px, *p01, i00, i01); - wenc16(*p10, *p11, i10, i11); - wenc16(i00, i10, *px, *p10); - wenc16(i01, i11, *p01, *p11); - } - } - - // - // Encode (1D) odd column (still in Y loop) - // - - if (nx & p) { - unsigned short *p10 = px + oy1; - - if (w14) - wenc14(*px, *p10, i00, *p10); - else - wenc16(*px, *p10, i00, *p10); - - *px = i00; - } - } - - // - // Encode (1D) odd line (must loop in X) - // - - if (ny & p) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - - if (w14) - wenc14(*px, *p01, i00, *p01); - else - wenc16(*px, *p01, i00, *p01); - - *px = i00; - } - } - - // - // Next level - // - - p = p2; - p2 <<= 1; - } -} - -// -// 2D Wavelet decoding: -// - -static void wav2Decode( - unsigned short *in, // io: values are transformed in place - int nx, // i : x size - int ox, // i : x offset - int ny, // i : y size - int oy, // i : y offset - unsigned short mx) // i : maximum in[x][y] value -{ - bool w14 = (mx < (1 << 14)); - int n = (nx > ny) ? ny : nx; - int p = 1; - int p2; - - // - // Search max level - // - - while (p <= n) p <<= 1; - - p >>= 1; - p2 = p; - p >>= 1; - - // - // Hierarchical loop on smaller dimension n - // - - while (p >= 1) { - unsigned short *py = in; - unsigned short *ey = in + oy * (ny - p2); - int oy1 = oy * p; - int oy2 = oy * p2; - int ox1 = ox * p; - int ox2 = ox * p2; - unsigned short i00, i01, i10, i11; - - // - // Y loop - // - - for (; py <= ey; py += oy2) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - // - // X loop - // - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - unsigned short *p10 = px + oy1; - unsigned short *p11 = p10 + ox1; - - // - // 2D wavelet decoding - // - - if (w14) { - wdec14(*px, *p10, i00, i10); - wdec14(*p01, *p11, i01, i11); - wdec14(i00, i01, *px, *p01); - wdec14(i10, i11, *p10, *p11); - } else { - wdec16(*px, *p10, i00, i10); - wdec16(*p01, *p11, i01, i11); - wdec16(i00, i01, *px, *p01); - wdec16(i10, i11, *p10, *p11); - } - } - - // - // Decode (1D) odd column (still in Y loop) - // - - if (nx & p) { - unsigned short *p10 = px + oy1; - - if (w14) - wdec14(*px, *p10, i00, *p10); - else - wdec16(*px, *p10, i00, *p10); - - *px = i00; - } - } - - // - // Decode (1D) odd line (must loop in X) - // - - if (ny & p) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - - if (w14) - wdec14(*px, *p01, i00, *p01); - else - wdec16(*px, *p01, i00, *p01); - - *px = i00; - } - } - - // - // Next level - // - - p2 = p; - p >>= 1; - } -} - -//----------------------------------------------------------------------------- -// -// 16-bit Huffman compression and decompression. -// -// The source code in this file is derived from the 8-bit -// Huffman compression and decompression routines written -// by Christian Rouet for his PIZ image file format. -// -//----------------------------------------------------------------------------- - -// Adds some modification for tinyexr. - -const int HUF_ENCBITS = 16; // literal (value) bit length -const int HUF_DECBITS = 14; // decoding bit size (>= 8) - -const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1; // encoding table size -const int HUF_DECSIZE = 1 << HUF_DECBITS; // decoding table size -const int HUF_DECMASK = HUF_DECSIZE - 1; - -struct HufDec { // short code long code - //------------------------------- - int len : 8; // code length 0 - int lit : 24; // lit p size - int *p; // 0 lits -}; - -inline long long hufLength(long long code) { return code & 63; } - -inline long long hufCode(long long code) { return code >> 6; } - -inline void outputBits(int nBits, long long bits, long long &c, int &lc, - char *&out) { - c <<= nBits; - lc += nBits; - - c |= bits; - - while (lc >= 8) *out++ = static_cast((c >> (lc -= 8))); -} - -inline long long getBits(int nBits, long long &c, int &lc, const char *&in) { - while (lc < nBits) { - c = (c << 8) | *(reinterpret_cast(in++)); - lc += 8; - } - - lc -= nBits; - return (c >> lc) & ((1 << nBits) - 1); -} - -// -// ENCODING TABLE BUILDING & (UN)PACKING -// - -// -// Build a "canonical" Huffman code table: -// - for each (uncompressed) symbol, hcode contains the length -// of the corresponding code (in the compressed data) -// - canonical codes are computed and stored in hcode -// - the rules for constructing canonical codes are as follows: -// * shorter codes (if filled with zeroes to the right) -// have a numerically higher value than longer codes -// * for codes with the same length, numerical values -// increase with numerical symbol values -// - because the canonical code table can be constructed from -// symbol lengths alone, the code table can be transmitted -// without sending the actual code values -// - see http://www.compressconsult.com/huffman/ -// - -static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) { - long long n[59]; - - // - // For each i from 0 through 58, count the - // number of different codes of length i, and - // store the count in n[i]. - // - - for (int i = 0; i <= 58; ++i) n[i] = 0; - - for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1; - - // - // For each i from 58 through 1, compute the - // numerically lowest code with length i, and - // store that code in n[i]. - // - - long long c = 0; - - for (int i = 58; i > 0; --i) { - long long nc = ((c + n[i]) >> 1); - n[i] = c; - c = nc; - } - - // - // hcode[i] contains the length, l, of the - // code for symbol i. Assign the next available - // code of length l to the symbol and store both - // l and the code in hcode[i]. - // - - for (int i = 0; i < HUF_ENCSIZE; ++i) { - int l = static_cast(hcode[i]); - - if (l > 0) hcode[i] = l | (n[l]++ << 6); - } -} - -// -// Compute Huffman codes (based on frq input) and store them in frq: -// - code structure is : [63:lsb - 6:msb] | [5-0: bit length]; -// - max code length is 58 bits; -// - codes outside the range [im-iM] have a null length (unused values); -// - original frequencies are destroyed; -// - encoding tables are used by hufEncode() and hufBuildDecTable(); -// - -struct FHeapCompare { - bool operator()(long long *a, long long *b) { return *a > *b; } -}; - -static void hufBuildEncTable( - long long *frq, // io: input frequencies [HUF_ENCSIZE], output table - int *im, // o: min frq index - int *iM) // o: max frq index -{ - // - // This function assumes that when it is called, array frq - // indicates the frequency of all possible symbols in the data - // that are to be Huffman-encoded. (frq[i] contains the number - // of occurrences of symbol i in the data.) - // - // The loop below does three things: - // - // 1) Finds the minimum and maximum indices that point - // to non-zero entries in frq: - // - // frq[im] != 0, and frq[i] == 0 for all i < im - // frq[iM] != 0, and frq[i] == 0 for all i > iM - // - // 2) Fills array fHeap with pointers to all non-zero - // entries in frq. - // - // 3) Initializes array hlink such that hlink[i] == i - // for all array entries. - // - - std::vector hlink(HUF_ENCSIZE); - std::vector fHeap(HUF_ENCSIZE); - - *im = 0; - - while (!frq[*im]) (*im)++; - - int nf = 0; - - for (int i = *im; i < HUF_ENCSIZE; i++) { - hlink[i] = i; - - if (frq[i]) { - fHeap[nf] = &frq[i]; - nf++; - *iM = i; - } - } - - // - // Add a pseudo-symbol, with a frequency count of 1, to frq; - // adjust the fHeap and hlink array accordingly. Function - // hufEncode() uses the pseudo-symbol for run-length encoding. - // - - (*iM)++; - frq[*iM] = 1; - fHeap[nf] = &frq[*iM]; - nf++; - - // - // Build an array, scode, such that scode[i] contains the number - // of bits assigned to symbol i. Conceptually this is done by - // constructing a tree whose leaves are the symbols with non-zero - // frequency: - // - // Make a heap that contains all symbols with a non-zero frequency, - // with the least frequent symbol on top. - // - // Repeat until only one symbol is left on the heap: - // - // Take the two least frequent symbols off the top of the heap. - // Create a new node that has first two nodes as children, and - // whose frequency is the sum of the frequencies of the first - // two nodes. Put the new node back into the heap. - // - // The last node left on the heap is the root of the tree. For each - // leaf node, the distance between the root and the leaf is the length - // of the code for the corresponding symbol. - // - // The loop below doesn't actually build the tree; instead we compute - // the distances of the leaves from the root on the fly. When a new - // node is added to the heap, then that node's descendants are linked - // into a single linear list that starts at the new node, and the code - // lengths of the descendants (that is, their distance from the root - // of the tree) are incremented by one. - // - - std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - - std::vector scode(HUF_ENCSIZE); - memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE); - - while (nf > 1) { - // - // Find the indices, mm and m, of the two smallest non-zero frq - // values in fHeap, add the smallest frq to the second-smallest - // frq, and remove the smallest frq value from fHeap. - // - - int mm = fHeap[0] - frq; - std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - --nf; - - int m = fHeap[0] - frq; - std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - - frq[m] += frq[mm]; - std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - - // - // The entries in scode are linked into lists with the - // entries in hlink serving as "next" pointers and with - // the end of a list marked by hlink[j] == j. - // - // Traverse the lists that start at scode[m] and scode[mm]. - // For each element visited, increment the length of the - // corresponding code by one bit. (If we visit scode[j] - // during the traversal, then the code for symbol j becomes - // one bit longer.) - // - // Merge the lists that start at scode[m] and scode[mm] - // into a single list that starts at scode[m]. - // - - // - // Add a bit to all codes in the first list. - // - - for (int j = m;; j = hlink[j]) { - scode[j]++; - - assert(scode[j] <= 58); - - if (hlink[j] == j) { - // - // Merge the two lists. - // - - hlink[j] = mm; - break; - } - } - - // - // Add a bit to all codes in the second list - // - - for (int j = mm;; j = hlink[j]) { - scode[j]++; - - assert(scode[j] <= 58); - - if (hlink[j] == j) break; - } - } - - // - // Build a canonical Huffman code table, replacing the code - // lengths in scode with (code, code length) pairs. Copy the - // code table from scode into frq. - // - - hufCanonicalCodeTable(scode.data()); - memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE); -} - -// -// Pack an encoding table: -// - only code lengths, not actual codes, are stored -// - runs of zeroes are compressed as follows: -// -// unpacked packed -// -------------------------------- -// 1 zero 0 (6 bits) -// 2 zeroes 59 -// 3 zeroes 60 -// 4 zeroes 61 -// 5 zeroes 62 -// n zeroes (6 or more) 63 n-6 (6 + 8 bits) -// - -const int SHORT_ZEROCODE_RUN = 59; -const int LONG_ZEROCODE_RUN = 63; -const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN; -const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN; - -static void hufPackEncTable( - const long long *hcode, // i : encoding table [HUF_ENCSIZE] - int im, // i : min hcode index - int iM, // i : max hcode index - char **pcode) // o: ptr to packed table (updated) -{ - char *p = *pcode; - long long c = 0; - int lc = 0; - - for (; im <= iM; im++) { - int l = hufLength(hcode[im]); - - if (l == 0) { - int zerun = 1; - - while ((im < iM) && (zerun < LONGEST_LONG_RUN)) { - if (hufLength(hcode[im + 1]) > 0) break; - im++; - zerun++; - } - - if (zerun >= 2) { - if (zerun >= SHORTEST_LONG_RUN) { - outputBits(6, LONG_ZEROCODE_RUN, c, lc, p); - outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p); - } else { - outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p); - } - continue; - } - } - - outputBits(6, l, c, lc, p); - } - - if (lc > 0) *p++ = (unsigned char)(c << (8 - lc)); - - *pcode = p; -} - -// -// Unpack an encoding table packed by hufPackEncTable(): -// - -static bool hufUnpackEncTable( - const char **pcode, // io: ptr to packed table (updated) - int ni, // i : input size (in bytes) - int im, // i : min hcode index - int iM, // i : max hcode index - long long *hcode) // o: encoding table [HUF_ENCSIZE] -{ - memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE); - - const char *p = *pcode; - long long c = 0; - int lc = 0; - - for (; im <= iM; im++) { - if (p - *pcode >= ni) { - return false; - } - - long long l = hcode[im] = getBits(6, c, lc, p); // code length - - if (l == (long long)LONG_ZEROCODE_RUN) { - if (p - *pcode > ni) { - return false; - } - - int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN; - - if (im + zerun > iM + 1) { - return false; - } - - while (zerun--) hcode[im++] = 0; - - im--; - } else if (l >= (long long)SHORT_ZEROCODE_RUN) { - int zerun = l - SHORT_ZEROCODE_RUN + 2; - - if (im + zerun > iM + 1) { - return false; - } - - while (zerun--) hcode[im++] = 0; - - im--; - } - } - - *pcode = const_cast(p); - - hufCanonicalCodeTable(hcode); - - return true; -} - -// -// DECODING TABLE BUILDING -// - -// -// Clear a newly allocated decoding table so that it contains only zeroes. -// - -static void hufClearDecTable(HufDec *hdecod) // io: (allocated by caller) -// decoding table [HUF_DECSIZE] -{ - for (int i = 0; i < HUF_DECSIZE; i++) { - hdecod[i].len = 0; - hdecod[i].lit = 0; - hdecod[i].p = NULL; - } - // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE); -} - -// -// Build a decoding hash table based on the encoding table hcode: -// - short codes (<= HUF_DECBITS) are resolved with a single table access; -// - long code entry allocations are not optimized, because long codes are -// unfrequent; -// - decoding tables are used by hufDecode(); -// - -static bool hufBuildDecTable(const long long *hcode, // i : encoding table - int im, // i : min index in hcode - int iM, // i : max index in hcode - HufDec *hdecod) // o: (allocated by caller) -// decoding table [HUF_DECSIZE] -{ - // - // Init hashtable & loop on all codes. - // Assumes that hufClearDecTable(hdecod) has already been called. - // - - for (; im <= iM; im++) { - long long c = hufCode(hcode[im]); - int l = hufLength(hcode[im]); - - if (c >> l) { - // - // Error: c is supposed to be an l-bit code, - // but c contains a value that is greater - // than the largest l-bit number. - // - - // invalidTableEntry(); - return false; - } - - if (l > HUF_DECBITS) { - // - // Long code: add a secondary entry - // - - HufDec *pl = hdecod + (c >> (l - HUF_DECBITS)); - - if (pl->len) { - // - // Error: a short code has already - // been stored in table entry *pl. - // - - // invalidTableEntry(); - return false; - } - - pl->lit++; - - if (pl->p) { - int *p = pl->p; - pl->p = new int[pl->lit]; - - for (int i = 0; i < pl->lit - 1; ++i) pl->p[i] = p[i]; - - delete[] p; - } else { - pl->p = new int[1]; - } - - pl->p[pl->lit - 1] = im; - } else if (l) { - // - // Short code: init all primary entries - // - - HufDec *pl = hdecod + (c << (HUF_DECBITS - l)); - - for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) { - if (pl->len || pl->p) { - // - // Error: a short code or a long code has - // already been stored in table entry *pl. - // - - // invalidTableEntry(); - return false; - } - - pl->len = l; - pl->lit = im; - } - } - } - - return true; -} - -// -// Free the long code entries of a decoding table built by hufBuildDecTable() -// - -static void hufFreeDecTable(HufDec *hdecod) // io: Decoding table -{ - for (int i = 0; i < HUF_DECSIZE; i++) { - if (hdecod[i].p) { - delete[] hdecod[i].p; - hdecod[i].p = 0; - } - } -} - -// -// ENCODING -// - -inline void outputCode(long long code, long long &c, int &lc, char *&out) { - outputBits(hufLength(code), hufCode(code), c, lc, out); -} - -inline void sendCode(long long sCode, int runCount, long long runCode, - long long &c, int &lc, char *&out) { - // - // Output a run of runCount instances of the symbol sCount. - // Output the symbols explicitly, or if that is shorter, output - // the sCode symbol once followed by a runCode symbol and runCount - // expressed as an 8-bit number. - // - - if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) { - outputCode(sCode, c, lc, out); - outputCode(runCode, c, lc, out); - outputBits(8, runCount, c, lc, out); - } else { - while (runCount-- >= 0) outputCode(sCode, c, lc, out); - } -} - -// -// Encode (compress) ni values based on the Huffman encoding table hcode: -// - -static int hufEncode // return: output size (in bits) - (const long long *hcode, // i : encoding table - const unsigned short *in, // i : uncompressed input buffer - const int ni, // i : input buffer size (in bytes) - int rlc, // i : rl code - char *out) // o: compressed output buffer -{ - char *outStart = out; - long long c = 0; // bits not yet written to out - int lc = 0; // number of valid bits in c (LSB) - int s = in[0]; - int cs = 0; - - // - // Loop on input values - // - - for (int i = 1; i < ni; i++) { - // - // Count same values or send code - // - - if (s == in[i] && cs < 255) { - cs++; - } else { - sendCode(hcode[s], cs, hcode[rlc], c, lc, out); - cs = 0; - } - - s = in[i]; - } - - // - // Send remaining code - // - - sendCode(hcode[s], cs, hcode[rlc], c, lc, out); - - if (lc) *out = (c << (8 - lc)) & 0xff; - - return (out - outStart) * 8 + lc; -} - -// -// DECODING -// - -// -// In order to force the compiler to inline them, -// getChar() and getCode() are implemented as macros -// instead of "inline" functions. -// - -#define getChar(c, lc, in) \ - { \ - c = (c << 8) | *(unsigned char *)(in++); \ - lc += 8; \ - } - -#if 0 -#define getCode(po, rlc, c, lc, in, out, ob, oe) \ - { \ - if (po == rlc) { \ - if (lc < 8) getChar(c, lc, in); \ - \ - lc -= 8; \ - \ - unsigned char cs = (c >> lc); \ - \ - if (out + cs > oe) return false; \ - \ - /* TinyEXR issue 78 */ \ - unsigned short s = out[-1]; \ - \ - while (cs-- > 0) *out++ = s; \ - } else if (out < oe) { \ - *out++ = po; \ - } else { \ - return false; \ - } \ - } -#else -static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in, - const char *in_end, unsigned short *&out, - const unsigned short *ob, const unsigned short *oe) { - (void)ob; - if (po == rlc) { - if (lc < 8) { - /* TinyEXR issue 78 */ - if ((in + 1) >= in_end) { - return false; - } - - getChar(c, lc, in); - } - - lc -= 8; - - unsigned char cs = (c >> lc); - - if (out + cs > oe) return false; - - // Bounds check for safety - // Issue 100. - if ((out - 1) < ob) return false; - unsigned short s = out[-1]; - - while (cs-- > 0) *out++ = s; - } else if (out < oe) { - *out++ = po; - } else { - return false; - } - return true; -} -#endif - -// -// Decode (uncompress) ni bits based on encoding & decoding tables: -// - -static bool hufDecode(const long long *hcode, // i : encoding table - const HufDec *hdecod, // i : decoding table - const char *in, // i : compressed input buffer - int ni, // i : input size (in bits) - int rlc, // i : run-length code - int no, // i : expected output size (in bytes) - unsigned short *out) // o: uncompressed output buffer -{ - long long c = 0; - int lc = 0; - unsigned short *outb = out; // begin - unsigned short *oe = out + no; // end - const char *ie = in + (ni + 7) / 8; // input byte size - - // - // Loop on input bytes - // - - while (in < ie) { - getChar(c, lc, in); - - // - // Access decoding table - // - - while (lc >= HUF_DECBITS) { - const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK]; - - if (pl.len) { - // - // Get short code - // - - lc -= pl.len; - // std::cout << "lit = " << pl.lit << std::endl; - // std::cout << "rlc = " << rlc << std::endl; - // std::cout << "c = " << c << std::endl; - // std::cout << "lc = " << lc << std::endl; - // std::cout << "in = " << in << std::endl; - // std::cout << "out = " << out << std::endl; - // std::cout << "oe = " << oe << std::endl; - if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { - return false; - } - } else { - if (!pl.p) { - return false; - } - // invalidCode(); // wrong code - - // - // Search long code - // - - int j; - - for (j = 0; j < pl.lit; j++) { - int l = hufLength(hcode[pl.p[j]]); - - while (lc < l && in < ie) // get more bits - getChar(c, lc, in); - - if (lc >= l) { - if (hufCode(hcode[pl.p[j]]) == - ((c >> (lc - l)) & (((long long)(1) << l) - 1))) { - // - // Found : get long code - // - - lc -= l; - if (!getCode(pl.p[j], rlc, c, lc, in, ie, out, outb, oe)) { - return false; - } - break; - } - } - } - - if (j == pl.lit) { - return false; - // invalidCode(); // Not found - } - } - } - } - - // - // Get remaining (short) codes - // - - int i = (8 - ni) & 7; - c >>= i; - lc -= i; - - while (lc > 0) { - const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK]; - - if (pl.len) { - lc -= pl.len; - if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { - return false; - } - } else { - return false; - // invalidCode(); // wrong (long) code - } - } - - if (out - outb != no) { - return false; - } - // notEnoughData (); - - return true; -} - -static void countFrequencies(std::vector &freq, - const unsigned short data[/*n*/], int n) { - for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0; - - for (int i = 0; i < n; ++i) ++freq[data[i]]; -} - -static void writeUInt(char buf[4], unsigned int i) { - unsigned char *b = (unsigned char *)buf; - - b[0] = i; - b[1] = i >> 8; - b[2] = i >> 16; - b[3] = i >> 24; -} - -static unsigned int readUInt(const char buf[4]) { - const unsigned char *b = (const unsigned char *)buf; - - return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) | - ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000); -} - -// -// EXTERNAL INTERFACE -// - -static int hufCompress(const unsigned short raw[], int nRaw, - char compressed[]) { - if (nRaw == 0) return 0; - - std::vector freq(HUF_ENCSIZE); - - countFrequencies(freq, raw, nRaw); - - int im = 0; - int iM = 0; - hufBuildEncTable(freq.data(), &im, &iM); - - char *tableStart = compressed + 20; - char *tableEnd = tableStart; - hufPackEncTable(freq.data(), im, iM, &tableEnd); - int tableLength = tableEnd - tableStart; - - char *dataStart = tableEnd; - int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart); - int data_length = (nBits + 7) / 8; - - writeUInt(compressed, im); - writeUInt(compressed + 4, iM); - writeUInt(compressed + 8, tableLength); - writeUInt(compressed + 12, nBits); - writeUInt(compressed + 16, 0); // room for future extensions - - return dataStart + data_length - compressed; -} - -static bool hufUncompress(const char compressed[], int nCompressed, - std::vector *raw) { - if (nCompressed == 0) { - if (raw->size() != 0) return false; - - return false; - } - - int im = readUInt(compressed); - int iM = readUInt(compressed + 4); - // int tableLength = readUInt (compressed + 8); - int nBits = readUInt(compressed + 12); - - if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false; - - const char *ptr = compressed + 20; - - // - // Fast decoder needs at least 2x64-bits of compressed data, and - // needs to be run-able on this platform. Otherwise, fall back - // to the original decoder - // - - // if (FastHufDecoder::enabled() && nBits > 128) - //{ - // FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM); - // fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw); - //} - // else - { - std::vector freq(HUF_ENCSIZE); - std::vector hdec(HUF_DECSIZE); - - hufClearDecTable(&hdec.at(0)); - - hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM, - &freq.at(0)); - - { - if (nBits > 8 * (nCompressed - (ptr - compressed))) { - return false; - } - - hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0)); - hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, raw->size(), - raw->data()); - } - // catch (...) - //{ - // hufFreeDecTable (hdec); - // throw; - //} - - hufFreeDecTable(&hdec.at(0)); - } - - return true; -} - -// -// Functions to compress the range of values in the pixel data -// - -const int USHORT_RANGE = (1 << 16); -const int BITMAP_SIZE = (USHORT_RANGE >> 3); - -static void bitmapFromData(const unsigned short data[/*nData*/], int nData, - unsigned char bitmap[BITMAP_SIZE], - unsigned short &minNonZero, - unsigned short &maxNonZero) { - for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0; - - for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7)); - - bitmap[0] &= ~1; // zero is not explicitly stored in - // the bitmap; we assume that the - // data always contain zeroes - minNonZero = BITMAP_SIZE - 1; - maxNonZero = 0; - - for (int i = 0; i < BITMAP_SIZE; ++i) { - if (bitmap[i]) { - if (minNonZero > i) minNonZero = i; - if (maxNonZero < i) maxNonZero = i; - } - } -} - -static unsigned short forwardLutFromBitmap( - const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { - int k = 0; - - for (int i = 0; i < USHORT_RANGE; ++i) { - if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) - lut[i] = k++; - else - lut[i] = 0; - } - - return k - 1; // maximum value stored in lut[], -} // i.e. number of ones in bitmap minus 1 - -static unsigned short reverseLutFromBitmap( - const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { - int k = 0; - - for (int i = 0; i < USHORT_RANGE; ++i) { - if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i; - } - - int n = k - 1; - - while (k < USHORT_RANGE) lut[k++] = 0; - - return n; // maximum k where lut[k] is non-zero, -} // i.e. number of ones in bitmap minus 1 - -static void applyLut(const unsigned short lut[USHORT_RANGE], - unsigned short data[/*nData*/], int nData) { - for (int i = 0; i < nData; ++i) data[i] = lut[data[i]]; -} - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif // __clang__ - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, - const unsigned char *inPtr, size_t inSize, - const std::vector &channelInfo, - int data_width, int num_lines) { - std::vector bitmap(BITMAP_SIZE); - unsigned short minNonZero; - unsigned short maxNonZero; - -#if !MINIZ_LITTLE_ENDIAN - // @todo { PIZ compression on BigEndian architecture. } - assert(0); - return false; -#endif - - // Assume `inSize` is multiple of 2 or 4. - std::vector tmpBuffer(inSize / sizeof(unsigned short)); - - std::vector channelData(channelInfo.size()); - unsigned short *tmpBufferEnd = &tmpBuffer.at(0); - - for (size_t c = 0; c < channelData.size(); c++) { - PIZChannelData &cd = channelData[c]; - - cd.start = tmpBufferEnd; - cd.end = cd.start; - - cd.nx = data_width; - cd.ny = num_lines; - // cd.ys = c.channel().ySampling; - - size_t pixelSize = sizeof(int); // UINT and FLOAT - if (channelInfo[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - pixelSize = sizeof(short); - } - - cd.size = static_cast(pixelSize / sizeof(short)); - - tmpBufferEnd += cd.nx * cd.ny * cd.size; - } - - const unsigned char *ptr = inPtr; - for (int y = 0; y < num_lines; ++y) { - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - // if (modp (y, cd.ys) != 0) - // continue; - - size_t n = static_cast(cd.nx * cd.size); - memcpy(cd.end, ptr, n * sizeof(unsigned short)); - ptr += n * sizeof(unsigned short); - cd.end += n; - } - } - - bitmapFromData(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), - bitmap.data(), minNonZero, maxNonZero); - - std::vector lut(USHORT_RANGE); - unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data()); - applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBuffer.size())); - - // - // Store range compression info in _outBuffer - // - - char *buf = reinterpret_cast(outPtr); - - memcpy(buf, &minNonZero, sizeof(unsigned short)); - buf += sizeof(unsigned short); - memcpy(buf, &maxNonZero, sizeof(unsigned short)); - buf += sizeof(unsigned short); - - if (minNonZero <= maxNonZero) { - memcpy(buf, reinterpret_cast(&bitmap[0] + minNonZero), - maxNonZero - minNonZero + 1); - buf += maxNonZero - minNonZero + 1; - } - - // - // Apply wavelet encoding - // - - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - for (int j = 0; j < cd.size; ++j) { - wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, - maxValue); - } - } - - // - // Apply Huffman encoding; append the result to _outBuffer - // - - // length header(4byte), then huff data. Initialize length header with zero, - // then later fill it by `length`. - char *lengthPtr = buf; - int zero = 0; - memcpy(buf, &zero, sizeof(int)); - buf += sizeof(int); - - int length = - hufCompress(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), buf); - memcpy(lengthPtr, &length, sizeof(int)); - - (*outSize) = static_cast( - (reinterpret_cast(buf) - outPtr) + - static_cast(length)); - - // Use uncompressed data when compressed data is larger than uncompressed. - // (Issue 40) - if ((*outSize) >= inSize) { - (*outSize) = static_cast(inSize); - memcpy(outPtr, inPtr, inSize); - } - return true; -} - -static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, - size_t tmpBufSize, size_t inLen, int num_channels, - const EXRChannelInfo *channels, int data_width, - int num_lines) { - if (inLen == tmpBufSize) { - // Data is not compressed(Issue 40). - memcpy(outPtr, inPtr, inLen); - return true; - } - - std::vector bitmap(BITMAP_SIZE); - unsigned short minNonZero; - unsigned short maxNonZero; - -#if !MINIZ_LITTLE_ENDIAN - // @todo { PIZ compression on BigEndian architecture. } - assert(0); - return false; -#endif - - memset(bitmap.data(), 0, BITMAP_SIZE); - - const unsigned char *ptr = inPtr; - // minNonZero = *(reinterpret_cast(ptr)); - tinyexr::cpy2(&minNonZero, reinterpret_cast(ptr)); - // maxNonZero = *(reinterpret_cast(ptr + 2)); - tinyexr::cpy2(&maxNonZero, reinterpret_cast(ptr + 2)); - ptr += 4; - - if (maxNonZero >= BITMAP_SIZE) { - return false; - } - - if (minNonZero <= maxNonZero) { - memcpy(reinterpret_cast(&bitmap[0] + minNonZero), ptr, - maxNonZero - minNonZero + 1); - ptr += maxNonZero - minNonZero + 1; - } - - std::vector lut(USHORT_RANGE); - memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE); - unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data()); - - // - // Huffman decoding - // - - int length; - - // length = *(reinterpret_cast(ptr)); - tinyexr::cpy4(&length, reinterpret_cast(ptr)); - ptr += sizeof(int); - - if (size_t((ptr - inPtr) + length) > inLen) { - return false; - } - - std::vector tmpBuffer(tmpBufSize); - hufUncompress(reinterpret_cast(ptr), length, &tmpBuffer); - - // - // Wavelet decoding - // - - std::vector channelData(static_cast(num_channels)); - - unsigned short *tmpBufferEnd = &tmpBuffer.at(0); - - for (size_t i = 0; i < static_cast(num_channels); ++i) { - const EXRChannelInfo &chan = channels[i]; - - size_t pixelSize = sizeof(int); // UINT and FLOAT - if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) { - pixelSize = sizeof(short); - } - - channelData[i].start = tmpBufferEnd; - channelData[i].end = channelData[i].start; - channelData[i].nx = data_width; - channelData[i].ny = num_lines; - // channelData[i].ys = 1; - channelData[i].size = static_cast(pixelSize / sizeof(short)); - - tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size; - } - - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - for (int j = 0; j < cd.size; ++j) { - wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, - maxValue); - } - } - - // - // Expand the pixel data to their original range - // - - applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBufSize)); - - for (int y = 0; y < num_lines; y++) { - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - // if (modp (y, cd.ys) != 0) - // continue; - - size_t n = static_cast(cd.nx * cd.size); - memcpy(outPtr, cd.end, static_cast(n * sizeof(unsigned short))); - outPtr += n * sizeof(unsigned short); - cd.end += n; - } - } - - return true; -} -#endif // TINYEXR_USE_PIZ - -#if TINYEXR_USE_ZFP -struct ZFPCompressionParam { - double rate; - int precision; - double tolerance; - int type; // TINYEXR_ZFP_COMPRESSIONTYPE_* - - ZFPCompressionParam() { - type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE; - rate = 2.0; - precision = 0; - tolerance = 0.0f; - } -}; - -bool FindZFPCompressionParam(ZFPCompressionParam *param, - const EXRAttribute *attributes, - int num_attributes) { - bool foundType = false; - - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionType") == 0) && - (attributes[i].size == 1)) { - param->type = static_cast(attributes[i].value[0]); - - foundType = true; - } - } - - if (!foundType) { - return false; - } - - if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) && - (attributes[i].size == 8)) { - param->rate = *(reinterpret_cast(attributes[i].value)); - return true; - } - } - } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) && - (attributes[i].size == 4)) { - param->rate = *(reinterpret_cast(attributes[i].value)); - return true; - } - } - } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) && - (attributes[i].size == 8)) { - param->tolerance = *(reinterpret_cast(attributes[i].value)); - return true; - } - } - } else { - assert(0); - } - - return false; -} - -// Assume pixel format is FLOAT for all channels. -static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines, - int num_channels, const unsigned char *src, - unsigned long src_size, - const ZFPCompressionParam ¶m) { - size_t uncompressed_size = dst_width * dst_num_lines * num_channels; - - if (uncompressed_size == src_size) { - // Data is not compressed(Issue 40). - memcpy(dst, src, src_size); - } - - zfp_stream *zfp = NULL; - zfp_field *field = NULL; - - assert((dst_width % 4) == 0); - assert((dst_num_lines % 4) == 0); - - if ((dst_width & 3U) || (dst_num_lines & 3U)) { - return false; - } - - field = - zfp_field_2d(reinterpret_cast(const_cast(src)), - zfp_type_float, dst_width, dst_num_lines * num_channels); - zfp = zfp_stream_open(NULL); - - if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { - zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimention */ 2, - /* write random access */ 0); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { - zfp_stream_set_precision(zfp, param.precision, zfp_type_float); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { - zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); - } else { - assert(0); - } - - size_t buf_size = zfp_stream_maximum_size(zfp, field); - std::vector buf(buf_size); - memcpy(&buf.at(0), src, src_size); - - bitstream *stream = stream_open(&buf.at(0), buf_size); - zfp_stream_set_bit_stream(zfp, stream); - zfp_stream_rewind(zfp); - - size_t image_size = dst_width * dst_num_lines; - - for (int c = 0; c < num_channels; c++) { - // decompress 4x4 pixel block. - for (int y = 0; y < dst_num_lines; y += 4) { - for (int x = 0; x < dst_width; x += 4) { - float fblock[16]; - zfp_decode_block_float_2(zfp, fblock); - for (int j = 0; j < 4; j++) { - for (int i = 0; i < 4; i++) { - dst[c * image_size + ((y + j) * dst_width + (x + i))] = - fblock[j * 4 + i]; - } - } - } - } - } - - zfp_field_free(field); - zfp_stream_close(zfp); - stream_close(stream); - - return true; -} - -// Assume pixel format is FLOAT for all channels. -bool CompressZfp(std::vector *outBuf, unsigned int *outSize, - const float *inPtr, int width, int num_lines, int num_channels, - const ZFPCompressionParam ¶m) { - zfp_stream *zfp = NULL; - zfp_field *field = NULL; - - assert((width % 4) == 0); - assert((num_lines % 4) == 0); - - if ((width & 3U) || (num_lines & 3U)) { - return false; - } - - // create input array. - field = zfp_field_2d(reinterpret_cast(const_cast(inPtr)), - zfp_type_float, width, num_lines * num_channels); - - zfp = zfp_stream_open(NULL); - - if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { - zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { - zfp_stream_set_precision(zfp, param.precision, zfp_type_float); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { - zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); - } else { - assert(0); - } - - size_t buf_size = zfp_stream_maximum_size(zfp, field); - - outBuf->resize(buf_size); - - bitstream *stream = stream_open(&outBuf->at(0), buf_size); - zfp_stream_set_bit_stream(zfp, stream); - zfp_field_free(field); - - size_t image_size = width * num_lines; - - for (int c = 0; c < num_channels; c++) { - // compress 4x4 pixel block. - for (int y = 0; y < num_lines; y += 4) { - for (int x = 0; x < width; x += 4) { - float fblock[16]; - for (int j = 0; j < 4; j++) { - for (int i = 0; i < 4; i++) { - fblock[j * 4 + i] = - inPtr[c * image_size + ((y + j) * width + (x + i))]; - } - } - zfp_encode_block_float_2(zfp, fblock); - } - } - } - - zfp_stream_flush(zfp); - (*outSize) = zfp_stream_compressed_size(zfp); - - zfp_stream_close(zfp); - - return true; -} - -#endif - -// -// ----------------------------------------------------------------- -// - -// TODO(syoyo): Refactor function arguments. -static bool DecodePixelData(/* out */ unsigned char **out_images, - const int *requested_pixel_types, - const unsigned char *data_ptr, size_t data_len, - int compression_type, int line_order, int width, - int height, int x_stride, int y, int line_no, - int num_lines, size_t pixel_data_size, - size_t num_attributes, - const EXRAttribute *attributes, size_t num_channels, - const EXRChannelInfo *channels, - const std::vector &channel_offset_list) { - if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { // PIZ -#if TINYEXR_USE_PIZ - if ((width == 0) || (num_lines == 0) || (pixel_data_size == 0)) { - // Invalid input #90 - return false; - } - - // Allocate original data size. - std::vector outBuf(static_cast( - static_cast(width * num_lines) * pixel_data_size)); - size_t tmpBufLen = outBuf.size(); - - bool ret = tinyexr::DecompressPiz( - reinterpret_cast(&outBuf.at(0)), data_ptr, tmpBufLen, - data_len, static_cast(num_channels), channels, width, num_lines); - - if (!ret) { - return false; - } - - // For PIZ_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned short *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - FP16 hf; - - // hf.u = line_ptr[u]; - // use `cpy` to avoid unaligned memory access when compiler's - // optimization is on. - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - *image = hf.u; - } else { // HALF -> FLOAT - FP32 f32 = half_to_float(hf); - float *image = reinterpret_cast(out_images)[c]; - size_t offset = 0; - if (line_order == 0) { - offset = (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - offset = static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - image += offset; - *image = f32.f; - } - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); - - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned int *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(&val); - - unsigned int *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast(&outBuf.at( - v * pixel_data_size * static_cast(x_stride) + - channel_offset_list[c] * static_cast(x_stride))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - } - } -#else - assert(0 && "PIZ is enabled in this build"); - return false; -#endif - - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS || - compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - // Allocate original data size. - std::vector outBuf(static_cast(width) * - static_cast(num_lines) * - pixel_data_size); - - unsigned long dstLen = static_cast(outBuf.size()); - assert(dstLen > 0); - if (!tinyexr::DecompressZip( - reinterpret_cast(&outBuf.at(0)), &dstLen, data_ptr, - static_cast(data_len))) { - return false; - } - - // For ZIP_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned short *line_ptr = reinterpret_cast( - &outBuf.at(v * static_cast(pixel_data_size) * - static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - tinyexr::FP16 hf; - - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = hf.u; - } else { // HALF -> FLOAT - tinyexr::FP32 f32 = half_to_float(hf); - float *image = reinterpret_cast(out_images)[c]; - size_t offset = 0; - if (line_order == 0) { - offset = (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - offset = (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - image += offset; - - *image = f32.f; - } - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); - - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned int *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(&val); - - unsigned int *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - return false; - } - } - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { - // Allocate original data size. - std::vector outBuf(static_cast(width) * - static_cast(num_lines) * - pixel_data_size); - - unsigned long dstLen = static_cast(outBuf.size()); - if (dstLen == 0) { - return false; - } - - if (!tinyexr::DecompressRle(reinterpret_cast(&outBuf.at(0)), - dstLen, data_ptr, - static_cast(data_len))) { - return false; - } - - // For RLE_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned short *line_ptr = reinterpret_cast( - &outBuf.at(v * static_cast(pixel_data_size) * - static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - tinyexr::FP16 hf; - - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = hf.u; - } else { // HALF -> FLOAT - tinyexr::FP32 f32 = half_to_float(hf); - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = f32.f; - } - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); - - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned int *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(&val); - - unsigned int *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - return false; - } - } - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { -#if TINYEXR_USE_ZFP - tinyexr::ZFPCompressionParam zfp_compression_param; - if (!FindZFPCompressionParam(&zfp_compression_param, attributes, - num_attributes)) { - assert(0); - return false; - } - - // Allocate original data size. - std::vector outBuf(static_cast(width) * - static_cast(num_lines) * - pixel_data_size); - - unsigned long dstLen = outBuf.size(); - assert(dstLen > 0); - tinyexr::DecompressZfp(reinterpret_cast(&outBuf.at(0)), width, - num_lines, num_channels, data_ptr, - static_cast(data_len), - zfp_compression_param); - - // For ZFP_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - assert(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT); - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - return false; - } - } -#else - (void)attributes; - (void)num_attributes; - (void)num_channels; - assert(0); - return false; -#endif - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { - for (size_t c = 0; c < num_channels; c++) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - const unsigned short *line_ptr = - reinterpret_cast( - data_ptr + v * pixel_data_size * size_t(width) + - channel_offset_list[c] * static_cast(width)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *outLine = - reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - for (int u = 0; u < width; u++) { - tinyexr::FP16 hf; - - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - outLine[u] = hf.u; - } - } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - float *outLine = reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - if (reinterpret_cast(line_ptr + width) > - (data_ptr + data_len)) { - // Insufficient data size - return false; - } - - for (int u = 0; u < width; u++) { - tinyexr::FP16 hf; - - // address may not be aliged. use byte-wise copy for safety.#76 - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - tinyexr::FP32 f32 = half_to_float(hf); - - outLine[u] = f32.f; - } - } else { - assert(0); - return false; - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - const float *line_ptr = reinterpret_cast( - data_ptr + v * pixel_data_size * size_t(width) + - channel_offset_list[c] * static_cast(width)); - - float *outLine = reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - if (reinterpret_cast(line_ptr + width) > - (data_ptr + data_len)) { - // Insufficient data size - return false; - } - - for (int u = 0; u < width; u++) { - float val; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - outLine[u] = val; - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - const unsigned int *line_ptr = reinterpret_cast( - data_ptr + v * pixel_data_size * size_t(width) + - channel_offset_list[c] * static_cast(width)); - - unsigned int *outLine = - reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - for (int u = 0; u < width; u++) { - if (reinterpret_cast(line_ptr + u) >= - (data_ptr + data_len)) { - // Corrupsed data? - return false; - } - - unsigned int val; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - outLine[u] = val; - } - } - } - } - } - - return true; -} - -static void DecodeTiledPixelData( - unsigned char **out_images, int *width, int *height, - const int *requested_pixel_types, const unsigned char *data_ptr, - size_t data_len, int compression_type, int line_order, int data_width, - int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x, - int tile_size_y, size_t pixel_data_size, size_t num_attributes, - const EXRAttribute *attributes, size_t num_channels, - const EXRChannelInfo *channels, - const std::vector &channel_offset_list) { - assert(tile_offset_x * tile_size_x < data_width); - assert(tile_offset_y * tile_size_y < data_height); - - // Compute actual image size in a tile. - if ((tile_offset_x + 1) * tile_size_x >= data_width) { - (*width) = data_width - (tile_offset_x * tile_size_x); - } else { - (*width) = tile_size_x; - } - - if ((tile_offset_y + 1) * tile_size_y >= data_height) { - (*height) = data_height - (tile_offset_y * tile_size_y); - } else { - (*height) = tile_size_y; - } - - // Image size = tile size. - DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len, - compression_type, line_order, (*width), tile_size_y, - /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0, - (*height), pixel_data_size, num_attributes, attributes, - num_channels, channels, channel_offset_list); -} - -static bool ComputeChannelLayout(std::vector *channel_offset_list, - int *pixel_data_size, size_t *channel_offset, - int num_channels, - const EXRChannelInfo *channels) { - channel_offset_list->resize(static_cast(num_channels)); - - (*pixel_data_size) = 0; - (*channel_offset) = 0; - - for (size_t c = 0; c < static_cast(num_channels); c++) { - (*channel_offset_list)[c] = (*channel_offset); - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - (*pixel_data_size) += sizeof(unsigned short); - (*channel_offset) += sizeof(unsigned short); - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - (*pixel_data_size) += sizeof(float); - (*channel_offset) += sizeof(float); - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - (*pixel_data_size) += sizeof(unsigned int); - (*channel_offset) += sizeof(unsigned int); - } else { - // ??? - return false; - } - } - return true; -} - -static unsigned char **AllocateImage(int num_channels, - const EXRChannelInfo *channels, - const int *requested_pixel_types, - int data_width, int data_height) { - unsigned char **images = - reinterpret_cast(static_cast( - malloc(sizeof(float *) * static_cast(num_channels)))); - - for (size_t c = 0; c < static_cast(num_channels); c++) { - size_t data_len = - static_cast(data_width) * static_cast(data_height); - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - // pixel_data_size += sizeof(unsigned short); - // channel_offset += sizeof(unsigned short); - // Alloc internal image for half type. - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - images[c] = - reinterpret_cast(static_cast( - malloc(sizeof(unsigned short) * data_len))); - } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - images[c] = reinterpret_cast( - static_cast(malloc(sizeof(float) * data_len))); - } else { - assert(0); - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - // pixel_data_size += sizeof(float); - // channel_offset += sizeof(float); - images[c] = reinterpret_cast( - static_cast(malloc(sizeof(float) * data_len))); - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - // pixel_data_size += sizeof(unsigned int); - // channel_offset += sizeof(unsigned int); - images[c] = reinterpret_cast( - static_cast(malloc(sizeof(unsigned int) * data_len))); - } else { - assert(0); - } - } - - return images; -} - -static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, - const EXRVersion *version, std::string *err, - const unsigned char *buf, size_t size) { - const char *marker = reinterpret_cast(&buf[0]); - - if (empty_header) { - (*empty_header) = false; - } - - if (version->multipart) { - if (size > 0 && marker[0] == '\0') { - // End of header list. - if (empty_header) { - (*empty_header) = true; - } - return TINYEXR_SUCCESS; - } - } - - // According to the spec, the header of every OpenEXR file must contain at - // least the following attributes: - // - // channels chlist - // compression compression - // dataWindow box2i - // displayWindow box2i - // lineOrder lineOrder - // pixelAspectRatio float - // screenWindowCenter v2f - // screenWindowWidth float - bool has_channels = false; - bool has_compression = false; - bool has_data_window = false; - bool has_display_window = false; - bool has_line_order = false; - bool has_pixel_aspect_ratio = false; - bool has_screen_window_center = false; - bool has_screen_window_width = false; - - info->data_window[0] = 0; - info->data_window[1] = 0; - info->data_window[2] = 0; - info->data_window[3] = 0; - info->line_order = 0; // @fixme - info->display_window[0] = 0; - info->display_window[1] = 0; - info->display_window[2] = 0; - info->display_window[3] = 0; - info->screen_window_center[0] = 0.0f; - info->screen_window_center[1] = 0.0f; - info->screen_window_width = -1.0f; - info->pixel_aspect_ratio = -1.0f; - - info->tile_size_x = -1; - info->tile_size_y = -1; - info->tile_level_mode = -1; - info->tile_rounding_mode = -1; - - info->attributes.clear(); - - // Read attributes - size_t orig_size = size; - for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) { - if (0 == size) { - if (err) { - (*err) += "Insufficient data size for attributes.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } else if (marker[0] == '\0') { - size--; - break; - } - - std::string attr_name; - std::string attr_type; - std::vector data; - size_t marker_size; - if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, - marker, size)) { - if (err) { - (*err) += "Failed to read attribute.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - marker += marker_size; - size -= marker_size; - - if (version->tiled && attr_name.compare("tiles") == 0) { - unsigned int x_size, y_size; - unsigned char tile_mode; - assert(data.size() == 9); - memcpy(&x_size, &data.at(0), sizeof(int)); - memcpy(&y_size, &data.at(4), sizeof(int)); - tile_mode = data[8]; - tinyexr::swap4(&x_size); - tinyexr::swap4(&y_size); - - info->tile_size_x = static_cast(x_size); - info->tile_size_y = static_cast(y_size); - - // mode = levelMode + roundingMode * 16 - info->tile_level_mode = tile_mode & 0x3; - info->tile_rounding_mode = (tile_mode >> 4) & 0x1; - - } else if (attr_name.compare("compression") == 0) { - bool ok = false; - if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) { - ok = true; - } - - if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) { -#if TINYEXR_USE_PIZ - ok = true; -#else - if (err) { - (*err) = "PIZ compression is not supported."; - } - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; -#endif - } - - if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) { -#if TINYEXR_USE_ZFP - ok = true; -#else - if (err) { - (*err) = "ZFP compression is not supported."; - } - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; -#endif - } - - if (!ok) { - if (err) { - (*err) = "Unknown compression type."; - } - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - info->compression_type = static_cast(data[0]); - has_compression = true; - - } else if (attr_name.compare("channels") == 0) { - // name: zero-terminated string, from 1 to 255 bytes long - // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 - // pLinear: unsigned char, possible values are 0 and 1 - // reserved: three chars, should be zero - // xSampling: int - // ySampling: int - - if (!ReadChannelInfo(info->channels, data)) { - if (err) { - (*err) += "Failed to parse channel info.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - if (info->channels.size() < 1) { - if (err) { - (*err) += "# of channels is zero.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - has_channels = true; - - } else if (attr_name.compare("dataWindow") == 0) { - if (data.size() >= 16) { - memcpy(&info->data_window[0], &data.at(0), sizeof(int)); - memcpy(&info->data_window[1], &data.at(4), sizeof(int)); - memcpy(&info->data_window[2], &data.at(8), sizeof(int)); - memcpy(&info->data_window[3], &data.at(12), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&info->data_window[0])); - tinyexr::swap4(reinterpret_cast(&info->data_window[1])); - tinyexr::swap4(reinterpret_cast(&info->data_window[2])); - tinyexr::swap4(reinterpret_cast(&info->data_window[3])); - has_data_window = true; - } - } else if (attr_name.compare("displayWindow") == 0) { - if (data.size() >= 16) { - memcpy(&info->display_window[0], &data.at(0), sizeof(int)); - memcpy(&info->display_window[1], &data.at(4), sizeof(int)); - memcpy(&info->display_window[2], &data.at(8), sizeof(int)); - memcpy(&info->display_window[3], &data.at(12), sizeof(int)); - tinyexr::swap4( - reinterpret_cast(&info->display_window[0])); - tinyexr::swap4( - reinterpret_cast(&info->display_window[1])); - tinyexr::swap4( - reinterpret_cast(&info->display_window[2])); - tinyexr::swap4( - reinterpret_cast(&info->display_window[3])); - - has_display_window = true; - } - } else if (attr_name.compare("lineOrder") == 0) { - if (data.size() >= 1) { - info->line_order = static_cast(data[0]); - has_line_order = true; - } - } else if (attr_name.compare("pixelAspectRatio") == 0) { - if (data.size() >= sizeof(float)) { - memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float)); - tinyexr::swap4( - reinterpret_cast(&info->pixel_aspect_ratio)); - has_pixel_aspect_ratio = true; - } - } else if (attr_name.compare("screenWindowCenter") == 0) { - if (data.size() >= 8) { - memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float)); - memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float)); - tinyexr::swap4( - reinterpret_cast(&info->screen_window_center[0])); - tinyexr::swap4( - reinterpret_cast(&info->screen_window_center[1])); - has_screen_window_center = true; - } - } else if (attr_name.compare("screenWindowWidth") == 0) { - if (data.size() >= sizeof(float)) { - memcpy(&info->screen_window_width, &data.at(0), sizeof(float)); - tinyexr::swap4( - reinterpret_cast(&info->screen_window_width)); - - has_screen_window_width = true; - } - } else if (attr_name.compare("chunkCount") == 0) { - if (data.size() >= sizeof(int)) { - memcpy(&info->chunk_count, &data.at(0), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&info->chunk_count)); - } - } else { - // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES) - if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) { - EXRAttribute attrib; -#ifdef _MSC_VER - strncpy_s(attrib.name, attr_name.c_str(), 255); - strncpy_s(attrib.type, attr_type.c_str(), 255); -#else - strncpy(attrib.name, attr_name.c_str(), 255); - strncpy(attrib.type, attr_type.c_str(), 255); -#endif - attrib.name[255] = '\0'; - attrib.type[255] = '\0'; - attrib.size = static_cast(data.size()); - attrib.value = static_cast(malloc(data.size())); - memcpy(reinterpret_cast(attrib.value), &data.at(0), - data.size()); - info->attributes.push_back(attrib); - } - } - } - - // Check if required attributes exist - { - std::stringstream ss_err; - - if (!has_compression) { - ss_err << "\"compression\" attribute not found in the header." - << std::endl; - } - - if (!has_channels) { - ss_err << "\"channels\" attribute not found in the header." << std::endl; - } - - if (!has_line_order) { - ss_err << "\"lineOrder\" attribute not found in the header." << std::endl; - } - - if (!has_display_window) { - ss_err << "\"displayWindow\" attribute not found in the header." - << std::endl; - } - - if (!has_data_window) { - ss_err << "\"dataWindow\" attribute not found in the header or invalid." - << std::endl; - } - - if (!has_pixel_aspect_ratio) { - ss_err << "\"pixelAspectRatio\" attribute not found in the header." - << std::endl; - } - - if (!has_screen_window_width) { - ss_err << "\"screenWindowWidth\" attribute not found in the header." - << std::endl; - } - - if (!has_screen_window_center) { - ss_err << "\"screenWindowCenter\" attribute not found in the header." - << std::endl; - } - - if (!(ss_err.str().empty())) { - if (err) { - (*err) += ss_err.str(); - } - return TINYEXR_ERROR_INVALID_HEADER; - } - } - - info->header_len = static_cast(orig_size - size); - - return TINYEXR_SUCCESS; -} - -// C++ HeaderInfo to C EXRHeader conversion. -static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) { - exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio; - exr_header->screen_window_center[0] = info.screen_window_center[0]; - exr_header->screen_window_center[1] = info.screen_window_center[1]; - exr_header->screen_window_width = info.screen_window_width; - exr_header->chunk_count = info.chunk_count; - exr_header->display_window[0] = info.display_window[0]; - exr_header->display_window[1] = info.display_window[1]; - exr_header->display_window[2] = info.display_window[2]; - exr_header->display_window[3] = info.display_window[3]; - exr_header->data_window[0] = info.data_window[0]; - exr_header->data_window[1] = info.data_window[1]; - exr_header->data_window[2] = info.data_window[2]; - exr_header->data_window[3] = info.data_window[3]; - exr_header->line_order = info.line_order; - exr_header->compression_type = info.compression_type; - - exr_header->tile_size_x = info.tile_size_x; - exr_header->tile_size_y = info.tile_size_y; - exr_header->tile_level_mode = info.tile_level_mode; - exr_header->tile_rounding_mode = info.tile_rounding_mode; - - exr_header->num_channels = static_cast(info.channels.size()); - - exr_header->channels = static_cast(malloc( - sizeof(EXRChannelInfo) * static_cast(exr_header->num_channels))); - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { -#ifdef _MSC_VER - strncpy_s(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); -#else - strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); -#endif - // manually add '\0' for safety. - exr_header->channels[c].name[255] = '\0'; - - exr_header->channels[c].pixel_type = info.channels[c].pixel_type; - exr_header->channels[c].p_linear = info.channels[c].p_linear; - exr_header->channels[c].x_sampling = info.channels[c].x_sampling; - exr_header->channels[c].y_sampling = info.channels[c].y_sampling; - } - - exr_header->pixel_types = static_cast( - malloc(sizeof(int) * static_cast(exr_header->num_channels))); - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - exr_header->pixel_types[c] = info.channels[c].pixel_type; - } - - // Initially fill with values of `pixel_types` - exr_header->requested_pixel_types = static_cast( - malloc(sizeof(int) * static_cast(exr_header->num_channels))); - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - exr_header->requested_pixel_types[c] = info.channels[c].pixel_type; - } - - exr_header->num_custom_attributes = static_cast(info.attributes.size()); - - if (exr_header->num_custom_attributes > 0) { - // TODO(syoyo): Report warning when # of attributes exceeds - // `TINYEXR_MAX_CUSTOM_ATTRIBUTES` - if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { - exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES; - } - - exr_header->custom_attributes = static_cast(malloc( - sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes))); - - for (size_t i = 0; i < info.attributes.size(); i++) { - memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, - 256); - memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, - 256); - exr_header->custom_attributes[i].size = info.attributes[i].size; - // Just copy poiner - exr_header->custom_attributes[i].value = info.attributes[i].value; - } - - } else { - exr_header->custom_attributes = NULL; - } - - exr_header->header_len = info.header_len; -} - -static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, - const std::vector &offsets, - const unsigned char *head, const size_t size, - std::string *err) { - int num_channels = exr_header->num_channels; - - int num_scanline_blocks = 1; - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanline_blocks = 16; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - num_scanline_blocks = 32; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - num_scanline_blocks = 16; - } - - int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1; - int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1; - - if ((data_width < 0) || (data_height < 0)) { - if (err) { - std::stringstream ss; - ss << "Invalid data width or data height: " << data_width << ", " - << data_height << std::endl; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_DATA; - } - - // Do not allow too large data_width and data_height. header invalid? - { - const int threshold = 1024 * 8192; // heuristics - if ((data_width > threshold) || (data_height > threshold)) { - if (err) { - std::stringstream ss; - ss << "data_with or data_height too large. data_width: " << data_width - << ", " - << "data_height = " << data_height << std::endl; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_DATA; - } - } - - size_t num_blocks = offsets.size(); - - std::vector channel_offset_list; - int pixel_data_size = 0; - size_t channel_offset = 0; - if (!tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size, - &channel_offset, num_channels, - exr_header->channels)) { - if (err) { - (*err) += "Failed to compute channel layout.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - bool invalid_data = false; // TODO(LTE): Use atomic lock for MT safety. - - if (exr_header->tiled) { - // value check - if (exr_header->tile_size_x < 0) { - if (err) { - std::stringstream ss; - ss << "Invalid tile size x : " << exr_header->tile_size_x << "\n"; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_HEADER; - } - - if (exr_header->tile_size_y < 0) { - if (err) { - std::stringstream ss; - ss << "Invalid tile size y : " << exr_header->tile_size_y << "\n"; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_HEADER; - } - - size_t num_tiles = offsets.size(); // = # of blocks - - exr_image->tiles = static_cast( - calloc(sizeof(EXRTile), static_cast(num_tiles))); - - for (size_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) { - // Allocate memory for each tile. - exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( - num_channels, exr_header->channels, exr_header->requested_pixel_types, - exr_header->tile_size_x, exr_header->tile_size_y); - - // 16 byte: tile coordinates - // 4 byte : data size - // ~ : data(uncompressed or compressed) - if (offsets[tile_idx] + sizeof(int) * 5 > size) { - if (err) { - (*err) += "Insufficient data size.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - size_t data_size = size_t(size - (offsets[tile_idx] + sizeof(int) * 5)); - const unsigned char *data_ptr = - reinterpret_cast(head + offsets[tile_idx]); - - int tile_coordinates[4]; - memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[0])); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[1])); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[2])); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[3])); - - // @todo{ LoD } - if (tile_coordinates[2] != 0) { - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } - if (tile_coordinates[3] != 0) { - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } - - int data_len; - memcpy(&data_len, data_ptr + 16, - sizeof(int)); // 16 = sizeof(tile_coordinates) - tinyexr::swap4(reinterpret_cast(&data_len)); - - if (data_len < 4 || size_t(data_len) > data_size) { - if (err) { - (*err) += "Insufficient data length.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - // Move to data addr: 20 = 16 + 4; - data_ptr += 20; - - tinyexr::DecodeTiledPixelData( - exr_image->tiles[tile_idx].images, - &(exr_image->tiles[tile_idx].width), - &(exr_image->tiles[tile_idx].height), - exr_header->requested_pixel_types, data_ptr, - static_cast(data_len), exr_header->compression_type, - exr_header->line_order, data_width, data_height, tile_coordinates[0], - tile_coordinates[1], exr_header->tile_size_x, exr_header->tile_size_y, - static_cast(pixel_data_size), - static_cast(exr_header->num_custom_attributes), - exr_header->custom_attributes, - static_cast(exr_header->num_channels), exr_header->channels, - channel_offset_list); - - exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; - exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; - exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; - exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; - - exr_image->num_tiles = static_cast(num_tiles); - } - } else { // scanline format - - // Don't allow too large image(256GB * pixel_data_size or more). Workaround - // for #104. - size_t total_data_len = - size_t(data_width) * size_t(data_height) * size_t(num_channels); - const bool total_data_len_overflown = sizeof(void*) == 8 ? (total_data_len >= 0x4000000000) : false; - if ((total_data_len == 0) || total_data_len_overflown ) { - if (err) { - std::stringstream ss; - ss << "Image data size is zero or too large: width = " << data_width - << ", height = " << data_height << ", channels = " << num_channels - << std::endl; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_DATA; - } - - exr_image->images = tinyexr::AllocateImage( - num_channels, exr_header->channels, exr_header->requested_pixel_types, - data_width, data_height); - -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int y = 0; y < static_cast(num_blocks); y++) { - size_t y_idx = static_cast(y); - - if (offsets[y_idx] + sizeof(int) * 2 > size) { - invalid_data = true; - } else { - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(uncompressed or compressed) - size_t data_size = size_t(size - (offsets[y_idx] + sizeof(int) * 2)); - const unsigned char *data_ptr = - reinterpret_cast(head + offsets[y_idx]); - - int line_no; - memcpy(&line_no, data_ptr, sizeof(int)); - int data_len; - memcpy(&data_len, data_ptr + 4, sizeof(int)); - tinyexr::swap4(reinterpret_cast(&line_no)); - tinyexr::swap4(reinterpret_cast(&data_len)); - - if (size_t(data_len) > data_size) { - invalid_data = true; - - } else if ((line_no > (2 << 20)) || (line_no < -(2 << 20))) { - // Too large value. Assume this is invalid - // 2**20 = 1048576 = heuristic value. - invalid_data = true; - } else if (data_len == 0) { - // TODO(syoyo): May be ok to raise the threshold for example `data_len - // < 4` - invalid_data = true; - } else { - // line_no may be negative. - int end_line_no = (std::min)(line_no + num_scanline_blocks, - (exr_header->data_window[3] + 1)); - - int num_lines = end_line_no - line_no; - - if (num_lines <= 0) { - invalid_data = true; - } else { - // Move to data addr: 8 = 4 + 4; - data_ptr += 8; - - // Adjust line_no with data_window.bmin.y - - // overflow check - tinyexr_int64 lno = static_cast(line_no) - static_cast(exr_header->data_window[1]); - if (lno > std::numeric_limits::max()) { - line_no = -1; // invalid - } else if (lno < -std::numeric_limits::max()) { - line_no = -1; // invalid - } else { - line_no -= exr_header->data_window[1]; - } - - if (line_no < 0) { - invalid_data = true; - } else { - if (!tinyexr::DecodePixelData( - exr_image->images, exr_header->requested_pixel_types, - data_ptr, static_cast(data_len), - exr_header->compression_type, exr_header->line_order, - data_width, data_height, data_width, y, line_no, - num_lines, static_cast(pixel_data_size), - static_cast(exr_header->num_custom_attributes), - exr_header->custom_attributes, - static_cast(exr_header->num_channels), - exr_header->channels, channel_offset_list)) { - invalid_data = true; - } - } - } - } - } - } // omp parallel - } - - if (invalid_data) { - if (err) { - std::stringstream ss; - (*err) += "Invalid data found when decoding pixels.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - // Overwrite `pixel_type` with `requested_pixel_type`. - { - for (int c = 0; c < exr_header->num_channels; c++) { - exr_header->pixel_types[c] = exr_header->requested_pixel_types[c]; - } - } - - { - exr_image->num_channels = num_channels; - - exr_image->width = data_width; - exr_image->height = data_height; - } - - return TINYEXR_SUCCESS; -} - -static bool ReconstructLineOffsets( - std::vector *offsets, size_t n, - const unsigned char *head, const unsigned char *marker, const size_t size) { - assert(head < marker); - assert(offsets->size() == n); - - for (size_t i = 0; i < n; i++) { - size_t offset = static_cast(marker - head); - // Offset should not exceed whole EXR file/data size. - if ((offset + sizeof(tinyexr::tinyexr_uint64)) >= size) { - return false; - } - - int y; - unsigned int data_len; - - memcpy(&y, marker, sizeof(int)); - memcpy(&data_len, marker + 4, sizeof(unsigned int)); - - if (data_len >= size) { - return false; - } - - tinyexr::swap4(reinterpret_cast(&y)); - tinyexr::swap4(reinterpret_cast(&data_len)); - - (*offsets)[i] = offset; - - marker += data_len + 8; // 8 = 4 bytes(y) + 4 bytes(data_len) - } - - return true; -} - -static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, - const unsigned char *head, - const unsigned char *marker, const size_t size, - const char **err) { - if (exr_image == NULL || exr_header == NULL || head == NULL || - marker == NULL || (size <= tinyexr::kEXRVersionSize)) { - tinyexr::SetErrorMessage("Invalid argument for DecodeEXRImage().", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - int num_scanline_blocks = 1; - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanline_blocks = 16; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - num_scanline_blocks = 32; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - num_scanline_blocks = 16; - } - - int data_width = exr_header->data_window[2] - exr_header->data_window[0]; - if (data_width >= std::numeric_limits::max()) { - // Issue 63 - tinyexr::SetErrorMessage("Invalid data width value", err); - return TINYEXR_ERROR_INVALID_DATA; - } - data_width++; - - int data_height = exr_header->data_window[3] - exr_header->data_window[1]; - if (data_height >= std::numeric_limits::max()) { - tinyexr::SetErrorMessage("Invalid data height value", err); - return TINYEXR_ERROR_INVALID_DATA; - } - data_height++; - - if ((data_width < 0) || (data_height < 0)) { - tinyexr::SetErrorMessage("data width or data height is negative.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - // Do not allow too large data_width and data_height. header invalid? - { - const int threshold = 1024 * 8192; // heuristics - if (data_width > threshold) { - tinyexr::SetErrorMessage("data width too large.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - if (data_height > threshold) { - tinyexr::SetErrorMessage("data height too large.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - } - - // Read offset tables. - size_t num_blocks = 0; - - if (exr_header->chunk_count > 0) { - // Use `chunkCount` attribute. - num_blocks = static_cast(exr_header->chunk_count); - } else if (exr_header->tiled) { - // @todo { LoD } - size_t num_x_tiles = static_cast(data_width) / - static_cast(exr_header->tile_size_x); - if (num_x_tiles * static_cast(exr_header->tile_size_x) < - static_cast(data_width)) { - num_x_tiles++; - } - size_t num_y_tiles = static_cast(data_height) / - static_cast(exr_header->tile_size_y); - if (num_y_tiles * static_cast(exr_header->tile_size_y) < - static_cast(data_height)) { - num_y_tiles++; - } - - num_blocks = num_x_tiles * num_y_tiles; - } else { - num_blocks = static_cast(data_height) / - static_cast(num_scanline_blocks); - if (num_blocks * static_cast(num_scanline_blocks) < - static_cast(data_height)) { - num_blocks++; - } - } - - std::vector offsets(num_blocks); - - for (size_t y = 0; y < num_blocks; y++) { - tinyexr::tinyexr_uint64 offset; - // Issue #81 - if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { - tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); - tinyexr::swap8(&offset); - if (offset >= size) { - tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - marker += sizeof(tinyexr::tinyexr_uint64); // = 8 - offsets[y] = offset; - } - - // If line offsets are invalid, we try to reconstruct it. - // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. - for (size_t y = 0; y < num_blocks; y++) { - if (offsets[y] <= 0) { - // TODO(syoyo) Report as warning? - // if (err) { - // stringstream ss; - // ss << "Incomplete lineOffsets." << std::endl; - // (*err) += ss.str(); - //} - bool ret = - ReconstructLineOffsets(&offsets, num_blocks, head, marker, size); - if (ret) { - // OK - break; - } else { - tinyexr::SetErrorMessage( - "Cannot reconstruct lineOffset table in DecodeEXRImage.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - } - } - - { - std::string e; - int ret = DecodeChunk(exr_image, exr_header, offsets, head, size, &e); - - if (ret != TINYEXR_SUCCESS) { - if (!e.empty()) { - tinyexr::SetErrorMessage(e, err); - } - - // release memory(if exists) - if ((exr_header->num_channels > 0) && exr_image && exr_image->images) { - for (size_t c = 0; c < size_t(exr_header->num_channels); c++) { - if (exr_image->images[c]) { - free(exr_image->images[c]); - exr_image->images[c] = NULL; - } - } - free(exr_image->images); - exr_image->images = NULL; - } - } - - return ret; - } -} - -} // namespace tinyexr - -int LoadEXR(float **out_rgba, int *width, int *height, const char *filename, - const char **err) { - if (out_rgba == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - EXRVersion exr_version; - EXRImage exr_image; - EXRHeader exr_header; - InitEXRHeader(&exr_header); - InitEXRImage(&exr_image); - - { - int ret = ParseEXRVersionFromFile(&exr_version, filename); - if (ret != TINYEXR_SUCCESS) { - tinyexr::SetErrorMessage("Invalid EXR header.", err); - return ret; - } - - if (exr_version.multipart || exr_version.non_image) { - tinyexr::SetErrorMessage( - "Loading multipart or DeepImage is not supported in LoadEXR() API", - err); - return TINYEXR_ERROR_INVALID_DATA; // @fixme. - } - } - - { - int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); - if (ret != TINYEXR_SUCCESS) { - FreeEXRHeader(&exr_header); - return ret; - } - } - - // Read HALF channel as FLOAT. - for (int i = 0; i < exr_header.num_channels; i++) { - if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { - exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; - } - } - - { - int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err); - if (ret != TINYEXR_SUCCESS) { - FreeEXRHeader(&exr_header); - return ret; - } - } - - // RGBA - int idxR = -1; - int idxG = -1; - int idxB = -1; - int idxA = -1; - for (int c = 0; c < exr_header.num_channels; c++) { - if (strcmp(exr_header.channels[c].name, "R") == 0) { - idxR = c; - } else if (strcmp(exr_header.channels[c].name, "G") == 0) { - idxG = c; - } else if (strcmp(exr_header.channels[c].name, "B") == 0) { - idxB = c; - } else if (strcmp(exr_header.channels[c].name, "A") == 0) { - idxA = c; - } - } - - if (exr_header.num_channels == 1) { - // Grayscale channel only. - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) { - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[0][srcIdx]; - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - const float val = reinterpret_cast(exr_image.images)[0][i]; - (*out_rgba)[4 * i + 0] = val; - (*out_rgba)[4 * i + 1] = val; - (*out_rgba)[4 * i + 2] = val; - (*out_rgba)[4 * i + 3] = val; - } - } - } else { - // Assume RGB(A) - - if (idxR == -1) { - tinyexr::SetErrorMessage("R channel not found", err); - - // @todo { free exr_image } - FreeEXRHeader(&exr_header); - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxG == -1) { - tinyexr::SetErrorMessage("G channel not found", err); - // @todo { free exr_image } - FreeEXRHeader(&exr_header); - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxB == -1) { - tinyexr::SetErrorMessage("B channel not found", err); - // @todo { free exr_image } - FreeEXRHeader(&exr_header); - return TINYEXR_ERROR_INVALID_DATA; - } - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) { - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[idxR][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[idxG][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[idxB][srcIdx]; - if (idxA != -1) { - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[idxA][srcIdx]; - } else { - (*out_rgba)[4 * idx + 3] = 1.0; - } - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - (*out_rgba)[4 * i + 0] = - reinterpret_cast(exr_image.images)[idxR][i]; - (*out_rgba)[4 * i + 1] = - reinterpret_cast(exr_image.images)[idxG][i]; - (*out_rgba)[4 * i + 2] = - reinterpret_cast(exr_image.images)[idxB][i]; - if (idxA != -1) { - (*out_rgba)[4 * i + 3] = - reinterpret_cast(exr_image.images)[idxA][i]; - } else { - (*out_rgba)[4 * i + 3] = 1.0; - } - } - } - } - - (*width) = exr_image.width; - (*height) = exr_image.height; - - FreeEXRHeader(&exr_header); - FreeEXRImage(&exr_image); - - return TINYEXR_SUCCESS; -} - -int IsEXR(const char *filename) { - EXRVersion exr_version; - - int ret = ParseEXRVersionFromFile(&exr_version, filename); - if (ret != TINYEXR_SUCCESS) { - return TINYEXR_ERROR_INVALID_HEADER; - } - - return TINYEXR_SUCCESS; -} - -int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version, - const unsigned char *memory, size_t size, - const char **err) { - if (memory == NULL || exr_header == NULL) { - tinyexr::SetErrorMessage( - "Invalid argument. `memory` or `exr_header` argument is null in " - "ParseEXRHeaderFromMemory()", - err); - - // Invalid argument - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (size < tinyexr::kEXRVersionSize) { - tinyexr::SetErrorMessage("Insufficient header/data size.\n", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - const unsigned char *marker = memory + tinyexr::kEXRVersionSize; - size_t marker_size = size - tinyexr::kEXRVersionSize; - - tinyexr::HeaderInfo info; - info.clear(); - - std::string err_str; - int ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size); - - if (ret != TINYEXR_SUCCESS) { - if (err && !err_str.empty()) { - tinyexr::SetErrorMessage(err_str, err); - } - } - - ConvertHeader(exr_header, info); - - // transfoer `tiled` from version. - exr_header->tiled = version->tiled; - - return ret; -} - -int LoadEXRFromMemory(float **out_rgba, int *width, int *height, - const unsigned char *memory, size_t size, - const char **err) { - if (out_rgba == NULL || memory == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXRFromMemory", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - EXRVersion exr_version; - EXRImage exr_image; - EXRHeader exr_header; - - InitEXRHeader(&exr_header); - - int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); - if (ret != TINYEXR_SUCCESS) { - tinyexr::SetErrorMessage("Failed to parse EXR version", err); - return ret; - } - - ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err); - if (ret != TINYEXR_SUCCESS) { - return ret; - } - - // Read HALF channel as FLOAT. - for (int i = 0; i < exr_header.num_channels; i++) { - if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { - exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; - } - } - - InitEXRImage(&exr_image); - ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err); - if (ret != TINYEXR_SUCCESS) { - return ret; - } - - // RGBA - int idxR = -1; - int idxG = -1; - int idxB = -1; - int idxA = -1; - for (int c = 0; c < exr_header.num_channels; c++) { - if (strcmp(exr_header.channels[c].name, "R") == 0) { - idxR = c; - } else if (strcmp(exr_header.channels[c].name, "G") == 0) { - idxG = c; - } else if (strcmp(exr_header.channels[c].name, "B") == 0) { - idxB = c; - } else if (strcmp(exr_header.channels[c].name, "A") == 0) { - idxA = c; - } - } - - // TODO(syoyo): Refactor removing same code as used in LoadEXR(). - if (exr_header.num_channels == 1) { - // Grayscale channel only. - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) { - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[0][srcIdx]; - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - const float val = reinterpret_cast(exr_image.images)[0][i]; - (*out_rgba)[4 * i + 0] = val; - (*out_rgba)[4 * i + 1] = val; - (*out_rgba)[4 * i + 2] = val; - (*out_rgba)[4 * i + 3] = val; - } - } - - } else { - // TODO(syoyo): Support non RGBA image. - - if (idxR == -1) { - tinyexr::SetErrorMessage("R channel not found", err); - - // @todo { free exr_image } - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxG == -1) { - tinyexr::SetErrorMessage("G channel not found", err); - // @todo { free exr_image } - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxB == -1) { - tinyexr::SetErrorMessage("B channel not found", err); - // @todo { free exr_image } - return TINYEXR_ERROR_INVALID_DATA; - } - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[idxR][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[idxG][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[idxB][srcIdx]; - if (idxA != -1) { - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[idxA][srcIdx]; - } else { - (*out_rgba)[4 * idx + 3] = 1.0; - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - (*out_rgba)[4 * i + 0] = - reinterpret_cast(exr_image.images)[idxR][i]; - (*out_rgba)[4 * i + 1] = - reinterpret_cast(exr_image.images)[idxG][i]; - (*out_rgba)[4 * i + 2] = - reinterpret_cast(exr_image.images)[idxB][i]; - if (idxA != -1) { - (*out_rgba)[4 * i + 3] = - reinterpret_cast(exr_image.images)[idxA][i]; - } else { - (*out_rgba)[4 * i + 3] = 1.0; - } - } - } - } - - (*width) = exr_image.width; - (*height) = exr_image.height; - - FreeEXRHeader(&exr_header); - FreeEXRImage(&exr_image); - - return TINYEXR_SUCCESS; -} - -int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, - const char *filename, const char **err) { - if (exr_image == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromFile", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - if (filesize < 16) { - tinyexr::SetErrorMessage("File size too short " + std::string(filename), - err); - return TINYEXR_ERROR_INVALID_FILE; - } - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - (void)ret; - } - - return LoadEXRImageFromMemory(exr_image, exr_header, &buf.at(0), filesize, - err); -} - -int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header, - const unsigned char *memory, const size_t size, - const char **err) { - if (exr_image == NULL || memory == NULL || - (size < tinyexr::kEXRVersionSize)) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromMemory", - err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (exr_header->header_len == 0) { - tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - const unsigned char *head = memory; - const unsigned char *marker = reinterpret_cast( - memory + exr_header->header_len + - 8); // +8 for magic number + version header. - return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size, - err); -} - -size_t SaveEXRImageToMemory(const EXRImage *exr_image, - const EXRHeader *exr_header, - unsigned char **memory_out, const char **err) { - if (exr_image == NULL || memory_out == NULL || - exr_header->compression_type < 0) { - tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToMemory", err); - return 0; - } - -#if !TINYEXR_USE_PIZ - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - tinyexr::SetErrorMessage("PIZ compression is not supported in this build", - err); - return 0; - } -#endif - -#if !TINYEXR_USE_ZFP - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - tinyexr::SetErrorMessage("ZFP compression is not supported in this build", - err); - return 0; - } -#endif - -#if TINYEXR_USE_ZFP - for (size_t i = 0; i < static_cast(exr_header->num_channels); i++) { - if (exr_header->requested_pixel_types[i] != TINYEXR_PIXELTYPE_FLOAT) { - tinyexr::SetErrorMessage("Pixel type must be FLOAT for ZFP compression", - err); - return 0; - } - } -#endif - - std::vector memory; - - // Header - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - memory.insert(memory.end(), header, header + 4); - } - - // Version, scanline. - { - char marker[] = {2, 0, 0, 0}; - /* @todo - if (exr_header->tiled) { - marker[1] |= 0x2; - } - if (exr_header->long_name) { - marker[1] |= 0x4; - } - if (exr_header->non_image) { - marker[1] |= 0x8; - } - if (exr_header->multipart) { - marker[1] |= 0x10; - } - */ - memory.insert(memory.end(), marker, marker + 4); - } - - int num_scanlines = 1; - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanlines = 16; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - num_scanlines = 32; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - num_scanlines = 16; - } - - // Write attributes. - std::vector channels; - { - std::vector data; - - for (int c = 0; c < exr_header->num_channels; c++) { - tinyexr::ChannelInfo info; - info.p_linear = 0; - info.pixel_type = exr_header->requested_pixel_types[c]; - info.x_sampling = 1; - info.y_sampling = 1; - info.name = std::string(exr_header->channels[c].name); - channels.push_back(info); - } - - tinyexr::WriteChannelInfo(data, channels); - - tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), - static_cast(data.size())); - } - - { - int comp = exr_header->compression_type; - tinyexr::swap4(reinterpret_cast(&comp)); - tinyexr::WriteAttributeToMemory( - &memory, "compression", "compression", - reinterpret_cast(&comp), 1); - } - - { - int data[4] = {0, 0, exr_image->width - 1, exr_image->height - 1}; - tinyexr::swap4(reinterpret_cast(&data[0])); - tinyexr::swap4(reinterpret_cast(&data[1])); - tinyexr::swap4(reinterpret_cast(&data[2])); - tinyexr::swap4(reinterpret_cast(&data[3])); - tinyexr::WriteAttributeToMemory( - &memory, "dataWindow", "box2i", - reinterpret_cast(data), sizeof(int) * 4); - tinyexr::WriteAttributeToMemory( - &memory, "displayWindow", "box2i", - reinterpret_cast(data), sizeof(int) * 4); - } - - { - unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } - tinyexr::WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", - &line_order, 1); - } - - { - float aspectRatio = 1.0f; - tinyexr::swap4(reinterpret_cast(&aspectRatio)); - tinyexr::WriteAttributeToMemory( - &memory, "pixelAspectRatio", "float", - reinterpret_cast(&aspectRatio), sizeof(float)); - } - - { - float center[2] = {0.0f, 0.0f}; - tinyexr::swap4(reinterpret_cast(¢er[0])); - tinyexr::swap4(reinterpret_cast(¢er[1])); - tinyexr::WriteAttributeToMemory( - &memory, "screenWindowCenter", "v2f", - reinterpret_cast(center), 2 * sizeof(float)); - } - - { - float w = static_cast(exr_image->width); - tinyexr::swap4(reinterpret_cast(&w)); - tinyexr::WriteAttributeToMemory(&memory, "screenWindowWidth", "float", - reinterpret_cast(&w), - sizeof(float)); - } - - // Custom attributes - if (exr_header->num_custom_attributes > 0) { - for (int i = 0; i < exr_header->num_custom_attributes; i++) { - tinyexr::WriteAttributeToMemory( - &memory, exr_header->custom_attributes[i].name, - exr_header->custom_attributes[i].type, - reinterpret_cast( - exr_header->custom_attributes[i].value), - exr_header->custom_attributes[i].size); - } - } - - { // end of header - unsigned char e = 0; - memory.push_back(e); - } - - int num_blocks = exr_image->height / num_scanlines; - if (num_blocks * num_scanlines < exr_image->height) { - num_blocks++; - } - - std::vector offsets(static_cast(num_blocks)); - - size_t headerSize = memory.size(); - tinyexr::tinyexr_uint64 offset = - headerSize + - static_cast(num_blocks) * - sizeof( - tinyexr::tinyexr_int64); // sizeof(header) + sizeof(offsetTable) - - std::vector > data_list( - static_cast(num_blocks)); - std::vector channel_offset_list( - static_cast(exr_header->num_channels)); - - int pixel_data_size = 0; - size_t channel_offset = 0; - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - channel_offset_list[c] = channel_offset; - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - pixel_data_size += sizeof(unsigned short); - channel_offset += sizeof(unsigned short); - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_FLOAT) { - pixel_data_size += sizeof(float); - channel_offset += sizeof(float); - } else if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { - pixel_data_size += sizeof(unsigned int); - channel_offset += sizeof(unsigned int); - } else { - assert(0); - } - } - -#if TINYEXR_USE_ZFP - tinyexr::ZFPCompressionParam zfp_compression_param; - - // Use ZFP compression parameter from custom attributes(if such a parameter - // exists) - { - bool ret = tinyexr::FindZFPCompressionParam( - &zfp_compression_param, exr_header->custom_attributes, - exr_header->num_custom_attributes); - - if (!ret) { - // Use predefined compression parameter. - zfp_compression_param.type = 0; - zfp_compression_param.rate = 2; - } - } -#endif - -// Use signed int since some OpenMP compiler doesn't allow unsigned type for -// `parallel for` -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int i = 0; i < num_blocks; i++) { - size_t ii = static_cast(i); - int start_y = num_scanlines * i; - int endY = (std::min)(num_scanlines * (i + 1), exr_image->height); - int h = endY - start_y; - - std::vector buf( - static_cast(exr_image->width * h * pixel_data_size)); - - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - float *line_ptr = reinterpret_cast(&buf.at( - static_cast(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - tinyexr::FP16 h16; - h16.u = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::FP32 f32 = half_to_float(h16); - - tinyexr::swap4(reinterpret_cast(&f32.f)); - - // line_ptr[x] = f32.f; - tinyexr::cpy4(line_ptr + x, &(f32.f)); - } - } - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_HALF) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned short *line_ptr = reinterpret_cast( - &buf.at(static_cast(pixel_data_size * y * - exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - unsigned short val = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap2(&val); - - // line_ptr[x] = val; - tinyexr::cpy2(line_ptr + x, &val); - } - } - } else { - assert(0); - } - - } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned short *line_ptr = reinterpret_cast( - &buf.at(static_cast(pixel_data_size * y * - exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - tinyexr::FP32 f32; - f32.f = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::FP16 h16; - h16 = float_to_half_full(f32); - - tinyexr::swap2(reinterpret_cast(&h16.u)); - - // line_ptr[x] = h16.u; - tinyexr::cpy2(line_ptr + x, &(h16.u)); - } - } - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_FLOAT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - float *line_ptr = reinterpret_cast(&buf.at( - static_cast(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - float val = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap4(reinterpret_cast(&val)); - - // line_ptr[x] = val; - tinyexr::cpy4(line_ptr + x, &val); - } - } - } else { - assert(0); - } - } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned int *line_ptr = reinterpret_cast(&buf.at( - static_cast(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - unsigned int val = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap4(&val); - - // line_ptr[x] = val; - tinyexr::cpy4(line_ptr + x, &val); - } - } - } - } - - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(uncompressed) - std::vector header(8); - unsigned int data_len = static_cast(buf.size()); - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), buf.begin(), - buf.begin() + data_len); - - } else if ((exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || - (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { -#if TINYEXR_USE_MINIZ - std::vector block(tinyexr::miniz::mz_compressBound( - static_cast(buf.size()))); -#else - std::vector block( - compressBound(static_cast(buf.size()))); -#endif - tinyexr::tinyexr_uint64 outSize = block.size(); - - tinyexr::CompressZip(&block.at(0), outSize, - reinterpret_cast(&buf.at(0)), - static_cast(buf.size())); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = static_cast(outSize); // truncate - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { - // (buf.size() * 3) / 2 would be enough. - std::vector block((buf.size() * 3) / 2); - - tinyexr::tinyexr_uint64 outSize = block.size(); - - tinyexr::CompressRle(&block.at(0), outSize, - reinterpret_cast(&buf.at(0)), - static_cast(buf.size())); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = static_cast(outSize); // truncate - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { -#if TINYEXR_USE_PIZ - unsigned int bufLen = - 8192 + static_cast( - 2 * static_cast( - buf.size())); // @fixme { compute good bound. } - std::vector block(bufLen); - unsigned int outSize = static_cast(block.size()); - - CompressPiz(&block.at(0), &outSize, - reinterpret_cast(&buf.at(0)), - buf.size(), channels, exr_image->width, h); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = outSize; - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - -#else - assert(0); -#endif - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { -#if TINYEXR_USE_ZFP - std::vector block; - unsigned int outSize; - - tinyexr::CompressZfp( - &block, &outSize, reinterpret_cast(&buf.at(0)), - exr_image->width, h, exr_header->num_channels, zfp_compression_param); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = outSize; - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - -#else - assert(0); -#endif - } else { - assert(0); - } - } // omp parallel - - for (size_t i = 0; i < static_cast(num_blocks); i++) { - offsets[i] = offset; - tinyexr::swap8(reinterpret_cast(&offsets[i])); - offset += data_list[i].size(); - } - - size_t totalSize = static_cast(offset); - { - memory.insert( - memory.end(), reinterpret_cast(&offsets.at(0)), - reinterpret_cast(&offsets.at(0)) + - sizeof(tinyexr::tinyexr_uint64) * static_cast(num_blocks)); - } - - if (memory.size() == 0) { - tinyexr::SetErrorMessage("Output memory size is zero", err); - return 0; - } - - (*memory_out) = static_cast(malloc(totalSize)); - memcpy((*memory_out), &memory.at(0), memory.size()); - unsigned char *memory_ptr = *memory_out + memory.size(); - - for (size_t i = 0; i < static_cast(num_blocks); i++) { - memcpy(memory_ptr, &data_list[i].at(0), data_list[i].size()); - memory_ptr += data_list[i].size(); - } - - return totalSize; // OK -} - -int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, - const char *filename, const char **err) { - if (exr_image == NULL || filename == NULL || - exr_header->compression_type < 0) { - tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToFile", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#if !TINYEXR_USE_PIZ - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - tinyexr::SetErrorMessage("PIZ compression is not supported in this build", - err); - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } -#endif - -#if !TINYEXR_USE_ZFP - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - tinyexr::SetErrorMessage("ZFP compression is not supported in this build", - err); - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } -#endif - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "wb"); -#else - FILE *fp = fopen(filename, "wb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot write a file", err); - return TINYEXR_ERROR_CANT_WRITE_FILE; - } - - unsigned char *mem = NULL; - size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err); - if (mem_size == 0) { - return TINYEXR_ERROR_SERIALZATION_FAILED; - } - - size_t written_size = 0; - if ((mem_size > 0) && mem) { - written_size = fwrite(mem, 1, mem_size, fp); - } - free(mem); - - fclose(fp); - - if (written_size != mem_size) { - tinyexr::SetErrorMessage("Cannot write a file", err); - return TINYEXR_ERROR_CANT_WRITE_FILE; - } - - return TINYEXR_SUCCESS; -} - -int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { - if (deep_image == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _MSC_VER - FILE *fp = NULL; - errno_t errcode = fopen_s(&fp, filename, "rb"); - if ((0 != errcode) || (!fp)) { - tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename), - err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } -#else - FILE *fp = fopen(filename, "rb"); - if (!fp) { - tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename), - err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } -#endif - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - if (filesize == 0) { - fclose(fp); - tinyexr::SetErrorMessage("File size is zero : " + std::string(filename), - err); - return TINYEXR_ERROR_INVALID_FILE; - } - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - (void)ret; - } - fclose(fp); - - const char *head = &buf[0]; - const char *marker = &buf[0]; - - // Header check. - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - - if (memcmp(marker, header, 4) != 0) { - tinyexr::SetErrorMessage("Invalid magic number", err); - return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; - } - marker += 4; - } - - // Version, scanline. - { - // ver 2.0, scanline, deep bit on(0x800) - // must be [2, 0, 0, 0] - if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) { - tinyexr::SetErrorMessage("Unsupported version or scanline", err); - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - marker += 4; - } - - int dx = -1; - int dy = -1; - int dw = -1; - int dh = -1; - int num_scanline_blocks = 1; // 16 for ZIP compression. - int compression_type = -1; - int num_channels = -1; - std::vector channels; - - // Read attributes - size_t size = filesize - tinyexr::kEXRVersionSize; - for (;;) { - if (0 == size) { - return TINYEXR_ERROR_INVALID_DATA; - } else if (marker[0] == '\0') { - marker++; - size--; - break; - } - - std::string attr_name; - std::string attr_type; - std::vector data; - size_t marker_size; - if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, - marker, size)) { - std::stringstream ss; - ss << "Failed to parse attribute\n"; - tinyexr::SetErrorMessage(ss.str(), err); - return TINYEXR_ERROR_INVALID_DATA; - } - marker += marker_size; - size -= marker_size; - - if (attr_name.compare("compression") == 0) { - compression_type = data[0]; - if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) { - std::stringstream ss; - ss << "Unsupported compression type : " << compression_type; - tinyexr::SetErrorMessage(ss.str(), err); - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanline_blocks = 16; - } - - } else if (attr_name.compare("channels") == 0) { - // name: zero-terminated string, from 1 to 255 bytes long - // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 - // pLinear: unsigned char, possible values are 0 and 1 - // reserved: three chars, should be zero - // xSampling: int - // ySampling: int - - if (!tinyexr::ReadChannelInfo(channels, data)) { - tinyexr::SetErrorMessage("Failed to parse channel info", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - num_channels = static_cast(channels.size()); - - if (num_channels < 1) { - tinyexr::SetErrorMessage("Invalid channels format", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - } else if (attr_name.compare("dataWindow") == 0) { - memcpy(&dx, &data.at(0), sizeof(int)); - memcpy(&dy, &data.at(4), sizeof(int)); - memcpy(&dw, &data.at(8), sizeof(int)); - memcpy(&dh, &data.at(12), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&dx)); - tinyexr::swap4(reinterpret_cast(&dy)); - tinyexr::swap4(reinterpret_cast(&dw)); - tinyexr::swap4(reinterpret_cast(&dh)); - - } else if (attr_name.compare("displayWindow") == 0) { - int x; - int y; - int w; - int h; - memcpy(&x, &data.at(0), sizeof(int)); - memcpy(&y, &data.at(4), sizeof(int)); - memcpy(&w, &data.at(8), sizeof(int)); - memcpy(&h, &data.at(12), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&x)); - tinyexr::swap4(reinterpret_cast(&y)); - tinyexr::swap4(reinterpret_cast(&w)); - tinyexr::swap4(reinterpret_cast(&h)); - } - } - - assert(dx >= 0); - assert(dy >= 0); - assert(dw >= 0); - assert(dh >= 0); - assert(num_channels >= 1); - - int data_width = dw - dx + 1; - int data_height = dh - dy + 1; - - std::vector image( - static_cast(data_width * data_height * 4)); // 4 = RGBA - - // Read offset tables. - int num_blocks = data_height / num_scanline_blocks; - if (num_blocks * num_scanline_blocks < data_height) { - num_blocks++; - } - - std::vector offsets(static_cast(num_blocks)); - - for (size_t y = 0; y < static_cast(num_blocks); y++) { - tinyexr::tinyexr_int64 offset; - memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64)); - tinyexr::swap8(reinterpret_cast(&offset)); - marker += sizeof(tinyexr::tinyexr_int64); // = 8 - offsets[y] = offset; - } - -#if TINYEXR_USE_PIZ - if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) || - (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) { -#else - if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { -#endif - // OK - } else { - tinyexr::SetErrorMessage("Unsupported compression format", err); - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - deep_image->image = static_cast( - malloc(sizeof(float **) * static_cast(num_channels))); - for (int c = 0; c < num_channels; c++) { - deep_image->image[c] = static_cast( - malloc(sizeof(float *) * static_cast(data_height))); - for (int y = 0; y < data_height; y++) { - } - } - - deep_image->offset_table = static_cast( - malloc(sizeof(int *) * static_cast(data_height))); - for (int y = 0; y < data_height; y++) { - deep_image->offset_table[y] = static_cast( - malloc(sizeof(int) * static_cast(data_width))); - } - - for (size_t y = 0; y < static_cast(num_blocks); y++) { - const unsigned char *data_ptr = - reinterpret_cast(head + offsets[y]); - - // int: y coordinate - // int64: packed size of pixel offset table - // int64: packed size of sample data - // int64: unpacked size of sample data - // compressed pixel offset table - // compressed sample data - int line_no; - tinyexr::tinyexr_int64 packedOffsetTableSize; - tinyexr::tinyexr_int64 packedSampleDataSize; - tinyexr::tinyexr_int64 unpackedSampleDataSize; - memcpy(&line_no, data_ptr, sizeof(int)); - memcpy(&packedOffsetTableSize, data_ptr + 4, - sizeof(tinyexr::tinyexr_int64)); - memcpy(&packedSampleDataSize, data_ptr + 12, - sizeof(tinyexr::tinyexr_int64)); - memcpy(&unpackedSampleDataSize, data_ptr + 20, - sizeof(tinyexr::tinyexr_int64)); - - tinyexr::swap4(reinterpret_cast(&line_no)); - tinyexr::swap8( - reinterpret_cast(&packedOffsetTableSize)); - tinyexr::swap8( - reinterpret_cast(&packedSampleDataSize)); - tinyexr::swap8( - reinterpret_cast(&unpackedSampleDataSize)); - - std::vector pixelOffsetTable(static_cast(data_width)); - - // decode pixel offset table. - { - unsigned long dstLen = - static_cast(pixelOffsetTable.size() * sizeof(int)); - if (!tinyexr::DecompressZip( - reinterpret_cast(&pixelOffsetTable.at(0)), - &dstLen, data_ptr + 28, - static_cast(packedOffsetTableSize))) { - return false; - } - - assert(dstLen == pixelOffsetTable.size() * sizeof(int)); - for (size_t i = 0; i < static_cast(data_width); i++) { - deep_image->offset_table[y][i] = pixelOffsetTable[i]; - } - } - - std::vector sample_data( - static_cast(unpackedSampleDataSize)); - - // decode sample data. - { - unsigned long dstLen = static_cast(unpackedSampleDataSize); - if (dstLen) { - if (!tinyexr::DecompressZip( - reinterpret_cast(&sample_data.at(0)), &dstLen, - data_ptr + 28 + packedOffsetTableSize, - static_cast(packedSampleDataSize))) { - return false; - } - assert(dstLen == static_cast(unpackedSampleDataSize)); - } - } - - // decode sample - int sampleSize = -1; - std::vector channel_offset_list(static_cast(num_channels)); - { - int channel_offset = 0; - for (size_t i = 0; i < static_cast(num_channels); i++) { - channel_offset_list[i] = channel_offset; - if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) { // UINT - channel_offset += 4; - } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) { // half - channel_offset += 2; - } else if (channels[i].pixel_type == - TINYEXR_PIXELTYPE_FLOAT) { // float - channel_offset += 4; - } else { - assert(0); - } - } - sampleSize = channel_offset; - } - assert(sampleSize >= 2); - - assert(static_cast( - pixelOffsetTable[static_cast(data_width - 1)] * - sampleSize) == sample_data.size()); - int samples_per_line = static_cast(sample_data.size()) / sampleSize; - - // - // Alloc memory - // - - // - // pixel data is stored as image[channels][pixel_samples] - // - { - tinyexr::tinyexr_uint64 data_offset = 0; - for (size_t c = 0; c < static_cast(num_channels); c++) { - deep_image->image[c][y] = static_cast( - malloc(sizeof(float) * static_cast(samples_per_line))); - - if (channels[c].pixel_type == 0) { // UINT - for (size_t x = 0; x < static_cast(samples_per_line); x++) { - unsigned int ui; - unsigned int *src_ptr = reinterpret_cast( - &sample_data.at(size_t(data_offset) + x * sizeof(int))); - tinyexr::cpy4(&ui, src_ptr); - deep_image->image[c][y][x] = static_cast(ui); // @fixme - } - data_offset += - sizeof(unsigned int) * static_cast(samples_per_line); - } else if (channels[c].pixel_type == 1) { // half - for (size_t x = 0; x < static_cast(samples_per_line); x++) { - tinyexr::FP16 f16; - const unsigned short *src_ptr = reinterpret_cast( - &sample_data.at(size_t(data_offset) + x * sizeof(short))); - tinyexr::cpy2(&(f16.u), src_ptr); - tinyexr::FP32 f32 = half_to_float(f16); - deep_image->image[c][y][x] = f32.f; - } - data_offset += sizeof(short) * static_cast(samples_per_line); - } else { // float - for (size_t x = 0; x < static_cast(samples_per_line); x++) { - float f; - const float *src_ptr = reinterpret_cast( - &sample_data.at(size_t(data_offset) + x * sizeof(float))); - tinyexr::cpy4(&f, src_ptr); - deep_image->image[c][y][x] = f; - } - data_offset += sizeof(float) * static_cast(samples_per_line); - } - } - } - } // y - - deep_image->width = data_width; - deep_image->height = data_height; - - deep_image->channel_names = static_cast( - malloc(sizeof(const char *) * static_cast(num_channels))); - for (size_t c = 0; c < static_cast(num_channels); c++) { -#ifdef _WIN32 - deep_image->channel_names[c] = _strdup(channels[c].name.c_str()); -#else - deep_image->channel_names[c] = strdup(channels[c].name.c_str()); -#endif - } - deep_image->num_channels = num_channels; - - return TINYEXR_SUCCESS; -} - -void InitEXRImage(EXRImage *exr_image) { - if (exr_image == NULL) { - return; - } - - exr_image->width = 0; - exr_image->height = 0; - exr_image->num_channels = 0; - - exr_image->images = NULL; - exr_image->tiles = NULL; - - exr_image->num_tiles = 0; -} - -void FreeEXRErrorMessage(const char *msg) { - if (msg) { - free(reinterpret_cast(const_cast(msg))); - } - return; -} - -void InitEXRHeader(EXRHeader *exr_header) { - if (exr_header == NULL) { - return; - } - - memset(exr_header, 0, sizeof(EXRHeader)); -} - -int FreeEXRHeader(EXRHeader *exr_header) { - if (exr_header == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (exr_header->channels) { - free(exr_header->channels); - } - - if (exr_header->pixel_types) { - free(exr_header->pixel_types); - } - - if (exr_header->requested_pixel_types) { - free(exr_header->requested_pixel_types); - } - - for (int i = 0; i < exr_header->num_custom_attributes; i++) { - if (exr_header->custom_attributes[i].value) { - free(exr_header->custom_attributes[i].value); - } - } - - if (exr_header->custom_attributes) { - free(exr_header->custom_attributes); - } - - return TINYEXR_SUCCESS; -} - -int FreeEXRImage(EXRImage *exr_image) { - if (exr_image == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - for (int i = 0; i < exr_image->num_channels; i++) { - if (exr_image->images && exr_image->images[i]) { - free(exr_image->images[i]); - } - } - - if (exr_image->images) { - free(exr_image->images); - } - - if (exr_image->tiles) { - for (int tid = 0; tid < exr_image->num_tiles; tid++) { - for (int i = 0; i < exr_image->num_channels; i++) { - if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) { - free(exr_image->tiles[tid].images[i]); - } - } - if (exr_image->tiles[tid].images) { - free(exr_image->tiles[tid].images); - } - } - free(exr_image->tiles); - } - - return TINYEXR_SUCCESS; -} - -int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, - const char *filename, const char **err) { - if (exr_header == NULL || exr_version == NULL || filename == NULL) { - tinyexr::SetErrorMessage("Invalid argument for ParseEXRHeaderFromFile", - err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - - if (ret != filesize) { - tinyexr::SetErrorMessage("fread() error on " + std::string(filename), - err); - return TINYEXR_ERROR_INVALID_FILE; - } - } - - return ParseEXRHeaderFromMemory(exr_header, exr_version, &buf.at(0), filesize, - err); -} - -int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers, - int *num_headers, - const EXRVersion *exr_version, - const unsigned char *memory, size_t size, - const char **err) { - if (memory == NULL || exr_headers == NULL || num_headers == NULL || - exr_version == NULL) { - // Invalid argument - tinyexr::SetErrorMessage( - "Invalid argument for ParseEXRMultipartHeaderFromMemory", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (size < tinyexr::kEXRVersionSize) { - tinyexr::SetErrorMessage("Data size too short", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - const unsigned char *marker = memory + tinyexr::kEXRVersionSize; - size_t marker_size = size - tinyexr::kEXRVersionSize; - - std::vector infos; - - for (;;) { - tinyexr::HeaderInfo info; - info.clear(); - - std::string err_str; - bool empty_header = false; - int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str, - marker, marker_size); - - if (ret != TINYEXR_SUCCESS) { - tinyexr::SetErrorMessage(err_str, err); - return ret; - } - - if (empty_header) { - marker += 1; // skip '\0' - break; - } - - // `chunkCount` must exist in the header. - if (info.chunk_count == 0) { - tinyexr::SetErrorMessage( - "`chunkCount' attribute is not found in the header.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - infos.push_back(info); - - // move to next header. - marker += info.header_len; - size -= info.header_len; - } - - // allocate memory for EXRHeader and create array of EXRHeader pointers. - (*exr_headers) = - static_cast(malloc(sizeof(EXRHeader *) * infos.size())); - for (size_t i = 0; i < infos.size(); i++) { - EXRHeader *exr_header = static_cast(malloc(sizeof(EXRHeader))); - - ConvertHeader(exr_header, infos[i]); - - // transfoer `tiled` from version. - exr_header->tiled = exr_version->tiled; - - (*exr_headers)[i] = exr_header; - } - - (*num_headers) = static_cast(infos.size()); - - return TINYEXR_SUCCESS; -} - -int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, - const EXRVersion *exr_version, - const char *filename, const char **err) { - if (exr_headers == NULL || num_headers == NULL || exr_version == NULL || - filename == NULL) { - tinyexr::SetErrorMessage( - "Invalid argument for ParseEXRMultipartHeaderFromFile()", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - - if (ret != filesize) { - tinyexr::SetErrorMessage("`fread' error. file may be corrupted.", err); - return TINYEXR_ERROR_INVALID_FILE; - } - } - - return ParseEXRMultipartHeaderFromMemory( - exr_headers, num_headers, exr_version, &buf.at(0), filesize, err); -} - -int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, - size_t size) { - if (version == NULL || memory == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (size < tinyexr::kEXRVersionSize) { - return TINYEXR_ERROR_INVALID_DATA; - } - - const unsigned char *marker = memory; - - // Header check. - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - - if (memcmp(marker, header, 4) != 0) { - return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; - } - marker += 4; - } - - version->tiled = false; - version->long_name = false; - version->non_image = false; - version->multipart = false; - - // Parse version header. - { - // must be 2 - if (marker[0] != 2) { - return TINYEXR_ERROR_INVALID_EXR_VERSION; - } - - if (version == NULL) { - return TINYEXR_SUCCESS; // May OK - } - - version->version = 2; - - if (marker[1] & 0x2) { // 9th bit - version->tiled = true; - } - if (marker[1] & 0x4) { // 10th bit - version->long_name = true; - } - if (marker[1] & 0x8) { // 11th bit - version->non_image = true; // (deep image) - } - if (marker[1] & 0x10) { // 12th bit - version->multipart = true; - } - } - - return TINYEXR_SUCCESS; -} - -int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { - if (filename == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t file_size; - // Compute size - fseek(fp, 0, SEEK_END); - file_size = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - if (file_size < tinyexr::kEXRVersionSize) { - return TINYEXR_ERROR_INVALID_FILE; - } - - unsigned char buf[tinyexr::kEXRVersionSize]; - size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp); - fclose(fp); - - if (ret != tinyexr::kEXRVersionSize) { - return TINYEXR_ERROR_INVALID_FILE; - } - - return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize); -} - -int LoadEXRMultipartImageFromMemory(EXRImage *exr_images, - const EXRHeader **exr_headers, - unsigned int num_parts, - const unsigned char *memory, - const size_t size, const char **err) { - if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || - memory == NULL || (size <= tinyexr::kEXRVersionSize)) { - tinyexr::SetErrorMessage( - "Invalid argument for LoadEXRMultipartImageFromMemory()", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - // compute total header size. - size_t total_header_size = 0; - for (unsigned int i = 0; i < num_parts; i++) { - if (exr_headers[i]->header_len == 0) { - tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - total_header_size += exr_headers[i]->header_len; - } - - const char *marker = reinterpret_cast( - memory + total_header_size + 4 + - 4); // +8 for magic number and version header. - - marker += 1; // Skip empty header. - - // NOTE 1: - // In multipart image, There is 'part number' before chunk data. - // 4 byte : part number - // 4+ : chunk - // - // NOTE 2: - // EXR spec says 'part number' is 'unsigned long' but actually this is - // 'unsigned int(4 bytes)' in OpenEXR implementation... - // http://www.openexr.com/openexrfilelayout.pdf - - // Load chunk offset table. - std::vector > chunk_offset_table_list; - for (size_t i = 0; i < static_cast(num_parts); i++) { - std::vector offset_table( - static_cast(exr_headers[i]->chunk_count)); - - for (size_t c = 0; c < offset_table.size(); c++) { - tinyexr::tinyexr_uint64 offset; - memcpy(&offset, marker, 8); - tinyexr::swap8(&offset); - - if (offset >= size) { - tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", - err); - return TINYEXR_ERROR_INVALID_DATA; - } - - offset_table[c] = offset + 4; // +4 to skip 'part number' - marker += 8; - } - - chunk_offset_table_list.push_back(offset_table); - } - - // Decode image. - for (size_t i = 0; i < static_cast(num_parts); i++) { - std::vector &offset_table = - chunk_offset_table_list[i]; - - // First check 'part number' is identitical to 'i' - for (size_t c = 0; c < offset_table.size(); c++) { - const unsigned char *part_number_addr = - memory + offset_table[c] - 4; // -4 to move to 'part number' field. - unsigned int part_no; - memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 - tinyexr::swap4(&part_no); - - if (part_no != i) { - tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.", - err); - return TINYEXR_ERROR_INVALID_DATA; - } - } - - std::string e; - int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_table, - memory, size, &e); - if (ret != TINYEXR_SUCCESS) { - if (!e.empty()) { - tinyexr::SetErrorMessage(e, err); - } - return ret; - } - } - - return TINYEXR_SUCCESS; -} - -int LoadEXRMultipartImageFromFile(EXRImage *exr_images, - const EXRHeader **exr_headers, - unsigned int num_parts, const char *filename, - const char **err) { - if (exr_images == NULL || exr_headers == NULL || num_parts == 0) { - tinyexr::SetErrorMessage( - "Invalid argument for LoadEXRMultipartImageFromFile", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - (void)ret; - } - - return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts, - &buf.at(0), filesize, err); -} - -int SaveEXR(const float *data, int width, int height, int components, - const int save_as_fp16, const char *outfilename, const char **err) { - if ((components == 1) || components == 3 || components == 4) { - // OK - } else { - std::stringstream ss; - ss << "Unsupported component value : " << components << std::endl; - - tinyexr::SetErrorMessage(ss.str(), err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - EXRHeader header; - InitEXRHeader(&header); - - if ((width < 16) && (height < 16)) { - // No compression for small image. - header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; - } else { - header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; - } - - EXRImage image; - InitEXRImage(&image); - - image.num_channels = components; - - std::vector images[4]; - - if (components == 1) { - images[0].resize(static_cast(width * height)); - memcpy(images[0].data(), data, sizeof(float) * size_t(width * height)); - } else { - images[0].resize(static_cast(width * height)); - images[1].resize(static_cast(width * height)); - images[2].resize(static_cast(width * height)); - images[3].resize(static_cast(width * height)); - - // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers - for (size_t i = 0; i < static_cast(width * height); i++) { - images[0][i] = data[static_cast(components) * i + 0]; - images[1][i] = data[static_cast(components) * i + 1]; - images[2][i] = data[static_cast(components) * i + 2]; - if (components == 4) { - images[3][i] = data[static_cast(components) * i + 3]; - } - } - } - - float *image_ptr[4] = {0, 0, 0, 0}; - if (components == 4) { - image_ptr[0] = &(images[3].at(0)); // A - image_ptr[1] = &(images[2].at(0)); // B - image_ptr[2] = &(images[1].at(0)); // G - image_ptr[3] = &(images[0].at(0)); // R - } else if (components == 3) { - image_ptr[0] = &(images[2].at(0)); // B - image_ptr[1] = &(images[1].at(0)); // G - image_ptr[2] = &(images[0].at(0)); // R - } else if (components == 1) { - image_ptr[0] = &(images[0].at(0)); // A - } - - image.images = reinterpret_cast(image_ptr); - image.width = width; - image.height = height; - - header.num_channels = components; - header.channels = static_cast(malloc( - sizeof(EXRChannelInfo) * static_cast(header.num_channels))); - // Must be (A)BGR order, since most of EXR viewers expect this channel order. - if (components == 4) { -#ifdef _MSC_VER - strncpy_s(header.channels[0].name, "A", 255); - strncpy_s(header.channels[1].name, "B", 255); - strncpy_s(header.channels[2].name, "G", 255); - strncpy_s(header.channels[3].name, "R", 255); -#else - strncpy(header.channels[0].name, "A", 255); - strncpy(header.channels[1].name, "B", 255); - strncpy(header.channels[2].name, "G", 255); - strncpy(header.channels[3].name, "R", 255); -#endif - header.channels[0].name[strlen("A")] = '\0'; - header.channels[1].name[strlen("B")] = '\0'; - header.channels[2].name[strlen("G")] = '\0'; - header.channels[3].name[strlen("R")] = '\0'; - } else if (components == 3) { -#ifdef _MSC_VER - strncpy_s(header.channels[0].name, "B", 255); - strncpy_s(header.channels[1].name, "G", 255); - strncpy_s(header.channels[2].name, "R", 255); -#else - strncpy(header.channels[0].name, "B", 255); - strncpy(header.channels[1].name, "G", 255); - strncpy(header.channels[2].name, "R", 255); -#endif - header.channels[0].name[strlen("B")] = '\0'; - header.channels[1].name[strlen("G")] = '\0'; - header.channels[2].name[strlen("R")] = '\0'; - } else { -#ifdef _MSC_VER - strncpy_s(header.channels[0].name, "A", 255); -#else - strncpy(header.channels[0].name, "A", 255); -#endif - header.channels[0].name[strlen("A")] = '\0'; - } - - header.pixel_types = static_cast( - malloc(sizeof(int) * static_cast(header.num_channels))); - header.requested_pixel_types = static_cast( - malloc(sizeof(int) * static_cast(header.num_channels))); - for (int i = 0; i < header.num_channels; i++) { - header.pixel_types[i] = - TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image - - if (save_as_fp16 > 0) { - header.requested_pixel_types[i] = - TINYEXR_PIXELTYPE_HALF; // save with half(fp16) pixel format - } else { - header.requested_pixel_types[i] = - TINYEXR_PIXELTYPE_FLOAT; // save with float(fp32) pixel format(i.e. - // no precision reduction) - } - } - - int ret = SaveEXRImageToFile(&image, &header, outfilename, err); - if (ret != TINYEXR_SUCCESS) { - return ret; - } - - free(header.channels); - free(header.pixel_types); - free(header.requested_pixel_types); - - return ret; -} - -#ifdef __clang__ -// zero-as-null-ppinter-constant -#pragma clang diagnostic pop -#endif - -#endif // TINYEXR_IMPLEMENTATION_DEIFNED -#endif // TINYEXR_IMPLEMENTATION diff --git a/zenovis/stbi/src/stbi.c b/zenovis/stbi/src/stbi.c index aa7c528a9c..72ee413543 100644 --- a/zenovis/stbi/src/stbi.c +++ b/zenovis/stbi/src/stbi.c @@ -2,3 +2,6 @@ #define STB_IMAGE_WRITE_IMPLEMENTATION #include #include + +#define STB_DXT_IMPLEMENTATION +#include "stb_dxt.h" diff --git a/zenovis/stbi/src/tinyexr.cpp b/zenovis/stbi/src/tinyexr.cpp deleted file mode 100644 index a1a456ac61..0000000000 --- a/zenovis/stbi/src/tinyexr.cpp +++ /dev/null @@ -1,2 +0,0 @@ -#define TINYEXR_IMPLEMENTATION -#include diff --git a/zenovis/xinxinoptix/BCX.h b/zenovis/xinxinoptix/BCX.h new file mode 100644 index 0000000000..05e2a353c4 --- /dev/null +++ b/zenovis/xinxinoptix/BCX.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + +template +inline void compress(std::vector &packed, std::vector &block) { + + if constexpr (N == 1) + return stb_compress_bc4_block(packed.data(), (unsigned char*)block.data()); + if constexpr (N == 2) + return stb_compress_bc5_block(packed.data(), (unsigned char*)block.data()); + if constexpr (N == 4) + return stb_compress_dxt_block(packed.data(), (unsigned char*)block.data(), 1, STB_DXT_HIGHQUAL); +} + +template +inline std::vector compressBCx(unsigned char* img, uint32_t nx, uint32_t ny) { + + static std::map sizes { + { 1, 8 }, + { 2, 16}, + { 4, 16} + }; + + const auto size_per_packed = sizes[N]; + + auto count = size_per_packed * (nx/4) * (ny/4); + std::vector result(count); + + tbb::task_group bc_group; + + for (size_t i=0; i block(16 * N, 0); + std::vector packed(size_per_packed, 0); + + for (size_t j=0; j(packed, block); + + auto offset = size_per_packed * ((nx/4) * i/4 + j/4); + memcpy(result.data()+offset, packed.data(), packed.size()); + } + + }); // run + } + + bc_group.wait(); + return result; +} + +inline std::vector compressBC4(unsigned char* one_byte_per_pixel, uint32_t nx, uint32_t ny) { + return compressBCx<1>(one_byte_per_pixel, nx, ny); +} + +inline std::vector compressBC5(unsigned char* two_byte_per_pixel, uint32_t nx, uint32_t ny) { + return compressBCx<2>(two_byte_per_pixel, nx, ny); +} + +inline std::vector compressBC3(unsigned char* four_byte_per_pixel, uint32_t nx, uint32_t ny) { + return compressBCx<4>(four_byte_per_pixel, nx, ny); +} \ No newline at end of file diff --git a/zenovis/xinxinoptix/CMakeLists.txt b/zenovis/xinxinoptix/CMakeLists.txt index 43e552e726..0a927a9b76 100644 --- a/zenovis/xinxinoptix/CMakeLists.txt +++ b/zenovis/xinxinoptix/CMakeLists.txt @@ -15,6 +15,8 @@ target_sources(zenovis PRIVATE Shape.h XAS.h + BCX.h + LightTree.cpp LightTree.h LightBounds.cpp diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 310d015263..8a0ee9e52c 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -48,6 +48,7 @@ #include #include +#include "BCX.h" #include "ies/ies.h" #include "zeno/utils/fileio.h" @@ -338,6 +339,7 @@ inline void createRTProgramGroups(OptixDeviceContext &context, OptixModule &_mod } struct cuTexture{ std::string md5; + bool blockCompression = false; cudaArray_t gpuImageArray = nullptr; cudaTextureObject_t texture = 0llu; @@ -367,40 +369,83 @@ inline sutil::Texture loadCubeMap(const std::string& ppm_filename) return loadPPMTexture( ppm_filename, make_float3(1,1,1), nullptr ); } -inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, int ny, int nc) + + + +inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, int ny, int nc, bool blockCompression) { auto texture = std::make_shared(nx, ny); - std::vector data; - data.resize(nx*ny); - for(int j=0;j=1?(img[idx*nc + 0]):(unsigned char)0, - nc>=2?(img[idx*nc + 1]):(unsigned char)0, - nc>=3?(img[idx*nc + 2]):(unsigned char)0, - nc>=4?(img[idx*nc + 3]):(unsigned char)0, - }; - } + + std::vector alt; - cudaChannelFormatDesc channelDescriptor = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned); - cudaError_t rc = cudaMallocArray(&texture->gpuImageArray, &channelDescriptor, nx, ny, 0); - if (rc != cudaSuccess) { - std::cout<<"texture space alloc failed\n"; - return 0; + if (nc == 3) { // cuda doesn't rgb, should be rgba + auto count = nx * ny; + alt.resize(count); + + for (size_t i=0; igpuImageArray, 0, 0, data.data(), - nx * sizeof(unsigned char) * 4, - nx * sizeof(unsigned char) * 4, - ny, - cudaMemcpyHostToDevice); + + if (nx%4 || ny%4) { + blockCompression = false; + } + + cudaError_t rc; + + if (blockCompression == false) { + std::vector xyzw(4, 0); + for (int i=0; igpuImageArray, &channelDescriptor, nx, ny, 0); + if (rc != cudaSuccess) { + std::cout<<"texture space alloc failed\n"; + return 0; + } + // rc = cudaMemcpy2DToArray(texture->gpuImageArray, 0, 0, img, + // nx * sizeof(unsigned char) * nc, + // nx * sizeof(unsigned char) * nc, + // ny, + // cudaMemcpyHostToDevice); + + rc = cudaMemcpyToArray(texture->gpuImageArray, 0, 0, img, sizeof(unsigned char) * nc * nx * ny, cudaMemcpyHostToDevice); + + } else { + + std::vector bc_data; + cudaChannelFormatDesc channelDescriptor; + + if (nc == 1) { + bc_data = compressBC4(img, nx, ny); + channelDescriptor = cudaCreateChannelDesc(); + } else if (nc == 2) { + bc_data = compressBC5(img, nx, ny); + channelDescriptor = cudaCreateChannelDesc(); + } else if (nc == 4) { + bc_data = compressBC3(img, nx, ny); + channelDescriptor = cudaCreateChannelDesc(); + } + + rc = cudaMallocArray(&texture->gpuImageArray, &channelDescriptor, nx, ny, 0); + + if (rc != cudaSuccess) { + std::cout<<"texture space alloc failed\n"; + return 0; + } + + rc = cudaMemcpyToArray(texture->gpuImageArray, 0, 0, bc_data.data(), bc_data.size(), cudaMemcpyHostToDevice); + } + if (rc != cudaSuccess) { std::cout<<"texture data copy failed\n"; cudaFreeArray(texture->gpuImageArray); texture->gpuImageArray = nullptr; return 0; } + cudaResourceDesc resourceDescriptor = { }; resourceDescriptor.resType = cudaResourceTypeArray; resourceDescriptor.res.array.array = texture->gpuImageArray; @@ -422,6 +467,8 @@ inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, in texture->gpuImageArray = nullptr; return 0; } + + texture->blockCompression = blockCompression; return texture; } @@ -721,14 +768,22 @@ namespace detail { } template -inline void addTexture(std::string path, TaskType* task=nullptr) +inline void addTexture(std::string path, bool blockCompression=false, TaskType* task=nullptr) { zeno::log_debug("loading texture :{}", path); std::string native_path = std::filesystem::u8path(path).string(); + + bool should_reload = false; + if (std::filesystem::exists(native_path)) { std::filesystem::file_time_type ftime = std::filesystem::last_write_time(native_path); - if(g_tex.count(path) && g_tex_last_write_time[path] == ftime) { - return; + + if(g_tex_last_write_time[path] == ftime && g_tex.count(path) ) { + + if (blockCompression == g_tex[path]->blockCompression) { + return; + } + should_reload = true; } g_tex_last_write_time[path] = ftime; } @@ -740,7 +795,7 @@ inline void addTexture(std::string path, TaskType* task=nullptr) auto input = readData(native_path); std::string md5Hash = calculateMD5(input); - if (md5_path_mapping.count(md5Hash)) { + if (md5_path_mapping.count(md5Hash) && !should_reload) { g_tex[path] = g_tex[md5_path_mapping[md5Hash]]; zeno::log_info("path {} reuse {} tex", path, md5_path_mapping[md5Hash]); return; @@ -816,19 +871,24 @@ inline void addTexture(std::string path, TaskType* task=nullptr) } nx = std::max(img->userData().get2("w"), 1); ny = std::max(img->userData().get2("h"), 1); - int channels = std::max(img->userData().get2("channels"), 3); - nc = 3; + nc = std::max(img->userData().get2("channels"), 1); + + if (nc < 4) { - if (channels == 3) { std::vector ucdata; - ucdata.resize(img->verts.size()*3); - for(int i=0;iverts.size()*3;i++) - { - ucdata[i] = (unsigned char)(((float*)img->verts.data())[i]*255.0); + ucdata.resize(img->verts.size() * nc); + + for(size_t i=0; iverts.size(); i+=1 ) { + + for (int c=0; cverts[i][c] * 255.0); + } } - g_tex[path] = makeCudaTexture(ucdata.data(), nx, ny, 3); - } - else { + g_tex[path] = makeCudaTexture(ucdata.data(), nx, ny, nc, blockCompression); + + } else { + + assert(nc == 4); std::vector data(nx * ny); auto &alpha = img->verts.attr("alpha"); for (auto i = 0; i < nx * ny; i++) { @@ -838,7 +898,7 @@ inline void addTexture(std::string path, TaskType* task=nullptr) data[i].w = (unsigned char)(alpha[i] *255.0); } - g_tex[path] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4); + g_tex[path] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4, blockCompression); } lookupTexture = [&img](uint32_t idx) { @@ -876,7 +936,7 @@ inline void addTexture(std::string path, TaskType* task=nullptr) nx = std::max(nx, 1); ny = std::max(ny, 1); - g_tex[path] = makeCudaTexture(img, nx, ny, nc); + g_tex[path] = makeCudaTexture(img, nx, ny, nc, blockCompression); lookupTexture = [img](uint32_t idx) { return (float)img[idx] / 255; @@ -925,7 +985,7 @@ inline void addSkyTexture(std::string path) { calc_sky_cdf_map(tex, nx, ny, nc, lookupTexture); }; - addTexture(path, &task); + addTexture(path, false, &task); } struct OptixShaderCore { From f619e0ac6d305d89554916681f064f078fafb57c Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 2 Jul 2024 18:19:06 +0800 Subject: [PATCH 084/244] fix build --- projects/CUDA/zpc | 2 +- projects/CuLagrange/pbd/ConstraintsBuilder.cu | 2 +- .../CuLagrange/pbd/constraint_function_kernel/constraint.cuh | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 740079464b..282e0b1f19 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 740079464b56bfd84269610509bb5ab941710826 +Subproject commit 282e0b1f197fdf37b1c4a496b502b85d4d24f359 diff --git a/projects/CuLagrange/pbd/ConstraintsBuilder.cu b/projects/CuLagrange/pbd/ConstraintsBuilder.cu index d3f86b7f60..b025c7e676 100644 --- a/projects/CuLagrange/pbd/ConstraintsBuilder.cu +++ b/projects/CuLagrange/pbd/ConstraintsBuilder.cu @@ -470,7 +470,7 @@ virtual void apply() override { rest_scale = rest_scale, eles = proxy({},eles), verts = proxy({},verts)] ZS_LAMBDA(auto ai,const auto& pair) mutable { - eles.tuple(dim_c<2>,"inds",ai) = pair.reinterpret_bits(); + eles.tuple(dim_c<2>,"inds",ai) = pair.template reinterpret_bits(); auto v0 = verts.pack(dim_c<3>,"x",pair[0]); auto v1 = verts.pack(dim_c<3>,"x",pair[1]); eles("r",ai) = (v0 - v1).norm() * rest_scale; diff --git a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh index 3572e62c43..e7c55a7971 100644 --- a/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh +++ b/projects/CuLagrange/pbd/constraint_function_kernel/constraint.cuh @@ -976,6 +976,7 @@ constexpr bool solve_BendTwistConstraint( } // ---------------------------------------------------------------------------------------------- +#if 0 template constexpr bool solve_PerpendiculaBisectorConstraint( const VECTOR3d &p0, SCALER invMass0, @@ -1002,6 +1003,7 @@ constexpr bool solve_PerpendiculaBisectorConstraint( return true; } +#endif // // ---------------------------------------------------------------------------------------------- // template From cb525d66250e9a95c0bc5a383efdc5e811fd3734 Mon Sep 17 00:00:00 2001 From: iaomw Date: Wed, 3 Jul 2024 18:00:16 +0800 Subject: [PATCH 085/244] minor updates --- zenovis/xinxinoptix/BCX.h | 34 ++++++++++++++------------------ zenovis/xinxinoptix/OptiXStuff.h | 10 ++++------ 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/zenovis/xinxinoptix/BCX.h b/zenovis/xinxinoptix/BCX.h index 05e2a353c4..1635aa5cc1 100644 --- a/zenovis/xinxinoptix/BCX.h +++ b/zenovis/xinxinoptix/BCX.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -9,26 +10,22 @@ #include template -inline void compress(std::vector &packed, std::vector &block) { +inline void compress(unsigned char* packed, unsigned char* block) { if constexpr (N == 1) - return stb_compress_bc4_block(packed.data(), (unsigned char*)block.data()); + stb_compress_bc4_block(packed, block); if constexpr (N == 2) - return stb_compress_bc5_block(packed.data(), (unsigned char*)block.data()); + stb_compress_bc5_block(packed, block); if constexpr (N == 4) - return stb_compress_dxt_block(packed.data(), (unsigned char*)block.data(), 1, STB_DXT_HIGHQUAL); + stb_compress_dxt_block(packed, block, 1, STB_DXT_HIGHQUAL); } -template +template inline std::vector compressBCx(unsigned char* img, uint32_t nx, uint32_t ny) { - static std::map sizes { - { 1, 8 }, - { 2, 16}, - { 4, 16} - }; + static const char sizes[] = { 0, 8, 16, 8, 16 }; - const auto size_per_packed = sizes[N]; + const auto size_per_packed = sizes[channel]; auto count = size_per_packed * (nx/4) * (ny/4); std::vector result(count); @@ -39,8 +36,7 @@ inline std::vector compressBCx(unsigned char* img, uint32_t nx, u bc_group.run([&, i]{ - std::vector block(16 * N, 0); - std::vector packed(size_per_packed, 0); + std::vector block(16 * byte_per_source_pixel, 0); for (size_t j=0; j compressBCx(unsigned char* img, uint32_t nx, u //auto offset_j = k % 4; auto index = nx * (i+offset_i) + (j); - //raw_block[k] = img[index]; - auto dst_ptr = block.data() + k*N; - auto src_ptr = img + index*N ; - memcpy(dst_ptr, src_ptr, N * 4); + + auto dst_ptr = block.data() + k*byte_per_source_pixel; + auto src_ptr = img + index*byte_per_source_pixel; + memcpy(dst_ptr, src_ptr, byte_per_source_pixel * 4); } - compress(packed, block); auto offset = size_per_packed * ((nx/4) * i/4 + j/4); - memcpy(result.data()+offset, packed.data(), packed.size()); + auto packed = result.data()+offset; + compress(packed, block.data()); } }); // run diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 8a0ee9e52c..55e3f65e51 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -378,7 +378,7 @@ inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, in std::vector alt; - if (nc == 3) { // cuda doesn't rgb, should be rgba + if (nc == 3 && !blockCompression) { // cuda doesn't support raw rgb, should be raw rgba or compressed rgb auto count = nx * ny; alt.resize(count); @@ -405,11 +405,6 @@ inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, in std::cout<<"texture space alloc failed\n"; return 0; } - // rc = cudaMemcpy2DToArray(texture->gpuImageArray, 0, 0, img, - // nx * sizeof(unsigned char) * nc, - // nx * sizeof(unsigned char) * nc, - // ny, - // cudaMemcpyHostToDevice); rc = cudaMemcpyToArray(texture->gpuImageArray, 0, 0, img, sizeof(unsigned char) * nc * nx * ny, cudaMemcpyHostToDevice); @@ -427,6 +422,9 @@ inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, in } else if (nc == 4) { bc_data = compressBC3(img, nx, ny); channelDescriptor = cudaCreateChannelDesc(); + } else { + std::cout<<"texture data unsupported \n"; + return 0; } rc = cudaMallocArray(&texture->gpuImageArray, &channelDescriptor, nx, ny, 0); From ca5eefdfa6ee68afcca5904c4f2a846c2bb7f979 Mon Sep 17 00:00:00 2001 From: iaomw Date: Wed, 3 Jul 2024 18:01:03 +0800 Subject: [PATCH 086/244] bc1 rgb --- zenovis/xinxinoptix/BCX.h | 16 ++++++++++++++++ zenovis/xinxinoptix/OptiXStuff.h | 3 +++ 2 files changed, 19 insertions(+) diff --git a/zenovis/xinxinoptix/BCX.h b/zenovis/xinxinoptix/BCX.h index 1635aa5cc1..dfc9bf42a8 100644 --- a/zenovis/xinxinoptix/BCX.h +++ b/zenovis/xinxinoptix/BCX.h @@ -16,6 +16,8 @@ inline void compress(unsigned char* packed, unsigned char* block) { stb_compress_bc4_block(packed, block); if constexpr (N == 2) stb_compress_bc5_block(packed, block); + if constexpr (N == 3) + stb_compress_dxt_block(packed, block, 0, STB_DXT_HIGHQUAL); if constexpr (N == 4) stb_compress_dxt_block(packed, block, 1, STB_DXT_HIGHQUAL); } @@ -71,6 +73,20 @@ inline std::vector compressBC5(unsigned char* two_byte_per_pixel, return compressBCx<2>(two_byte_per_pixel, nx, ny); } +inline std::vector compressBC1(unsigned char* three_byte_per_pixel, uint32_t nx, uint32_t ny) { + + auto raw = three_byte_per_pixel; + + auto count = nx * ny; + std::vector alt(count); + + for (size_t i=0; i((unsigned char*)alt.data(), nx, ny); +} + inline std::vector compressBC3(unsigned char* four_byte_per_pixel, uint32_t nx, uint32_t ny) { return compressBCx<4>(four_byte_per_pixel, nx, ny); } \ No newline at end of file diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 55e3f65e51..933129abfa 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -419,6 +419,9 @@ inline std::shared_ptr makeCudaTexture(unsigned char* img, int nx, in } else if (nc == 2) { bc_data = compressBC5(img, nx, ny); channelDescriptor = cudaCreateChannelDesc(); + } else if (nc == 3) { + bc_data = compressBC1(img, nx, ny); + channelDescriptor = cudaCreateChannelDesc(); } else if (nc == 4) { bc_data = compressBC3(img, nx, ny); channelDescriptor = cudaCreateChannelDesc(); From 8c1bbf3a4152f9fc7cd213db9ea2c2d019bb327e Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 4 Jul 2024 15:16:56 +0800 Subject: [PATCH 087/244] TexKey --- zenovis/src/optx/RenderEngineOptx.cpp | 33 ++++---- zenovis/xinxinoptix/OptiXStuff.h | 104 +++++++++++++++--------- zenovis/xinxinoptix/optixPathTracer.cpp | 24 +++--- zenovis/xinxinoptix/xinxinoptixapi.h | 3 +- 4 files changed, 96 insertions(+), 68 deletions(-) diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index 3b808a3401..8ef3ee2564 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -697,7 +697,7 @@ struct GraphicsManager { if (!path.empty()) { if (OptixUtil::sky_tex.has_value() && OptixUtil::sky_tex.value() != path && OptixUtil::sky_tex.value() != OptixUtil::default_sky_tex ) { - OptixUtil::removeTexture(OptixUtil::sky_tex.value()); + OptixUtil::removeTexture( {OptixUtil::sky_tex.value(), false} ); } OptixUtil::sky_tex = path; @@ -1227,7 +1227,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { // Auto unload unused texure { - std::map realNeedTexPaths; + std::set realNeedTexPaths; for(auto const &[matkey, mtldet] : matMap) { if (mtldet->parameters.find("vol") != std::string::npos || cachedMeshesMaterials.count(mtldet->mtlidkey) > 0 @@ -1248,24 +1248,26 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { realNeedTexPaths.insert( {ld.textureKey, false}); } } - std::vector needToRemoveTexPaths; - for(auto const &[tex, _]: OptixUtil::g_tex) { - if (realNeedTexPaths.count(tex) > 0) { + std::vector needToRemoveTexPaths; + for(auto const &[key, _]: OptixUtil::tex_lut) { + + if (realNeedTexPaths.count(key) > 0) { continue; } - if (OptixUtil::sky_tex.has_value() && tex == OptixUtil::sky_tex.value()) { + if (OptixUtil::sky_tex.has_value() && key.path == OptixUtil::sky_tex.value()) { continue; } - if (tex == OptixUtil::default_sky_tex) { + if (key.path == OptixUtil::default_sky_tex) { continue; } - needToRemoveTexPaths.emplace_back(tex); + needToRemoveTexPaths.emplace_back(key); } for (const auto& need_remove_tex: needToRemoveTexPaths) { OptixUtil::removeTexture(need_remove_tex); } - for (const auto& realNeedTexPath: realNeedTexPaths) { - OptixUtil::addTexture(realNeedTexPath.first, realNeedTexPath.second); + for (const auto& realNeedTexKey: realNeedTexPaths) { + + OptixUtil::addTexture(realNeedTexKey.path, realNeedTexKey.blockCompression); } } for(auto const &[matkey, mtldet] : matMap) @@ -1323,12 +1325,6 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { callable.append(shadtpl2.second); //std::cout< shaderTex; - for(auto tex:mtldet->tex2Ds) - { - shaderTex.emplace_back(tex->path); - } - ShaderPrepared shaderP; shaderP.callable = callable; @@ -1336,7 +1332,10 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { shaderP.parameters = mtldet->parameters; shaderP.matid = mtldet->mtlidkey; - shaderP.tex_names = shaderTex; + for(auto tex:mtldet->tex2Ds) + { + shaderP.tex_keys.push_back( {tex->path, tex->blockCompression} ); + } if (isVol) { diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 933129abfa..71fa0ffe17 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -675,7 +675,22 @@ inline std::vector loadIES(const std::string& path, float& coneAngle) return iesData; } -inline std::map> g_tex; +struct TexKey { + std::string path; + bool blockCompression; + + bool operator == (const TexKey& other) const { + return path == other.path && blockCompression == other.blockCompression; + } + + bool operator < (const TexKey& other) const { + auto l = std::tie(this->path, this->blockCompression); + auto r = std::tie(other.path, other.blockCompression); + return l < r; + } +}; + +inline std::map> tex_lut; inline std::map g_tex_last_write_time; inline std::map md5_path_mapping; inline std::optional sky_tex; @@ -771,35 +786,44 @@ namespace detail { template inline void addTexture(std::string path, bool blockCompression=false, TaskType* task=nullptr) { - zeno::log_debug("loading texture :{}", path); std::string native_path = std::filesystem::u8path(path).string(); - bool should_reload = false; + TexKey tex_key {path, blockCompression}; + if (tex_lut.count(tex_key)) { + return; // do nothing + } + + zeno::log_debug("loading texture :{}", path); + + bool should_reload = false; if (std::filesystem::exists(native_path)) { std::filesystem::file_time_type ftime = std::filesystem::last_write_time(native_path); - if(g_tex_last_write_time[path] == ftime && g_tex.count(path) ) { - - if (blockCompression == g_tex[path]->blockCompression) { - return; - } - should_reload = true; - } - g_tex_last_write_time[path] = ftime; - } - else { - if(g_tex.count(path)) { + if(g_tex_last_write_time[path] == ftime) { return; } + should_reload = true; + g_tex_last_write_time[path] = ftime; + } else { + zeno::log_info("file {} doesn't exist", path); + return; } + auto input = readData(native_path); std::string md5Hash = calculateMD5(input); - if (md5_path_mapping.count(md5Hash) && !should_reload) { - g_tex[path] = g_tex[md5_path_mapping[md5Hash]]; - zeno::log_info("path {} reuse {} tex", path, md5_path_mapping[md5Hash]); - return; + if ( md5_path_mapping.count(md5Hash) ) { + + auto& alt_path = md5_path_mapping[md5Hash]; + auto alt_key = TexKey { alt_path, blockCompression }; + + if (tex_lut.count(alt_key)) { + + tex_lut[tex_key] = tex_lut[alt_key]; + zeno::log_info("path {} reuse {} tex", path, alt_path); + return; + } } else { md5_path_mapping[md5Hash] = path; @@ -833,7 +857,7 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* } assert(rgba); - g_tex[path] = makeCudaTexture(rgba, nx, ny, nc); + tex_lut[tex_key] = makeCudaTexture(rgba, nx, ny, nc); lookupTexture = [rgba](uint32_t idx) { return rgba[idx]; @@ -867,7 +891,7 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* // Create nodes auto img = outs.get("image"); if (img->verts.size() == 0) { - g_tex[path] = std::make_shared(); + tex_lut[tex_key] = std::make_shared(); return; } nx = std::max(img->userData().get2("w"), 1); @@ -885,7 +909,7 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* ucdata[i*nc+c] = (img->verts[i][c] * 255.0); } } - g_tex[path] = makeCudaTexture(ucdata.data(), nx, ny, nc, blockCompression); + tex_lut[tex_key] = makeCudaTexture(ucdata.data(), nx, ny, nc, blockCompression); } else { @@ -899,7 +923,7 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* data[i].w = (unsigned char)(alpha[i] *255.0); } - g_tex[path] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4, blockCompression); + tex_lut[tex_key] = makeCudaTexture((unsigned char *)data.data(), nx, ny, 4, blockCompression); } lookupTexture = [&img](uint32_t idx) { @@ -911,14 +935,14 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* float *img = stbi_loadf(native_path.c_str(), &nx, &ny, &nc, 0); if(!img){ zeno::log_error("loading hdr texture failed:{}", path); - g_tex[path] = std::make_shared(); + tex_lut[tex_key] = std::make_shared(); return; } nx = std::max(nx, 1); ny = std::max(ny, 1); assert(img); - g_tex[path] = makeCudaTexture(img, nx, ny, nc); + tex_lut[tex_key] = makeCudaTexture(img, nx, ny, nc); lookupTexture = [img](uint32_t idx) { return img[idx]; @@ -931,13 +955,13 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* unsigned char *img = stbi_load(native_path.c_str(), &nx, &ny, &nc, 0); if(!img){ zeno::log_error("loading ldr texture failed:{}", path); - g_tex[path] = std::make_shared(); + tex_lut[tex_key] = std::make_shared(); return; } nx = std::max(nx, 1); ny = std::max(ny, 1); - g_tex[path] = makeCudaTexture(img, nx, ny, nc, blockCompression); + tex_lut[tex_key] = makeCudaTexture(img, nx, ny, nc, blockCompression); lookupTexture = [img](uint32_t idx) { return (float)img[idx] / 255; @@ -946,27 +970,29 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* stbi_image_free(img); }; } - g_tex[path]->md5 = md5Hash; + tex_lut[tex_key]->md5 = md5Hash; if constexpr (!detail::is_void::value) { if (task != nullptr) { - (*task)(g_tex[path].get(), nx, ny, nc, lookupTexture); + (*task)(tex_lut[tex_key].get(), nx, ny, nc, lookupTexture); } } cleanupTexture(); } -inline void removeTexture(std::string path) { +inline void removeTexture(const TexKey &key) { + + auto& path = key.path; + if (path.size()) { - if (g_tex.count(path)) { - zeno::log_info("removeTexture: {}", path); - std::cout << "removeTexture :" << path << std::endl; - md5_path_mapping.erase(g_tex[path]->md5); + if (tex_lut.count(key)) { + zeno::log_info("removeTexture: {} blockCompresssion: {}", path, key.blockCompression); + md5_path_mapping.erase(tex_lut[key]->md5); } else { zeno::log_error("removeTexture: {} not exists!", path); } - g_tex.erase(path); + tex_lut.erase(key); g_tex_last_write_time.erase(path); } } @@ -1060,7 +1086,7 @@ struct OptixShaderWrapper raii callable_module {}; raii callable_prog_group {}; - std::map m_texs {}; + std::map m_texs {}; bool has_vdb {}; std::string parameters{}; @@ -1111,17 +1137,17 @@ struct OptixShaderWrapper { m_texs.clear(); } - void addTexture(int i, std::string name) + void addTexture(int i, TexKey key) { - m_texs[i] = name; + m_texs[i] = key; } cudaTextureObject_t getTexture(int i) { if(m_texs.find(i)!=m_texs.end()) { - if(g_tex.find(m_texs[i])!=g_tex.end()) + if(tex_lut.find(m_texs[i])!=tex_lut.end()) { - return g_tex[m_texs[i]]->texture; + return tex_lut[m_texs[i]]->texture; } return 0; } diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index f4ccbd84c2..729ad1b7a3 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -2054,7 +2054,7 @@ void show_background(bool enable) { void updatePortalLights(const std::vector& portals) { - auto &tex = OptixUtil::g_tex[OptixUtil::sky_tex.value()]; + auto &tex = OptixUtil::tex_lut[ { OptixUtil::sky_tex.value(), false } ]; auto& pll = state.plights; auto& pls = pll.list; @@ -2471,11 +2471,13 @@ void buildLightTree() { auto scale = val.coneAngle / M_PIf; light.intensity = dat.fluxFixed * scale * scale; } - } + } + + OptixUtil::TexKey tex_key {dat.textureKey, false}; - if ( OptixUtil::g_tex.count(dat.textureKey) > 0 ) { + if ( OptixUtil::tex_lut.count( tex_key ) > 0 ) { - auto& val = OptixUtil::g_tex.at(dat.textureKey); + auto& val = OptixUtil::tex_lut.at(tex_key); light.tex = val->texture; light.texGamma = dat.textureGamma; } @@ -2638,13 +2640,13 @@ OptixUtil::_compile_group.run([&shaders, i] () { default: {} } - auto& texs = shaders[i]->tex_names; + auto& texs = shaders[i]->tex_keys; if(texs.size()>0){ std::cout<<"texSize:"<texture == state.params.sky_texture) return; @@ -3989,10 +3991,10 @@ void optixCleanup() { state.params.sky_strength = 1.0f; state.params.sky_texture; - std::vector keys; + std::vector keys; - for (auto& [k, _] : OptixUtil::g_tex) { - if (k != OptixUtil::default_sky_tex) { + for (auto& [k, _] : OptixUtil::tex_lut) { + if (k.path != OptixUtil::default_sky_tex) { keys.push_back(k); } } diff --git a/zenovis/xinxinoptix/xinxinoptixapi.h b/zenovis/xinxinoptix/xinxinoptixapi.h index 51c54320ca..c4741fac27 100644 --- a/zenovis/xinxinoptix/xinxinoptixapi.h +++ b/zenovis/xinxinoptix/xinxinoptixapi.h @@ -12,6 +12,7 @@ #include "zeno/types/LightObject.h" #include "Portal.h" +#include "OptiXStuff.h" enum ShaderMaker { Mesh = 0, @@ -27,7 +28,7 @@ struct ShaderPrepared { std::string callable; std::string parameters; - std::vector tex_names; + std::vector tex_keys; }; namespace xinxinoptix { From 4a431bc84399152604b813128307b81a51ac8054 Mon Sep 17 00:00:00 2001 From: littlemine Date: Thu, 4 Jul 2024 16:43:53 +0800 Subject: [PATCH 088/244] zsparticleperlinnoise --- projects/CUDA/utils/Primitives.cu | 94 +++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/projects/CUDA/utils/Primitives.cu b/projects/CUDA/utils/Primitives.cu index 694bbeefff..0598963937 100644 --- a/projects/CUDA/utils/Primitives.cu +++ b/projects/CUDA/utils/Primitives.cu @@ -14,6 +14,8 @@ #include #include +#include "Noise.cuh" + namespace zeno { /// utilities @@ -74,6 +76,98 @@ float prim_reduce(typename ZenoParticles::particles_t &verts, float e, TransOp t return ret.getVal(); } +struct ZSParticlePerlinNoise : INode { + virtual void apply() override { + auto zspars = get_input("zspars"); + auto attrTag = get_input2("Attribute"); + auto opType = get_input2("OpType"); + auto frequency = get_input2("Frequency"); + auto offset = get_input2("Offset"); + auto roughness = get_input2("Roughness"); + auto turbulence = get_input2("Turbulence"); + auto amplitude = get_input2("Amplitude"); + auto attenuation = get_input2("Attenuation"); + auto mean = get_input2("MeanNoise"); + + bool isAccumulate = opType == "accumulate" ? true : false; + + zs::SmallString tag = attrTag; + + auto &tv = zspars->getParticles(); + + if (!tv.hasProperty(tag)) + throw std::runtime_error(fmt::format("Attribute [{}] doesn't exist!", tag)); + const int nchns = tv.getPropertySize(tag); + + auto pol = zs::cuda_exec(); + constexpr auto space = zs::execspace_e::cuda; + + pol(zs::range(tv.size()), + [tvv = zs::proxy({}, tv), tag, nchns, isAccumulate, + frequency = zs::vec::from_array(frequency), offset = zs::vec::from_array(offset), + roughness, turbulence, amplitude, attenuation, + mean = zs::vec::from_array(mean)] __device__(int no) mutable { + auto wcoord = tvv.pack(zs::dim_c<3>, "x", no); + auto pp = frequency * wcoord - offset; + + float scale = amplitude; + + if (nchns == 3) { + // fractal Brownian motion + auto fbm = zs::vec::uniform(0); + for (int i = 0; i < turbulence; ++i, pp *= 2.f, scale *= roughness) { + zs::vec pln{ZSPerlinNoise1::perlin(pp[0], pp[1], pp[2]), + ZSPerlinNoise1::perlin(pp[1], pp[2], pp[0]), + ZSPerlinNoise1::perlin(pp[2], pp[0], pp[1])}; + fbm += scale * pln; + } + auto noise = zs::vec{zs::pow(fbm[0], attenuation), zs::pow(fbm[1], attenuation), + zs::pow(fbm[2], attenuation)} + + mean; + + if (isAccumulate) + tvv.tuple(zs::dim_c<3>, tag, no) = + tvv.pack(zs::dim_c<3>, tag, no) + noise; + else + tvv.tuple(zs::dim_c<3>, tag, no) = noise; + + } else if (nchns == 1) { + float fbm = 0; + for (int i = 0; i < turbulence; ++i, pp *= 2.f, scale *= roughness) { + float pln = ZSPerlinNoise1::perlin(pp[0], pp[1], pp[2]); + fbm += scale * pln; + } + auto noise = zs::pow(fbm, attenuation) + mean[0]; + + if (isAccumulate) + tvv(tag, no) += noise; + else + tvv(tag, no) = noise; + } + }); + + set_output("zspars", zspars); + } +}; + +ZENDEFNODE(ZSParticlePerlinNoise, {/* inputs: */ + {"zspars", + {"string", "Attribute", "v"}, + {"enum replace accumulate", "OpType", "accumulate"}, + {"vec3f", "Frequency", "1, 1, 1"}, + {"vec3f", "Offset", "0, 0, 0"}, + {"float", "Roughness", "0.5"}, + {"int", "Turbulence", "4"}, + {"float", "Amplitude", "1.0"}, + {"float", "Attenuation", "1.0"}, + {"vec3f", "MeanNoise", "0, 0, 0"}}, + /* outputs: */ + {"zspars"}, + /* params: */ + {}, + /* category: */ + {"Eulerian"}}); + struct ZSPrimitiveReduction : zeno::INode { struct pass_on { template From 60b9a0e4815573459eb216b72956f5b378ea4b77 Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 4 Jul 2024 16:49:49 +0800 Subject: [PATCH 089/244] fix texture reload --- zenovis/xinxinoptix/OptiXStuff.h | 7 +++---- zenovis/xinxinoptix/optixPathTracer.cpp | 4 ++++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 71fa0ffe17..9d7d0cb777 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -800,10 +800,9 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* if (std::filesystem::exists(native_path)) { std::filesystem::file_time_type ftime = std::filesystem::last_write_time(native_path); - if(g_tex_last_write_time[path] == ftime) { - return; + if(g_tex_last_write_time[path] != ftime) { + should_reload = true; } - should_reload = true; g_tex_last_write_time[path] = ftime; } else { zeno::log_info("file {} doesn't exist", path); @@ -813,7 +812,7 @@ inline void addTexture(std::string path, bool blockCompression=false, TaskType* auto input = readData(native_path); std::string md5Hash = calculateMD5(input); - if ( md5_path_mapping.count(md5Hash) ) { + if ( md5_path_mapping.count(md5Hash) && !should_reload) { auto& alt_path = md5_path_mapping[md5Hash]; auto alt_key = TexKey { alt_path, blockCompression }; diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 729ad1b7a3..6910a8647e 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -703,6 +703,8 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path ) ); //CUDA_SYNC_CHECK(); + + timer.tick(); OPTIX_CHECK( optixLaunch( state.pipeline, @@ -714,6 +716,8 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path state.params.tile_h, // launch height 1 // launch depth ) ); + + timer.tock("frametime"); } } output_buffer.unmap(); From 0d4ca4915682ab5c66f62a346de8bd2723dda5c8 Mon Sep 17 00:00:00 2001 From: luzh Date: Thu, 4 Jul 2024 16:56:29 +0800 Subject: [PATCH 090/244] update ver. --- ui/zenoedit/zenoedit.rc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ui/zenoedit/zenoedit.rc b/ui/zenoedit/zenoedit.rc index d0f80a5c53..7522248741 100644 --- a/ui/zenoedit/zenoedit.rc +++ b/ui/zenoedit/zenoedit.rc @@ -48,8 +48,8 @@ END // VS_VERSION_INFO VERSIONINFO - FILEVERSION 1,3,1,618 - PRODUCTVERSION 1,3,1,618 + FILEVERSION 1,3,1,701 + PRODUCTVERSION 1,3,1,701 FILEFLAGSMASK 0x3fL #ifdef _DEBUG FILEFLAGS 0x1L @@ -66,12 +66,12 @@ BEGIN BEGIN VALUE "CompanyName", "ZENUSTECH" VALUE "FileDescription", "Zeno Editor" - VALUE "FileVersion", "1.3.1.618" + VALUE "FileVersion", "1.3.1.701" VALUE "InternalName", "zenoedit.rc" VALUE "LegalCopyright", "Copyright (C) 2023" VALUE "OriginalFilename", "zenoedit.rc" VALUE "ProductName", "Zeno" - VALUE "ProductVersion", "1.3.1.618" + VALUE "ProductVersion", "1.3.1.701" END END BLOCK "VarFileInfo" From abd822a87fc4c5f8e8eaca7592c46db9aa41fafb Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 5 Jul 2024 17:50:23 +0800 Subject: [PATCH 091/244] shortcut --- ui/zenoedit/dock/docktabcontent.cpp | 16 ++++++++++++++-- ui/zenoedit/dock/docktabcontent.h | 1 + ui/zenoedit/viewport/cameracontrol.cpp | 6 ++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/ui/zenoedit/dock/docktabcontent.cpp b/ui/zenoedit/dock/docktabcontent.cpp index 8e679b65d0..a46d5fb043 100644 --- a/ui/zenoedit/dock/docktabcontent.cpp +++ b/ui/zenoedit/dock/docktabcontent.cpp @@ -698,6 +698,18 @@ DockContent_View::DockContent_View(bool bGLView, QWidget* parent) , m_background(nullptr) { } +void DockContent_View::keyPressEvent(QKeyEvent *event) { + DockToolbarWidget::keyPressEvent(event); + int uKey = event->key(); + if (uKey == Qt::Key_C) { + auto state = m_depth->checkState(); + m_depth->setCheckState(state == Qt::Checked? Qt::Unchecked : Qt::Checked); + } + else if (uKey == Qt::Key_N) { + auto state = m_FPN->checkState(); + m_FPN->setCheckState(state == Qt::Checked? Qt::Unchecked : Qt::Checked); + } +} void DockContent_View::initToolbar(QHBoxLayout* pToolLayout) { @@ -906,11 +918,11 @@ void DockContent_View::initToolbar(QHBoxLayout* pToolLayout) { pToolLayout->addWidget(new ZLineWidget(false, QColor("#121416"))); - m_depth = new QCheckBox(tr("Depth")); + m_depth = new QCheckBox(tr("Depth[C]")); m_depth->setStyleSheet("color: white;"); m_depth->setCheckState(Qt::Checked); pToolLayout->addWidget(m_depth); - m_FPN = new QCheckBox(tr("FPN")); + m_FPN = new QCheckBox(tr("FPN[N]")); m_FPN->setStyleSheet("color: white;"); pToolLayout->addWidget(m_FPN); m_Reset = new QPushButton(tr("Reset")); diff --git a/ui/zenoedit/dock/docktabcontent.h b/ui/zenoedit/dock/docktabcontent.h index c173ca61fa..f90e995d1c 100644 --- a/ui/zenoedit/dock/docktabcontent.h +++ b/ui/zenoedit/dock/docktabcontent.h @@ -145,6 +145,7 @@ class DockContent_View : public DockToolbarWidget void initToolbar(QHBoxLayout* pToolLayout) override; QWidget *initWidget() override; void initConnections() override; + void keyPressEvent(QKeyEvent* event) override; private: DisplayWidget* m_pDisplay; diff --git a/ui/zenoedit/viewport/cameracontrol.cpp b/ui/zenoedit/viewport/cameracontrol.cpp index 6919ba8126..b7611ae6bd 100644 --- a/ui/zenoedit/viewport/cameracontrol.cpp +++ b/ui/zenoedit/viewport/cameracontrol.cpp @@ -771,6 +771,12 @@ bool CameraControl::fakeKeyPressEvent(int uKey) { updatePerspective(); return true; } + if ((uKey & 0xff) == Qt::Key_G && uKey & Qt::AltModifier) { + auto *scene = m_zenovis->getSession()->get_scene(); + scene->camera->reset(); + updatePerspective(); + return true; + } if (!middle_button_pressed) { return false; } From ea81a5bd4666c16ae49ad1581d9118903ec3c57e Mon Sep 17 00:00:00 2001 From: littlemine Date: Mon, 8 Jul 2024 14:09:20 +0800 Subject: [PATCH 092/244] fix build --- projects/CUDA/CMakeLists.txt | 8 +------- projects/CUDA/zpc | 2 +- zeno/src/extra/CAPI.cpp | 36 ++++++++++++++++++------------------ 3 files changed, 20 insertions(+), 26 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index d740214e32..1ec0614bd6 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -59,8 +59,6 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) if (WIN32) cmake_path(GET ZS_OVERWRITE_PYTHON_INCLUDE_DIR PARENT_PATH PYTHON_ENV_PATH) message(STATUS "python3 parent: ${PYTHON_ENV_PATH}") - string (REPLACE ";" " " ZS_PYTHON_LIBS "${ZS_OVERWRITE_PYTHON_LIBRARIES}") - separate_arguments(ZS_PYTHON_LIBS) add_custom_command( TARGET copy_py POST_BUILD @@ -93,11 +91,7 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) add_custom_command( TARGET copy_py POST_BUILD - COMMAND ${CMAKE_COMMAND} - -DLIB=${lib} - -DDLL_DIR=${PYTHON_ENV_PATH} - -DOUTPUT=$ - -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_dll.cmake + COMMAND ${CMAKE_COMMAND} -E copy lib $ ) endforeach() else() diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 282e0b1f19..7958113f75 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 282e0b1f197fdf37b1c4a496b502b85d4d24f359 +Subproject commit 7958113f7546f80647f74f1fd368aef76d5508ff diff --git a/zeno/src/extra/CAPI.cpp b/zeno/src/extra/CAPI.cpp index 9fd7824f08..40f5b58d2c 100644 --- a/zeno/src/extra/CAPI.cpp +++ b/zeno/src/extra/CAPI.cpp @@ -76,11 +76,11 @@ ZENO_CAPI Zeno_Error Zeno_CreateObjectInt(Zeno_Object *objectRet_, const int *va if (dim_ == 1) *objectRet_ = PyZeno::lutObject.create(std::make_shared(value_[0])); else if (dim_ == 2) - *objectRet_ = PyZeno::lutObject.create(std::make_shared(vec2i(value_[0], value_[1]))); + *objectRet_ = PyZeno::lutObject.create(std::make_shared(zeno::vec2i(value_[0], value_[1]))); else if (dim_ == 3) - *objectRet_ = PyZeno::lutObject.create(std::make_shared(vec3i(value_[0], value_[1], value_[2]))); + *objectRet_ = PyZeno::lutObject.create(std::make_shared(zeno::vec3i(value_[0], value_[1], value_[2]))); else if (dim_ == 4) - *objectRet_ = PyZeno::lutObject.create(std::make_shared(vec4i(value_[0], value_[1], value_[2], value_[3]))); + *objectRet_ = PyZeno::lutObject.create(std::make_shared(zeno::vec4i(value_[0], value_[1], value_[2], value_[3]))); }); } @@ -89,11 +89,11 @@ ZENO_CAPI Zeno_Error Zeno_CreateObjectFloat(Zeno_Object *objectRet_, const float if (dim_ == 1) *objectRet_ = PyZeno::lutObject.create(std::make_shared(value_[0])); else if (dim_ == 2) - *objectRet_ = PyZeno::lutObject.create(std::make_shared(vec2f(value_[0], value_[1]))); + *objectRet_ = PyZeno::lutObject.create(std::make_shared(zeno::vec2f(value_[0], value_[1]))); else if (dim_ == 3) - *objectRet_ = PyZeno::lutObject.create(std::make_shared(vec3f(value_[0], value_[1], value_[2]))); + *objectRet_ = PyZeno::lutObject.create(std::make_shared(zeno::vec3f(value_[0], value_[1], value_[2]))); else if (dim_ == 4) - *objectRet_ = PyZeno::lutObject.create(std::make_shared(vec4f(value_[0], value_[1], value_[2], value_[3]))); + *objectRet_ = PyZeno::lutObject.create(std::make_shared(zeno::vec4f(value_[0], value_[1], value_[2], value_[3]))); }); } @@ -131,19 +131,19 @@ ZENO_CAPI Zeno_Error Zeno_GetObjectLiterialType(Zeno_Object object_, int *typeRe if (auto numptr = dynamic_cast(optr)) { if (numptr->is()) return 11; - if (numptr->is()) + if (numptr->is()) return 12; - if (numptr->is()) + if (numptr->is()) return 13; - if (numptr->is()) + if (numptr->is()) return 14; if (numptr->is()) return 21; - if (numptr->is()) + if (numptr->is()) return 22; - if (numptr->is()) + if (numptr->is()) return 23; - if (numptr->is()) + if (numptr->is()) return 24; } return 0; @@ -161,16 +161,16 @@ ZENO_CAPI Zeno_Error Zeno_GetObjectInt(Zeno_Object object_, int *value_, size_t auto const &val = ptr->get(); value_[0] = val; } else if (dim_ == 2) { - auto const &val = ptr->get(); + auto const &val = ptr->get(); value_[0] = val[0]; value_[1] = val[1]; } else if (dim_ == 3) { - auto const &val = ptr->get(); + auto const &val = ptr->get(); value_[0] = val[0]; value_[1] = val[1]; value_[2] = val[2]; } else if (dim_ == 4) { - auto const &val = ptr->get(); + auto const &val = ptr->get(); value_[0] = val[0]; value_[1] = val[1]; value_[2] = val[2]; @@ -189,16 +189,16 @@ ZENO_CAPI Zeno_Error Zeno_GetObjectFloat(Zeno_Object object_, float *value_, siz auto const &val = ptr->get(); value_[0] = val; } else if (dim_ == 2) { - auto const &val = ptr->get(); + auto const &val = ptr->get(); value_[0] = val[0]; value_[1] = val[1]; } else if (dim_ == 3) { - auto const &val = ptr->get(); + auto const &val = ptr->get(); value_[0] = val[0]; value_[1] = val[1]; value_[2] = val[2]; } else if (dim_ == 4) { - auto const &val = ptr->get(); + auto const &val = ptr->get(); value_[0] = val[0]; value_[1] = val[1]; value_[2] = val[2]; From fb39e1faca90cce40680e2124886491e620edd1b Mon Sep 17 00:00:00 2001 From: littlemine Date: Mon, 8 Jul 2024 15:15:07 +0800 Subject: [PATCH 093/244] add zeno:: prefix to vec to avoid confusion --- projects/CUDA/utils/Primitives.cpp | 4 +- projects/CUDA/utils/Primitives.cu | 6 +- projects/CuEulerian/navierstokes/NS_noise.cu | 12 ++-- projects/CuLagrange/mpm/Boundary.cu | 4 +- projects/CuLagrange/mpm/Generation.cu | 6 +- projects/CuLagrange/mpm/MPMPipeline.cu | 2 +- projects/FastFLIP/whitewater.cpp | 12 ++-- projects/GUI/GUI.cpp | 2 +- projects/ImgCV/ImageComposite.cpp | 8 +-- projects/ImgCV/ImageProcessing.cpp | 16 ++--- projects/ImgCV/ObjectRecog.cpp | 6 +- projects/ImgCV/imgcv.cpp | 10 +-- .../PBD/PBDCloth/PBDDihedralConstraint.cpp | 8 +-- projects/PBD/PBDPreSolve.cpp | 2 +- projects/PBD/PBF/PBF.h | 6 +- projects/PBD/PBF/PBFWorld_Setup.cpp | 6 +- projects/PBD/PBF/PBFWorld_testCube.cpp | 2 +- projects/PBD/PBF/PBF_BVH.h | 6 +- projects/PBD/PBF/testPBFCube.cpp | 2 +- projects/PluginPOC/CMakeLists.txt | 6 +- projects/Rigid/RigidFracture.cpp | 4 +- projects/Rigid/RigidTest.cpp | 6 +- projects/ZenoFX/pnbvhw.cpp | 6 +- projects/cgmesh/PrimitiveBooleanOp.cpp | 2 +- projects/cgmesh/VoronoiFracture.cpp | 18 +++--- projects/zenvdb/MakeVDBGrid.cpp | 2 +- projects/zenvdb/SampleVDBToPrimitive.cpp | 2 +- projects/zenvdb/VDBAddPerlinNoise.cpp | 18 +++--- projects/zenvdb/VDBAddTurbulentNoise.cpp | 22 +++---- projects/zenvdb/VDBCreateLevelsetSphere.cpp | 2 +- .../zenvdb/VDBExplosiveTurbulentNoise.cpp | 62 +++++++++---------- projects/zenvdb/VDBFillClearOps.cpp | 10 +-- projects/zenvdb/VDBInvertSDF.cpp | 2 +- zeno/include/zeno/funcs/PrimitiveIO.h | 2 +- zeno/include/zeno/funcs/PrimitiveTools.h | 2 +- zeno/include/zeno/types/AttrVector.h | 4 +- zeno/src/nodes/AxisNodes.cpp | 8 +-- zeno/src/nodes/CameraNodes.cpp | 18 +++--- zeno/src/nodes/CurveNodes.cpp | 4 +- zeno/src/nodes/InputParams.cpp | 12 ++-- zeno/src/nodes/ProcedrualSkyNode.cpp | 6 +- zeno/src/nodes/color/MakeColor.cpp | 2 +- zeno/src/nodes/mtl/ShaderAttrs.cpp | 4 +- zeno/src/nodes/mtl/ShaderFinalize.cpp | 2 +- zeno/src/nodes/mtl/ShaderTexture.cpp | 12 ++-- zeno/src/nodes/mtl/ShaderUtils.cpp | 6 +- zeno/src/nodes/neo/NumRandom.cpp | 2 +- zeno/src/nodes/neo/PrimBend.cpp | 6 +- zeno/src/nodes/neo/PrimCodecUVs.cpp | 16 ++--- zeno/src/nodes/neo/PrimDuplicate.cpp | 4 +- zeno/src/nodes/neo/PrimExtrude.cpp | 6 +- zeno/src/nodes/neo/PrimFlipFaces.cpp | 4 +- zeno/src/nodes/neo/PrimForceTrail.cpp | 2 +- zeno/src/nodes/neo/PrimGenerateONB.cpp | 12 ++-- zeno/src/nodes/neo/PrimPerlinNoise.cpp | 2 +- zeno/src/nodes/neo/PrimProject.cpp | 10 +-- zeno/src/nodes/neo/PrimRandomize.cpp | 2 +- zeno/src/nodes/neo/PrimSepTris.cpp | 36 +++++------ zeno/src/nodes/neo/PrimSimplifyTag.cpp | 2 +- zeno/src/nodes/neo/PrimTranslate.cpp | 4 +- zeno/src/nodes/neo/PrimTwist.cpp | 6 +- zeno/src/nodes/num/NumericMath.cpp | 22 +++---- zeno/src/nodes/prim/MakeGridPrimitive.cpp | 46 +++++++------- zeno/src/nodes/prim/MakeVisualPrimitive.cpp | 6 +- zeno/src/nodes/prim/PrimitiveAttrOp.cpp | 10 +-- zeno/src/nodes/prim/PrimitiveAttribute.cpp | 12 ++-- zeno/src/nodes/prim/PrimitiveBent.cpp | 6 +- zeno/src/nodes/prim/PrimitiveCalcCentroid.cpp | 2 +- zeno/src/nodes/prim/PrimitiveClip.cpp | 4 +- zeno/src/nodes/prim/PrimitiveDuplicate.cpp | 6 +- zeno/src/nodes/prim/PrimitiveHeatmap.cpp | 2 +- zeno/src/nodes/prim/PrimitiveMath.cpp | 14 ++--- zeno/src/nodes/prim/PrimitiveNoiseAttr.cpp | 18 +++--- zeno/src/nodes/prim/PrimitiveNormal.cpp | 2 +- zeno/src/nodes/prim/PrimitivePolygonate.cpp | 6 +- zeno/src/nodes/prim/PrimitiveReduction.cpp | 2 +- zeno/src/nodes/prim/PrimitiveTrace.cpp | 8 +-- zeno/src/nodes/prim/PrimitiveTriangulate.cpp | 6 +- zeno/src/nodes/prim/PrimitiveTwist.cpp | 6 +- zeno/src/nodes/prim/SimpleGeometry.cpp | 8 +-- zeno/src/nodes/prim/TransformPrimitive.cpp | 2 +- zeno/src/nodes/prim/UVProjectFromPlane.cpp | 24 +++---- zeno/src/nodes/prim/WBErode.cpp | 12 ++-- zeno/src/nodes/prim/WBNoise.cpp | 58 ++++++++--------- zeno/src/nodes/prim/WBPrimBend.cpp | 36 +++++------ 85 files changed, 398 insertions(+), 398 deletions(-) diff --git a/projects/CUDA/utils/Primitives.cpp b/projects/CUDA/utils/Primitives.cpp index f5d60cb5e6..57068512e9 100644 --- a/projects/CUDA/utils/Primitives.cpp +++ b/projects/CUDA/utils/Primitives.cpp @@ -3859,7 +3859,7 @@ struct QueryClosestPrimitive : zeno::INode { dist, bvhId, lbvh->getNumLeaves(), pid, prim->size()); #endif } else if (has_input("prim")) { - auto p = get_input("prim")->get(); + auto p = get_input("prim")->get(); using vec3 = zs::vec; auto pi = vec3::from_array(p); auto lbvhv = zs::proxy(lbvh); @@ -4122,7 +4122,7 @@ struct ComputeParticlesDirection : INode { zeno::vec3f trans{0, 0, 0}; if (has_input("origin")) { - trans = get_input2("origin"); + trans = get_input2("origin"); } else { std::vector locs[3]; for (int d = 0; d != 3; ++d) { diff --git a/projects/CUDA/utils/Primitives.cu b/projects/CUDA/utils/Primitives.cu index 0598963937..30a5d0d8d7 100644 --- a/projects/CUDA/utils/Primitives.cu +++ b/projects/CUDA/utils/Primitives.cu @@ -81,13 +81,13 @@ struct ZSParticlePerlinNoise : INode { auto zspars = get_input("zspars"); auto attrTag = get_input2("Attribute"); auto opType = get_input2("OpType"); - auto frequency = get_input2("Frequency"); - auto offset = get_input2("Offset"); + auto frequency = get_input2("Frequency"); + auto offset = get_input2("Offset"); auto roughness = get_input2("Roughness"); auto turbulence = get_input2("Turbulence"); auto amplitude = get_input2("Amplitude"); auto attenuation = get_input2("Attenuation"); - auto mean = get_input2("MeanNoise"); + auto mean = get_input2("MeanNoise"); bool isAccumulate = opType == "accumulate" ? true : false; diff --git a/projects/CuEulerian/navierstokes/NS_noise.cu b/projects/CuEulerian/navierstokes/NS_noise.cu index 186b42fa64..c0783abcde 100644 --- a/projects/CuEulerian/navierstokes/NS_noise.cu +++ b/projects/CuEulerian/navierstokes/NS_noise.cu @@ -21,13 +21,13 @@ struct ZSGridPerlinNoise : INode { auto zsSPG = get_input("SparseGrid"); auto attrTag = get_input2("GridAttribute"); auto opType = get_input2("OpType"); - auto frequency = get_input2("Frequency"); - auto offset = get_input2("Offset"); + auto frequency = get_input2("Frequency"); + auto offset = get_input2("Offset"); auto roughness = get_input2("Roughness"); auto turbulence = get_input2("Turbulence"); auto amplitude = get_input2("Amplitude"); auto attenuation = get_input2("Attenuation"); - auto mean = get_input2("MeanNoise"); + auto mean = get_input2("MeanNoise"); bool isAccumulate = opType == "accumulate" ? true : false; @@ -115,12 +115,12 @@ struct ZSGridCurlNoise : INode { auto attrTag = get_input2("GridAttribute"); bool isStaggered = get_input2("staggered"); auto opType = get_input2("OpType"); - auto frequency = get_input2("Frequency"); - auto offset = get_input2("Offset"); + auto frequency = get_input2("Frequency"); + auto offset = get_input2("Offset"); auto roughness = get_input2("Roughness"); auto turbulence = get_input2("Turbulence"); auto amplitude = get_input2("Amplitude"); - auto mean = get_input2("MeanNoise"); + auto mean = get_input2("MeanNoise"); bool isAccumulate = opType == "accumulate" ? true : false; diff --git a/projects/CuLagrange/mpm/Boundary.cu b/projects/CuLagrange/mpm/Boundary.cu index 897904bc7c..c5e3e4e907 100644 --- a/projects/CuLagrange/mpm/Boundary.cu +++ b/projects/CuLagrange/mpm/Boundary.cu @@ -625,7 +625,7 @@ struct TransformZSLevelSet : INode { using basic_ls_t = typename ZenoLevelSet::basic_ls_t; // translation if (has_input("translation")) { - auto b = get_input("translation")->get(); + auto b = get_input("translation")->get(); match( [&b](basic_ls_t &basicLs) { match( @@ -663,7 +663,7 @@ struct TransformZSLevelSet : INode { } // rotation if (has_input("eulerXYZ")) { - auto yprAngles = get_input("eulerXYZ")->get(); + auto yprAngles = get_input("eulerXYZ")->get(); auto rot = zs::Rotation{yprAngles[0], yprAngles[1], yprAngles[2], zs::degree_c, zs::ypr_c}; match( [&rot](basic_ls_t &basicLs) { diff --git a/projects/CuLagrange/mpm/Generation.cu b/projects/CuLagrange/mpm/Generation.cu index de69bd7376..e40671d9a6 100644 --- a/projects/CuLagrange/mpm/Generation.cu +++ b/projects/CuLagrange/mpm/Generation.cu @@ -1616,11 +1616,11 @@ struct ToZSBoundary : INode { // translation if (has_input("translation")) { - auto b = get_input("translation")->get(); + auto b = get_input("translation")->get(); boundary->b = zs::vec{b[0], b[1], b[2]}; } if (has_input("translation_rate")) { - auto dbdt = get_input("translation_rate")->get(); + auto dbdt = get_input("translation_rate")->get(); boundary->dbdt = zs::vec{dbdt[0], dbdt[1], dbdt[2]}; // fmt::print("dbdt assigned as {}, {}, {}\n", boundary->dbdt[0], // boundary->dbdt[1], boundary->dbdt[2]); @@ -1636,7 +1636,7 @@ struct ToZSBoundary : INode { } // rotation if (has_input("ypr_angles")) { - auto yprAngles = get_input("ypr_angles")->get(); + auto yprAngles = get_input("ypr_angles")->get(); auto rot = zs::Rotation{yprAngles[0], yprAngles[1], yprAngles[2], zs::degree_c, zs::ypr_c}; boundary->R = rot; } diff --git a/projects/CuLagrange/mpm/MPMPipeline.cu b/projects/CuLagrange/mpm/MPMPipeline.cu index 0d71df4cb3..d8d001a9a2 100644 --- a/projects/CuLagrange/mpm/MPMPipeline.cu +++ b/projects/CuLagrange/mpm/MPMPipeline.cu @@ -840,7 +840,7 @@ struct UpdateZSGrid : INode { auto gravity = get_input2("gravity"); auto accel = zs::vec::zeros(); if (has_input("Accel")) { - auto tmp = get_input("Accel")->get(); + auto tmp = get_input("Accel")->get(); accel = zs::vec{tmp[0], tmp[1], tmp[2]}; } else accel[1] = gravity; diff --git a/projects/FastFLIP/whitewater.cpp b/projects/FastFLIP/whitewater.cpp index 55f2671e16..fd70457796 100644 --- a/projects/FastFLIP/whitewater.cpp +++ b/projects/FastFLIP/whitewater.cpp @@ -37,15 +37,15 @@ struct WhitewaterSource : INode { // pars->verts.values.push_back(vec3f{}); // vel.push_back(); - auto limit_depth = get_input2("LimitDepth"); - auto speed_range = get_input2("SpeedRange"); + auto limit_depth = get_input2("LimitDepth"); + auto speed_range = get_input2("SpeedRange"); auto curv_emit = get_input2("EmitFromCurvature"); auto max_angle = get_input2("MaxVelocityAngle"); - auto curv_range = get_input2("CurvatureRange"); + auto curv_range = get_input2("CurvatureRange"); auto acc_emit = get_input2("EmitFromAcceleration"); - auto acc_range = get_input2("AccelerationRange"); + auto acc_range = get_input2("AccelerationRange"); auto vor_emit = get_input2("EmitFromVorticity"); - auto vor_range = get_input2("VorticityRange"); + auto vor_range = get_input2("VorticityRange"); float dx = static_cast(Velocity->voxelSize()[0]); @@ -210,7 +210,7 @@ struct WhitewaterSolver : INode { auto &Solid_sdf = get_input("SolidSDF")->m_grid; auto TargetVelAttr = get_input2("TargetVelAttr"); - auto gravity = vec_to_other(get_input2("Gravity")); + auto gravity = vec_to_other(get_input2("Gravity")); auto dragModel = get_input2("DragModel"); auto air_drag = get_input2("AirDrag"); auto foam_drag = get_input2("FoamDrag"); diff --git a/projects/GUI/GUI.cpp b/projects/GUI/GUI.cpp index 59577bd339..8ac53a88b3 100644 --- a/projects/GUI/GUI.cpp +++ b/projects/GUI/GUI.cpp @@ -154,7 +154,7 @@ ZENO_DEFNODE(ZGL_DrawVboArrays)({ struct ZGL_ClearColor : INode { void apply() override { - auto color = get_input2("color"); + auto color = get_input2("color"); auto alpha = get_input2("alpha"); CHECK_GL(glClearColor(color[0], color[1], color[2], alpha)); CHECK_GL(glClear(GL_COLOR_BUFFER_BIT)); diff --git a/projects/ImgCV/ImageComposite.cpp b/projects/ImgCV/ImageComposite.cpp index 04e6188b5f..96738015e5 100644 --- a/projects/ImgCV/ImageComposite.cpp +++ b/projects/ImgCV/ImageComposite.cpp @@ -240,9 +240,9 @@ struct CompBlur : INode {//TODO::delete virtual void apply() override { auto image = get_input("image"); auto s = get_input2("strength"); - auto ktop = get_input2("kerneltop"); - auto kmid = get_input2("kernelmid"); - auto kbot = get_input2("kernelbot"); + auto ktop = get_input2("kerneltop"); + auto kmid = get_input2("kernelmid"); + auto kbot = get_input2("kernelbot"); auto &ud = image->userData(); int w = ud.get2("w"); int h = ud.get2("h"); @@ -374,7 +374,7 @@ struct CompImport : INode { int nx = ud.has("nx")?ud.get2("nx"):ud.get2("w"); int ny = ud.has("ny")?ud.get2("ny"):ud.get2("h"); auto attrName = get_input2("attrName"); - auto remapRange = get_input2("RemapRange"); + auto remapRange = get_input2("RemapRange"); auto remap = get_input2("Remap"); auto image = std::make_shared(); auto attributesType = get_input2("AttributesType"); diff --git a/projects/ImgCV/ImageProcessing.cpp b/projects/ImgCV/ImageProcessing.cpp index a97b736240..704a5c17b7 100644 --- a/projects/ImgCV/ImageProcessing.cpp +++ b/projects/ImgCV/ImageProcessing.cpp @@ -645,8 +645,8 @@ struct ImageBlur : INode { auto type = get_input2("type"); auto fastgaussian = get_input2("Fast Blur(Gaussian)"); auto sigmaX = get_input2("GaussianSigma"); - auto sigmaColor = get_input2("BilateralSigma")[0]; - auto sigmaSpace = get_input2("BilateralSigma")[1]; + auto sigmaColor = get_input2("BilateralSigma")[0]; + auto sigmaSpace = get_input2("BilateralSigma")[1]; auto &ud = image->userData(); int w = ud.get2("w"); int h = ud.get2("h"); @@ -1152,8 +1152,8 @@ ZENDEFNODE(ImageErode, { struct ImageColor : INode { virtual void apply() override { auto image = std::make_shared(); - auto color = get_input2("Color"); - auto size = get_input2("Size"); + auto color = get_input2("Color"); + auto size = get_input2("Size"); auto balpha = get_input2("alpha"); auto vertsize = size[0] * size[1]; image->verts.resize(vertsize); @@ -1192,9 +1192,9 @@ ZENDEFNODE(ImageColor, { struct ImageColor2 : INode { virtual void apply() override { auto image = std::make_shared(); - auto color = get_input2("Color"); + auto color = get_input2("Color"); auto alpha = get_input2("Alpha"); - auto size = get_input2("Size"); + auto size = get_input2("Size"); auto balpha = get_input2("alpha"); auto vertsize = size[0] * size[1]; image->verts.resize(vertsize); @@ -1487,8 +1487,8 @@ ZENDEFNODE(ImageMatting, { struct ImageLevels: INode { void apply() override { std::shared_ptr image = get_input("image"); - auto inputLevels = get_input2("Input Levels"); - auto outputLevels = get_input2("Output Levels"); + auto inputLevels = get_input2("Input Levels"); + auto outputLevels = get_input2("Output Levels"); auto gamma = get_input2("gamma");//range 0.01 - 9.99 auto channel = get_input2("channel"); auto clamp = get_input2("Clamp Output"); diff --git a/projects/ImgCV/ObjectRecog.cpp b/projects/ImgCV/ObjectRecog.cpp index c9ecec049f..2754bd3e7a 100644 --- a/projects/ImgCV/ObjectRecog.cpp +++ b/projects/ImgCV/ObjectRecog.cpp @@ -1298,9 +1298,9 @@ ZENDEFNODE(Image3DAnalyze, { */ struct CreateCameraMatrix : INode { void apply() override { - auto top = get_input2("top"); - auto mid = get_input2("mid"); - auto bot = get_input2("bot"); + auto top = get_input2("top"); + auto mid = get_input2("mid"); + auto bot = get_input2("bot"); cv::Mat cameraMatrix = cv::Mat::eye(3, 3, CV_32F); cameraMatrix.at(0, 0) = top[0]; diff --git a/projects/ImgCV/imgcv.cpp b/projects/ImgCV/imgcv.cpp index 2b62a32155..5f21514a0d 100644 --- a/projects/ImgCV/imgcv.cpp +++ b/projects/ImgCV/imgcv.cpp @@ -466,7 +466,7 @@ struct CVImageFillColor : CVINode { void apply() override { auto likeimage = get_input("image"); auto is255 = get_input2("is255"); - auto color = tocvscalar(get_input2("color")); + auto color = tocvscalar(get_input2("color")); auto image = get_input2("inplace") ? likeimage : std::make_shared(likeimage->image.clone()); if (has_input("mask")) { @@ -734,8 +734,8 @@ struct CVImageFillGrad : CVINode { auto scale = get_input2("scale"); auto offset = get_input2("offset"); auto is255 = get_input2("is255"); - auto color1 = tocvscalar(get_input2("color1")); - auto color2 = tocvscalar(get_input2("color2")); + auto color1 = tocvscalar(get_input2("color1")); + auto color2 = tocvscalar(get_input2("color2")); auto image = get_input2("inplace") ? likeimage : std::make_shared(likeimage->image.clone()); vec2i shape(image->image.size[1], image->image.size[0]); @@ -786,7 +786,7 @@ ZENDEFNODE(CVImageFillGrad, { struct CVImageDrawPoly : CVINode { void apply() override { auto image = get_input("image"); - auto color = tocvscalar(get_input2("color")); + auto color = tocvscalar(get_input2("color")); if (!get_input2("inplace")) image = std::make_shared(*image); auto prim = get_input("prim"); @@ -868,7 +868,7 @@ struct CVImagePutText : CVINode { auto antialias = get_input2("antialias"); auto scale = get_input2("scale"); auto is255 = get_input2("is255"); - auto color = tocvscalar(get_input2("color") * (is255 ? 255 : 1)); + auto color = tocvscalar(get_input2("color") * (is255 ? 255 : 1)); cv::Point org(get_input2("X0"), get_input2("Y0")); cv::putText(image->image, text, org, fontFace, scale, color, thickness, antialias ? cv::LINE_AA : cv::LINE_8); diff --git a/projects/PBD/PBDCloth/PBDDihedralConstraint.cpp b/projects/PBD/PBDCloth/PBDDihedralConstraint.cpp index d2c51ee496..3e9dfaf9d4 100644 --- a/projects/PBD/PBDCloth/PBDDihedralConstraint.cpp +++ b/projects/PBD/PBDCloth/PBDDihedralConstraint.cpp @@ -71,10 +71,10 @@ struct PBDDihedralConstraint : zeno::INode { auto dihedralCompliance = get_input("dihedralCompliance")->get(); auto dt = get_input("dt")->get(); - auto p1 = get_input("p1")->get(); - auto p2 = get_input("p2")->get(); - auto p3 = get_input("p3")->get(); - auto p4 = get_input("p4")->get(); + auto p1 = get_input("p1")->get(); + auto p2 = get_input("p2")->get(); + auto p3 = get_input("p3")->get(); + auto p4 = get_input("p4")->get(); auto invMass1 = get_input("invMass1")->get(); auto invMass2 = get_input("invMass2")->get(); auto invMass3 = get_input("invMass3")->get(); diff --git a/projects/PBD/PBDPreSolve.cpp b/projects/PBD/PBDPreSolve.cpp index a867674960..d69872ab52 100644 --- a/projects/PBD/PBDPreSolve.cpp +++ b/projects/PBD/PBDPreSolve.cpp @@ -37,7 +37,7 @@ struct PBDPreSolve : zeno::INode { auto dt = get_input("dt")->get(); prim->userData().set("dt", std::make_shared(dt)); - auto externForce = get_input("externForce")->get(); + auto externForce = get_input("externForce")->get(); auto &pos = prim->verts; auto &invMass = prim->verts.attr("invMass"); diff --git a/projects/PBD/PBF/PBF.h b/projects/PBD/PBF/PBF.h index f581af995e..08469c6a1e 100644 --- a/projects/PBD/PBF/PBF.h +++ b/projects/PBD/PBF/PBF.h @@ -77,9 +77,9 @@ struct PBF : INode{ //用户自定义å‚æ•° dt = get_input("dt")->get(); pRadius = get_input("particle_radius")->get(); - bounds_min = get_input("bounds_min")->get(); - bounds_max = get_input("bounds_max")->get(); - gravity = get_input("gravity")->get(); + bounds_min = get_input("bounds_min")->get(); + bounds_max = get_input("bounds_max")->get(); + gravity = get_input("gravity")->get(); rho0 = get_input("rho0")->get(); lambdaEpsilon = get_input("lambdaEpsilon")->get(); coeffDq = get_input("coeffDq")->get(); diff --git a/projects/PBD/PBF/PBFWorld_Setup.cpp b/projects/PBD/PBF/PBFWorld_Setup.cpp index ae38934b80..210ff98e47 100644 --- a/projects/PBD/PBF/PBFWorld_Setup.cpp +++ b/projects/PBD/PBF/PBFWorld_Setup.cpp @@ -30,9 +30,9 @@ struct PBFWorld_Setup : INode //用户自定义å‚æ•° data->dt = get_input("dt")->get(); data->radius = get_input("particle_radius")->get(); - data->bounds_min = get_input("bounds_min")->get(); - data->bounds_max = get_input("bounds_max")->get(); - data->externForce = get_input("externForce")->get(); + data->bounds_min = get_input("bounds_min")->get(); + data->bounds_max = get_input("bounds_max")->get(); + data->externForce = get_input("externForce")->get(); data->rho0 = get_input("rho0")->get(); data->lambdaEpsilon = get_input("lambdaEpsilon")->get(); data->coeffDq = get_input("coeffDq")->get(); diff --git a/projects/PBD/PBF/PBFWorld_testCube.cpp b/projects/PBD/PBF/PBFWorld_testCube.cpp index 774e2f592d..ed2b3de533 100644 --- a/projects/PBD/PBF/PBFWorld_testCube.cpp +++ b/projects/PBD/PBF/PBFWorld_testCube.cpp @@ -14,7 +14,7 @@ struct PBFWorld_testCube : INode{ auto prim = std::make_shared(); auto cubeSize = get_input("cubeSize")->get(); auto spacing = get_input("spacing")->get(); - auto initPos = get_input("initPos")->get(); + auto initPos = get_input("initPos")->get(); auto numParticles = get_input("numParticles")->get(); auto &pos = prim->verts; pos.resize(numParticles); diff --git a/projects/PBD/PBF/PBF_BVH.h b/projects/PBD/PBF/PBF_BVH.h index d5436a5417..c06d9af4e3 100644 --- a/projects/PBD/PBF/PBF_BVH.h +++ b/projects/PBD/PBF/PBF_BVH.h @@ -60,9 +60,9 @@ struct PBF_BVH : INode{ //用户自定义å‚æ•° dt = get_input("dt")->get(); pRadius = get_input("particle_radius")->get(); - bounds_min = get_input("bounds_min")->get(); - bounds_max = get_input("bounds_max")->get(); - externForce = get_input("externForce")->get(); + bounds_min = get_input("bounds_min")->get(); + bounds_max = get_input("bounds_max")->get(); + externForce = get_input("externForce")->get(); rho0 = get_input("rho0")->get(); h = get_input("h")->get(); lambdaEpsilon = get_input("lambdaEpsilon")->get(); diff --git a/projects/PBD/PBF/testPBFCube.cpp b/projects/PBD/PBF/testPBFCube.cpp index ea7f994abd..832dc11226 100644 --- a/projects/PBD/PBF/testPBFCube.cpp +++ b/projects/PBD/PBF/testPBFCube.cpp @@ -17,7 +17,7 @@ struct testPBFCube : INode{ // int cubeSize = 20; // float spacing = 1; auto numParticles = get_input("numParticles")->get(); - auto initPos = get_input("initPos")->get(); + auto initPos = get_input("initPos")->get(); auto cubeSize = get_input("cubeSize")->get(); auto spacing = get_input("spacing")->get(); diff --git a/projects/PluginPOC/CMakeLists.txt b/projects/PluginPOC/CMakeLists.txt index 30e1d9aad0..4a538c75e4 100644 --- a/projects/PluginPOC/CMakeLists.txt +++ b/projects/PluginPOC/CMakeLists.txt @@ -14,9 +14,9 @@ get_target_property(ZENO_INC_DIR zeno INCLUDE_DIRECTORIES) message("demo plugin includes [${ZENO_INC_DIR}]") target_include_directories(zeno_plugin_helper INTERFACE ${ZENO_INC_DIR}) -get_target_property(ZENO_LINK_OPTIONS zeno LINK_OPTIONS) -message("demo plugin link options [${ZENO_LINK_OPTIONS}]") -target_link_options(zeno_plugin_helper INTERFACE ${ZENO_LINK_OPTIONS}) +# get_target_property(ZENO_LINK_OPTIONS zeno LINK_OPTIONS) +# message("demo plugin link options [${ZENO_LINK_OPTIONS}]") +# target_link_options(zeno_plugin_helper INTERFACE ${ZENO_LINK_OPTIONS}) get_target_property(ZENO_LINK_DIRS zeno LINK_DIRECTORIES) message("demo plugin link dirs [${ZENO_LINK_DIRS}]") diff --git a/projects/Rigid/RigidFracture.cpp b/projects/Rigid/RigidFracture.cpp index 871b916c8e..6266f2e0ae 100644 --- a/projects/Rigid/RigidFracture.cpp +++ b/projects/Rigid/RigidFracture.cpp @@ -627,11 +627,11 @@ struct BulletObjectSetVel : zeno::INode { auto body = obj->body.get(); if (has_input("linearVel")) { - auto v = get_input2("linearVel"); + auto v = get_input2("linearVel"); body->setLinearVelocity(vec_to_other(v)); } if (has_input("angularVel")) { - auto v = get_input2("angularVel"); + auto v = get_input2("angularVel"); body->setAngularVelocity(vec_to_other(v)); } diff --git a/projects/Rigid/RigidTest.cpp b/projects/Rigid/RigidTest.cpp index cce754650a..81f389e9bf 100644 --- a/projects/Rigid/RigidTest.cpp +++ b/projects/Rigid/RigidTest.cpp @@ -2539,14 +2539,14 @@ struct BulletCalcInverseKinematics : zeno::INode { auto numericObjs = get_input( "targetPositions")->get>(); for (auto &&no: numericObjs) - targetPositions.push_back(no->get()); + targetPositions.push_back(no->get()); } std::vector targetOrientations; { auto numericObjs = get_input("targetOrientations")->get>(); for (auto &&no : numericObjs) - targetOrientations.push_back(no->get()); + targetOrientations.push_back(no->get()); } auto numIterations = get_input2("numIterations"); // 20 @@ -3156,7 +3156,7 @@ struct BulletMultiBodyCalculateJacobian : zeno::INode { virtual void apply() { auto object = get_input("object"); auto linkIndex = get_input2("linkIndex"); - auto localPosition = get_input2("localPos"); + auto localPosition = get_input2("localPos"); std::vector jointPositionsQ; { diff --git a/projects/ZenoFX/pnbvhw.cpp b/projects/ZenoFX/pnbvhw.cpp index f690c736ea..597626f4a8 100644 --- a/projects/ZenoFX/pnbvhw.cpp +++ b/projects/ZenoFX/pnbvhw.cpp @@ -355,7 +355,7 @@ struct QueryNearestPrimitive : zeno::INode { dist, bvhId, lbvh->getNumLeaves(), pid, prim->size()); #endif } else if (has_input("prim")) { - auto p = get_input("prim")->get(); + auto p = get_input("prim")->get(); w = lbvh->find_nearest(p, bvhId, dist); line->verts.push_back(p); } else @@ -496,7 +496,7 @@ struct QueryNearestPrimitiveWithUV : zeno::INode { dist, bvhId, lbvh->getNumLeaves(), pid, prim->size()); #endif } else if (has_input("prim")) { - auto p = get_input("prim")->get(); + auto p = get_input("prim")->get(); w = lbvh->find_nearest(p, bvhId, dist); line->verts.push_back(p); } else @@ -700,7 +700,7 @@ struct QueryNearestPrimitiveWithinGroup : zeno::INode { dist, bvhId, lbvh->getNumLeaves(), pid, prim->size()); #endif } else if (has_input("prim")) { - auto p = get_input("prim")->get(); + auto p = get_input("prim")->get(); w = lbvh->find_nearest(p, bvhId, dist); line->verts.push_back(p); } else diff --git a/projects/cgmesh/PrimitiveBooleanOp.cpp b/projects/cgmesh/PrimitiveBooleanOp.cpp index d0434bd9e0..cb6db6e9fb 100644 --- a/projects/cgmesh/PrimitiveBooleanOp.cpp +++ b/projects/cgmesh/PrimitiveBooleanOp.cpp @@ -43,7 +43,7 @@ struct PrimitiveBooleanOp : INode { auto attrValB = get_input("faceAttrB")->value; std::visit([&] (auto const &valA) { using T = std::decay_t; - if constexpr (std::is_same_v || std::is_same_v) { + if constexpr (std::is_same_v || std::is_same_v) { auto valB = std::get(attrValB); auto &arrC = primC->tris.add_attr(attrName); for (int i = 0; i < primC->tris.size(); i++) { diff --git a/projects/cgmesh/VoronoiFracture.cpp b/projects/cgmesh/VoronoiFracture.cpp index 801613db4c..730e8bece3 100644 --- a/projects/cgmesh/VoronoiFracture.cpp +++ b/projects/cgmesh/VoronoiFracture.cpp @@ -27,9 +27,9 @@ struct AABBVoronoi : INode { auto triangulate = get_param("triangulate"); auto bmin = has_input("bboxMin") ? - get_input("bboxMin")->get() : vec3f(-1); + get_input("bboxMin")->get() : zeno::vec3f(-1); auto bmax = has_input("bboxMax") ? - get_input("bboxMax")->get() : vec3f(1); + get_input("bboxMax")->get() : zeno::vec3f(1); auto minx = bmin[0]; auto miny = bmin[1]; auto minz = bmin[2]; @@ -45,7 +45,7 @@ struct AABBVoronoi : INode { if (has_input("particlesPrim")) { auto particlesPrim = get_input("particlesPrim"); - auto &parspos = particlesPrim->attr("pos"); + auto &parspos = particlesPrim->attr("pos"); for (int i = 0; i < parspos.size(); i++) { auto p = parspos[i]; pcon.put(i + 1, p[0], p[1], p[2]); @@ -54,7 +54,7 @@ struct AABBVoronoi : INode { auto numParticles = get_param("numRandPoints"); wangsrng rng(numParticles); for (int i = 0; i < numParticles; i++) { - vec3f p(rng.next_float(),rng.next_float(),rng.next_float()); + zeno::vec3f p(rng.next_float(),rng.next_float(),rng.next_float()); p = p * (bmax - bmin) + bmin; pcon.put(i + 1, p[0], p[1], p[2]); } @@ -96,7 +96,7 @@ struct AABBVoronoi : INode { auto prim = std::make_shared(); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); for (int i = 0; i < (int)v.size(); i += 3) { pos.emplace_back(v[i], v[i+1], v[i+2]); } @@ -108,7 +108,7 @@ struct AABBVoronoi : INode { isBoundary = true; } else { if (auto ncid = neigh[i] - 1; ncid > cid) { - neighs->arr.push_back(objectFromLiterial(vec2i(cid, ncid))); + neighs->arr.push_back(objectFromLiterial(zeno::vec2i(cid, ncid))); } } int len = f_vert[j]; @@ -166,7 +166,7 @@ struct VoronoiFracture : AABBVoronoi { auto primA = get_input("meshPrim"); auto VFA = get_param("doMeshFix") ? prim_to_eigen_with_fix(primA.get()) : prim_to_eigen(primA.get()); - auto bmin = primA->verts.size() ? primA->verts[0] : vec3f(0); + auto bmin = primA->verts.size() ? primA->verts[0] : zeno::vec3f(0); auto bmax = bmin; for (int i = 1; i < primA->verts.size(); i++) { bmin = zeno::min(primA->verts[i], bmin); @@ -323,7 +323,7 @@ struct SimplifyVoroNeighborList : INode { auto newNeighList = std::make_shared(); std::map> lut; - for (auto const &ind: neighList->getLiterial()) { + for (auto const &ind: neighList->getLiterial()) { auto x = ind[0], y = ind[1]; lut[x].push_back(y); lut[y].push_back(x); @@ -345,7 +345,7 @@ struct SimplifyVoroNeighborList : INode { } for (auto const &[x, y]: edges) { - newNeighList->arr.push_back(objectFromLiterial(vec2i(x, y))); + newNeighList->arr.push_back(objectFromLiterial(zeno::vec2i(x, y))); } set_output("newNeighList", std::move(newNeighList)); } diff --git a/projects/zenvdb/MakeVDBGrid.cpp b/projects/zenvdb/MakeVDBGrid.cpp index 5d746bac60..4eb8df291e 100644 --- a/projects/zenvdb/MakeVDBGrid.cpp +++ b/projects/zenvdb/MakeVDBGrid.cpp @@ -32,7 +32,7 @@ struct MakeVDBGrid : zeno::INode { auto tmp = !has_input("background") ? zeno::IObject::make() : std::make_shared(openvdb::Vec3fGrid::create( zeno::vec_to_other(get_input("background") - ->as()->get()))); + ->as()->get()))); tmp->m_grid->setTransform(openvdb::math::Transform::createLinearTransform(dx)); tmp->m_grid->setName(name); if (structure == "Staggered") { diff --git a/projects/zenvdb/SampleVDBToPrimitive.cpp b/projects/zenvdb/SampleVDBToPrimitive.cpp index 6ede76084f..9752d6daf5 100644 --- a/projects/zenvdb/SampleVDBToPrimitive.cpp +++ b/projects/zenvdb/SampleVDBToPrimitive.cpp @@ -203,7 +203,7 @@ struct PrimSample : zeno::INode { auto remapMin = get_input2("remapMin"); auto remapMax = get_input2("remapMax"); auto wrap = get_input2("wrap"); - auto borderColor = get_input2("borderColor"); + auto borderColor = get_input2("borderColor"); if (has_input("sampledObject") && get_input("sampledObject")->userData().has("isImage")) { auto image = get_input("sampledObject"); primSampleTexture(prim, srcChannel, "vertex", dstChannel, image, wrap, borderColor, remapMin, remapMax); diff --git a/projects/zenvdb/VDBAddPerlinNoise.cpp b/projects/zenvdb/VDBAddPerlinNoise.cpp index 4f67d5b521..bf60ab337b 100644 --- a/projects/zenvdb/VDBAddPerlinNoise.cpp +++ b/projects/zenvdb/VDBAddPerlinNoise.cpp @@ -23,18 +23,18 @@ struct fuck_openvdb_vec { template <> struct fuck_openvdb_vec { - using type = vec3f; + using type = zeno::vec3f; }; struct VDBPerlinNoise : INode { virtual void apply() override { auto inoutSDF = get_input("inoutSDF"); auto scale = get_input2("scale"); - auto scale3d = get_input2("scale3d"); + auto scale3d = get_input2("scale3d"); auto detail = get_input2("detail"); auto roughness = get_input2("roughness"); auto disortion = get_input2("disortion"); - auto offset = get_input2("offset"); + auto offset = get_input2("offset"); auto average = get_input2("average"); auto strength = get_input2("strength"); @@ -50,18 +50,18 @@ struct VDBPerlinNoise : INode { typename std::decay_t::ValueType>>::type; OutT noise; { - vec3f p(coord[0], coord[1], coord[2]); + zeno::vec3f p(coord[0], coord[1], coord[2]); p = scale3d * (p - offset); OutT o; if constexpr (std::is_same_v) { o = PerlinNoise::perlin(p, roughness, detail); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { o = OutT( - PerlinNoise::perlin(vec3f(p[0], p[1], p[2]), roughness, detail), - PerlinNoise::perlin(vec3f(p[1], p[2], p[0]), roughness, detail), - PerlinNoise::perlin(vec3f(p[2], p[0], p[1]), roughness, detail)); + PerlinNoise::perlin(zeno::vec3f(p[0], p[1], p[2]), roughness, detail), + PerlinNoise::perlin(zeno::vec3f(p[1], p[2], p[0]), roughness, detail), + PerlinNoise::perlin(zeno::vec3f(p[2], p[0], p[1]), roughness, detail)); } else { - throw makeError(typeid(vec3f), typeid(OutT), "outType"); + throw makeError(typeid(zeno::vec3f), typeid(OutT), "outType"); } noise = average + o * strength; } diff --git a/projects/zenvdb/VDBAddTurbulentNoise.cpp b/projects/zenvdb/VDBAddTurbulentNoise.cpp index 9cc3f17bc0..13d90439f7 100644 --- a/projects/zenvdb/VDBAddTurbulentNoise.cpp +++ b/projects/zenvdb/VDBAddTurbulentNoise.cpp @@ -113,11 +113,11 @@ struct VDBAddPerlinNoise : INode { auto strength = get_input("strength")->get(); auto scale = get_input("scale")->get(); auto scaling = has_input("scaling") ? - get_input("scaling")->get() - : vec3f(1); + get_input("scaling")->get() + : zeno::vec3f(1); auto translation = has_input("translation") ? - get_input("translation")->get() - : vec3f(0); + get_input("translation")->get() + : zeno::vec3f(0); auto inv_scale = 1.f / (scale * scaling); auto grid = inoutSDF->m_grid; @@ -127,7 +127,7 @@ struct VDBAddPerlinNoise : INode { auto wrangler = [&](auto &leaf, openvdb::Index leafpos) { for (auto iter = leaf.beginValueOn(); iter != leaf.endValueOn(); ++iter) { auto coord = iter.getCoord(); - auto pos = (vec3i(coord[0], coord[1], coord[2]) + translation) * inv_scale; + auto pos = (zeno::vec3i(coord[0], coord[1], coord[2]) + translation) * inv_scale; auto noise = strength * perlin(pos[0], pos[1], pos[2]); iter.modifyValue([&] (auto &v) { v += noise; @@ -169,7 +169,7 @@ auto smoothstep(T0 edge0, T1 edge1, T2 x) { struct Turbulent { -using vec3 = vec3f; +using vec3 = zeno::vec3f; // Noise settings: //float Power = 5.059; @@ -250,11 +250,11 @@ struct VDBAddTurbulentNoise : INode { auto strength = get_input("strength")->get(); auto scale = get_input("scale")->get(); auto scaling = has_input("scaling") ? - get_input("scaling")->get() - : vec3f(1); + get_input("scaling")->get() + : zeno::vec3f(1); auto translation = has_input("translation") ? - get_input("translation")->get() - : vec3f(0); + get_input("translation")->get() + : zeno::vec3f(0); auto inv_scale = 1.f / (scale * scaling); auto grid = inoutSDF->m_grid; @@ -266,7 +266,7 @@ struct VDBAddTurbulentNoise : INode { auto wrangler = [&](auto &leaf, openvdb::Index leafpos) { for (auto iter = leaf.beginValueOn(); iter != leaf.endValueOn(); ++iter) { auto coord = iter.getCoord(); - auto pos = (vec3i(coord[0], coord[1], coord[2]) + translation) * inv_scale; + auto pos = (zeno::vec3i(coord[0], coord[1], coord[2]) + translation) * inv_scale; auto noise = strength * turbulent(pos[0], pos[1], pos[2]); iter.modifyValue([&] (auto &v) { v += noise; diff --git a/projects/zenvdb/VDBCreateLevelsetSphere.cpp b/projects/zenvdb/VDBCreateLevelsetSphere.cpp index 2143e7d48b..82443e6a21 100644 --- a/projects/zenvdb/VDBCreateLevelsetSphere.cpp +++ b/projects/zenvdb/VDBCreateLevelsetSphere.cpp @@ -23,7 +23,7 @@ struct VDBCreateLevelsetSphere : zeno::INode { vec3f center(0); if(has_input("center")) { - center = get_input("center")->as()->get(); + center = get_input("center")->as()->get(); } float half_width=(float)openvdb::LEVEL_SET_HALF_WIDTH; if(has_input("half_width")) diff --git a/projects/zenvdb/VDBExplosiveTurbulentNoise.cpp b/projects/zenvdb/VDBExplosiveTurbulentNoise.cpp index ee6a145902..0540404e8d 100644 --- a/projects/zenvdb/VDBExplosiveTurbulentNoise.cpp +++ b/projects/zenvdb/VDBExplosiveTurbulentNoise.cpp @@ -36,60 +36,60 @@ int FBM_Octaves= 5; //fork from Dave Hoskins //https://www.shadertoy.com/view/4djSRW -vec4f hash43(vec3f p) +zeno::vec4f hash43(zeno::vec3f p) { - vec4f p4 = fract(vec4f(p[0], p[1], p[2], p[0]) * vec4f(1031.f, .1030f, .0973f, .1099f)); - p4 += dot(p4, vec4f(p4[3],p4[2],p4[0],p4[1])+19.19f); - return -1.0f + 2.0f * fract(vec4f( + zeno::vec4f p4 = fract(zeno::vec4f(p[0], p[1], p[2], p[0]) * zeno::vec4f(1031.f, .1030f, .0973f, .1099f)); + p4 += dot(p4, zeno::vec4f(p4[3],p4[2],p4[0],p4[1])+19.19f); + return -1.0f + 2.0f * fract(zeno::vec4f( (p4[0] + p4[1])*p4[2], (p4[0] + p4[2])*p4[1], (p4[1] + p4[2])*p4[3], (p4[2] + p4[3])*p4[0]) ); } //offsets for noise -inline static const vec3f nbs[] = { - vec3f(0.0, 0.0, 0.0),vec3f(0.0, 1.0, 0.0),vec3f(1.0, 0.0, 0.0),vec3f(1.0, 1.0, 0.0), - vec3f(0.0, 0.0, 1.0),vec3f(0.0, 1.0, 1.0),vec3f(1.0, 0.0, 1.0),vec3f(1.0, 1.0, 1.0) +inline static const zeno::vec3f nbs[] = { + zeno::vec3f(0.0, 0.0, 0.0),zeno::vec3f(0.0, 1.0, 0.0),zeno::vec3f(1.0, 0.0, 0.0),zeno::vec3f(1.0, 1.0, 0.0), + zeno::vec3f(0.0, 0.0, 1.0),zeno::vec3f(0.0, 1.0, 1.0),zeno::vec3f(1.0, 0.0, 1.0),zeno::vec3f(1.0, 1.0, 1.0) }; //'Simplex out of value noise', forked from: https://www.shadertoy.com/view/XltXRH //not sure about performance, is this faster than classic simplex noise? -vec4f AchNoise3D(vec3f x) +zeno::vec4f AchNoise3D(zeno::vec3f x) { - vec3f p = floor(x); - vec3f fr = smoothstep(0.0f, 1.0f, fract(x)); - - vec4f L1C1 = mix(hash43(p+nbs[0]), hash43(p+nbs[2]), fr[0]); - vec4f L1C2 = mix(hash43(p+nbs[1]), hash43(p+nbs[3]), fr[0]); - vec4f L1C3 = mix(hash43(p+nbs[4]), hash43(p+nbs[6]), fr[0]); - vec4f L1C4 = mix(hash43(p+nbs[5]), hash43(p+nbs[7]), fr[0]); - vec4f L2C1 = mix(L1C1, L1C2, fr[1]); - vec4f L2C2 = mix(L1C3, L1C4, fr[1]); + zeno::vec3f p = floor(x); + zeno::vec3f fr = smoothstep(0.0f, 1.0f, fract(x)); + + zeno::vec4f L1C1 = mix(hash43(p+nbs[0]), hash43(p+nbs[2]), fr[0]); + zeno::vec4f L1C2 = mix(hash43(p+nbs[1]), hash43(p+nbs[3]), fr[0]); + zeno::vec4f L1C3 = mix(hash43(p+nbs[4]), hash43(p+nbs[6]), fr[0]); + zeno::vec4f L1C4 = mix(hash43(p+nbs[5]), hash43(p+nbs[7]), fr[0]); + zeno::vec4f L2C1 = mix(L1C1, L1C2, fr[1]); + zeno::vec4f L2C2 = mix(L1C3, L1C4, fr[1]); return mix(L2C1, L2C2, fr[2]); } -vec4f ValueSimplex3D(vec3f p) +zeno::vec4f ValueSimplex3D(zeno::vec3f p) { - vec4f a = AchNoise3D(p); - vec4f b = AchNoise3D(p + 120.5f); + zeno::vec4f a = AchNoise3D(p); + zeno::vec4f b = AchNoise3D(p + 120.5f); return (a + b) * 0.5; } //my FBM -vec4f FBM(vec3f p) +zeno::vec4f FBM(zeno::vec3f p) { - vec4f f(0), s(0), n(0); + zeno::vec4f f(0), s(0), n(0); float a = 1.0f, w = 0.0f; for (int i=0; i("strength")->get(); auto scale = get_input("scale")->get(); auto scaling = has_input("scaling") ? - get_input("scaling")->get() - : vec3f(1); + get_input("scaling")->get() + : zeno::vec3f(1); auto translation = has_input("translation") ? - get_input("translation")->get() - : vec3f(0); + get_input("translation")->get() + : zeno::vec3f(0); auto inv_scale = 1.f / (scale * scaling); auto grid = inoutSDF->m_grid; @@ -136,7 +136,7 @@ struct VDBExplosiveTurbulentNoise : INode { auto wrangler = [&](auto &leaf, openvdb::Index leafpos) { for (auto iter = leaf.beginValueOn(); iter != leaf.endValueOn(); ++iter) { auto coord = iter.getCoord(); - auto pos = (vec3i(coord[0], coord[1], coord[2]) + translation) * inv_scale; + auto pos = (zeno::vec3i(coord[0], coord[1], coord[2]) + translation) * inv_scale; auto noise = strength * turbulent(pos[0], pos[1], pos[2]); iter.modifyValue([&] (auto &v) { v += noise; diff --git a/projects/zenvdb/VDBFillClearOps.cpp b/projects/zenvdb/VDBFillClearOps.cpp index 0fafe5b832..ee20bc7ba5 100644 --- a/projects/zenvdb/VDBFillClearOps.cpp +++ b/projects/zenvdb/VDBFillClearOps.cpp @@ -36,7 +36,7 @@ struct VDBFillActiveVoxels : INode { } else if (auto p = std::dynamic_pointer_cast(grid); p) { auto velman = openvdb::tree::LeafManager m_grid->tree())>>(p->m_grid->tree()); - velman.foreach(fill_voxels_op(vec_to_other(std::get(value)))); + velman.foreach(fill_voxels_op(vec_to_other(std::get(value)))); } set_output("grid", get_input("grid")); @@ -79,7 +79,7 @@ struct VDBMultiplyOperation : INode { } else if (auto p = std::dynamic_pointer_cast(grid); p) { auto velman = openvdb::tree::LeafManager m_grid->tree())>>(p->m_grid->tree()); - velman.foreach(fill_voxels_op(vec_to_other(std::get(value)))); + velman.foreach(fill_voxels_op(vec_to_other(std::get(value)))); } set_output("grid", get_input("grid")); @@ -100,7 +100,7 @@ ZENO_DEFNODE(VDBMultiplyOperation)( template -void touch_aabb_region(GridPtr const &grid, vec3f const &bmin, vec3f const &bmax) { +void touch_aabb_region(GridPtr const &grid, zeno::vec3f const &bmin, zeno::vec3f const &bmax) { auto cmin = grid->transform().worldToIndex(openvdb::Vec3R(bmin[0], bmin[1], bmin[2])); auto cmax = grid->transform().worldToIndex(openvdb::Vec3R(bmax[0], bmax[1], bmax[2])); using size_type = std::decay_t()[0])>; @@ -124,8 +124,8 @@ void touch_aabb_region(GridPtr const &grid, vec3f const &bmin, vec3f const &bmax struct VDBTouchAABBRegion : INode { virtual void apply() override { auto grid = get_input("grid"); - auto bmin = get_input("bmin")->get(); - auto bmax = get_input("bmax")->get(); + auto bmin = get_input("bmin")->get(); + auto bmax = get_input("bmax")->get(); if (auto p = std::dynamic_pointer_cast(grid); p) { touch_aabb_region(p->m_grid, bmin, bmax); } else if (auto p = std::dynamic_pointer_cast(grid); p) { diff --git a/projects/zenvdb/VDBInvertSDF.cpp b/projects/zenvdb/VDBInvertSDF.cpp index f9beacd7f5..f256ca6e19 100644 --- a/projects/zenvdb/VDBInvertSDF.cpp +++ b/projects/zenvdb/VDBInvertSDF.cpp @@ -15,7 +15,7 @@ struct VDBChangeBackground : INode{ if (auto p = std::dynamic_pointer_cast(grid); p) { openvdb::tools::changeBackground(p->m_grid->tree(), get_input2("background")); } else if (auto p = std::dynamic_pointer_cast(grid); p) { - openvdb::tools::changeBackground(p->m_grid->tree(), vec_to_other(get_input2("background"))); + openvdb::tools::changeBackground(p->m_grid->tree(), vec_to_other(get_input2("background"))); } set_output("grid", get_input("grid")); diff --git a/zeno/include/zeno/funcs/PrimitiveIO.h b/zeno/include/zeno/funcs/PrimitiveIO.h index 7104d0f678..fabcb6c690 100644 --- a/zeno/include/zeno/funcs/PrimitiveIO.h +++ b/zeno/include/zeno/funcs/PrimitiveIO.h @@ -112,7 +112,7 @@ static void deserialize(const std::vector &str, AttrVector &arr) if (attributeHeader->type == AttributeType::Vec3f) { - auto &attr = arr.template add_attr(key); + auto &attr = arr.template add_attr(key); attr.clear(); attr.reserve(attributeHeader->size); std::copy_n((vec3f *)attributeHeader->buff, attributeHeader->size, std::back_inserter(attr)); diff --git a/zeno/include/zeno/funcs/PrimitiveTools.h b/zeno/include/zeno/funcs/PrimitiveTools.h index 2da5acbd5c..b70610d1a7 100644 --- a/zeno/include/zeno/funcs/PrimitiveTools.h +++ b/zeno/include/zeno/funcs/PrimitiveTools.h @@ -63,7 +63,7 @@ static void addIndividualPrimitive(PrimitiveObject* dst, const PrimitiveObject* }, src->attr(key)); // dst->attr(key).emplace_back(src->attr(key)[index]); } else { - dst->attr(key).emplace_back(src->attr(key)[index]); + dst->attr(key).emplace_back(src->attr(key)[index]); } } dst->resize(dst->attr("pos").size()); diff --git a/zeno/include/zeno/types/AttrVector.h b/zeno/include/zeno/types/AttrVector.h index 6da0cd9658..bf20d7c6ec 100644 --- a/zeno/include/zeno/types/AttrVector.h +++ b/zeno/include/zeno/types/AttrVector.h @@ -437,8 +437,8 @@ struct AttrVector { //means like attr.emplace_back(val) //suppose "pos" = {} // "clr" = {} - //attr("clr").emplace_back(val) - //attr("pos").emplace_back(val)<---this will resize "clr" to zero first and then push_back to "pos" + //attr("clr").emplace_back(val) + //attr("pos").emplace_back(val)<---this will resize "clr" to zero first and then push_back to "pos" //_ensure_update(); auto it = attrs.find(name); if (it == attrs.end()) diff --git a/zeno/src/nodes/AxisNodes.cpp b/zeno/src/nodes/AxisNodes.cpp index 8536da8f66..d9dbf0f0c5 100644 --- a/zeno/src/nodes/AxisNodes.cpp +++ b/zeno/src/nodes/AxisNodes.cpp @@ -30,10 +30,10 @@ ZENDEFNODE(ExtractAxis, { struct MakeAxis : zeno::INode { virtual void apply() override { - auto origin = get_input2("origin"); - auto axisX = get_input2("axisX"); - auto axisY = get_input2("axisY"); - auto axisZ = get_input2("axisZ"); + auto origin = get_input2("origin"); + auto axisX = get_input2("axisX"); + auto axisY = get_input2("axisY"); + auto axisZ = get_input2("axisZ"); auto p = std::make_shared(origin, axisX, axisY, axisZ); auto by = get_param("normalize"); if (by == "X") diff --git a/zeno/src/nodes/CameraNodes.cpp b/zeno/src/nodes/CameraNodes.cpp index 97563d4871..e55c445ce4 100644 --- a/zeno/src/nodes/CameraNodes.cpp +++ b/zeno/src/nodes/CameraNodes.cpp @@ -65,9 +65,9 @@ struct MakeCamera : INode { virtual void apply() override { auto camera = std::make_unique(); - camera->pos = get_input2("pos"); - camera->up = get_input2("up"); - camera->view = get_input2("view"); + camera->pos = get_input2("pos"); + camera->up = get_input2("up"); + camera->view = get_input2("view"); camera->ffar = get_input2("far"); camera->fnear = get_input2("near"); camera->fov = get_input2("fov"); @@ -132,9 +132,9 @@ struct TargetCamera : INode { virtual void apply() override { auto camera = std::make_unique(); - auto refUp = zeno::normalize(get_input2("refUp")); - auto pos = get_input2("pos"); - auto target = get_input2("target"); + auto refUp = zeno::normalize(get_input2("refUp")); + auto pos = get_input2("pos"); + auto target = get_input2("target"); auto AF = get_input2("AutoFocus"); vec3f view = zeno::normalize(target - pos); vec3f right = zeno::cross(view, refUp); @@ -180,12 +180,12 @@ ZENO_DEFNODE(TargetCamera)({ struct MakeLight : INode { virtual void apply() override { auto light = std::make_unique(); - light->lightDir = normalize(get_input2("lightDir")); + light->lightDir = normalize(get_input2("lightDir")); light->intensity = get_input2("intensity"); - light->shadowTint = get_input2("shadowTint"); + light->shadowTint = get_input2("shadowTint"); light->lightHight = get_input2("lightHight"); light->shadowSoftness = get_input2("shadowSoftness"); - light->lightColor = get_input2("lightColor"); + light->lightColor = get_input2("lightColor"); light->lightScale = get_input2("lightScale"); light->isEnabled = get_input2("isEnabled"); set_output("light", std::move(light)); diff --git a/zeno/src/nodes/CurveNodes.cpp b/zeno/src/nodes/CurveNodes.cpp index f38fa97135..fb206f7d3b 100644 --- a/zeno/src/nodes/CurveNodes.cpp +++ b/zeno/src/nodes/CurveNodes.cpp @@ -137,9 +137,9 @@ struct UpdateCurveControlPoint : zeno::INode { if (has_input("point_y")) data.cpoints[i].v = get_input2("point_y"); if (has_input("left_handler")) - data.cpoints[i].left_handler = get_input2("left_handler"); + data.cpoints[i].left_handler = get_input2("left_handler"); if (has_input("right_handler")) - data.cpoints[i].right_handler = get_input2("right_handler"); + data.cpoints[i].right_handler = get_input2("right_handler"); set_output("curve", std::move(curve)); } diff --git a/zeno/src/nodes/InputParams.cpp b/zeno/src/nodes/InputParams.cpp index 74df5a01bd..3be243aa5f 100644 --- a/zeno/src/nodes/InputParams.cpp +++ b/zeno/src/nodes/InputParams.cpp @@ -209,17 +209,17 @@ struct ParamFileParser : zeno::INode { fprintf(fp, "%s,%s,%d\n", p->name.c_str(), p->_type.c_str(), v); } else if (std::holds_alternative(p->defaultValue)) { - auto v = std::get(p->defaultValue); + auto v = std::get(p->defaultValue); value = std::make_shared(v); fprintf(fp, "%s,%s,%d,%d\n", p->name.c_str(), p->_type.c_str(), v[0], v[1]); } else if (std::holds_alternative(p->defaultValue)) { - auto v = std::get(p->defaultValue); + auto v = std::get(p->defaultValue); value = std::make_shared(v); fprintf(fp, "%s,%s,%d,%d,%d\n", p->name.c_str(), p->_type.c_str(), v[0], v[1], v[2]); } else if (std::holds_alternative(p->defaultValue)) { - auto v = std::get(p->defaultValue); + auto v = std::get(p->defaultValue); value = std::make_shared(v); fprintf(fp, "%s,%s,%d,%d,%d,%d\n", p->name.c_str(), p->_type.c_str(), v[0], v[1], v[2], v[3]); } @@ -229,17 +229,17 @@ struct ParamFileParser : zeno::INode { fprintf(fp, "%s,%s,%f\n", p->name.c_str(), p->_type.c_str(), v); } else if (std::holds_alternative(p->defaultValue)) { - auto v = std::get(p->defaultValue); + auto v = std::get(p->defaultValue); value = std::make_shared(v); fprintf(fp, "%s,%s,%f,%f\n", p->name.c_str(), p->_type.c_str(), v[0], v[1]); } else if (std::holds_alternative(p->defaultValue)) { - auto v = std::get(p->defaultValue); + auto v = std::get(p->defaultValue); value = std::make_shared(v); fprintf(fp, "%s,%s,%f,%f,%f\n", p->name.c_str(), p->_type.c_str(), v[0], v[1], v[2]); } else if (std::holds_alternative(p->defaultValue)) { - auto v = std::get(p->defaultValue); + auto v = std::get(p->defaultValue); value = std::make_shared(v); fprintf(fp, "%s,%s,%f,%f,%f,%f\n", p->name.c_str(), p->_type.c_str(), v[0], v[1], v[2], v[3]); } diff --git a/zeno/src/nodes/ProcedrualSkyNode.cpp b/zeno/src/nodes/ProcedrualSkyNode.cpp index 8c7a20161a..265796de58 100644 --- a/zeno/src/nodes/ProcedrualSkyNode.cpp +++ b/zeno/src/nodes/ProcedrualSkyNode.cpp @@ -16,9 +16,9 @@ struct ProceduralSky : INode { prim->userData().set2("isRealTimeObject", std::move(1)); prim->userData().set2("ProceduralSky", std::move(1)); - prim->userData().set2("sunLightDir", std::move(get_input2("sunLightDir"))); + prim->userData().set2("sunLightDir", std::move(get_input2("sunLightDir"))); prim->userData().set2("sunLightSoftness", std::move(get_input2("sunLightSoftness"))); - prim->userData().set2("windDir", std::move(get_input2("windDir"))); + prim->userData().set2("windDir", std::move(get_input2("windDir"))); prim->userData().set2("timeStart", std::move(get_input2("timeStart"))); prim->userData().set2("timeSpeed", std::move(get_input2("timeSpeed"))); prim->userData().set2("sunLightIntensity", std::move(get_input2("sunLightIntensity"))); @@ -61,7 +61,7 @@ struct HDRSky : INode { prim->userData().set2("isRealTimeObject", std::move(1)); prim->userData().set2("HDRSky", std::move(path)); prim->userData().set2("evnTexRotation", std::move(get_input2("rotation"))); - prim->userData().set2("evnTex3DRotation", std::move(get_input2("rotation3d"))); + prim->userData().set2("evnTex3DRotation", std::move(get_input2("rotation3d"))); prim->userData().set2("evnTexStrength", std::move(get_input2("strength"))); prim->userData().set2("enable", std::move(get_input2("enable"))); set_output("HDRSky", std::move(prim)); diff --git a/zeno/src/nodes/color/MakeColor.cpp b/zeno/src/nodes/color/MakeColor.cpp index 63c03a3399..ed81e2d281 100644 --- a/zeno/src/nodes/color/MakeColor.cpp +++ b/zeno/src/nodes/color/MakeColor.cpp @@ -5,7 +5,7 @@ namespace zeno { struct MakeColor : zeno::INode { virtual void apply() override { - auto color = get_input2("color"); + auto color = get_input2("color"); set_output2("color", std::move(color)); } }; diff --git a/zeno/src/nodes/mtl/ShaderAttrs.cpp b/zeno/src/nodes/mtl/ShaderAttrs.cpp index 495341a64e..8b9caefdf3 100644 --- a/zeno/src/nodes/mtl/ShaderAttrs.cpp +++ b/zeno/src/nodes/mtl/ShaderAttrs.cpp @@ -53,8 +53,8 @@ struct MakeShaderUniform : zeno::INode { for (const auto& [key, value] : uniformDict->lut) { auto index = std::stoi(key); if (auto num = dynamic_cast(value.get())) { - auto value = num->get(); - std::vector& attr_arr = prim->add_attr("pos"); + auto value = num->get(); + std::vector& attr_arr = prim->add_attr("pos"); if (index < attr_arr.size()) { attr_arr[index] = value; } diff --git a/zeno/src/nodes/mtl/ShaderFinalize.cpp b/zeno/src/nodes/mtl/ShaderFinalize.cpp index d38d21bbda..58e9b02d95 100644 --- a/zeno/src/nodes/mtl/ShaderFinalize.cpp +++ b/zeno/src/nodes/mtl/ShaderFinalize.cpp @@ -127,7 +127,7 @@ struct ShaderFinalize : INode { code += "bool sssFxiedRadius = false;\n"; } - vec3f mask_value = (vec3f)get_input2("mask_value") / 255.0f; + vec3f mask_value = (vec3f)get_input2("mask_value") / 255.0f; code += zeno::format("vec3 mask_value = vec3({}, {}, {});\n", mask_value[0], mask_value[1], mask_value[2]); auto mtl = std::make_shared(); diff --git a/zeno/src/nodes/mtl/ShaderTexture.cpp b/zeno/src/nodes/mtl/ShaderTexture.cpp index 4ec10b750a..63b21532d5 100644 --- a/zeno/src/nodes/mtl/ShaderTexture.cpp +++ b/zeno/src/nodes/mtl/ShaderTexture.cpp @@ -198,22 +198,22 @@ struct SmartTexture2D : ShaderNodeClone { number[0] = get_input2("value"); } - if(has_input2("value")) + if(has_input2("value")) { - auto in = get_input2("value"); + auto in = get_input2("value"); number[0] = in[0]; number[1] = in[1]; } - if(has_input2("value")) + if(has_input2("value")) { - auto in = get_input2("value"); + auto in = get_input2("value"); number[0] = in[0]; number[1] = in[1]; number[2] = in[2]; } - if(has_input2("value")) + if(has_input2("value")) { - auto in = get_input2("value"); + auto in = get_input2("value"); number[0] = in[0]; number[1] = in[1]; number[2] = in[2]; diff --git a/zeno/src/nodes/mtl/ShaderUtils.cpp b/zeno/src/nodes/mtl/ShaderUtils.cpp index 144c3c2938..cbd0e0f0f1 100644 --- a/zeno/src/nodes/mtl/ShaderUtils.cpp +++ b/zeno/src/nodes/mtl/ShaderUtils.cpp @@ -171,9 +171,9 @@ ZENDEFNODE(ShaderNormalMap, { struct CalcCameraUp : INode { virtual void apply() override { - auto refUp = zeno::normalize(get_input2("refUp")); - auto pos = get_input2("pos"); - auto target = get_input2("target"); + auto refUp = zeno::normalize(get_input2("refUp")); + auto pos = get_input2("pos"); + auto target = get_input2("target"); vec3f view = zeno::normalize(target - pos); vec3f right = zeno::cross(view, refUp); vec3f up = zeno::cross(right, view); diff --git a/zeno/src/nodes/neo/NumRandom.cpp b/zeno/src/nodes/neo/NumRandom.cpp index 5ee862c293..c53b5e2cb3 100644 --- a/zeno/src/nodes/neo/NumRandom.cpp +++ b/zeno/src/nodes/neo/NumRandom.cpp @@ -184,7 +184,7 @@ namespace { struct NumRandom : INode { virtual void apply() override { - auto dir = get_input2("dir"); + auto dir = get_input2("dir"); auto base = get_input2("base"); auto scale = get_input2("scale"); auto seed = get_input2("seed"); diff --git a/zeno/src/nodes/neo/PrimBend.cpp b/zeno/src/nodes/neo/PrimBend.cpp index d8dbae7744..86d0fcde43 100644 --- a/zeno/src/nodes/neo/PrimBend.cpp +++ b/zeno/src/nodes/neo/PrimBend.cpp @@ -29,9 +29,9 @@ struct PrimBend : zeno::INode { midPoint = std::min(limitMax, std::max(limitMin, midPoint)); biasDir = std::min(1.f, std::max(0.f, biasDir)); - auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); - auto tangent = has_input("tangent") ? get_input("tangent")->get() : vec3f(0, 1, 0); - auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(1, 0, 0); + auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); + auto tangent = has_input("tangent") ? get_input("tangent")->get() : vec3f(0, 1, 0); + auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(1, 0, 0); orthonormal orb(direction, tangent); direction = orb.normal; diff --git a/zeno/src/nodes/neo/PrimCodecUVs.cpp b/zeno/src/nodes/neo/PrimCodecUVs.cpp index ca62976819..195d20dc74 100644 --- a/zeno/src/nodes/neo/PrimCodecUVs.cpp +++ b/zeno/src/nodes/neo/PrimCodecUVs.cpp @@ -17,7 +17,7 @@ ZENO_API void primDecodeUVs(PrimitiveObject *prim) { ZENO_API void primLoopUVsToVerts(PrimitiveObject *prim) { if (prim->loops.size() && prim->has_attr("uvs")) { auto &loop_uvs = prim->loops.attr("uvs"); - auto &vert_uv = prim->verts.add_attr("uv"); // todo: support vec2f in attr... + auto &vert_uv = prim->verts.add_attr("uv"); // todo: support vec2f in attr... /*attr_uv.resize(prim->loop_uvs.size());*/ for (size_t i = 0; i < loop_uvs.size(); i++) { auto uv = prim->uvs[loop_uvs[i]]; @@ -73,7 +73,7 @@ ZENO_DEFNODE(PrimLoopUVsToVerts)({ struct PrimUVVertsToLoopsuv : INode { virtual void apply() override { auto prim = get_input("prim"); - auto &vuv = prim->verts.attr("uv"); + auto &vuv = prim->verts.attr("uv"); if (prim->loops.size()) { auto &uvs = prim->loops.add_attr("uvs"); for (auto i = 0; i < prim->loops.size(); i++) { @@ -86,9 +86,9 @@ struct PrimUVVertsToLoopsuv : INode { } } else if (prim->tris.size()) { - auto &uv0 = prim->tris.add_attr("uv0"); - auto &uv1 = prim->tris.add_attr("uv1"); - auto &uv2 = prim->tris.add_attr("uv2"); + auto &uv0 = prim->tris.add_attr("uv0"); + auto &uv1 = prim->tris.add_attr("uv1"); + auto &uv2 = prim->tris.add_attr("uv2"); for (auto i = 0; i < prim->tris.size(); i++) { uv0[i] = vuv[prim->tris[i][0]]; uv1[i] = vuv[prim->tris[i][1]]; @@ -151,7 +151,7 @@ struct PrimUVEdgeDuplicate : INode { }); std::swap(prim->verts, new_verts); if (writeUVToVertex) { - auto &vert_uv = prim->verts.add_attr("uv"); + auto &vert_uv = prim->verts.add_attr("uv"); auto &loopsuv = prim->loops.attr("uvs"); for (auto i = 0; i < prim->loops.size(); i++) { auto uv = prim->uvs[loopsuv[i]]; @@ -186,7 +186,7 @@ struct PrimSplitVertexForSharedNormal : INode { indexs.reserve(prim->loops.size()); std::map, int> mapping; { - auto &nrm = prim->loops.attr("nrm"); + auto &nrm = prim->loops.attr("nrm"); for (auto i = 0; i < prim->loops.size(); i++) { std::tuple n = {nrm[i][0], nrm[i][1], nrm[i][2]}; if (mapping.count(n) == 0) { @@ -216,7 +216,7 @@ struct PrimSplitVertexForSharedNormal : INode { revert_new_mapping[v] = k; } AttrVector verts(new_mapping.size()); - auto &nrm = verts.add_attr("nrm"); + auto &nrm = verts.add_attr("nrm"); for (auto i = 0; i < verts.size(); i++) { verts[i] = prim->verts[revert_new_mapping[i].first]; nrm[i] = revert_mapping[revert_new_mapping[i].second]; diff --git a/zeno/src/nodes/neo/PrimDuplicate.cpp b/zeno/src/nodes/neo/PrimDuplicate.cpp index 1484e4e588..f0cdfb51a7 100644 --- a/zeno/src/nodes/neo/PrimDuplicate.cpp +++ b/zeno/src/nodes/neo/PrimDuplicate.cpp @@ -81,9 +81,9 @@ ZENO_API std::shared_ptr primDuplicate(PrimitiveObject *parsPri }); }; if constexpr (hasDirAttr.value) { - auto const &accDir = parsPrim->attr(dirAttr); + auto const &accDir = parsPrim->attr(dirAttr); if (!tanAttr.empty()) - func(accDir, std::true_type{}, parsPrim->attr(tanAttr)); + func(accDir, std::true_type{}, parsPrim->attr(tanAttr)); else func(accDir, std::false_type{}, std::array{}); } else { diff --git a/zeno/src/nodes/neo/PrimExtrude.cpp b/zeno/src/nodes/neo/PrimExtrude.cpp index 62a6f7820e..24cffe7326 100644 --- a/zeno/src/nodes/neo/PrimExtrude.cpp +++ b/zeno/src/nodes/neo/PrimExtrude.cpp @@ -20,7 +20,7 @@ struct PrimExtrude : INode { auto maskAttr = get_input2("maskAttr"); auto extrude = get_input2("extrude"); auto inset = get_input2("inset"); - auto offset = get_input2("offset"); + auto offset = get_input2("offset"); //auto bridgeMaskAttrO = get_input2("bridgeMaskAttrO"); auto sourceMaskAttrO = get_input2("sourceMaskAttrO"); auto delOldFaces = get_input2("delOldFaces"); @@ -66,7 +66,7 @@ struct PrimExtrude : INode { if (extrude != 0 || inset != 0) { std::string tmpNormAttr = "%%extrude2"; primCalcNormal(prim2.get(), 1.0f, tmpNormAttr); - p2norms = std::move(prim2->verts.attr(tmpNormAttr)); + p2norms = std::move(prim2->verts.attr(tmpNormAttr)); prim2->verts.erase_attr(tmpNormAttr); } @@ -122,7 +122,7 @@ struct PrimExtrude : INode { insd -= dot(insd, norm) * norm; out[i] = normalizeSafe(insd); } - //p2inset = std::move(prim2->verts.attr(tmpInsetAttr)); + //p2inset = std::move(prim2->verts.attr(tmpInsetAttr)); //prim2->verts.erase_attr(tmpInsetAttr); if (!(extrude != 0)) diff --git a/zeno/src/nodes/neo/PrimFlipFaces.cpp b/zeno/src/nodes/neo/PrimFlipFaces.cpp index 3b682af5ae..44bbd6a711 100644 --- a/zeno/src/nodes/neo/PrimFlipFaces.cpp +++ b/zeno/src/nodes/neo/PrimFlipFaces.cpp @@ -20,8 +20,8 @@ ZENO_API void primFlipFaces(PrimitiveObject *prim) { std::swap(tri[2], tri[0]); }); if (prim->tris.attr_is("uv0")) { - auto &uv0 = prim->tris.add_attr("uv0"); - auto &uv2 = prim->tris.add_attr("uv2"); + auto &uv0 = prim->tris.add_attr("uv0"); + auto &uv2 = prim->tris.add_attr("uv2"); for (auto i = 0; i < prim->tris.size(); i++) { std::swap(uv0[i], uv2[i]); } diff --git a/zeno/src/nodes/neo/PrimForceTrail.cpp b/zeno/src/nodes/neo/PrimForceTrail.cpp index 10e9c85b49..58b9f0f227 100644 --- a/zeno/src/nodes/neo/PrimForceTrail.cpp +++ b/zeno/src/nodes/neo/PrimForceTrail.cpp @@ -71,7 +71,7 @@ struct PrimForceTrail : INode { }); std::visit([&] (auto const &attractUDFCurve, auto const &driftCoordCurve) { - auto &forceArr = prim->verts.add_attr(forceAttr); + auto &forceArr = prim->verts.add_attr(forceAttr); parallel_for(prim->verts.size(), [&] (size_t i) { auto pos = prim->verts[i]; diff --git a/zeno/src/nodes/neo/PrimGenerateONB.cpp b/zeno/src/nodes/neo/PrimGenerateONB.cpp index 54d74a9fbe..bc4ca82b43 100644 --- a/zeno/src/nodes/neo/PrimGenerateONB.cpp +++ b/zeno/src/nodes/neo/PrimGenerateONB.cpp @@ -17,9 +17,9 @@ struct PrimGenerateONB : INode { auto bitanAttrOut = get_input2("bitanAttrOut"); auto writebackDir = get_input2("doNormalize"); - auto &dir = prim->verts.attr(dirAttr); - auto &tan = prim->verts.add_attr(tanAttrOut); - auto &bitan = prim->verts.add_attr(bitanAttrOut); + auto &dir = prim->verts.attr(dirAttr); + auto &tan = prim->verts.add_attr(tanAttrOut); + auto &bitan = prim->verts.add_attr(bitanAttrOut); parallel_for(prim->verts.size(), [&] (size_t i) { auto d = normalizeSafe(dir[i]); @@ -58,9 +58,9 @@ struct PrimLineGenerateONB : zeno::INode { size_t n = prim->verts.size(); if (lineSort) primLineSort(prim.get()); - auto &dirs = prim->verts.add_attr(dirAttrOut); - auto &tans = prim->verts.add_attr(tanAttrOut); - auto &bitans = prim->verts.add_attr(bitanAttrOut); + auto &dirs = prim->verts.add_attr(dirAttrOut); + auto &tans = prim->verts.add_attr(tanAttrOut); + auto &bitans = prim->verts.add_attr(bitanAttrOut); if (n >= 2) { parallel_for((size_t)1, n - 1, [&] (size_t i) { diff --git a/zeno/src/nodes/neo/PrimPerlinNoise.cpp b/zeno/src/nodes/neo/PrimPerlinNoise.cpp index 6debf72b8d..c8feb608fe 100644 --- a/zeno/src/nodes/neo/PrimPerlinNoise.cpp +++ b/zeno/src/nodes/neo/PrimPerlinNoise.cpp @@ -68,7 +68,7 @@ struct PrimPerlinNoise : INode { auto detail = get_input2("detail"); auto roughness = get_input2("roughness"); auto disortion = get_input2("disortion"); - auto offset = get_input2("offset"); + auto offset = get_input2("offset"); auto average = get_input2("average"); auto strength = get_input2("strength"); auto outAttr = get_input2("outAttr"); diff --git a/zeno/src/nodes/neo/PrimProject.cpp b/zeno/src/nodes/neo/PrimProject.cpp index e19dd70638..afc01079c3 100644 --- a/zeno/src/nodes/neo/PrimProject.cpp +++ b/zeno/src/nodes/neo/PrimProject.cpp @@ -444,7 +444,7 @@ struct PrimProject : INode { } }; - auto const &nrm = prim->verts.attr(nrmAttr); + auto const &nrm = prim->verts.attr(nrmAttr); auto cond = enum_variant>( array_index({"front", "back", "both"}, allowDir)); @@ -484,10 +484,10 @@ ZENDEFNODE(PrimProject, { struct TestRayBox : INode { void apply() override { - auto origin = get_input2("ray_origin"); - auto dir = get_input2("ray_dir"); - auto bmin = get_input2("box_min"); - auto bmax = get_input2("box_max"); + auto origin = get_input2("ray_origin"); + auto dir = get_input2("ray_dir"); + auto bmin = get_input2("box_min"); + auto bmax = get_input2("box_max"); set_output("predicate", std::make_shared((int)ray_box_intersect(origin, dir, std::make_pair(bmin, bmax)))); } diff --git a/zeno/src/nodes/neo/PrimRandomize.cpp b/zeno/src/nodes/neo/PrimRandomize.cpp index f2736f2b2c..03d3fd600e 100644 --- a/zeno/src/nodes/neo/PrimRandomize.cpp +++ b/zeno/src/nodes/neo/PrimRandomize.cpp @@ -181,7 +181,7 @@ ZENO_API void primRandomize(PrimitiveObject *prim, std::string attr, std::string std::visit([&] (auto const &randty, auto const &seedSel, auto hasDirArr) { using T = std::invoke_result_t, wangsrng &>; auto &arr = prim->verts.add_attr(attr); - auto const &dirArr = hasDirArr ? prim->attr(dirAttr) : std::vector(); + auto const &dirArr = hasDirArr ? prim->attr(dirAttr) : std::vector(); parallel_for((size_t)0, arr.size(), [&] (size_t i) { wangsrng rng(seed, seedSel(i)); T offs = base + randty(rng) * scale; diff --git a/zeno/src/nodes/neo/PrimSepTris.cpp b/zeno/src/nodes/neo/PrimSepTris.cpp index 1dd3225e13..d3f4030d21 100644 --- a/zeno/src/nodes/neo/PrimSepTris.cpp +++ b/zeno/src/nodes/neo/PrimSepTris.cpp @@ -8,7 +8,7 @@ namespace zeno { #if 0 ZENO_API void primSmoothNormal(PrimitiveObject *prim) { - auto &nrm = prim->verts.add_attr("nrm"); + auto &nrm = prim->verts.add_attr("nrm"); std::fill(ZENO_PAR_UNSEQ nrm.begin(), nrm.end(), vec3f()); for (size_t i = 0; i < prim->polys.size(); i++) { auto [base, len] = prim->polys[i]; @@ -36,7 +36,7 @@ ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal, bool ke if (!prim->tris.size() && !prim->quads.size() && !prim->polys.size()) { //if ((prim->points.size() || prim->lines.size()) && !prim->verts.has_attr("clr")) { //throw; - //prim->verts.add_attr("clr").assign(prim->verts.size(), vec3f(1, 1, 0)); + //prim->verts.add_attr("clr").assign(prim->verts.size(), vec3f(1, 1, 0)); //} return; // TODO: cihou pars and lines } @@ -103,10 +103,10 @@ ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal, bool ke if (prim->tris.has_attr("uv0") && prim->tris.has_attr("uv1") && prim->tris.has_attr("uv2")) { - auto &uv0 = prim->tris.attr("uv0"); - auto &uv1 = prim->tris.attr("uv1"); - auto &uv2 = prim->tris.attr("uv2"); - auto &new_uv = new_verts.add_attr("uv"); + auto &uv0 = prim->tris.attr("uv0"); + auto &uv1 = prim->tris.attr("uv1"); + auto &uv2 = prim->tris.attr("uv2"); + auto &new_uv = new_verts.add_attr("uv"); for (int i = 0; i < prim->tris.size(); i++) { auto uv = uv0[i]; new_uv[i * 3 + 0] = {uv[0], uv[1], 0}; @@ -120,11 +120,11 @@ ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal, bool ke prim->quads.has_attr("uv1") && prim->quads.has_attr("uv2") && prim->quads.has_attr("uv3")) { - auto &uv0 = prim->quads.attr("uv0"); - auto &uv1 = prim->quads.attr("uv1"); - auto &uv2 = prim->quads.attr("uv2"); - auto &uv3 = prim->quads.attr("uv3"); - auto &new_uv = new_verts.add_attr("uv"); + auto &uv0 = prim->quads.attr("uv0"); + auto &uv1 = prim->quads.attr("uv1"); + auto &uv2 = prim->quads.attr("uv2"); + auto &uv3 = prim->quads.attr("uv3"); + auto &new_uv = new_verts.add_attr("uv"); size_t b = prim->tris.size() * 3; for (int i = 0; i < prim->quads.size(); i++) { new_uv[b + i * 6 + 0] = uv0[i]; @@ -153,7 +153,7 @@ ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal, bool ke b += (len - 2) * 3; } b = prim->tris.size() * 3 + prim->quads.size() * 6; - auto &new_uv = new_verts.add_attr("uv"); + auto &new_uv = new_verts.add_attr("uv"); for (int i = 0; i < v.size(); i++) { auto uv = prim->uvs[v[i]]; new_uv[b + i] = {uv[0], uv[1], 0}; @@ -179,7 +179,7 @@ ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal, bool ke shn[v[i * 3 + 1]] += n; shn[v[i * 3 + 2]] += n; } - auto &new_nrm = new_verts.add_attr("nrm"); + auto &new_nrm = new_verts.add_attr("nrm"); for (size_t i = 0; i < v.size(); i++) { auto n = shn[v[i]]; n = normalizeSafe(n); @@ -190,7 +190,7 @@ ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal, bool ke std::swap(new_verts, prim->verts); if (!smoothNormal && needCompNormal) { - auto &nrm = prim->verts.add_attr("nrm"); + auto &nrm = prim->verts.add_attr("nrm"); for (size_t i = 0; i < v.size() / 3; i++) { auto a = prim->verts[i * 3 + 0]; auto b = prim->verts[i * 3 + 1]; @@ -204,10 +204,10 @@ ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal, bool ke } if (keepTriFaces) { - auto &uv0 = prim->tris.add_attr("uv0"); - auto &uv1 = prim->tris.add_attr("uv1"); - auto &uv2 = prim->tris.add_attr("uv2"); - auto &uv = prim->attr("uv"); + auto &uv0 = prim->tris.add_attr("uv0"); + auto &uv1 = prim->tris.add_attr("uv1"); + auto &uv2 = prim->tris.add_attr("uv2"); + auto &uv = prim->attr("uv"); prim->tris.resize(v.size() / 3); for (int i = 0; i < prim->tris.size(); i++) { prim->tris[i] = {i * 3, i * 3 + 1, i * 3 + 2}; diff --git a/zeno/src/nodes/neo/PrimSimplifyTag.cpp b/zeno/src/nodes/neo/PrimSimplifyTag.cpp index f363f1412d..c929efe9e8 100644 --- a/zeno/src/nodes/neo/PrimSimplifyTag.cpp +++ b/zeno/src/nodes/neo/PrimSimplifyTag.cpp @@ -27,7 +27,7 @@ ZENO_API void primSimplifyTag(PrimitiveObject *prim, std::string tagAttr) { ZENO_API void primColorByTag(PrimitiveObject *prim, std::string tagAttr, std::string clrAttr, int seed) { auto const &tag = prim->verts.attr(tagAttr); - auto &clr = prim->verts.add_attr(clrAttr); + auto &clr = prim->verts.add_attr(clrAttr); std::unordered_map lut; std::mt19937 gen{seed == -1 ? std::random_device{}() : seed}; std::uniform_real_distribution unif(0.2f, 1.0f); diff --git a/zeno/src/nodes/neo/PrimTranslate.cpp b/zeno/src/nodes/neo/PrimTranslate.cpp index fb155fdbc2..4116cf7734 100644 --- a/zeno/src/nodes/neo/PrimTranslate.cpp +++ b/zeno/src/nodes/neo/PrimTranslate.cpp @@ -27,7 +27,7 @@ namespace { struct PrimTranslate : INode { virtual void apply() override { auto prim = get_input("prim"); - auto offset = get_input2("offset"); + auto offset = get_input2("offset"); primTranslate(prim.get(), offset); set_output("prim", get_input("prim")); } @@ -48,7 +48,7 @@ ZENDEFNODE(PrimTranslate, { struct PrimScale : INode { virtual void apply() override { auto prim = get_input("prim"); - auto scale = get_input2("scale"); + auto scale = get_input2("scale"); primScale(prim.get(), scale); set_output("prim", get_input("prim")); } diff --git a/zeno/src/nodes/neo/PrimTwist.cpp b/zeno/src/nodes/neo/PrimTwist.cpp index b7003984b8..d4c9c1fea2 100644 --- a/zeno/src/nodes/neo/PrimTwist.cpp +++ b/zeno/src/nodes/neo/PrimTwist.cpp @@ -25,11 +25,11 @@ struct PrimTwist : zeno::INode { // todo: also add PrimitiveStretch and Primitiv limitMin -= 0.5f; limitMax -= 0.5f; - auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); - auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(0, 1, 0); + auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); + auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(0, 1, 0); auto orb = has_input("tangent") - ? orthonormal(direction, get_input("tangent")->get()) + ? orthonormal(direction, get_input("tangent")->get()) : orthonormal(direction); direction = orb.normal; auto tangent = orb.tangent; diff --git a/zeno/src/nodes/num/NumericMath.cpp b/zeno/src/nodes/num/NumericMath.cpp index 20311cd21d..980436f15d 100644 --- a/zeno/src/nodes/num/NumericMath.cpp +++ b/zeno/src/nodes/num/NumericMath.cpp @@ -11,11 +11,11 @@ namespace { struct MakeOrthonormalBase : INode { virtual void apply() override { - auto normal = get_input("normal")->get(); + auto normal = get_input("normal")->get(); normal = normalize(normal); vec3f tangent, bitangent; if (has_input("tangent")) { - tangent = get_input("tangent")->get(); + tangent = get_input("tangent")->get(); bitangent = cross(normal, tangent); } else { tangent = vec3f(0, 0, 1); @@ -46,9 +46,9 @@ struct OrthonormalBase : INode { virtual void apply() override { std::unique_ptr orb; - auto normal = get_input("normal")->get(); + auto normal = get_input("normal")->get(); if (has_input("tangent")) { - auto tangent = get_input("tangent")->get(); + auto tangent = get_input("tangent")->get(); orb = std::make_unique(normal, tangent); } else { orb = std::make_unique(normal); @@ -70,10 +70,10 @@ ZENDEFNODE(OrthonormalBase, { struct PixarOrthonormalBase : INode { virtual void apply() override { - auto normal = get_input("normal")->get(); + auto normal = get_input("normal")->get(); vec3f tangent{}, bitangent{}; if (has_input("tangent")) { - tangent = get_input("tangent")->get(); + tangent = get_input("tangent")->get(); guidedPixarONB(normal, tangent, bitangent); } else { pixarONB(normal, tangent, bitangent); @@ -95,10 +95,10 @@ ZENDEFNODE(PixarOrthonormalBase, { struct AABBCollideDetect : INode { virtual void apply() override { - auto bminA = get_input("bminA")->get(); - auto bmaxA = get_input("bmaxA")->get(); - auto bminB = get_input("bminB")->get(); - auto bmaxB = get_input("bmaxB")->get(); + auto bminA = get_input("bminA")->get(); + auto bmaxA = get_input("bmaxA")->get(); + auto bminB = get_input("bminB")->get(); + auto bmaxB = get_input("bmaxB")->get(); // https://www.cnblogs.com/liez/p/11965027.html bool overlap = alltrue(abs(bminA + bmaxA - bminB - bmaxB) <= (bmaxA - bminA + bmaxB - bminB)); @@ -119,7 +119,7 @@ ZENDEFNODE(AABBCollideDetect, { struct ProjectAndNormalize : INode { virtual void apply() override { - auto vec = get_input("vec")->get(); + auto vec = get_input("vec")->get(); auto plane = get_input2("plane"); std::array orb; diff --git a/zeno/src/nodes/prim/MakeGridPrimitive.cpp b/zeno/src/nodes/prim/MakeGridPrimitive.cpp index 6a5bc436f4..948366af98 100644 --- a/zeno/src/nodes/prim/MakeGridPrimitive.cpp +++ b/zeno/src/nodes/prim/MakeGridPrimitive.cpp @@ -10,10 +10,10 @@ namespace zeno { struct MakePointPrimitive :INode{ virtual void apply() override { - auto p = get_input("vec3")->get(); + auto p = get_input("vec3")->get(); auto prim = std::make_shared(); prim->resize(1); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); pos[0] = p; set_output("prim", prim); } @@ -40,7 +40,7 @@ struct Make1DLinePrimitive : INode { float dx = 1.f / std::max(nx - 1, (size_t)1); vec3f o = has_input("origin") ? - get_input("origin")->get() : vec3f(0); + get_input("origin")->get() : vec3f(0); vec3f ax = vec3f(1,0,0); auto dir = get_param("Direction"); @@ -53,7 +53,7 @@ struct Make1DLinePrimitive : INode { ax = zeno::vec3f(0,0,ax[0]); } ax = has_input("direction") ? - get_input("direction")->get() + get_input("direction")->get() : ax; if (has_input("scale")) { auto scale = get_input("scale")->get(); @@ -65,7 +65,7 @@ struct Make1DLinePrimitive : INode { auto prim = std::make_shared(); prim->resize(nx); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); #pragma omp parallel for for (intptr_t x = 0; x < nx; x++) { vec3f p = o + x * ax; @@ -110,13 +110,13 @@ struct Make2DGridPrimitive : INode { float dx = 1.f / std::max(nx - 1, (size_t)1); float dy = 1.f / std::max(ny - 1, (size_t)1); vec3f ax = has_input("sizeX") ? - get_input("sizeX")->get() + get_input("sizeX")->get() : vec3f(1, 0, 0); vec3f ay = has_input("sizeY") ? - get_input("sizeY")->get() + get_input("sizeY")->get() : vec3f(0, 1, 0); vec3f o = has_input("origin") ? - get_input("origin")->get() : vec3f(0); + get_input("origin")->get() : vec3f(0); if (has_input("scale")) { auto obj = get_input("scale"); auto scale = obj->is() ? obj->get() : obj->get(); @@ -140,7 +140,7 @@ struct Make2DGridPrimitive : INode { auto prim = std::make_shared(); prim->resize(nx * ny); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); auto layout = get_param("Layout"); if (layout == "Column-major") { @@ -153,7 +153,7 @@ struct Make2DGridPrimitive : INode { pos[i] = p; } if (get_param("hasUV")) { - auto &uv = prim->verts.add_attr("uv"); + auto &uv = prim->verts.add_attr("uv"); for (intptr_t y = 0; y < ny; y++) for (intptr_t x = 0; x < nx; x++) { size_t i = x + y * nx; @@ -185,7 +185,7 @@ struct Make2DGridPrimitive : INode { } if (get_param("hasUV")) { - auto &uv = prim->verts.add_attr("uv"); + auto &uv = prim->verts.add_attr("uv"); for (intptr_t x = 0; x < nx; x++) for (intptr_t y = 0; y < ny; y++) { size_t i = x * ny + y; @@ -249,16 +249,16 @@ struct Make3DGridPrimitive : INode { float dy = 1.f / std::max(ny - 1, (size_t)1); float dz = 1.f / std::max(nz - 1, (size_t)1); vec3f ax = has_input("sizeX") ? - get_input("sizeX")->get() + get_input("sizeX")->get() : vec3f(1, 0, 0); vec3f ay = has_input("sizeY") ? - get_input("sizeY")->get() + get_input("sizeY")->get() : vec3f(0, 1, 0); vec3f az = has_input("sizeZ") ? - get_input("sizeZ")->get() + get_input("sizeZ")->get() : vec3f(0, 0, 1); vec3f o = has_input("origin") ? - get_input("origin")->get() : vec3f(0); + get_input("origin")->get() : vec3f(0); if (has_input("scale")) { auto scale = get_input("scale")->get(); ax *= scale; @@ -273,7 +273,7 @@ struct Make3DGridPrimitive : INode { auto prim = std::make_shared(); prim->resize(nx * ny * nz); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); // for (size_t y = 0; y < ny; y++) { // for (size_t x = 0; x < nx; x++) { /* @@ -327,9 +327,9 @@ struct Make3DGridPointsInAABB : INode {//xubenhappy float dz = 1.f / std::max(nz - 1, (size_t)1); vec3f bmin = has_input("bmin") ? - get_input("bmin")->get() : vec3f(-1); + get_input("bmin")->get() : vec3f(-1); vec3f bmax = has_input("bmax") ? - get_input("bmax")->get() : vec3f(1); + get_input("bmax")->get() : vec3f(1); auto delta = (bmax - bmin) * vec3f(dx, dy, dz); if (get_param("isStaggered")) { @@ -339,7 +339,7 @@ struct Make3DGridPointsInAABB : INode {//xubenhappy auto prim = std::make_shared(); prim->resize(nx * ny * nz); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); #pragma omp parallel for for (int index = 0; index < nx * ny * nz; index++) { int x = index % nx; @@ -380,11 +380,11 @@ struct MakeCubePrimitive : INode { get_input("nz")->get() : nx; vec3f o = has_input("origin") ? - get_input("origin")->get() : vec3f(0); + get_input("origin")->get() : vec3f(0); auto prim = std::make_shared(); prim->resize(nx * ny * nz); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); #pragma omp parallel for // for (size_t y = 0; y < ny; y++) { // for (size_t x = 0; x < nx; x++) { @@ -416,10 +416,10 @@ struct MakeBoxPrimitive : INode { float size_x = get_input("size_x")->get(); float size_y = get_input("size_y")->get(); float size_z = get_input("size_z")->get(); - vec3f o = get_input("origin")->get(); + vec3f o = get_input("origin")->get(); auto prim = std::make_shared(); prim->resize(8); - auto& pos = prim->add_attr("pos"); + auto& pos = prim->add_attr("pos"); for (int index = 0; index < 8; index++) { int x = index / 2 / 2; diff --git a/zeno/src/nodes/prim/MakeVisualPrimitive.cpp b/zeno/src/nodes/prim/MakeVisualPrimitive.cpp index f99c768dc0..80552a5625 100644 --- a/zeno/src/nodes/prim/MakeVisualPrimitive.cpp +++ b/zeno/src/nodes/prim/MakeVisualPrimitive.cpp @@ -16,16 +16,16 @@ struct MakeVisualAABBPrimitive : INode { auto dx = has_input("dx") ? //zhxxhappy get_input("dx")->get() : 1; auto a = has_input("boundMin") - ? get_input("boundMin")->get() + ? get_input("boundMin")->get() : vec3f(-0.5, -0.5, -0.5) * dx; auto b = has_input("boundMax") - ? get_input("boundMax")->get() + ? get_input("boundMax")->get() : vec3f(+0.5, +0.5, +0.5) * dx; auto connType = get_param("type"); auto prim = std::make_shared(); - auto &pos = prim->add_attr("pos"); + auto &pos = prim->add_attr("pos"); prim->resize(8); pos[0] = vec3f(a[0], a[1], a[2]); pos[1] = vec3f(b[0], a[1], a[2]); diff --git a/zeno/src/nodes/prim/PrimitiveAttrOp.cpp b/zeno/src/nodes/prim/PrimitiveAttrOp.cpp index fb5f705df9..dec8d81950 100644 --- a/zeno/src/nodes/prim/PrimitiveAttrOp.cpp +++ b/zeno/src/nodes/prim/PrimitiveAttrOp.cpp @@ -18,7 +18,7 @@ struct PrimitiveFillAttr : INode { attrType = "float3"; } if (!prim->has_attr(attrName)) { - if (attrType == "float3") prim->add_attr(attrName); + if (attrType == "float3") prim->add_attr(attrName); else if (attrType == "float") prim->add_attr(attrName); } auto &arr = prim->attr(attrName); @@ -104,7 +104,7 @@ struct PrimitiveRandomizeAttr : INode { auto attrName = get_param(("attrName")); auto attrType = get_param(("attrType")); if (!prim->has_attr(attrName)) { - if (attrType == "float3") prim->add_attr(attrName); + if (attrType == "float3") prim->add_attr(attrName); else if (attrType == "float") prim->add_attr(attrName); } prim->attr_visit(attrName, [min, minY, minZ, max, maxY, maxZ](auto &arr) { @@ -154,15 +154,15 @@ struct PrimitiveRandomAttr : INode { auto attrName = get_param(("attrName")); auto attrType = get_param(("attrType")); if (!prim->has_attr(attrName)) { - if (attrType == "float3") prim->add_attr(attrName); + if (attrType == "float3") prim->add_attr(attrName); else if (attrType == "float") prim->add_attr(attrName); } prim->attr_visit(attrName, [&](auto &arr) { for (int i = 0; i < arr.size(); i++) { if constexpr (is_decay_same_v) { vec3f f(frand(), frand(), frand()); - auto a = min->is() ? (vec3f)min->get() : min->get(); - auto b = max->is() ? (vec3f)max->get() : max->get(); + auto a = min->is() ? (vec3f)min->get() : min->get(); + auto b = max->is() ? (vec3f)max->get() : max->get(); arr[i] = mix(a, b, f); } else { float f(frand()); diff --git a/zeno/src/nodes/prim/PrimitiveAttribute.cpp b/zeno/src/nodes/prim/PrimitiveAttribute.cpp index 5c56986ed7..54d1f5448d 100644 --- a/zeno/src/nodes/prim/PrimitiveAttribute.cpp +++ b/zeno/src/nodes/prim/PrimitiveAttribute.cpp @@ -21,11 +21,11 @@ struct PrimitiveAddAttr : zeno::INode { } else if (type == "float3") { if (has_input("fillValue")) { - auto fillvalue = get_input("fillValue")->get(); - prim->add_attr(name, fillvalue); + auto fillvalue = get_input("fillValue")->get(); + prim->add_attr(name, fillvalue); } else { - prim->add_attr(name); + prim->add_attr(name); } } else { @@ -94,7 +94,7 @@ struct PrimitiveGetAttrValue : zeno::INode { } else if (type == "float3") { value->set(vec3f(0, 0, 0)); - std::vector& attr_arr = prim->attr(name); + std::vector& attr_arr = prim->attr(name); if (index < attr_arr.size()) { value->set(attr_arr[index]); } @@ -133,8 +133,8 @@ struct PrimitiveSetAttrValue : zeno::INode { } } else if (type == "float3") { - auto value = get_input("value")->get(); - std::vector& attr_arr = prim->add_attr(name); + auto value = get_input("value")->get(); + std::vector& attr_arr = prim->add_attr(name); if (index < attr_arr.size()) { attr_arr[index] = value; } diff --git a/zeno/src/nodes/prim/PrimitiveBent.cpp b/zeno/src/nodes/prim/PrimitiveBent.cpp index 0ef6641b1c..d3652b9aac 100644 --- a/zeno/src/nodes/prim/PrimitiveBent.cpp +++ b/zeno/src/nodes/prim/PrimitiveBent.cpp @@ -28,9 +28,9 @@ struct PrimitiveBent : zeno::INode { midPoint = std::min(limitMax, std::max(limitMin, midPoint)); biasDir = std::min(1.f, std::max(0.f, biasDir)); - auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); - auto tangent = has_input("tangent") ? get_input("tangent")->get() : vec3f(0, 1, 0); - auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(1, 0, 0); + auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); + auto tangent = has_input("tangent") ? get_input("tangent")->get() : vec3f(0, 1, 0); + auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(1, 0, 0); orthonormal orb(direction, tangent); direction = orb.normal; diff --git a/zeno/src/nodes/prim/PrimitiveCalcCentroid.cpp b/zeno/src/nodes/prim/PrimitiveCalcCentroid.cpp index af7488805e..3c8f85a2f4 100644 --- a/zeno/src/nodes/prim/PrimitiveCalcCentroid.cpp +++ b/zeno/src/nodes/prim/PrimitiveCalcCentroid.cpp @@ -13,7 +13,7 @@ struct PrimitiveCalcCentroid : zeno::INode { virtual void apply() override { auto method = get_param("method"); auto prim = get_input("prim"); - auto &pos = prim->attr("pos"); + auto &pos = prim->attr("pos"); vec4f acc; if (method == "Vertex" || !prim->tris.size()) { diff --git a/zeno/src/nodes/prim/PrimitiveClip.cpp b/zeno/src/nodes/prim/PrimitiveClip.cpp index d8cae3e6f6..f83acf5316 100644 --- a/zeno/src/nodes/prim/PrimitiveClip.cpp +++ b/zeno/src/nodes/prim/PrimitiveClip.cpp @@ -58,11 +58,11 @@ namespace zeno { }); }); } else { - const auto &srcs = src->attr(key); + const auto &srcs = src->attr(key); auto val1 = srcs[i]; auto val2 = srcs[j]; auto val = (1.0-c)*val1 + c * val2; - dst->attr(key).emplace_back(val); + dst->attr(key).emplace_back(val); } } dst->resize(dst->attr("pos").size()); diff --git a/zeno/src/nodes/prim/PrimitiveDuplicate.cpp b/zeno/src/nodes/prim/PrimitiveDuplicate.cpp index b5a3b2e794..d96aaaca76 100644 --- a/zeno/src/nodes/prim/PrimitiveDuplicate.cpp +++ b/zeno/src/nodes/prim/PrimitiveDuplicate.cpp @@ -18,9 +18,9 @@ struct PrimitiveDuplicate : zeno::INode { auto scaleByAttr = get_param("scaleByAttr"); - auto const &parspos = pars->attr("pos"); - auto const &meshpos = mesh->attr("pos"); - auto &outmpos = outm->add_attr("pos"); + auto const &parspos = pars->attr("pos"); + auto const &meshpos = mesh->attr("pos"); + auto &outmpos = outm->add_attr("pos"); if (scaleByAttr.size()) { auto const &scaleAttr = pars->attr(scaleByAttr); diff --git a/zeno/src/nodes/prim/PrimitiveHeatmap.cpp b/zeno/src/nodes/prim/PrimitiveHeatmap.cpp index f1aa411315..141a977b26 100644 --- a/zeno/src/nodes/prim/PrimitiveHeatmap.cpp +++ b/zeno/src/nodes/prim/PrimitiveHeatmap.cpp @@ -150,7 +150,7 @@ struct HeatmapFromPrimAttr : zeno::INode { bool reverse = get_input2("reverse Result"); std::vector temp; for (auto i = 0; i < attrNum; i++) { - temp.push_back(prim->attr(attrName)[i]); + temp.push_back(prim->attr(attrName)[i]); } auto resample = get_input2("resample"); if (0 < resample && resample < attrNum) { diff --git a/zeno/src/nodes/prim/PrimitiveMath.cpp b/zeno/src/nodes/prim/PrimitiveMath.cpp index 9b92563cd3..3b350638e8 100644 --- a/zeno/src/nodes/prim/PrimitiveMath.cpp +++ b/zeno/src/nodes/prim/PrimitiveMath.cpp @@ -7,14 +7,14 @@ using namespace zeno; struct PlaneProjectPrimitive2DAABB : INode { virtual void apply() override { - auto origin = get_input("origin")->get(); - auto normal = get_input("normal")->get(); - auto tangent = get_input("tangent")->get(); - auto bitangent = get_input("bitangent")->get(); + auto origin = get_input("origin")->get(); + auto normal = get_input("normal")->get(); + auto tangent = get_input("tangent")->get(); + auto bitangent = get_input("bitangent")->get(); auto prim = get_input("prim"); - vec2f bmin(+1e6), bmax(-1e6); - auto &pos = prim->attr("pos"); + zeno::vec2f bmin(+1e6), bmax(-1e6); + auto &pos = prim->attr("pos"); for (int i = 0; i < prim->lines.size(); i++) { auto line = prim->lines[i]; auto p = pos[line[0]], q = pos[line[1]]; @@ -25,7 +25,7 @@ struct PlaneProjectPrimitive2DAABB : INode { auto d = u / v; if (0 <= d && d <= 1) { auto dist = p + (q - p) * (u / v) - origin; - vec2f coor{dot(dist, tangent), dot(dist, bitangent)}; + zeno::vec2f coor{dot(dist, tangent), dot(dist, bitangent)}; bmin = zeno::min(bmin, coor); bmax = zeno::max(bmax, coor); } diff --git a/zeno/src/nodes/prim/PrimitiveNoiseAttr.cpp b/zeno/src/nodes/prim/PrimitiveNoiseAttr.cpp index db3825fe4d..56c6160ff1 100644 --- a/zeno/src/nodes/prim/PrimitiveNoiseAttr.cpp +++ b/zeno/src/nodes/prim/PrimitiveNoiseAttr.cpp @@ -115,7 +115,7 @@ struct PrimitivePerlinNoiseAttr : INode { //auto min = get_input("min"); //auto max = get_input("max"); - auto offset = vec3f(frand(), frand(), frand()); + auto offset = zeno::vec3f(frand(), frand(), frand()); if(has_input("seed")) offset = get_input("seed")->get(); auto res = std::make_shared(); @@ -124,20 +124,20 @@ struct PrimitivePerlinNoiseAttr : INode { auto attrType = get_param(("attrType")); auto &pos = prim->verts; if (!prim->has_attr(attrName)) { - if (attrType == "float3") prim->add_attr(attrName); + if (attrType == "float3") prim->add_attr(attrName); else if (attrType == "float") prim->add_attr(attrName); } prim->attr_visit(attrName, [&](auto &arr) { #pragma omp parallel for for (int i = 0; i < arr.size(); i++) { - if constexpr (is_decay_same_v) { - vec3f p = pos[i] * f + offset; + if constexpr (is_decay_same_v) { + zeno::vec3f p = pos[i] * f + offset; float x = perlin(p[0], p[1], p[2]); float y = perlin(p[1], p[2], p[0]); float z = perlin(p[2], p[0], p[1]); - arr[i] = vec3f(x,y,z); + arr[i] = zeno::vec3f(x,y,z); } else { - vec3f p = pos[i] * f + offset; + zeno::vec3f p = pos[i] * f + offset; arr[i] = perlin(p[0], p[1],p[2]); } } @@ -163,17 +163,17 @@ ZENDEFNODE(PrimitivePerlinNoiseAttr, struct GetPerlinNoise : INode{ virtual void apply() override { auto vec = get_input("vec3")->get(); - auto offset = vec3f(frand(), frand(), frand()); + auto offset = zeno::vec3f(frand(), frand(), frand()); if(has_input("seed")) offset = get_input("seed")->get(); auto res = std::make_shared(); float f = has_input("freq")? get_input("freq")->get() : 1.0f; - vec3f p = vec*f + offset; + zeno::vec3f p = vec*f + offset; p = p; float x = perlin(p[0], p[1],p[2]); float y = perlin(p[1], p[2], p[0]); float z = perlin(p[2], p[0], p[1]); - res->value = vec3f(x,y,z); + res->value = zeno::vec3f(x,y,z); set_output("noise", res); } }; diff --git a/zeno/src/nodes/prim/PrimitiveNormal.cpp b/zeno/src/nodes/prim/PrimitiveNormal.cpp index ab66a65ce8..767c699f24 100644 --- a/zeno/src/nodes/prim/PrimitiveNormal.cpp +++ b/zeno/src/nodes/prim/PrimitiveNormal.cpp @@ -202,7 +202,7 @@ ZENDEFNODE(PrimitiveOrderVertexByNormal, { }); //ZENO_API void primCalcInsetDir(zeno::PrimitiveObject* prim, float flip, std::string insetAttr) //{ - //auto &out = prim->verts.add_attr(insetAttr); + //auto &out = prim->verts.add_attr(insetAttr); //for (size_t i = 0; i < prim->tris.size(); i++) { //auto ind = prim->tris[i]; //auto a = prim->verts[ind[0]]; diff --git a/zeno/src/nodes/prim/PrimitivePolygonate.cpp b/zeno/src/nodes/prim/PrimitivePolygonate.cpp index 9d62f94161..765795c8d4 100644 --- a/zeno/src/nodes/prim/PrimitivePolygonate.cpp +++ b/zeno/src/nodes/prim/PrimitivePolygonate.cpp @@ -109,9 +109,9 @@ ZENO_API void primPolygonate(PrimitiveObject *prim, bool with_uv) { !prim->tris.has_attr("uv2") || !with_uv)) { auto old_uvs_base = prim->uvs.size(); prim->loops.add_attr("uvs"); - auto &uv0 = prim->tris.attr("uv0"); - auto &uv1 = prim->tris.attr("uv1"); - auto &uv2 = prim->tris.attr("uv2"); + auto &uv0 = prim->tris.attr("uv0"); + auto &uv1 = prim->tris.attr("uv1"); + auto &uv2 = prim->tris.attr("uv2"); for (int i = 0; i < prim->tris.size(); i++) { prim->loops.attr("uvs")[old_loop_base + i * 3 + 0] = old_uvs_base + i * 3 + 0; prim->loops.attr("uvs")[old_loop_base + i * 3 + 1] = old_uvs_base + i * 3 + 1; diff --git a/zeno/src/nodes/prim/PrimitiveReduction.cpp b/zeno/src/nodes/prim/PrimitiveReduction.cpp index e077c08034..5069afadb0 100644 --- a/zeno/src/nodes/prim/PrimitiveReduction.cpp +++ b/zeno/src/nodes/prim/PrimitiveReduction.cpp @@ -98,7 +98,7 @@ ZENDEFNODE(PrimReduction,{ struct PrimitiveBoundingBox : zeno::INode { virtual void apply() override{ auto prim = get_input("prim"); - auto &pos = prim->attr("pos"); + auto &pos = prim->attr("pos"); auto bmin = pos.size() ? pos[0] : vec3f(0); auto bmax = bmin; diff --git a/zeno/src/nodes/prim/PrimitiveTrace.cpp b/zeno/src/nodes/prim/PrimitiveTrace.cpp index 85bac19dd7..06579bc07e 100644 --- a/zeno/src/nodes/prim/PrimitiveTrace.cpp +++ b/zeno/src/nodes/prim/PrimitiveTrace.cpp @@ -56,12 +56,12 @@ struct PrimitiveCalcVelocity : zeno::INode { virtual void apply() override { auto prim = get_input("prim"); auto dt = has_input("dt") ? get_input("dt")->get() : 0.04f; - auto const &pos = prim->attr("pos"); + auto const &pos = prim->attr("pos"); if (no_last_pos) { last_pos = pos; no_last_pos = false; } - auto &vel = prim->add_attr("vel"); + auto &vel = prim->add_attr("vel"); #pragma omp parallel for for (int i = 0; i < std::min(last_pos.size(), pos.size()); i++) { @@ -95,10 +95,10 @@ struct PrimitiveInterpSubframe : zeno::INode { if (portion == 0) { base_pos = std::move(curr_pos); - curr_pos = prim->attr("pos"); + curr_pos = prim->attr("pos"); } - auto &pos = prim->attr("pos"); + auto &pos = prim->attr("pos"); #pragma omp parallel for for (int i = 0; i < std::min(pos.size(), diff --git a/zeno/src/nodes/prim/PrimitiveTriangulate.cpp b/zeno/src/nodes/prim/PrimitiveTriangulate.cpp index c7e666fba5..aab97035ac 100644 --- a/zeno/src/nodes/prim/PrimitiveTriangulate.cpp +++ b/zeno/src/nodes/prim/PrimitiveTriangulate.cpp @@ -108,9 +108,9 @@ ZENO_API void primTriangulate(PrimitiveObject *prim, bool with_uv, bool has_line } else { auto &loop_uv = prim->loops.attr("uvs"); auto &uvs = prim->uvs; - auto &uv0 = prim->tris.add_attr("uv0"); - auto &uv1 = prim->tris.add_attr("uv1"); - auto &uv2 = prim->tris.add_attr("uv2"); + auto &uv0 = prim->tris.add_attr("uv0"); + auto &uv1 = prim->tris.add_attr("uv1"); + auto &uv2 = prim->tris.add_attr("uv2"); parallel_for(prim->polys.size(), [&] (size_t i) { auto [start, len] = prim->polys[i]; diff --git a/zeno/src/nodes/prim/PrimitiveTwist.cpp b/zeno/src/nodes/prim/PrimitiveTwist.cpp index 949479f882..33fe650f23 100644 --- a/zeno/src/nodes/prim/PrimitiveTwist.cpp +++ b/zeno/src/nodes/prim/PrimitiveTwist.cpp @@ -24,11 +24,11 @@ struct PrimitiveTwist : zeno::INode { // todo: also add PrimitiveStretch and Pri limitMin -= 0.5f; limitMax -= 0.5f; - auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); - auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(0, 1, 0); + auto origin = has_input("origin") ? get_input("origin")->get() : vec3f(0, 0, 0); + auto direction = has_input("direction") ? get_input("direction")->get() : vec3f(0, 1, 0); auto orb = has_input("tangent") - ? orthonormal(direction, get_input("tangent")->get()) + ? orthonormal(direction, get_input("tangent")->get()) : orthonormal(direction); direction = orb.normal; auto tangent = orb.tangent; diff --git a/zeno/src/nodes/prim/SimpleGeometry.cpp b/zeno/src/nodes/prim/SimpleGeometry.cpp index 9a2a440159..52c09cde8f 100644 --- a/zeno/src/nodes/prim/SimpleGeometry.cpp +++ b/zeno/src/nodes/prim/SimpleGeometry.cpp @@ -88,9 +88,9 @@ struct CreateCube : zeno::INode { auto &loops = prim->loops; std::vector dummy; - auto &uv1 = !quad ? prim->tris.add_attr("uv0") : dummy; - auto &uv2 = !quad ? prim->tris.add_attr("uv1") : dummy; - auto &uv3 = !quad ? prim->tris.add_attr("uv2") : dummy; + auto &uv1 = !quad ? prim->tris.add_attr("uv0") : dummy; + auto &uv2 = !quad ? prim->tris.add_attr("uv1") : dummy; + auto &uv3 = !quad ? prim->tris.add_attr("uv2") : dummy; if(div_w <= 2) div_w = 2; @@ -963,7 +963,7 @@ struct CreateTorus : zeno::INode { auto prim = std::make_shared(); prim->verts.resize(majorSegment * minorSegment); - auto &nrm = prim->verts.add_attr("nrm"); + auto &nrm = prim->verts.add_attr("nrm"); for (auto j = 0; j < minorSegment; j++) { float theta = M_PI * 2.0 * j / minorSegment - M_PI; float y = sin(theta) * minorRadius; diff --git a/zeno/src/nodes/prim/TransformPrimitive.cpp b/zeno/src/nodes/prim/TransformPrimitive.cpp index fc561031f0..937a5140a9 100644 --- a/zeno/src/nodes/prim/TransformPrimitive.cpp +++ b/zeno/src/nodes/prim/TransformPrimitive.cpp @@ -364,7 +364,7 @@ struct PrimitiveTransform : zeno::INode { auto path = get_input2("path"); std::string pivotType = get_input2("pivot"); - auto pivotPos = get_input2("pivotPos"); + auto pivotPos = get_input2("pivotPos"); if (std::dynamic_pointer_cast(iObject)) { iObject = iObject->clone(); diff --git a/zeno/src/nodes/prim/UVProjectFromPlane.cpp b/zeno/src/nodes/prim/UVProjectFromPlane.cpp index 4515ee1299..d0f7b9a99a 100644 --- a/zeno/src/nodes/prim/UVProjectFromPlane.cpp +++ b/zeno/src/nodes/prim/UVProjectFromPlane.cpp @@ -28,7 +28,7 @@ namespace zeno { struct UVProjectFromPlane : zeno::INode { virtual void apply() override { auto prim = get_input("prim"); - auto &uv = prim->verts.add_attr("uv"); + auto &uv = prim->verts.add_attr("uv"); auto refPlane = get_input("refPlane"); if (refPlane->verts.size() != 4) { zeno::log_error("refPlane must be 1 * 1 plane!"); @@ -51,9 +51,9 @@ struct UVProjectFromPlane : zeno::INode { auto v = zeno::clamp(zeno::dot(proj, vDir) / vLength, 0, 1); uv[i] = zeno::vec3f(u, v, 0); } - auto &uv0 = prim->tris.add_attr("uv0"); - auto &uv1 = prim->tris.add_attr("uv1"); - auto &uv2 = prim->tris.add_attr("uv2"); + auto &uv0 = prim->tris.add_attr("uv0"); + auto &uv1 = prim->tris.add_attr("uv1"); + auto &uv2 = prim->tris.add_attr("uv2"); for (auto i = 0; i < prim->tris.size(); i++) { auto tri = prim->tris[i]; uv0[i] = uv[tri[0]]; @@ -163,13 +163,13 @@ struct PrimSample2D : zeno::INode { auto image = get_input2("image"); auto wrap = get_input2("wrap"); auto filter = get_input2("filter"); - auto borderColor = get_input2("borderColor"); + auto borderColor = get_input2("borderColor"); auto invertU = get_input2("invert U"); auto invertV = get_input2("invert V"); auto scale = get_input2("scale"); auto rotate = get_input2("rotate"); - auto translate = get_input2("translate"); + auto translate = get_input2("translate"); glm::vec3 pre_scale = glm::vec3(scale, scale, 0 ); if(invertU) pre_scale.x *= -1; @@ -257,9 +257,9 @@ struct PrimSample2D : zeno::INode { } } else if (uvSource == "tris") { - auto uv0 = prim->tris.attr("uv0"); - auto uv1 = prim->tris.attr("uv1"); - auto uv2 = prim->tris.attr("uv2"); + auto uv0 = prim->tris.attr("uv0"); + auto uv1 = prim->tris.attr("uv1"); + auto uv2 = prim->tris.attr("uv2"); #pragma omp parallel for for (auto i = 0; i < prim->tris.size(); i++) { @@ -512,7 +512,7 @@ struct ReadImageFile_v2 : INode { } } int w = image->userData().get2("w"); - auto &ij = image->verts.add_attr("ij"); + auto &ij = image->verts.add_attr("ij"); for (auto i = 0; i < image->verts.size(); i++) { ij[i] = vec3f(i % w, i / w, 0); } @@ -880,7 +880,7 @@ struct EnvMapRot : INode { float rot_theta = y / float(h - 1) * 180; - auto dir = get_input2("dir"); + auto dir = get_input2("dir"); dir = zeno::normalize(dir); auto to_rot_theta = glm::degrees(acos(dir[1])); auto diff_rot_theta = to_rot_theta - rot_theta; @@ -913,7 +913,7 @@ struct PrimLoadExrToChannel : INode { if (w * h != prim->size()) { throw zeno::makeError("PrimLoadExrToChannel image prim w and h not match!"); } - auto &channel = prim->add_attr(get_input2("channel")); + auto &channel = prim->add_attr(get_input2("channel")); for (auto i = 0; i < w * h; i++) { channel[i] = image->verts[i]; } diff --git a/zeno/src/nodes/prim/WBErode.cpp b/zeno/src/nodes/prim/WBErode.cpp index b9be6420ba..c8f9de0323 100644 --- a/zeno/src/nodes/prim/WBErode.cpp +++ b/zeno/src/nodes/prim/WBErode.cpp @@ -915,7 +915,7 @@ struct erode_tumble_material_v1 : INode { } auto &height = terrain->verts.attr("height"); auto &_material = terrain->verts.attr("_material"); - auto &flowdir = terrain->verts.attr("flowdir"); + auto &flowdir = terrain->verts.attr("flowdir"); //////////////////////////////////////////////////////////////////////////////////////// @@ -1406,7 +1406,7 @@ struct erode_tumble_material_v3 : INode { auto &height = terrain->verts.attr("height"); auto &_material = terrain->verts.attr("_material"); auto &_temp_material = terrain->verts.attr("_temp_material"); - auto &flowdir = terrain->verts.attr("flowdir"); + auto &flowdir = terrain->verts.attr("flowdir"); //////////////////////////////////////////////////////////////////////////////////////// @@ -2384,7 +2384,7 @@ struct HF_maskByFeature : INode { auto &height = terrain->verts.attr(heightLayer); auto &mask = terrain->verts.attr(maskLayer); - auto &_grad = terrain->verts.add_attr("_grad"); + auto &_grad = terrain->verts.add_attr("_grad"); std::fill(_grad.begin(), _grad.end(), vec3f(0,0,0)); //////////////////////////////////////////////////////////////////////////////////////// @@ -2537,8 +2537,8 @@ struct HF_rotate_displacement_2d : INode { void apply() override { auto terrain = get_input("prim_2DGrid"); - auto& var = terrain->verts.attr("var"); // hardcode - auto& pos = terrain->verts.attr("tempPos"); // hardcode + auto& var = terrain->verts.attr("var"); // hardcode + auto& pos = terrain->verts.attr("tempPos"); // hardcode auto angle = get_input("Rotate Displacement")->get(); float gl_angle = glm::radians(angle); @@ -2628,7 +2628,7 @@ struct HF_remap : INode { var[i] = fit(var[i], 0, 1, outMin, outMax); } if (remapLayer == "height"){ - terrain->verts.attr("pos")[i][1] = var[i]; + terrain->verts.attr("pos")[i][1] = var[i]; } } diff --git a/zeno/src/nodes/prim/WBNoise.cpp b/zeno/src/nodes/prim/WBNoise.cpp index e84a93791b..64de845b03 100644 --- a/zeno/src/nodes/prim/WBNoise.cpp +++ b/zeno/src/nodes/prim/WBNoise.cpp @@ -136,7 +136,7 @@ struct erode_noise_perlin : INode { auto attrName = get_param("attrName"); auto attrType = get_param("attrType"); if (!terrain->has_attr(attrName)) { - if (attrType == "float3") terrain->add_attr(attrName); + if (attrType == "float3") terrain->add_attr(attrName); else if (attrType == "float") terrain->add_attr(attrName); } @@ -145,7 +145,7 @@ struct erode_noise_perlin : INode { { zeno::log_error("no such data named '{}'.", vec3fAttrName); } - auto& vec3fAttr = terrain->verts.attr(vec3fAttrName); + auto& vec3fAttr = terrain->verts.attr(vec3fAttrName); terrain->attr_visit(attrName, [&](auto& arr) { @@ -536,7 +536,7 @@ struct erode_noise_simplex : INode { auto attrName = get_param("attrName"); auto attrType = get_param("attrType"); if (!terrain->has_attr(attrName)) { - if (attrType == "float3") terrain->add_attr(attrName); + if (attrType == "float3") terrain->add_attr(attrName); else if (attrType == "float") terrain->add_attr(attrName); } @@ -545,7 +545,7 @@ struct erode_noise_simplex : INode { { zeno::log_error("no such data named '{}'.", posLikeAttrName); } - auto& pos = terrain->verts.attr(posLikeAttrName); + auto& pos = terrain->verts.attr(posLikeAttrName); terrain->attr_visit(attrName, [&](auto& arr) { #pragma omp parallel for @@ -740,16 +740,16 @@ struct erode_noise_analytic_simplex_2d : INode { auto attrName = get_param("attrName"); if (!terrain->has_attr(attrName)) { - terrain->add_attr(attrName); + terrain->add_attr(attrName); } - auto& noise = terrain->verts.attr(attrName); + auto& noise = terrain->verts.attr(attrName); auto posLikeAttrName = get_input("posLikeAttrName")->get(); if (!terrain->verts.has_attr(posLikeAttrName)) { zeno::log_error("no such data named '{}'.", posLikeAttrName); } - auto& pos = terrain->verts.attr(posLikeAttrName); + auto& pos = terrain->verts.attr(posLikeAttrName); #pragma omp parallel for for (int i = 0; i < terrain->verts.size(); i++) @@ -872,13 +872,13 @@ struct NoiseImageGen2 : INode {//todo::image shape should same when pixel aspect virtual void apply() override { auto perC = get_input2("noise per component"); - auto image_size = get_input2("image size"); + auto image_size = get_input2("image size"); auto seed = get_input2("seed"); auto turbulence = get_input2("turbulence")+1; // tofix: think the case that turbulence = 0 auto roughness = get_input2("roughness"); auto exponent = get_input2("exponent"); - auto frequency = get_input2("spatial frequency") * 0.001f; // tofix: mysterious scale? - auto amplitude = get_input2("amplitude"); + auto frequency = get_input2("spatial frequency") * 0.001f; // tofix: mysterious scale? + auto amplitude = get_input2("amplitude"); auto pulsenum = get_input2("pulsenum"); auto image = std::make_shared(); @@ -1037,13 +1037,13 @@ struct NoiseImageGen : INode { virtual void apply() override { auto perC = get_input2("noise per component"); - auto image_size = get_input2("image size"); + auto image_size = get_input2("image size"); auto seed = get_input2("seed"); auto turbulence = get_input2("turbulence")+1; // tofix: think the case that turbulence = 0 auto roughness = get_input2("roughness"); auto exponent = get_input2("exponent"); - auto frequency = get_input2("spatial frequency") * 0.001f; // tofix: mysterious scale? - auto amplitude = get_input2("amplitude"); + auto frequency = get_input2("spatial frequency") * 0.001f; // tofix: mysterious scale? + auto amplitude = get_input2("amplitude"); auto image = std::make_shared(); image->verts.resize(image_size[0] * image_size[1]); @@ -1139,7 +1139,7 @@ struct erode_noise_sparse_convolution : INode { if (!terrain->has_attr(attrName)) { if (attrType == "float3") - terrain->add_attr(attrName); + terrain->add_attr(attrName); else if (attrType == "float") terrain->add_attr(attrName); } @@ -1149,7 +1149,7 @@ struct erode_noise_sparse_convolution : INode { zeno::log_error("no such data named '{}'.", posLikeAttrName); } - auto &pos = terrain->verts.attr(posLikeAttrName); + auto &pos = terrain->verts.attr(posLikeAttrName); terrain->attr_visit(attrName, [&](auto &arr) { #pragma omp parallel for @@ -1327,7 +1327,7 @@ struct Noise_gabor_2d : INode { if (!terrain->verts.has_attr(posLikeAttrName)) { zeno::log_error("no such data named '{}'.", posLikeAttrName); } - auto &pos = terrain->verts.attr(posLikeAttrName); + auto &pos = terrain->verts.attr(posLikeAttrName); glm::vec3 ret{}; auto K_ = 2.5f; // act on spectrum @@ -1444,7 +1444,7 @@ struct erode_noise_worley : INode { { zeno::log_error("no such data named '{}'.", posLikeAttrName); } - auto& pos = terrain->verts.attr(posLikeAttrName); + auto& pos = terrain->verts.attr(posLikeAttrName); auto jitter = get_input2("celljitter"); vec3f offset; if (!has_input("seed")) { @@ -1453,7 +1453,7 @@ struct erode_noise_worley : INode { offset = vec3f(unif(gen), unif(gen), unif(gen)); } else { - offset = get_input("seed")->get(); + offset = get_input("seed")->get(); } int fType = 0; @@ -1471,7 +1471,7 @@ struct erode_noise_worley : INode { auto attrType = get_param("attrType"); if (!terrain->has_attr(attrName)) { - if (attrType == "float3") terrain->add_attr(attrName); + if (attrType == "float3") terrain->add_attr(attrName); else if (attrType == "float") terrain->add_attr(attrName); } @@ -1565,7 +1565,7 @@ struct erode_hybridMultifractal_v1 : INode { auto& pos = terrain->verts; if (!terrain->has_attr(attrName)) { - if (attrType == "float3") terrain->add_attr(attrName); + if (attrType == "float3") terrain->add_attr(attrName); else if (attrType == "float") terrain->add_attr(attrName); } @@ -1650,7 +1650,7 @@ struct erode_hybridMultifractal_v2 : INode { auto& pos = terrain->verts; if (!terrain->has_attr(attrName)) { - if (attrType == "float3") terrain->add_attr(attrName); + if (attrType == "float3") terrain->add_attr(attrName); else if (attrType == "float") terrain->add_attr(attrName); } @@ -1733,7 +1733,7 @@ struct erode_hybridMultifractal_v3 : INode { auto& pos = terrain->verts; if (!terrain->has_attr(attrName)) { - if (attrType == "float3") terrain->add_attr(attrName); + if (attrType == "float3") terrain->add_attr(attrName); else if (attrType == "float") terrain->add_attr(attrName); } @@ -1811,7 +1811,7 @@ struct erode_domainWarping_v1 : INode { auto attrType = get_param("attrType"); auto& pos = prim->verts; if (!prim->has_attr(attrName)) { - if (attrType == "float3") prim->add_attr(attrName); + if (attrType == "float3") prim->add_attr(attrName); else if (attrType == "float") prim->add_attr(attrName); } @@ -1877,7 +1877,7 @@ struct erode_domainWarping_v2 : INode { auto attrType = get_param("attrType"); auto& pos = prim->verts; if (!prim->has_attr(attrName)) { - if (attrType == "float3") prim->add_attr(attrName); + if (attrType == "float3") prim->add_attr(attrName); else if (attrType == "float") prim->add_attr(attrName); } @@ -1939,10 +1939,10 @@ struct erode_voronoi : INode { auto attrName = get_param("attrName"); if (!prim->has_attr(attrName)) { prim->add_attr(attrName); } - if (!prim->has_attr("minFeaturePointPos")) { prim->add_attr("minFeaturePointPos"); } + if (!prim->has_attr("minFeaturePointPos")) { prim->add_attr("minFeaturePointPos"); } auto& attr_voro = prim->attr(attrName); - auto& attr_mFPP = prim->attr("minFeaturePointPos"); + auto& attr_mFPP = prim->attr("minFeaturePointPos"); auto& samplePoints = prim->verts; auto& featurePoints = featurePrim->verts; @@ -1983,7 +1983,7 @@ void assign_clusters(std::vector& cpoints, const std::vectorverts.attr(attrName)[i]); + float dist = zeno::distance(c.center, prim->verts.attr(attrName)[i]); if (dist < smallest_dist) { smallest_dist = dist; cpoints[i].clusterid = c.id; @@ -2004,7 +2004,7 @@ struct Primcluster : INode {//todo:: just for color ramp now std::default_random_engine generator(seed); std::uniform_real_distribution distribution(0.0, 1.0); - auto &attr = prim->verts.attr(attrName);//only test with vec3f now + auto &attr = prim->verts.attr(attrName);//only test with vec3f now std::vector old_clusters; @@ -2051,7 +2051,7 @@ struct Primcluster : INode {//todo:: just for color ramp now _iter++; } for(int i = 0; i < old_clusters.size(); i++){ - prim->verts.add_attr(outputattr)[i] = old_clusters[i].center; + prim->verts.add_attr(outputattr)[i] = old_clusters[i].center; } set_output("prim", get_input("prim")); diff --git a/zeno/src/nodes/prim/WBPrimBend.cpp b/zeno/src/nodes/prim/WBPrimBend.cpp index add57b5a59..67453fd385 100644 --- a/zeno/src/nodes/prim/WBPrimBend.cpp +++ b/zeno/src/nodes/prim/WBPrimBend.cpp @@ -30,9 +30,9 @@ struct WBPrimBend : INode { auto limitDeformation = get_input("Limit Deformation")->get(); auto symmetricDeformation = get_input("Symmetric Deformation")->get(); auto angle = get_input("Bend Angle (degree)")->get(); - auto upVector = has_input("Up Vector") ? get_input("Up Vector")->get() : vec3f(0, 1, 0); - auto capOrigin = has_input("Capture Origin") ? get_input("Capture Origin")->get() : vec3f(0, 0, 0); - auto dirVector = has_input("Capture Direction") ? get_input("Capture Direction")->get() : vec3f(0, 0, 1); + auto upVector = has_input("Up Vector") ? get_input("Up Vector")->get() : vec3f(0, 1, 0); + auto capOrigin = has_input("Capture Origin") ? get_input("Capture Origin")->get() : vec3f(0, 0, 0); + auto dirVector = has_input("Capture Direction") ? get_input("Capture Direction")->get() : vec3f(0, 0, 1); double capLen = has_input("Capture Length") ? get_input("Capture Length")->get() : 1.0; glm::vec3 up = normalize(glm::vec3(upVector[0], upVector[1], upVector[2])); @@ -412,7 +412,7 @@ struct LineCarve : INode { auto p = a + (b - a) * r1; prim->verts.reserve(prim->size() + 1); - prim->attr("pos").insert(prim->verts.begin() + int(index) + 1, { p[0], p[1], p[2] }); + prim->attr("pos").insert(prim->verts.begin() + int(index) + 1, { p[0], p[1], p[2] }); linesLen.reserve(linesLen.size() + 1); linesLen.insert(linesLen.begin() + int(index), insertU); @@ -479,13 +479,13 @@ ZENDEFNODE(LineCarve, struct VisVec3Attribute : INode { void apply() override { - auto color = get_input("color")->get(); + auto color = get_input("color")->get(); auto useNormalize = get_input("normalize")->get(); auto lengthScale = get_input("lengthScale")->get(); auto name = get_input2("name"); auto prim = get_input("prim"); - auto& attr = prim->verts.attr(name); + auto& attr = prim->verts.attr(name); auto& pos = prim->verts; auto primVis = std::make_shared(); @@ -500,7 +500,7 @@ struct VisVec3Attribute : INode { }, prim->attr(key)); } - auto& visColor = primVis->verts.add_attr("clr"); + auto& visColor = primVis->verts.add_attr("clr"); auto& visPos = primVis->verts; #pragma omp parallel for @@ -697,8 +697,8 @@ struct BVHNearestPos : INode { auto primNei = get_input("primNei"); auto bvh_id = prim->attr(get_input2("bvhIdTag")); - auto bvh_ws = prim->attr(get_input2("bvhWeightTag")); - auto &bvh_pos = prim->add_attr(get_input2("bvhPosTag")); + auto bvh_ws = prim->attr(get_input2("bvhWeightTag")); + auto &bvh_pos = prim->add_attr(get_input2("bvhPosTag")); #pragma omp parallel for for (int i = 0; i < prim->size(); i++) { @@ -753,7 +753,7 @@ struct BVHNearestAttr : INode { if(targetType == "tris"){ auto bvhWeightTag = get_input2("bvhWeightTag"); - auto& bvh_ws = prim->verts.attr(bvhWeightTag); + auto& bvh_ws = prim->verts.attr(bvhWeightTag); auto& bvh_id = prim->verts.attr(bvhIdTag); #pragma omp parallel for for (int i = 0; i < prim->size(); i++){ @@ -816,7 +816,7 @@ struct HeightStarPattern : zeno::INode { #pragma omp parallel for for (int i = 0; i < prim->verts.size(); i++) { - auto coord = prim->verts.attr("res")[i]; + auto coord = prim->verts.attr("res")[i]; vec2f coord2d = vec2f(coord[0], coord[1]); vec2f cellcenter = vec2f(floor(coord2d[0]), floor(coord2d[1])); float result = 0; @@ -1096,8 +1096,8 @@ ZENDEFNODE(PrimitiveDelAttrs, struct QuatRotBetweenVectors : INode { void apply() override { - auto start = normalize(get_input("start")->get()); - auto dest = normalize(get_input("dest")->get()); + auto start = normalize(get_input("start")->get()); + auto dest = normalize(get_input("dest")->get()); glm::vec3 gl_start(start[0], start[1], start[2]); glm::vec3 gl_dest(dest[0], dest[1], dest[2]); @@ -1123,8 +1123,8 @@ ZENDEFNODE(QuatRotBetweenVectors, // çŸ¢é‡ * 四元数 => çŸ¢é‡ struct QuatRotate : INode { void apply() override { - auto quat = get_input("quat")->get(); - auto vec3 = get_input("vec3")->get(); + auto quat = get_input("quat")->get(); + auto vec3 = get_input("vec3")->get(); glm::vec3 gl_vec3(vec3[0], vec3[1], vec3[2]); glm::quat gl_quat(quat[3], quat[0], quat[1], quat[2]); @@ -1152,7 +1152,7 @@ struct QuatAngleAxis : INode { void apply() override { auto angle = get_input("angle(D)")->get(); - auto axis = normalize(get_input("axis")->get()); + auto axis = normalize(get_input("axis")->get()); float gl_angle = glm::radians(angle); glm::vec3 gl_axis(axis[0], axis[1], axis[2]); @@ -1179,7 +1179,7 @@ ZENDEFNODE(QuatAngleAxis, // 四元数 -> 旋转角度 struct QuatGetAngle : INode { void apply() override { - auto quat = get_input("quat")->get(); + auto quat = get_input("quat")->get(); glm::quat gl_quat(quat[3], quat[0], quat[1], quat[2]); float gl_angle = glm::degrees(glm::angle(gl_quat)); @@ -1203,7 +1203,7 @@ ZENDEFNODE(QuatGetAngle, // 四元数 -> 旋转轴 struct QuatGetAxis : INode { void apply() override { - auto quat = get_input("quat")->get(); + auto quat = get_input("quat")->get(); glm::quat gl_quat(quat[3], quat[0], quat[1], quat[2]); glm::vec3 gl_axis = glm::axis(gl_quat); From daac92fe3b2beab856c5d12acc3babe7bb0ab3f6 Mon Sep 17 00:00:00 2001 From: littlemine Date: Mon, 8 Jul 2024 19:18:03 +0800 Subject: [PATCH 094/244] disable readexr --- projects/Alembic/png16.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/projects/Alembic/png16.cpp b/projects/Alembic/png16.cpp index 0800c35a23..7ef41b81f8 100644 --- a/projects/Alembic/png16.cpp +++ b/projects/Alembic/png16.cpp @@ -262,6 +262,7 @@ ZENDEFNODE(WriteExr, { {}, {"comp"}, }); +#if 0 struct ReadExr : INode { std::pair get_output_name(std::string name) { std::string output_name; @@ -377,4 +378,5 @@ ZENDEFNODE(ReadExr, { {}, {"comp"}, }); +#endif } From 2f021d091acf56a8f8606a629e9c5db34ed978bd Mon Sep 17 00:00:00 2001 From: littlemine Date: Mon, 8 Jul 2024 19:18:11 +0800 Subject: [PATCH 095/244] upd zpc --- projects/CUDA/zpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 7958113f75..ce9829dbb2 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 7958113f7546f80647f74f1fd368aef76d5508ff +Subproject commit ce9829dbb299daae15be18767c7323c15c262d6a From c43abc1d1e5e34fe831684bcaaab55ac8ec83c0a Mon Sep 17 00:00:00 2001 From: littlemine Date: Mon, 8 Jul 2024 21:11:44 +0800 Subject: [PATCH 096/244] pip-free zpcjit setup --- projects/CUDA/CMakeLists.txt | 59 +++++++++++++++++++++++++++++++++++ projects/CUDA/zpc | 2 +- projects/PyZpc/CMakeLists.txt | 2 +- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 1ec0614bd6..38f73671b1 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -48,6 +48,7 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) message(STATUS "python3 libraries: ${ZS_OVERWRITE_PYTHON_LIBRARIES}") message(STATUS "python3 include directories: ${ZS_OVERWRITE_PYTHON_INCLUDE_DIR}") set(ENV{ZENO_BIN_DIR} "${ZENO_BIN_DIR}") + #[=[ add_custom_command( TARGET copy_py POST_BUILD @@ -56,6 +57,53 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) ${ZS_OVERWRITE_PYTHON_EXECUTABLE} -m pip install ${CMAKE_CURRENT_SOURCE_DIR}/zpc_jit --user COMMENT "installing pyzpc into the specified conda environment: ${ZS_OVERWRITE_PYTHON_EXECUTABLE}" ) + ]=] + set(PY_LIBS_DIR ${RESOURCE_BASE_DIR}/resource/py_libs) + set(PREFIX "lib") + if (WIN32) + set(SUFFIX ".dll") + set(PREFIX "") + elseif (APPLE) + set(SUFFIX ".dylib") + elseif (UNIX) + set(SUFFIX ".so") + else() + message(FATAL_ERROR "unknown system to determine shared library suffix") + endif() + + set(ZPC_BINARIES "clang" + "cudart" + "cuda" + "nvrtc" + "omp" + "zpccore" + "zpccuda" + "zpc_jit_clang" + "zpc_jit_nvrtc" + "zpcomp" + "zpc_py_interop" + "zpc" + "zpctool") # "zspartio", "zswhereami" + + target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CAPIS="${RESOURCE_BASE_DIR}/${PREFIX}zpc_py_interop${SUFFIX}") + target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_NVRTC="${RESOURCE_BASE_DIR}/${PREFIX}zpc_jit_nvrtc${SUFFIX}") + target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CLANG="${RESOURCE_BASE_DIR}/${PREFIX}zpc_jit_clang${SUFFIX}") + add_custom_command( + TARGET copy_py + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit + COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit/zpc_jit/lib + COMMENT "creating pyzpc jit module directory at ${PY_LIBS_DIR}/zpcjit" + ) + file(GLOB ZPC_JIT_SRC ${CMAKE_CURRENT_SOURCE_DIR}/zpc_jit/pyzpc/*.py) + message("ZPC_JIT_SRC : ${ZPC_JIT_SRC}") + + foreach(ZPCJIT_FILE IN LISTS ZPC_JIT_SRC) + add_custom_command( + TARGET copy_py POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ZPCJIT_FILE} ${PY_LIBS_DIR}/zpcjit + COMMENT "Copying zpcjit src file: ${ZPCJIT_FILE}") + endforeach() if (WIN32) cmake_path(GET ZS_OVERWRITE_PYTHON_INCLUDE_DIR PARENT_PATH PYTHON_ENV_PATH) message(STATUS "python3 parent: ${PYTHON_ENV_PATH}") @@ -107,6 +155,17 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) COMMENT "copying python lib directory ${PYTHON_ENV_PATH}/lib to ${RESOURCE_BASE_DIR}/lib" ) endif() + + foreach(ZPC_BIN_FILE IN LISTS ZPC_BINARIES) + if(EXISTS ${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX}) + add_custom_command( + TARGET copy_py POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX} ${PY_LIBS_DIR}/zpcjit/zpc_jit/lib + COMMENT "Copying zpc binary file: ${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX}") + else() + message(STATUS "${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX} not found.") + endif() + endforeach(ZPC_BIN_FILE IN LISTS ZPC_BINARIES) endif(ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index ce9829dbb2..9939af9654 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit ce9829dbb299daae15be18767c7323c15c262d6a +Subproject commit 9939af9654dfa0cbf19d27c8be7dcaf8e0011b9a diff --git a/projects/PyZpc/CMakeLists.txt b/projects/PyZpc/CMakeLists.txt index 9650ba5d77..fec762861e 100644 --- a/projects/PyZpc/CMakeLists.txt +++ b/projects/PyZpc/CMakeLists.txt @@ -45,7 +45,7 @@ message("ZPY_SRC : ${ZPY_SRC}") foreach(ZPY_FILE IN LISTS ZPY_SRC) add_custom_command( TARGET copy_zpy POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ZPY_FILE} ${RESOURCE_BASE_DIR}/resource/py_libs/zpy + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ZPY_FILE} ${PY_LIBS_DIR}/zpy COMMENT "Copying zpy src file: ${ZPY_FILE}") endforeach() From 3b3cd93500e77c78ab3a355e0233ab63b4ddc579 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 9 Jul 2024 02:17:42 +0800 Subject: [PATCH 097/244] copy necessary py folders --- projects/CUDA/CMakeLists.txt | 34 +++++++++++++++++++--------------- projects/PyZpc/CMakeLists.txt | 3 +++ projects/PyZpc/pyzfx.cpp | 13 ++++++++++++- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 38f73671b1..9f1a44aeb4 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -93,6 +93,7 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit/zpc_jit/lib + COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit/zpc_jit/zpc/include COMMENT "creating pyzpc jit module directory at ${PY_LIBS_DIR}/zpcjit" ) file(GLOB ZPC_JIT_SRC ${CMAKE_CURRENT_SOURCE_DIR}/zpc_jit/pyzpc/*.py) @@ -104,8 +105,8 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ZPCJIT_FILE} ${PY_LIBS_DIR}/zpcjit COMMENT "Copying zpcjit src file: ${ZPCJIT_FILE}") endforeach() + cmake_path(GET ZS_OVERWRITE_PYTHON_EXECUTABLE PARENT_PATH PYTHON_ENV_PATH) if (WIN32) - cmake_path(GET ZS_OVERWRITE_PYTHON_INCLUDE_DIR PARENT_PATH PYTHON_ENV_PATH) message(STATUS "python3 parent: ${PYTHON_ENV_PATH}") add_custom_command( TARGET copy_py @@ -113,25 +114,25 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) # COMMAND ${CMAKE_COMMAND} -E copy_directory ${PYTHON_ENV_PATH}/lib ${RESOURCE_BASE_DIR}/lib # COMMAND ${CMAKE_COMMAND} -E copy_directory ${PYTHON_ENV_PATH}/dlls ${RESOURCE_BASE_DIR}/dlls COMMAND ${CMAKE_COMMAND} - -DOBJECTS=${PYTHON_ENV_PATH}/lib + -DOBJECTS=${PYTHON_ENV_PATH}/Lib -DOUTPUT=${RESOURCE_BASE_DIR} -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_dir.cmake COMMAND ${CMAKE_COMMAND} - -DOBJECTS=${PYTHON_ENV_PATH}/dlls + -DOBJECTS=${PYTHON_ENV_PATH}/DLLs -DOUTPUT=${RESOURCE_BASE_DIR} -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_dir.cmake - COMMAND ${CMAKE_COMMAND} - -DOBJECTS=${PYTHON_ENV_PATH}/library - -DOUTPUT=${RESOURCE_BASE_DIR} - -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_dir.cmake + # COMMAND ${CMAKE_COMMAND} + # -DOBJECTS=${PYTHON_ENV_PATH}/library + # -DOUTPUT=${RESOURCE_BASE_DIR} + # -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_dir.cmake - COMMAND ${CMAKE_COMMAND} - -DLIB="${PYTHON_ENV_PATH}/zlib.dll" - -DDLL_DIR=${PYTHON_ENV_PATH} - -DOUTPUT=$ - -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_dll.cmake + # COMMAND ${CMAKE_COMMAND} + # -DLIB="${PYTHON_ENV_PATH}/zlib.dll" + # -DDLL_DIR=${PYTHON_ENV_PATH} + # -DOUTPUT=$ + # -P ${CMAKE_CURRENT_SOURCE_DIR}/copy_dll.cmake COMMENT "copying python lib directories at ${PYTHON_ENV_PATH} to ${RESOURCE_BASE_DIR}" ) @@ -143,8 +144,6 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) ) endforeach() else() - cmake_path(GET ZS_OVERWRITE_PYTHON_INCLUDE_DIR PARENT_PATH PYTHON_ENV_PATH) - cmake_path(GET PYTHON_ENV_PATH PARENT_PATH PYTHON_ENV_PATH) message(STATUS "python3 parent: ${PYTHON_ENV_PATH}") add_custom_command( TARGET copy_py @@ -156,6 +155,12 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) ) endif() + # prepare zpc headers for pyzpc + add_custom_command( + TARGET copy_py POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/zpc/include/zensim ${PY_LIBS_DIR}/zpcjit/zpc_jit/zpc/include + COMMENT "Copying zpc source files: ${CMAKE_CURRENT_SOURCE_DIR}/zpc/include/zensim") + # prepare zpc libs for pyzpc foreach(ZPC_BIN_FILE IN LISTS ZPC_BINARIES) if(EXISTS ${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX}) add_custom_command( @@ -168,7 +173,6 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) endforeach(ZPC_BIN_FILE IN LISTS ZPC_BINARIES) endif(ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) - add_library(zshelper INTERFACE) target_include_directories(zshelper INTERFACE $ diff --git a/projects/PyZpc/CMakeLists.txt b/projects/PyZpc/CMakeLists.txt index fec762861e..eb0f34fe8e 100644 --- a/projects/PyZpc/CMakeLists.txt +++ b/projects/PyZpc/CMakeLists.txt @@ -10,6 +10,9 @@ target_link_libraries(zeno PRIVATE zpc_jit_py) target_link_libraries(zeno PRIVATE zshelper) # +# find_package(PkgConfig) +# pkg_check_modules(LIBFFI REQUIRED IMPORTED_TARGET libffi) +# target_link_libraries(zeno PRIVATE PkgConfig::LIBFFI) target_sources(zeno PRIVATE pyzfx.cpp ) diff --git a/projects/PyZpc/pyzfx.cpp b/projects/PyZpc/pyzfx.cpp index 64299650c3..e6fdc9e84e 100644 --- a/projects/PyZpc/pyzfx.cpp +++ b/projects/PyZpc/pyzfx.cpp @@ -50,7 +50,18 @@ static callback_t zpc_init_callback = [] (auto _) { #endif auto zeno_lib_path = exe_dir + "/" + ZENO_PYZPC_DLL_FILE; auto py_libs_dir = exe_dir + "/resource/py_libs"; - if (PyRun_SimpleString(("__import__('sys').path.insert(0, '" + + if (PyRun_SimpleString("import sys; sys.path.append('C\:/Develop/vcpkg/installed/x64-windows/tools/python3/DLLs'); ") < 0) { + log_warn("Failed to initialize Python module"); + return; + } +#if 0 + if (PyRun_SimpleString(("import sys; import os; sys.path.append(os.path.join('" + + exe_dir + "', 'DLLs')); ").c_str()) < 0) { + log_warn("Failed to initialize Python module"); + return; + } +#endif + if (PyRun_SimpleString(("sys.path.append('" + py_libs_dir + "'); import zpy; zpy.init_zeno_lib('" + zeno_lib_path + "'); zpy.zeno_lib_path = '" + zeno_lib_path + "'").c_str()) < 0) { log_warn("Failed to initialize Python module"); From c5101c408e09d7088c3de2ae1f4fd29814324b22 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 9 Jul 2024 13:43:13 +0800 Subject: [PATCH 098/244] setup PYTHONPATH and HOME in pyenv --- projects/PyZpc/pyzfx.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/projects/PyZpc/pyzfx.cpp b/projects/PyZpc/pyzfx.cpp index e6fdc9e84e..80a14e4282 100644 --- a/projects/PyZpc/pyzfx.cpp +++ b/projects/PyZpc/pyzfx.cpp @@ -50,7 +50,11 @@ static callback_t zpc_init_callback = [] (auto _) { #endif auto zeno_lib_path = exe_dir + "/" + ZENO_PYZPC_DLL_FILE; auto py_libs_dir = exe_dir + "/resource/py_libs"; - if (PyRun_SimpleString("import sys; sys.path.append('C\:/Develop/vcpkg/installed/x64-windows/tools/python3/DLLs'); ") < 0) { + if (PyRun_SimpleString(("import os; os.environ['PYTHONPATH'] = '" + exe_dir + "/DLLs';").c_str()) < 0) { + log_warn("Failed to initialize Python module"); + return; + } + if (PyRun_SimpleString(("os.environ['PYTHONHOME'] = '" + exe_dir + "';").c_str()) < 0) { log_warn("Failed to initialize Python module"); return; } @@ -61,7 +65,7 @@ static callback_t zpc_init_callback = [] (auto _) { return; } #endif - if (PyRun_SimpleString(("sys.path.append('" + + if (PyRun_SimpleString(("import sys; sys.path.append('" + py_libs_dir + "'); import zpy; zpy.init_zeno_lib('" + zeno_lib_path + "'); zpy.zeno_lib_path = '" + zeno_lib_path + "'").c_str()) < 0) { log_warn("Failed to initialize Python module"); From 8888cf6a03426aceb8cfe6038b5098d16cfa1719 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 9 Jul 2024 14:38:34 +0800 Subject: [PATCH 099/244] fix pypath on linux --- projects/CUDA/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 9f1a44aeb4..222b49d8c1 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -105,8 +105,8 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) COMMAND ${CMAKE_COMMAND} -E copy_if_different ${ZPCJIT_FILE} ${PY_LIBS_DIR}/zpcjit COMMENT "Copying zpcjit src file: ${ZPCJIT_FILE}") endforeach() - cmake_path(GET ZS_OVERWRITE_PYTHON_EXECUTABLE PARENT_PATH PYTHON_ENV_PATH) if (WIN32) + cmake_path(GET ZS_OVERWRITE_PYTHON_EXECUTABLE PARENT_PATH PYTHON_ENV_PATH) message(STATUS "python3 parent: ${PYTHON_ENV_PATH}") add_custom_command( TARGET copy_py @@ -144,6 +144,8 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) ) endforeach() else() + cmake_path(GET ZS_OVERWRITE_PYTHON_EXECUTABLE PARENT_PATH PYTHON_ENV_PATH) + cmake_path(GET PYTHON_ENV_PATH PARENT_PATH PYTHON_ENV_PATH) message(STATUS "python3 parent: ${PYTHON_ENV_PATH}") add_custom_command( TARGET copy_py From 671afe6c24e392a182f4d1fe8a6ec71457652cf0 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 9 Jul 2024 15:15:56 +0800 Subject: [PATCH 100/244] fix typo --- projects/CUDA/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 222b49d8c1..29f8d9f5da 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -93,7 +93,7 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit/zpc_jit/lib - COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit/zpc_jit/zpc/include + COMMAND ${CMAKE_COMMAND} -E make_directory ${PY_LIBS_DIR}/zpcjit/zpc_jit/zpc/include/zensim COMMENT "creating pyzpc jit module directory at ${PY_LIBS_DIR}/zpcjit" ) file(GLOB ZPC_JIT_SRC ${CMAKE_CURRENT_SOURCE_DIR}/zpc_jit/pyzpc/*.py) @@ -160,7 +160,7 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) # prepare zpc headers for pyzpc add_custom_command( TARGET copy_py POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/zpc/include/zensim ${PY_LIBS_DIR}/zpcjit/zpc_jit/zpc/include + COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/zpc/include/zensim ${PY_LIBS_DIR}/zpcjit/zpc_jit/zpc/include/zensim COMMENT "Copying zpc source files: ${CMAKE_CURRENT_SOURCE_DIR}/zpc/include/zensim") # prepare zpc libs for pyzpc foreach(ZPC_BIN_FILE IN LISTS ZPC_BINARIES) From bf032320f4e23e20731ed817b9a113db891b1cc4 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 9 Jul 2024 15:54:07 +0800 Subject: [PATCH 101/244] setup numpy during build --- projects/CUDA/CMakeLists.txt | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 29f8d9f5da..3acd821246 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -48,16 +48,21 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) message(STATUS "python3 libraries: ${ZS_OVERWRITE_PYTHON_LIBRARIES}") message(STATUS "python3 include directories: ${ZS_OVERWRITE_PYTHON_INCLUDE_DIR}") set(ENV{ZENO_BIN_DIR} "${ZENO_BIN_DIR}") - #[=[ add_custom_command( TARGET copy_py POST_BUILD COMMAND ${CMAKE_COMMAND} -E env ZENO_BIN_DIR="${ZENO_BIN_DIR}" - ${ZS_OVERWRITE_PYTHON_EXECUTABLE} -m pip install ${CMAKE_CURRENT_SOURCE_DIR}/zpc_jit --user - COMMENT "installing pyzpc into the specified conda environment: ${ZS_OVERWRITE_PYTHON_EXECUTABLE}" + ${ZS_OVERWRITE_PYTHON_EXECUTABLE} -m ensurepip + COMMENT "preparing pip module" + ) + add_custom_command( + TARGET copy_py + POST_BUILD + COMMAND + ${ZS_OVERWRITE_PYTHON_EXECUTABLE} -m pip install numpy --user + COMMENT "installing numpy" ) - ]=] set(PY_LIBS_DIR ${RESOURCE_BASE_DIR}/resource/py_libs) set(PREFIX "lib") if (WIN32) From 79fbbdcf2fdee8bb9a71a0a4042c4e4119df0f9e Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 9 Jul 2024 18:39:54 +0800 Subject: [PATCH 102/244] finalize zpc jit setup --- projects/CUDA/CMakeLists.txt | 6 +++--- projects/CUDA/zpc | 2 +- projects/CUDA/zpc_jit | 2 +- projects/PyZpc/pyzfx.cpp | 16 ++++++++++++++-- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 3acd821246..66f93f15a7 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -90,9 +90,9 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) "zpc" "zpctool") # "zspartio", "zswhereami" - target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CAPIS="${RESOURCE_BASE_DIR}/${PREFIX}zpc_py_interop${SUFFIX}") - target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_NVRTC="${RESOURCE_BASE_DIR}/${PREFIX}zpc_jit_nvrtc${SUFFIX}") - target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CLANG="${RESOURCE_BASE_DIR}/${PREFIX}zpc_jit_clang${SUFFIX}") + target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CAPIS="${PREFIX}zpc_py_interop${SUFFIX}") + target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_NVRTC="${PREFIX}zpc_jit_nvrtc${SUFFIX}") + target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CLANG="${PREFIX}zpc_jit_clang${SUFFIX}") add_custom_command( TARGET copy_py POST_BUILD diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 9939af9654..66d6875413 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 9939af9654dfa0cbf19d27c8be7dcaf8e0011b9a +Subproject commit 66d687541315ebf31e67c1212aac5213dd4fc6b9 diff --git a/projects/CUDA/zpc_jit b/projects/CUDA/zpc_jit index c7d1194e61..2321ffaeb0 160000 --- a/projects/CUDA/zpc_jit +++ b/projects/CUDA/zpc_jit @@ -1 +1 @@ -Subproject commit c7d1194e6112cdc38308f473f6ce14ca2b861e57 +Subproject commit 2321ffaeb0c05e26ed4e3ed82fb2d46de6bc634d diff --git a/projects/PyZpc/pyzfx.cpp b/projects/PyZpc/pyzfx.cpp index 80a14e4282..8ab00636b6 100644 --- a/projects/PyZpc/pyzfx.cpp +++ b/projects/PyZpc/pyzfx.cpp @@ -65,12 +65,24 @@ static callback_t zpc_init_callback = [] (auto _) { return; } #endif - if (PyRun_SimpleString(("import sys; sys.path.append('" + - py_libs_dir + "'); import zpy; zpy.init_zeno_lib('" + zeno_lib_path + + if (PyRun_SimpleString(("import sys; sys.path.append('" + + py_libs_dir + "'); import zpy; zpy.init_zeno_lib('" + zeno_lib_path + "'); zpy.zeno_lib_path = '" + zeno_lib_path + "'").c_str()) < 0) { log_warn("Failed to initialize Python module"); return; } + if (PyRun_SimpleString(("import zpcjit; zpcjit.init_zpc_lib('" + exe_dir + "/" + ZENO_OUTPUT_BINARY_CAPIS + "');").c_str()) < 0) { + log_warn("Failed to initialize Python module"); + return; + } + if (PyRun_SimpleString(("zpcjit.init_zpc_nvrtc_lib('" + exe_dir + "/" + ZENO_OUTPUT_BINARY_NVRTC + "');").c_str()) < 0) { + log_warn("Failed to initialize Python module"); + return; + } + if (PyRun_SimpleString(("zpcjit.init_zpc_clang_lib('" + exe_dir + "/" + ZENO_OUTPUT_BINARY_CLANG + "');").c_str()) < 0) { + log_warn("Failed to initialize Python module"); + return; + } log_debug("Initialized Python successfully!"); getSession().userData().set("subprogram_python", std::make_shared>(subprogram_python_main)); }; From cf2916db7d476ce74d13ac447c5884cfff40c58a Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Tue, 9 Jul 2024 23:17:48 +0800 Subject: [PATCH 103/244] fix depth texture --- zenovis/include/zenovis/bate/FrameBufferRender.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zenovis/include/zenovis/bate/FrameBufferRender.h b/zenovis/include/zenovis/bate/FrameBufferRender.h index 3a8ce31e8c..7764e17db7 100644 --- a/zenovis/include/zenovis/bate/FrameBufferRender.h +++ b/zenovis/include/zenovis/bate/FrameBufferRender.h @@ -128,7 +128,7 @@ struct FrameBufferRender { depth_texture = make_unique(); depth_texture->target = GL_TEXTURE_2D_MULTISAMPLE; CHECK_GL(glBindTexture(depth_texture->target, depth_texture->tex)); - CHECK_GL(glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, GL_DEPTH_COMPONENT, w, h, GL_TRUE)); + CHECK_GL(glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, GL_DEPTH_COMPONENT32F, w, h, GL_TRUE)); CHECK_GL(glBindTexture(depth_texture->target, 0)); CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D_MULTISAMPLE, depth_texture->tex, 0)); @@ -150,7 +150,7 @@ struct FrameBufferRender { CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, screen_tex->tex, 0)); CHECK_GL(glBindTexture(GL_TEXTURE_2D, screen_depth_tex->tex)); - CHECK_GL(glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT, w, h, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL)); + CHECK_GL(glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT32F, w, h, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL)); CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)); CHECK_GL(glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)); CHECK_GL(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, screen_depth_tex->tex, 0)); From 1cc98c864e4cb3211f8eeead68ff54d8fbf2bb5e Mon Sep 17 00:00:00 2001 From: littlemine Date: Wed, 10 Jul 2024 01:14:28 +0800 Subject: [PATCH 104/244] conformant zsgrid2dsample --- projects/CUDA/CMakeLists.txt | 25 +--- projects/CuEulerian/hybrid/G2P.cu | 2 +- projects/CuEulerian/hybrid/P2G.cu | 2 +- projects/CuEulerian/swe/FDGather.cu | 127 ++++++++++++++++++- projects/CuEulerian/volume/Transfer.cu | 4 +- projects/CuEulerian/volume/VolumeOperator.cu | 10 +- projects/CuEulerian/volume/VolumeTopology.cu | 10 +- 7 files changed, 135 insertions(+), 45 deletions(-) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 66f93f15a7..3cec667147 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -76,20 +76,6 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) message(FATAL_ERROR "unknown system to determine shared library suffix") endif() - set(ZPC_BINARIES "clang" - "cudart" - "cuda" - "nvrtc" - "omp" - "zpccore" - "zpccuda" - "zpc_jit_clang" - "zpc_jit_nvrtc" - "zpcomp" - "zpc_py_interop" - "zpc" - "zpctool") # "zspartio", "zswhereami" - target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CAPIS="${PREFIX}zpc_py_interop${SUFFIX}") target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_NVRTC="${PREFIX}zpc_jit_nvrtc${SUFFIX}") target_compile_definitions(zeno PRIVATE -DZENO_OUTPUT_BINARY_CLANG="${PREFIX}zpc_jit_clang${SUFFIX}") @@ -168,16 +154,7 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) COMMAND ${CMAKE_COMMAND} -E copy_directory ${CMAKE_CURRENT_SOURCE_DIR}/zpc/include/zensim ${PY_LIBS_DIR}/zpcjit/zpc_jit/zpc/include/zensim COMMENT "Copying zpc source files: ${CMAKE_CURRENT_SOURCE_DIR}/zpc/include/zensim") # prepare zpc libs for pyzpc - foreach(ZPC_BIN_FILE IN LISTS ZPC_BINARIES) - if(EXISTS ${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX}) - add_custom_command( - TARGET copy_py POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX} ${PY_LIBS_DIR}/zpcjit/zpc_jit/lib - COMMENT "Copying zpc binary file: ${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX}") - else() - message(STATUS "${RESOURCE_BASE_DIR}/${PREFIX}${ZPC_BIN_FILE}${SUFFIX} not found.") - endif() - endforeach(ZPC_BIN_FILE IN LISTS ZPC_BINARIES) + endif(ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) add_library(zshelper INTERFACE) diff --git a/projects/CuEulerian/hybrid/G2P.cu b/projects/CuEulerian/hybrid/G2P.cu index 7f676047cf..470e10ca53 100644 --- a/projects/CuEulerian/hybrid/G2P.cu +++ b/projects/CuEulerian/hybrid/G2P.cu @@ -110,7 +110,7 @@ struct ZSSparseGridToPrimitive : INode { auto tag = src_tag(zsSPG, attrTag); using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); using kt_t = std::variant, wrapv, wrapv, wrapv, wrapv, wrapv>; diff --git a/projects/CuEulerian/hybrid/P2G.cu b/projects/CuEulerian/hybrid/P2G.cu index dc3a7fff0d..d706071b0a 100644 --- a/projects/CuEulerian/hybrid/P2G.cu +++ b/projects/CuEulerian/hybrid/P2G.cu @@ -227,7 +227,7 @@ struct ZSPrimitiveToSparseGrid : INode { using namespace zs; constexpr auto space = execspace_e::cuda; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); using kt_t = std::variant, wrapv, wrapv, wrapv, wrapv, wrapv>; diff --git a/projects/CuEulerian/swe/FDGather.cu b/projects/CuEulerian/swe/FDGather.cu index 3a4603204f..ee991d67ab 100644 --- a/projects/CuEulerian/swe/FDGather.cu +++ b/projects/CuEulerian/swe/FDGather.cu @@ -13,7 +13,7 @@ template void edgeLoop(typename ZenoParticles::particles_t &prim, int nx, int ny, const std::string &channel) { using namespace zs; constexpr auto space = execspace_e::cuda; - auto pol = cuda_exec().device(0); + auto pol = cuda_exec(); const SmallString lTag = std::string("l") + channel; const SmallString rTag = std::string("r") + channel; const SmallString tTag = std::string("t") + channel; @@ -128,7 +128,7 @@ void edgeLoopSum(typename ZenoParticles::particles_t &prim, int nx, int ny, cons const std::string &addChannel) { using namespace zs; constexpr auto space = execspace_e::cuda; - auto pol = cuda_exec().device(0); + auto pol = cuda_exec(); const SmallString lTag = std::string("l") + channel; const SmallString rTag = std::string("r") + channel; const SmallString tTag = std::string("t") + channel; @@ -154,7 +154,7 @@ template void cornerLoop(typename ZenoParticles::particles_t &prim, int nx, int ny, const std::string &channel) { using namespace zs; constexpr auto space = execspace_e::cuda; - auto pol = cuda_exec().device(0); + auto pol = cuda_exec(); const SmallString ltTag = std::string("lt") + channel; const SmallString rtTag = std::string("rt") + channel; const SmallString lbTag = std::string("lb") + channel; @@ -183,7 +183,7 @@ void cornerLoopSum(typename ZenoParticles::particles_t &prim, int nx, int ny, co const std::string &addChannel) { using namespace zs; constexpr auto space = execspace_e::cuda; - auto pol = cuda_exec().device(0); + auto pol = cuda_exec(); const SmallString ltTag = std::string("lt") + channel; const SmallString rtTag = std::string("rt") + channel; const SmallString lbTag = std::string("lb") + channel; @@ -377,7 +377,7 @@ struct ZSMomentumTransfer2DFiniteDifference : zeno::INode { auto addChannel = get_input2("add_channel"); auto &verts = grid->getParticles(); - auto pol = zs::cuda_exec().device(0); + auto pol = zs::cuda_exec(); if (attrT == "float") { verts.append_channels(pol, {{addChannel, 1}}); } @@ -421,7 +421,7 @@ ZENDEFNODE(ZSMomentumTransfer2DFiniteDifference, { {"zenofx"}, }); -template static constexpr T lerp(T a, T b, float c) { +template static constexpr auto lerp(T a, T b, float c) { return (1 - c) * a + c * b; } template @@ -430,7 +430,7 @@ void sample2D(typename ZenoParticles::particles_t &prim, const zs::SmallString & using vec3f = zs::vec; using namespace zs; constexpr auto space = execspace_e::cuda; - auto pol = cuda_exec().device(0); + auto pol = cuda_exec(); using T = conditional_t, vec3f>; Vector temp(prim.get_allocator(), prim.size()); @@ -495,4 +495,117 @@ ZENDEFNODE(ZSGrid2DSample, { {"zenofx"}, }); +template +void sample2D(CoordsT coord, FieldT field, PrimAttrT primAttr, int nx, int ny, float h, zs::vec bmin, bool isPeriodic) { + using vec3f = zs::vec; + using namespace zs; + constexpr auto space = execspace_e::cuda; + auto pol = cuda_exec(); + using T = RM_CVREF_T(*field); + static_assert(is_same_v, "???"); + + auto allocator = get_temporary_memory_source(pol); + Vector temp(allocator, range_size(coord)); + + pol(range(range_size(coord)), [temp = temp.begin(), coord = coord.begin(), field, primAttr, bmin, nx, ny, + h, isPeriodic] ZS_LAMBDA(auto tidx) mutable { + auto uv = coord[tidx]; + zs::vec uv2; + if (isPeriodic) { + auto Lx = (nx - 1) * h; + auto Ly = (ny - 1) * h; + int gid_x = zs::floor((uv[0] - bmin[0]) / Lx); + int gid_y = zs::floor((uv[2] - bmin[2]) / Ly); + uv2 = (uv - (bmin + zs::vec{gid_x * Lx, 0, gid_y * Ly})) / h; + uv2[1] = 0; + uv2[0] = zs::min(zs::max(uv2[0], 0.0f), nx - 1.01f); + uv2[2] = zs::min(zs::max(uv2[2], 0.0f), ny - 1.01f); + } else { + uv2 = (uv - bmin) / h; + uv2[1] = 0; + uv2[0] = zs::min(zs::max(uv2[0], 0.01f), nx - 1.01f); + uv2[2] = zs::min(zs::max(uv2[2], 0.01f), ny - 1.01f); + } + // uv2 = zeno::min(zeno::max(uv2, vec3f{0.01, 0.0, 0.01}), vec3f{nx - 1.01, 0.0, ny - 1.01}); + int i = uv2[0]; + int j = uv2[2]; + float cx = uv2[0] - i, cy = uv2[2] - j; + size_t idx00 = j * nx + i, idx01 = j * nx + i + 1, idx10 = (j + 1) * nx + i, idx11 = (j + 1) * nx + i + 1; + T f00 = field[idx00]; + T f01 = field[idx01]; + T f10 = field[idx10]; + T f11 = field[idx11]; + temp[tidx] = lerp(lerp(f00, f01, cx), lerp(f10, f11, cx), cy); + }); + + pol(range(range_size(coord)), [temp = temp.begin(), primAttr] ZS_LAMBDA( + auto tidx) mutable { primAttr[tidx] = temp[tidx]; }); +} +struct ZSGrid2DSample2 : zeno::INode { + virtual void apply() override { + using vec3f = zs::vec; + auto nx = get_input2("nx"); + auto ny = get_input2("ny"); + auto bmin = get_input2("bmin"); + auto prim = get_input("prim"); + auto grid = get_input("sampleGrid"); + auto channelList = get_input2("channel"); + auto sampleby = get_input2("sampleBy"); + auto isPeriodic = get_input2("sampleType") == "Periodic"; + auto h = get_input2("h"); + + std::vector channels; + std::istringstream iss(channelList); + std::string word; + while (iss >> word) { + if (word == "*") { + channels = grid->getParticles().getPropertyTags(); + break; + } + channels.push_back(zs::PropertyTag{word.c_str(), grid->getParticles().getPropertySize(word)}); + } + for (auto &ch : channels) + if (ch.name == "pos") + ch.name = "x"; + + if (sampleby == "pos") sampleby = "x"; + + auto &pars = prim->getParticles(); + auto &gridVerts = grid->getParticles(); + pars.append_channels(zs::cuda_exec(), channels); + + if (pars.hasProperty(sampleby)) { + if (!(sampleby == "pos" || pars.getPropertySize(sampleby) == 3)) + throw std::runtime_error("[sampleBy] has to be a vec3f attribute!"); + + for (const auto &ch : channels) { + if (ch.name != "x") { + if (ch.numChannels == 1) + sample2D(zs::range(pars, sampleby, zs::dim_c<3>), gridVerts.begin(ch.name, zs::dim_c<1>), pars.begin(ch.name, zs::dim_c<1>), nx, ny, + h, vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); + else if (ch.numChannels == 3) + sample2D(zs::range(pars, sampleby, zs::dim_c<3>), gridVerts.begin(ch.name, zs::dim_c<3>), pars.begin(ch.name, zs::dim_c<3>), nx, ny, + h, vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); + } + } + } + + set_output("prim", std::move(grid)); + } +}; +ZENDEFNODE(ZSGrid2DSample2, { + {{"ZenoParticles", "prim"}, + {"ZenoParticles", "sampleGrid"}, + {"int", "nx", "1"}, + {"int", "ny", "1"}, + {"float", "h", "1"}, + {"vec3f", "bmin", "0,0,0"}, + {"string", "channel", "*"}, + {"string", "sampleBy", "pos"}, + {"enum Clamp Periodic", "sampleType", "Clamp"}}, + {{"ZenoParticles", "prim"}}, + {}, + {"zenofx"}, + }); + } // namespace zeno \ No newline at end of file diff --git a/projects/CuEulerian/volume/Transfer.cu b/projects/CuEulerian/volume/Transfer.cu index 53f9f4e74e..fb60e15696 100644 --- a/projects/CuEulerian/volume/Transfer.cu +++ b/projects/CuEulerian/volume/Transfer.cu @@ -61,7 +61,7 @@ struct ZSParticleToZSLevelSet : INode { auto &field = zslevelset->getBasicLevelSet()._ls; using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); for (auto &&parObjPtr : parObjPtrs) { auto &pars = parObjPtr->getParticles(); @@ -160,7 +160,7 @@ struct PrimitiveToZSLevelSet : INode { const auto dx = get_input2("dx"); Vector xs{pos.size(), memsrc_e::device}, elePos{numEles, memsrc_e::device}; copy(zs::mem_device, (void *)xs.data(), (void *)pos.data(), sizeof(zeno::vec3f) * pos.size()); - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); { if (quads.size()) { Vector> inds{numEles, memsrc_e::device}; diff --git a/projects/CuEulerian/volume/VolumeOperator.cu b/projects/CuEulerian/volume/VolumeOperator.cu index f9feb968ca..ad25eeec04 100644 --- a/projects/CuEulerian/volume/VolumeOperator.cu +++ b/projects/CuEulerian/volume/VolumeOperator.cu @@ -19,7 +19,7 @@ struct ZSLevelSetBinaryOperator : INode { template void binaryOp(SplsT &lsa, const SplsT &lsb, T a, T b, Op op) { using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); const auto numInBlocks = lsb.numBlocks(); const auto numPrevBlocks = lsa.numBlocks(); @@ -99,7 +99,7 @@ struct ZSLevelSetBinaryOperator : INode { is_same_v> { auto numBlocks = lsPtr->numBlocks(); auto numBlocksB = lsPtrB->numBlocks(); - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); /// reserve enough memory lsPtr->resize(cudaPol, numBlocks + numBlocksB); /// assume sharing the same transformation @@ -137,7 +137,7 @@ struct ResampleZSLevelSet : INode { void resample(SplsT &ls, const RefSplsT &refLs, const zs::PropertyTag &tag) { using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); ls.append_channels(cudaPol, {tag}); // would also check channel dimension fmt::print("tag: [{}, {}] at {} (out of {}) in dst, [{}] (out of {}) in ref.\n", tag.name, tag.numChannels, @@ -239,7 +239,7 @@ struct AdvectZSLevelSet : INode { int advect(SplsT &lsOut, const VelSplsT &velLs, const float dt) { using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); auto vm = get_level_set_max_speed(cudaPol, velLs); int nvoxels = (int)std::ceil(vm * dt / lsOut._grid.dx); @@ -319,7 +319,7 @@ struct ClampZSLevelSet : INode { void clamp(SplsT &ls, const RefSplsT &refLs, const VelSplsT &velLs, const float dt) { using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); /// ls & refLs better be of same category cudaPol(Collapse{ls.numBlocks(), ls.block_size}, diff --git a/projects/CuEulerian/volume/VolumeTopology.cu b/projects/CuEulerian/volume/VolumeTopology.cu index e93824d553..9753ba7e90 100644 --- a/projects/CuEulerian/volume/VolumeTopology.cu +++ b/projects/CuEulerian/volume/VolumeTopology.cu @@ -30,7 +30,7 @@ struct MarkZSLevelSet : INode { match( [this, threshold](auto &lsPtr) -> std::enable_if_t> { // using SplsT = typename RM_CVREF_T(lsPtr)::element_type; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); mark_level_set(cudaPol, *lsPtr, threshold); // refit_level_set_domain(cudaPol, *lsPtr, threshold); }, @@ -53,7 +53,7 @@ struct ZSLevelSetTopologyUnion : INode { template void topologyUnion(SplsT &ls, const TableT &refTable) { using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); const auto numInBlocks = refTable.size(); const auto numPrevBlocks = ls.numBlocks(); @@ -96,7 +96,7 @@ struct ZSLevelSetTopologyUnion : INode { const auto &refTable = refLsPtr->_table; auto numBlocks = lsPtr->numBlocks(); auto numRefBlocks = refLsPtr->numBlocks(); - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); /// reserve enough memory lsPtr->resize(cudaPol, numBlocks + numRefBlocks); /// assume sharing the same transformation @@ -145,7 +145,7 @@ struct ExtendZSLevelSet : INode { [this, dt](auto &lsPtr, const auto &velLsPtr) -> std::enable_if_t && is_spls_v> { - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); auto vm = get_level_set_max_speed(cudaPol, *velLsPtr); int nvoxels = (int)std::ceil(vm * dt / lsPtr->_grid.dx); auto nlayers = std::max((nvoxels + lsPtr->side_length - 1) / lsPtr->side_length, 2); @@ -179,7 +179,7 @@ struct ZSLevelSetFloodFill : INode { using namespace zs; - auto cudaPol = cuda_exec().device(0); + auto cudaPol = cuda_exec(); using basic_ls_t = typename ZenoLevelSet::basic_ls_t; using const_sdf_vel_ls_t = typename ZenoLevelSet::const_sdf_vel_ls_t; From 9407041a8c51a9d8222aba04673477553badfed2 Mon Sep 17 00:00:00 2001 From: littlemine Date: Fri, 12 Jul 2024 02:53:47 +0800 Subject: [PATCH 105/244] done zsgrid2dsample2 --- projects/CUDA/zpc | 2 +- projects/CuEulerian/swe/FDGather.cu | 303 ++++++++++++++++++---------- 2 files changed, 198 insertions(+), 107 deletions(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 66d6875413..6fbcca5861 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 66d687541315ebf31e67c1212aac5213dd4fc6b9 +Subproject commit 6fbcca5861ee33c59730647f30b916ef2f8122da diff --git a/projects/CuEulerian/swe/FDGather.cu b/projects/CuEulerian/swe/FDGather.cu index ee991d67ab..3fd0d237c6 100644 --- a/projects/CuEulerian/swe/FDGather.cu +++ b/projects/CuEulerian/swe/FDGather.cu @@ -496,116 +496,207 @@ ZENDEFNODE(ZSGrid2DSample, { }); template -void sample2D(CoordsT coord, FieldT field, PrimAttrT primAttr, int nx, int ny, float h, zs::vec bmin, bool isPeriodic) { - using vec3f = zs::vec; - using namespace zs; - constexpr auto space = execspace_e::cuda; - auto pol = cuda_exec(); - using T = RM_CVREF_T(*field); - static_assert(is_same_v, "???"); - - auto allocator = get_temporary_memory_source(pol); - Vector temp(allocator, range_size(coord)); - - pol(range(range_size(coord)), [temp = temp.begin(), coord = coord.begin(), field, primAttr, bmin, nx, ny, - h, isPeriodic] ZS_LAMBDA(auto tidx) mutable { - auto uv = coord[tidx]; - zs::vec uv2; - if (isPeriodic) { - auto Lx = (nx - 1) * h; - auto Ly = (ny - 1) * h; - int gid_x = zs::floor((uv[0] - bmin[0]) / Lx); - int gid_y = zs::floor((uv[2] - bmin[2]) / Ly); - uv2 = (uv - (bmin + zs::vec{gid_x * Lx, 0, gid_y * Ly})) / h; - uv2[1] = 0; - uv2[0] = zs::min(zs::max(uv2[0], 0.0f), nx - 1.01f); - uv2[2] = zs::min(zs::max(uv2[2], 0.0f), ny - 1.01f); - } else { - uv2 = (uv - bmin) / h; - uv2[1] = 0; - uv2[0] = zs::min(zs::max(uv2[0], 0.01f), nx - 1.01f); - uv2[2] = zs::min(zs::max(uv2[2], 0.01f), ny - 1.01f); - } - // uv2 = zeno::min(zeno::max(uv2, vec3f{0.01, 0.0, 0.01}), vec3f{nx - 1.01, 0.0, ny - 1.01}); - int i = uv2[0]; - int j = uv2[2]; - float cx = uv2[0] - i, cy = uv2[2] - j; - size_t idx00 = j * nx + i, idx01 = j * nx + i + 1, idx10 = (j + 1) * nx + i, idx11 = (j + 1) * nx + i + 1; - T f00 = field[idx00]; - T f01 = field[idx01]; - T f10 = field[idx10]; - T f11 = field[idx11]; - temp[tidx] = lerp(lerp(f00, f01, cx), lerp(f10, f11, cx), cy); - }); - - pol(range(range_size(coord)), [temp = temp.begin(), primAttr] ZS_LAMBDA( - auto tidx) mutable { primAttr[tidx] = temp[tidx]; }); +void sample2D_(CoordsT coord, FieldT field, PrimAttrT primAttr, int nx, int ny, + float h, zs::vec bmin, bool isPeriodic) { + using vec3f = zs::vec; + using namespace zs; + constexpr auto space = execspace_e::cuda; + auto pol = cuda_exec(); + using T = RM_CVREF_T(*field); + static_assert(is_same_v, "???"); + + auto allocator = get_temporary_memory_source(pol); + constexpr int dim = sizeof(T) == sizeof(float) ? 1 : 3; + using TT = conditional_t>; + Vector temp(allocator, range_size(coord)); + + pol(range(range_size(coord)), [temp = temp.begin(), coord = coord.begin(), + field, primAttr, bmin, nx, ny, h, + isPeriodic] ZS_LAMBDA(auto tidx) mutable { + auto uv = coord[tidx]; + zs::vec uv2; + if (isPeriodic) { + auto Lx = (nx - 1) * h; + auto Ly = (ny - 1) * h; + int gid_x = zs::floor((uv[0] - bmin[0]) / Lx); + int gid_y = zs::floor((uv[2] - bmin[2]) / Ly); + uv2 = (uv - (bmin + zs::vec{gid_x * Lx, 0, gid_y * Ly})) / h; + uv2[1] = 0; + uv2[0] = zs::min(zs::max(uv2[0], 0.0f), nx - 1.01f); + uv2[2] = zs::min(zs::max(uv2[2], 0.0f), ny - 1.01f); + } else { + uv2 = (uv - bmin) / h; + uv2[1] = 0; + uv2[0] = zs::min(zs::max(uv2[0], 0.01f), nx - 1.01f); + uv2[2] = zs::min(zs::max(uv2[2], 0.01f), ny - 1.01f); + } + // uv2 = zeno::min(zeno::max(uv2, vec3f{0.01, 0.0, 0.01}), vec3f{nx - 1.01, + // 0.0, ny - 1.01}); + int i = uv2[0]; + int j = uv2[2]; + float cx = uv2[0] - i, cy = uv2[2] - j; + size_t idx00 = j * nx + i, idx01 = j * nx + i + 1, idx10 = (j + 1) * nx + i, + idx11 = (j + 1) * nx + i + 1; + auto f00 = field[idx00]; + auto f01 = field[idx01]; + auto f10 = field[idx10]; + auto f11 = field[idx11]; + // auto t = get_type_str(); + // auto tt = get_type_str(); + // auto ttt = get_type_str(); + // printf("%s ,%s , %s\n", t.asChars(), tt.asChars(), ttt.asChars()); + temp[tidx] = lerp(lerp(f00, f01, cx), lerp(f10, f11, cx), cy); + }); + + pol(range(range_size(temp)), + [temp = temp.begin(), primAttr] ZS_LAMBDA(auto tidx) mutable { + primAttr[tidx] = temp[tidx]; + }); +} +template +void assignToField2D(CoordsT coord, FieldT field, PrimAttrT primAttr, int nx, int ny, + float h, zs::vec bmin, bool isPeriodic) { + using vec3f = zs::vec; + using namespace zs; + constexpr auto space = execspace_e::cuda; + auto pol = cuda_exec(); + using T = RM_CVREF_T(*field); + static_assert(is_same_v, "???"); + + auto allocator = get_temporary_memory_source(pol); + constexpr int dim = sizeof(T) == sizeof(float) ? 1 : 3; + using TT = conditional_t>; + Vector temp(allocator, range_size(coord)); + + pol(range(range_size(coord)), [temp = temp.begin(), coord = coord.begin(), + field, primAttr, bmin, nx, ny, h, + isPeriodic] ZS_LAMBDA(auto tidx) mutable { + auto uv = coord[tidx]; + zs::vec uv2; + if (isPeriodic) { + auto Lx = (nx - 1) * h; + auto Ly = (ny - 1) * h; + int gid_x = zs::floor((uv[0] - bmin[0]) / Lx); + int gid_y = zs::floor((uv[2] - bmin[2]) / Ly); + uv2 = (uv - (bmin + zs::vec{gid_x * Lx, 0, gid_y * Ly})) / h; + uv2[1] = 0; + uv2[0] = zs::min(zs::max(uv2[0], 0.0f), nx - 1.01f); + uv2[2] = zs::min(zs::max(uv2[2], 0.0f), ny - 1.01f); + } else { + uv2 = (uv - bmin) / h; + uv2[1] = 0; + uv2[0] = zs::min(zs::max(uv2[0], 0.01f), nx - 1.01f); + uv2[2] = zs::min(zs::max(uv2[2], 0.01f), ny - 1.01f); + } + // uv2 = zeno::min(zeno::max(uv2, vec3f{0.01, 0.0, 0.01}), vec3f{nx - 1.01, + // 0.0, ny - 1.01}); + int i = uv2[0]; + int j = uv2[2]; + field[j * nx + i] = temp[tidx]; + }); } -struct ZSGrid2DSample2 : zeno::INode { - virtual void apply() override { - using vec3f = zs::vec; - auto nx = get_input2("nx"); - auto ny = get_input2("ny"); - auto bmin = get_input2("bmin"); - auto prim = get_input("prim"); - auto grid = get_input("sampleGrid"); - auto channelList = get_input2("channel"); - auto sampleby = get_input2("sampleBy"); - auto isPeriodic = get_input2("sampleType") == "Periodic"; - auto h = get_input2("h"); - std::vector channels; - std::istringstream iss(channelList); - std::string word; - while (iss >> word) { - if (word == "*") { - channels = grid->getParticles().getPropertyTags(); - break; - } - channels.push_back(zs::PropertyTag{word.c_str(), grid->getParticles().getPropertySize(word)}); - } - for (auto &ch : channels) - if (ch.name == "pos") - ch.name = "x"; - - if (sampleby == "pos") sampleby = "x"; - - auto &pars = prim->getParticles(); - auto &gridVerts = grid->getParticles(); - pars.append_channels(zs::cuda_exec(), channels); - - if (pars.hasProperty(sampleby)) { - if (!(sampleby == "pos" || pars.getPropertySize(sampleby) == 3)) - throw std::runtime_error("[sampleBy] has to be a vec3f attribute!"); - - for (const auto &ch : channels) { - if (ch.name != "x") { - if (ch.numChannels == 1) - sample2D(zs::range(pars, sampleby, zs::dim_c<3>), gridVerts.begin(ch.name, zs::dim_c<1>), pars.begin(ch.name, zs::dim_c<1>), nx, ny, - h, vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); - else if (ch.numChannels == 3) - sample2D(zs::range(pars, sampleby, zs::dim_c<3>), gridVerts.begin(ch.name, zs::dim_c<3>), pars.begin(ch.name, zs::dim_c<3>), nx, ny, - h, vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); - } +struct ZSGrid2DSample2 : zeno::INode { + virtual void apply() override { + using vec3f = zs::vec; + auto nx = get_input2("nx"); + auto ny = get_input2("ny"); + auto bmin = get_input2("bmin"); + auto prim = get_input("prim"); + auto grid = get_input("sampleGrid"); + auto channelList = get_input2("channel"); + auto sampleby = get_input2("sampleBy"); + auto isPeriodic = get_input2("sampleType") == "Periodic"; + auto isSampleFromGrid = get_input2("sampleDirection") == "SampleFromGrid"; + auto h = get_input2("h"); + + std::vector channels; + std::istringstream iss(channelList); + std::string word; + while (iss >> word) { + if (word == "*") { + channels = grid->getParticles().getPropertyTags(); + break; + } + channels.push_back(zs::PropertyTag{ + word.c_str(), grid->getParticles().getPropertySize(word)}); + } + for (auto &ch : channels) + if (ch.name == "pos") + ch.name = "x"; + + if (sampleby == "pos") + sampleby = "x"; + + auto &pars = prim->getParticles(); + auto &gridVerts = grid->getParticles(); + pars.append_channels(zs::cuda_exec(), channels); + + if (pars.hasProperty(sampleby)) { + if (!(sampleby == "pos" || pars.getPropertySize(sampleby) == 3)) + throw std::runtime_error("[sampleBy] has to be a vec3f attribute!"); + + for (const auto &ch : channels) { + // fmt::print("comp [{}] with x: eq {}, ne {}\n", ch.name.asChars(), ch.name == "x", ch.name != "x"); + if (ch.name != "x") { + if (!isSampleFromGrid) { + if (ch.numChannels == 1) + assignToField2D(zs::range(pars, sampleby, zs::dim_c<3>), + gridVerts.begin(ch.name, zs::dim_c<1>), + pars.begin(ch.name, zs::dim_c<1>), nx, ny, h, + vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); + else if (ch.numChannels == 3) + assignToField2D(zs::range(pars, sampleby, zs::dim_c<3>), + gridVerts.begin(ch.name, zs::dim_c<3>), + pars.begin(ch.name, zs::dim_c<3>), nx, ny, h, + vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); + else + fmt::print("skipping attr [{}]\n", ch.name); + continue; } + /* + fmt::print("\tsampling attrib [{}] sampleby chn offset [{}], attr " + "offset [{}], size [{}] (grid [{}, {}])!\n", + ch.name, pars.getPropertyOffset(sampleby), + pars.getPropertyOffset(ch.name), + pars.getPropertySize(ch.name), + gridVerts.getPropertyOffset(ch.name), + gridVerts.getPropertySize(ch.name)); + */ + if (ch.numChannels == 1) + sample2D_(zs::range(pars, sampleby, zs::dim_c<3>), + gridVerts.begin(ch.name, zs::dim_c<1>), + pars.begin(ch.name, zs::dim_c<1>), nx, ny, h, + vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); + else if (ch.numChannels == 3) + sample2D_(zs::range(pars, sampleby, zs::dim_c<3>), + gridVerts.begin(ch.name, zs::dim_c<3>), + pars.begin(ch.name, zs::dim_c<3>), nx, ny, h, + vec3f{bmin[0], bmin[1], bmin[2]}, isPeriodic); + else + fmt::print("skipping attr [{}]\n", ch.name); } - - set_output("prim", std::move(grid)); + } } -}; -ZENDEFNODE(ZSGrid2DSample2, { - {{"ZenoParticles", "prim"}, - {"ZenoParticles", "sampleGrid"}, - {"int", "nx", "1"}, - {"int", "ny", "1"}, - {"float", "h", "1"}, - {"vec3f", "bmin", "0,0,0"}, - {"string", "channel", "*"}, - {"string", "sampleBy", "pos"}, - {"enum Clamp Periodic", "sampleType", "Clamp"}}, - {{"ZenoParticles", "prim"}}, - {}, - {"zenofx"}, - }); + set_output("prim", std::move(prim)); + } +}; +ZENDEFNODE(ZSGrid2DSample2, + { + {{"ZenoParticles", "prim"}, + {"ZenoParticles", "sampleGrid"}, + {"int", "nx", "1"}, + {"int", "ny", "1"}, + {"float", "h", "1"}, + {"vec3f", "bmin", "0,0,0"}, + {"string", "channel", "*"}, + {"string", "sampleBy", "pos"}, + {"enum SampleFromGrid AssignToGrid", "sampleDirection", "SampleFromGrid"}, + {"enum Clamp Periodic", "sampleType", "Clamp"}}, + {{"ZenoParticles", "prim"}}, + {}, + {"zenofx"}, + }); } // namespace zeno \ No newline at end of file From 2f9dc8ed4b8101d3890bb3e0a89c62528c48033b Mon Sep 17 00:00:00 2001 From: littlemine Date: Fri, 12 Jul 2024 14:18:36 +0800 Subject: [PATCH 106/244] fix typo... --- projects/CuEulerian/swe/FDGather.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/CuEulerian/swe/FDGather.cu b/projects/CuEulerian/swe/FDGather.cu index 3fd0d237c6..18b8827c0a 100644 --- a/projects/CuEulerian/swe/FDGather.cu +++ b/projects/CuEulerian/swe/FDGather.cu @@ -593,7 +593,7 @@ void assignToField2D(CoordsT coord, FieldT field, PrimAttrT primAttr, int nx, in // 0.0, ny - 1.01}); int i = uv2[0]; int j = uv2[2]; - field[j * nx + i] = temp[tidx]; + field[j * nx + i] = primAttr[tidx]; }); } From 00d04cd0f42c493e4c46ae116e074fc2e3c3de2b Mon Sep 17 00:00:00 2001 From: littlemine Date: Sun, 14 Jul 2024 00:55:42 +0800 Subject: [PATCH 107/244] fix typos and for icx --- CMakeLists.txt | 36 ++++++++++++++++++++++-------------- projects/CUDA/zpc | 2 +- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 935f97f122..9180e45695 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,6 +61,14 @@ endif() message(STATUS "Build directory: ${PROJECT_BINARY_DIR}") message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") +cmake_path(GET CMAKE_CXX_COMPILER STEM LAST_ONLY ZENO_CXX_COMPILER_BINARY) +message(STATUS "Current C++ compiler is ${ZENO_CXX_COMPILER_BINARY} (${CMAKE_CXX_COMPILER})") + +# intel sycl compiler +if(${ZENO_CXX_COMPILER_BINARY} STREQUAL "icx") + add_compile_options(-Wno-error=register -Wno-enum-constexpr-conversion -Wno-implicit-function-declaration) +endif() + if (WIN32) # get rid of 'min', 'max' macros; get back 'M_PI', 'M_E' add_definitions(-DNOMINMAX -D_USE_MATH_DEFINES) @@ -137,7 +145,7 @@ endif() ## --- begin cihou wxl cuda if (ZENO_WITH_CUDA) # must move enable_language ahead to here - message(STATUS "CUDA_PATH: "$ENV{CUDA_PATH}) + message(STATUS "CUDA_PATH: $ENV{CUDA_PATH}") if (NOT DEFINED CUDAToolkit_ROOT AND NOT DEFINED ENV{CUDAToolkit_ROOT} AND DEFINED ENV{CUDA_PATH}) set(CUDAToolkit_ROOT $ENV{CUDA_PATH} CACHE FILEPATH "ZENOCIHOUCUDA" FORCE) if (NOT DEFINED CUDAToolkit_NVCC_EXECUTABLE) @@ -147,21 +155,21 @@ if (ZENO_WITH_CUDA) # must move enable_language ahead to here set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE} CACHE FILEPATH "ZENOCIHOUCUDA" FORCE) endif() endif() - message(STATUS "CUDAToolkit_ROOT: "${CUDAToolkit_ROOT}) - message(STATUS "CUDAToolkit_ROOT_DIR: "${CUDAToolkit_ROOT_DIR}) - message(STATUS "CUDAToolkit_NVCC_EXECUTABLE: "${CUDAToolkit_NVCC_EXECUTABLE}) - message(STATUS "CUDAToolkit_BIN_DIR: "${CUDAToolkit_BIN_DIR}) + message(STATUS "CUDAToolkit_ROOT: ${CUDAToolkit_ROOT}") + message(STATUS "CUDAToolkit_ROOT_DIR: ${CUDAToolkit_ROOT_DIR}") + message(STATUS "CUDAToolkit_NVCC_EXECUTABLE: ${CUDAToolkit_NVCC_EXECUTABLE}") + message(STATUS "CUDAToolkit_BIN_DIR: ${CUDAToolkit_BIN_DIR}") message(STATUS "Now invoking FindCUDAToolkit.cmake") find_package(CUDAToolkit REQUIRED COMPONENTS cufft REQUIRED) - message(STATUS "CUDAToolkit_ROOT: "${CUDAToolkit_ROOT}) - message(STATUS "CUDAToolkit_ROOT_DIR: "${CUDAToolkit_ROOT_DIR}) - message(STATUS "CUDAToolkit_NVCC_EXECUTABLE: "${CUDAToolkit_NVCC_EXECUTABLE}) - message(STATUS "CUDAToolkit_BIN_DIR: "${CUDAToolkit_BIN_DIR}) - message(STATUS "CUDAToolkit_INCLUDE_DIRS: "${CUDAToolkit_INCLUDE_DIRS}) - message(STATUS "CUDAToolkit_LIBRARY_DIR: "${CUDAToolkit_LIBRARY_DIR}) - message(STATUS "CUDAToolkit_VERSION: "${CUDAToolkit_VERSION}) - message(STATUS "CMAKE_CUDA_COMPILER: "${CMAKE_CUDA_COMPILER}) - message(STATUS "CMAKE_CUDA_COMPILER_VERSION: "${CMAKE_CUDA_COMPILER_VERSION}) + message(STATUS "CUDAToolkit_ROOT: ${CUDAToolkit_ROOT}") + message(STATUS "CUDAToolkit_ROOT_DIR: ${CUDAToolkit_ROOT_DIR}") + message(STATUS "CUDAToolkit_NVCC_EXECUTABLE: ${CUDAToolkit_NVCC_EXECUTABLE}") + message(STATUS "CUDAToolkit_BIN_DIR: ${CUDAToolkit_BIN_DIR}") + message(STATUS "CUDAToolkit_INCLUDE_DIRS: ${CUDAToolkit_INCLUDE_DIRS}") + message(STATUS "CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") + message(STATUS "CUDAToolkit_VERSION: ${CUDAToolkit_VERSION}") + message(STATUS "CMAKE_CUDA_COMPILER: ${CMAKE_CUDA_COMPILER}") + message(STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") enable_language(CUDA) # otherwise projects/CUDA will fail to compile endif() ## --- end cihou wxl cuda diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 6fbcca5861..7186053732 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 6fbcca5861ee33c59730647f30b916ef2f8122da +Subproject commit 7186053732a939fe02f6183106740026686976ba From 6dbb6f52ae7c488c4be28effa3e206f892e75a72 Mon Sep 17 00:00:00 2001 From: littlemine Date: Thu, 18 Jul 2024 23:15:56 +0800 Subject: [PATCH 108/244] upd zpc --- projects/CUDA/SpatialAccel.cuh | 2 +- projects/CUDA/zpc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/CUDA/SpatialAccel.cuh b/projects/CUDA/SpatialAccel.cuh index c02b4068f1..71c0a095eb 100644 --- a/projects/CUDA/SpatialAccel.cuh +++ b/projects/CUDA/SpatialAccel.cuh @@ -172,7 +172,7 @@ template struct LBvhView -constexpr decltype(auto) proxy(const ZenoLBvh &lbvh) { +decltype(auto) proxy(const ZenoLBvh &lbvh) { return LBvhView>{lbvh}; } diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 7186053732..8bc0a02cb4 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 7186053732a939fe02f6183106740026686976ba +Subproject commit 8bc0a02cb4c5bc39fe96a38d403c03923f593955 From 6e135dfe7900b301eff3aa8db82d2b426a8809be Mon Sep 17 00:00:00 2001 From: littlemine Date: Sat, 20 Jul 2024 20:43:55 +0800 Subject: [PATCH 109/244] for anaconda py, zlib and library need copy as well --- projects/CUDA/CMakeLists.txt | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/projects/CUDA/CMakeLists.txt b/projects/CUDA/CMakeLists.txt index 3cec667147..b753132791 100644 --- a/projects/CUDA/CMakeLists.txt +++ b/projects/CUDA/CMakeLists.txt @@ -127,6 +127,22 @@ if (ZS_PYTHON_FOUND AND ZENO_WITH_PyZpc) COMMENT "copying python lib directories at ${PYTHON_ENV_PATH} to ${RESOURCE_BASE_DIR}" ) + if (EXISTS ${PYTHON_ENV_PATH}/library) + add_custom_command( + TARGET copy_py + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory "${PYTHON_ENV_PATH}/library" ${RESOURCE_BASE_DIR}/library + COMMENT "copying python library directory at ${PYTHON_ENV_PATH} to ${RESOURCE_BASE_DIR}" + ) + endif() + if (EXISTS ${PYTHON_ENV_PATH}/zlib.dll) + add_custom_command( + TARGET copy_py + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy "${PYTHON_ENV_PATH}/zlib.dll" ${RESOURCE_BASE_DIR} + COMMENT "copying python zlib.dll ${PYTHON_ENV_PATH} to ${RESOURCE_BASE_DIR}" + ) + endif() foreach(lib ${ZS_PYTHON_LIBS}) add_custom_command( TARGET copy_py From 52496bba4d7053a8544abe7ba3b2b67e55c44688 Mon Sep 17 00:00:00 2001 From: littlemine Date: Sat, 20 Jul 2024 21:21:21 +0800 Subject: [PATCH 110/244] upd zpc --- projects/CUDA/zpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 8bc0a02cb4..9c3da758d8 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 8bc0a02cb4c5bc39fe96a38d403c03923f593955 +Subproject commit 9c3da758d8cda7906b3d045987812f3fc55aeb38 From 13dcf1b6e575cf01acc58168580b6f7cbedcf1bc Mon Sep 17 00:00:00 2001 From: littlemine Date: Mon, 22 Jul 2024 17:14:41 +0800 Subject: [PATCH 111/244] upd zpc --- projects/CUDA/zpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 9c3da758d8..e684734a8c 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 9c3da758d8cda7906b3d045987812f3fc55aeb38 +Subproject commit e684734a8c401370cc9cbfa03934674b0ea85233 From 88d8d9231bc157a538c1bd496d4f4ed3c9c16eb1 Mon Sep 17 00:00:00 2001 From: teachmain Date: Tue, 23 Jul 2024 16:42:56 +0800 Subject: [PATCH 112/244] normal fix --- zenovis/xinxinoptix/DeflMatShader.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index 0beb82ba96..bf1cbb1807 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -884,7 +884,7 @@ extern "C" __global__ void __closesthit__radiance() float3 frontPos, backPos; SelfIntersectionAvoidance::offsetSpawnPoint( frontPos, backPos, wldPos, prd->geometryNormal, wldOffset ); - shadowPRD.origin = dot(wi, vec3(wldNorm)) > 0 ? frontPos : backPos; + shadowPRD.origin = dot(wi, vec3(prd->geometryNormal)) > 0 ? frontPos : backPos; //auto shadingP = rtgems::offset_ray(shadowPRD.origin + params.cam.eye, prd->geometryNormal); // world space //shadowPRD.origin = frontPos; @@ -892,7 +892,7 @@ extern "C" __global__ void __closesthit__radiance() //shadowPRD.origin = backPos; //rtgems::offset_ray(P, -prd->geometryNormal); //} - auto shadingP = rtgems::offset_ray(P + params.cam.eye, dot(wi, vec3(wldNorm)) > 0 ? wldNorm:-wldNorm); // world space + auto shadingP = rtgems::offset_ray(P + params.cam.eye, dot(wi, vec3(prd->geometryNormal)) > 0 ? prd->geometryNormal:-prd->geometryNormal); // world space //if(mats.subsurface>0 && (mats.thin>0.5 || mats.doubleSide>0.5) && istransmission){ //shadingP = rtgems::offset_ray(P + params.cam.eye, -prd->geometryNormal); //} From 072f286c2258e2bcde1a0b37126d4b49e47955e8 Mon Sep 17 00:00:00 2001 From: littlemine Date: Thu, 25 Jul 2024 03:37:58 +0800 Subject: [PATCH 113/244] upd zpc --- projects/CUDA/zpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index e684734a8c..dc0bd936ea 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit e684734a8c401370cc9cbfa03934674b0ea85233 +Subproject commit dc0bd936ea71af4296dfce7f5fa7d013d3c158dc From 9757d6c967e10a1a2ca435e2ae7e676594ab115d Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Tue, 30 Jul 2024 16:52:24 +0800 Subject: [PATCH 114/244] fix depth texture 2 --- zenovis/src/Scene.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zenovis/src/Scene.cpp b/zenovis/src/Scene.cpp index a14ecb918a..f464b82989 100644 --- a/zenovis/src/Scene.cpp +++ b/zenovis/src/Scene.cpp @@ -175,7 +175,7 @@ std::vector Scene::record_frame_offline(int hdrSize, int rgbComps) { CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, rbo1)); CHECK_GL(glRenderbufferStorageMultisample(GL_RENDERBUFFER, drawOptions->msaa_samples, GL_RGBA, camera->m_nx, camera->m_ny)); CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, rbo2)); - CHECK_GL(glRenderbufferStorageMultisample(GL_RENDERBUFFER, drawOptions->msaa_samples, GL_DEPTH_COMPONENT32, camera->m_nx, camera->m_ny)); + CHECK_GL(glRenderbufferStorageMultisample(GL_RENDERBUFFER, drawOptions->msaa_samples, GL_DEPTH_COMPONENT32F, camera->m_nx, camera->m_ny)); CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, 0)); CHECK_GL(glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, rbo1)); @@ -199,7 +199,7 @@ std::vector Scene::record_frame_offline(int hdrSize, int rgbComps) { CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, srbo1)); CHECK_GL(glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA, nx, ny)); CHECK_GL(glBindRenderbuffer(GL_RENDERBUFFER, srbo2)); - CHECK_GL(glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT32, nx, ny)); + CHECK_GL(glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT32F, nx, ny)); auto bindReadSFbo = opengl::scopeGLBindFramebuffer(GL_DRAW_FRAMEBUFFER, sfbo); CHECK_GL(glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, From 18fbddda41c2704e628813866cc0eff755de8979 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 1 Aug 2024 01:03:12 +0800 Subject: [PATCH 115/244] improve-fbx-prim-merge --- projects/FBX/FBXSDK.cpp | 193 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 183 insertions(+), 10 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 1cee6f6b68..1674fc0738 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -13,6 +13,7 @@ #include #include "zeno/types/PrimitiveObject.h" #include "zeno/utils/scope_exit.h" +#include "zeno/funcs/PrimitiveUtils.h" #include #include #include @@ -520,6 +521,130 @@ bool GetMesh(FbxNode* pNode, std::shared_ptr prim, std::string return true; } +std::shared_ptr GetMesh(FbxNode* pNode) { + FbxMesh* pMesh = pNode->GetMesh(); + if (!pMesh) return nullptr; + std::string nodeName = pNode->GetName(); + auto prim = std::make_shared(); + prim->userData().set2("RootName", nodeName); + + FbxAMatrix bindMatrix = pNode->EvaluateGlobalTransform(); + auto s = bindMatrix.GetS(); + auto t = bindMatrix.GetT(); +// zeno::log_info("s {} {} {}", s[0], s[1], s[2]); +// zeno::log_info("t {} {} {}", t[0], t[1], t[2]); + + int numVertices = pMesh->GetControlPointsCount(); + FbxVector4* vertices = pMesh->GetControlPoints(); + prim->verts.resize(numVertices); + + for (int i = 0; i < numVertices; ++i) { + auto pos = bindMatrix.MultT( FbxVector4(vertices[i][0], vertices[i][1], vertices[i][2], 1.0)); + prim->verts[i] = vec3f(pos[0], pos[1], pos[2]); + } + int numPolygons = pMesh->GetPolygonCount(); + prim->polys.resize(numPolygons); + std::vector loops; + loops.reserve(numPolygons * 4); + int count = 0; + for (int i = 0; i < numPolygons; ++i) { + int numVertices = pMesh->GetPolygonSize(i); + for (int j = 0; j < numVertices; ++j) { + int vertexIndex = pMesh->GetPolygonVertex(i, j); + loops.push_back(vertexIndex); + } + prim->polys[i] = {count, numVertices}; + count += numVertices; + } + loops.shrink_to_fit(); + prim->loops.values = loops; +// zeno::log_info("pMesh->GetDeformerCount(FbxDeformer::eSkin) {}", pMesh->GetDeformerCount(FbxDeformer::eSkin)); + auto &ud = prim->userData(); + if (pMesh->GetDeformerCount(FbxDeformer::eSkin)) { + auto &bi = prim->verts.add_attr("boneName"); + std::fill(bi.begin(), bi.end(), vec4i(-1, -1, -1, -1)); + auto &bw = prim->verts.add_attr("boneWeight"); + std::fill(bw.begin(), bw.end(), vec4f(-1.0, -1.0, -1.0, -1.0)); + + FbxSkin* pSkin = (FbxSkin*)pMesh->GetDeformer(0, FbxDeformer::eSkin); + std::vector bone_names; + + // Iterate over each cluster (bone) + for (int j = 0; j < pSkin->GetClusterCount(); ++j) { + FbxCluster* pCluster = pSkin->GetCluster(j); + + // Get the link node (bone) + FbxNode* pBoneNode = pCluster->GetLink(); + if (!pBoneNode) continue; + + // Get the bone weights + int numIndices = pCluster->GetControlPointIndicesCount(); + int* indices = pCluster->GetControlPointIndices(); + double* weights = pCluster->GetControlPointWeights(); + + bone_names.emplace_back(pBoneNode->GetName()); + for (int k = 0; k < numIndices; ++k) { + for (auto l = 0; l < 4; l++) { + if (bi[indices[k]][l] == -1) { + bi[indices[k]][l] = j; + bw[indices[k]][l] = weights[k]; + break; + } + } + } + } + ud.set2("boneName_count", int(bone_names.size())); + for (auto i = 0; i < bone_names.size(); i++) { + ud.set2(zeno::format("boneName_{}", i), bone_names[i]); + } + } + if (pMesh->GetElementUVCount() > 0) { + auto* arr = pMesh->GetElementUV(0); + std::string name = "uv"; + if (arr->GetMappingMode() == FbxLayerElement::EMappingMode::eByControlPoint) { + zeno::log_info("{}, eByControlPoint", name); + auto &attr = prim->verts.add_attr(name); + for (auto i = 0; i < prim->verts.size(); i++) { + int pIndex = i; + if (arr->GetReferenceMode() == FbxLayerElement::EReferenceMode::eIndexToDirect) { + pIndex = arr->GetIndexArray().GetAt(i); + } + auto x = arr->GetDirectArray().GetAt(pIndex)[0]; + auto y = arr->GetDirectArray().GetAt(pIndex)[1]; + attr[i] = vec3f(x, y, 0); + } + } + else if (arr->GetMappingMode() == FbxLayerElement::EMappingMode::eByPolygonVertex) { + zeno::log_info("{}, eByPolygonVertex", name); + if (arr->GetReferenceMode() == FbxLayerElement::EReferenceMode::eDirect) { + auto &uvs = prim->loops.add_attr("uvs"); + std::iota(uvs.begin(), uvs.end(), 0); + prim->uvs.resize(prim->loops.size()); + } + else if (arr->GetReferenceMode() == FbxLayerElement::EReferenceMode::eIndexToDirect) { + auto &uvs = prim->loops.add_attr("uvs"); + for (auto i = 0; i < prim->loops.size(); i++) { + uvs[i] = arr->GetIndexArray().GetAt(i); + } + int count = arr->GetDirectArray().GetCount(); + prim->uvs.resize(count); + } + for (auto i = 0; i < prim->uvs.size(); i++) { + auto x = arr->GetDirectArray().GetAt(i)[0]; + auto y = arr->GetDirectArray().GetAt(i)[1]; + prim->uvs[i] = vec2f(x, y); + } + } + } + if (pMesh->GetElementNormalCount() > 0) { + getAttr(pMesh->GetElementNormal(0), "nrm", prim); + } + if (pMesh->GetElementTangentCount() > 0) { + getAttr(pMesh->GetElementTangent(0), "tang", prim); + } + return prim; +} + struct NewFBXImportSkin : INode { virtual void apply() override { // Change the following filename to a suitable filename value. @@ -557,8 +682,6 @@ struct NewFBXImportSkin : INode { // Note that we are not printing the root node because it should // not contain any attributes. auto prim = std::make_shared(); - auto &ud = prim->userData(); - ud.set2("version", vec3i(major, minor, revision)); FbxNode* lRootNode = lScene->GetRootNode(); std::vector availableRootNames; if(lRootNode) { @@ -570,17 +693,59 @@ struct NewFBXImportSkin : INode { availableRootNames.emplace_back(meshName); } } - ud.set2("AvailableRootName_count", int(availableRootNames.size())); - for (int i = 0; i < availableRootNames.size(); i++) { - ud.set2(format("AvailableRootName_{}", i), availableRootNames[i]); - } } auto rootName = get_input2("rootName"); if(lRootNode) { - for(int i = 0; i < lRootNode->GetChildCount(); i++) { - auto pNode = lRootNode->GetChild(i); - if (GetMesh(pNode, prim, rootName)) { - break; + if (rootName.empty()) { + std::vector> prims; + for(int i = 0; i < lRootNode->GetChildCount(); i++) { + auto pNode = lRootNode->GetChild(i); + auto sub_prim = GetMesh(pNode); + if (sub_prim) { + prims.push_back(sub_prim); + } + } + + std::map nameMappingGlobal; + + std::vector prims_ptr; + for (auto prim: prims) { + prims_ptr.push_back(prim.get()); + std::vector nameMapping; + auto boneName_count = prim->userData().get2("boneName_count"); + for (auto i = 0; i < boneName_count; i++) { + auto boneName = prim->userData().get2(zeno::format("boneName_{}", i)); + if (nameMappingGlobal.count(boneName) == 0) { + auto index = nameMappingGlobal.size(); + nameMappingGlobal[boneName] = index; + } + nameMapping.push_back(nameMappingGlobal[boneName]); + } + prim->userData().del("boneName_count"); + for (auto i = 0; i < boneName_count; i++) { + prim->userData().del(zeno::format("boneName_{}", i)); + } + auto &bis = prim->verts.add_attr("boneName"); + for (auto &bi: bis) { + for (auto i = 0; i < 4; i++) { + if (bi[i] != -1) { + bi[i] = nameMapping[bi[i]]; + } + } + } + } + prim = primMerge(prims_ptr); + prim->userData().set2("boneName_count", int(nameMappingGlobal.size())); + for (auto [key, value]: nameMappingGlobal) { + prim->userData().set2(zeno::format("boneName_{}", value), key); + } + } + else { + for(int i = 0; i < lRootNode->GetChildCount(); i++) { + auto pNode = lRootNode->GetChild(i); + if (GetMesh(pNode, prim, rootName)) { + break; + } } } } @@ -589,6 +754,14 @@ struct NewFBXImportSkin : INode { v = v * 0.01; } } + { + auto &ud = prim->userData(); + ud.set2("version", vec3i(major, minor, revision)); + ud.set2("AvailableRootName_count", int(availableRootNames.size())); + for (int i = 0; i < availableRootNames.size(); i++) { + ud.set2(format("AvailableRootName_{}", i), availableRootNames[i]); + } + } set_output("prim", prim); // Destroy the SDK manager and all the other objects it was handling. lSdkManager->Destroy(); From ccaa97273c7d4d6631bbf4e56821beaadf833b16 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 1 Aug 2024 13:20:25 +0800 Subject: [PATCH 116/244] PrimAttrFlat --- projects/FBX/FBXSDK.cpp | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 1cee6f6b68..2739fe623e 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -13,6 +13,7 @@ #include #include "zeno/types/PrimitiveObject.h" #include "zeno/utils/scope_exit.h" +#include "zeno/utils/string.h" #include #include #include @@ -1306,4 +1307,43 @@ ZENDEFNODE(BoneTransformView, { {}, {"debug"}, }); + +struct PrimAttrFlat : INode { + virtual void apply() override { + auto prim = get_input2("prim"); + auto params = get_input2("params"); + std::vector params_ = zeno::split_str(params, ','); + std::vector values; + for (auto i = 0; i < prim->size(); i++) { + for (const auto& param: params_) { + auto value = prim->attr(param); + values.push_back(value[i][0]); + values.push_back(value[i][1]); + values.push_back(value[i][2]); + } + } + + auto output = std::make_shared(); + output->resize(values.size()); + auto &value = output->add_attr("value"); + for (auto i = 0; i < values.size(); i++) { + value[i] = values[i]; + } + + set_output("output", output); + } +}; + +ZENDEFNODE(PrimAttrFlat, { + { + "prim", + {"string", "params", "transform_r0,transform_r1,transform_r2"}, + }, + { + "output", + }, + {}, + {"debug"}, +}); + } \ No newline at end of file From 76578d58f0e291ea3b302ef0572998d8fe4c769f Mon Sep 17 00:00:00 2001 From: littlemine Date: Thu, 1 Aug 2024 16:34:44 +0800 Subject: [PATCH 117/244] allow attr list handling --- projects/CUDA/zpc | 2 +- projects/PyZpc/interop/TileVector_nodes.cu | 38 ++++++++++++++++------ 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index dc0bd936ea..fbdde50422 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit dc0bd936ea71af4296dfce7f5fa7d013d3c158dc +Subproject commit fbdde50422a210e0094de52cf4e4ec19ffa78357 diff --git a/projects/PyZpc/interop/TileVector_nodes.cu b/projects/PyZpc/interop/TileVector_nodes.cu index d3eac82804..6c119c0dfe 100644 --- a/projects/PyZpc/interop/TileVector_nodes.cu +++ b/projects/PyZpc/interop/TileVector_nodes.cu @@ -117,6 +117,21 @@ namespace zeno { }); struct CopyZsTileVectorTo : INode { + std::set separate_string_by(const std::string &tags, const std::string &sep) { + std::set res; + using Ti = RM_CVREF_T(std::string::npos); + Ti st = tags.find_first_not_of(sep, 0); + for (auto ed = tags.find_first_of(sep, st + 1); ed != std::string::npos; ed = tags.find_first_of(sep, st + 1)) { + res.insert(tags.substr(st, ed - st)); + st = tags.find_first_not_of(sep, ed); + if (st == std::string::npos) + break; + } + if (st != std::string::npos && st < tags.size()) { + res.insert(tags.substr(st)); + } + return res; + } template static void rearrange_device_data(SrcRange&& srcRange, DstRange&& dstRange, zs::wrapv, zs::wrapv) { @@ -139,11 +154,12 @@ namespace zeno { void apply() override { auto tvObj = get_input("ZsTileVector"); auto prim = get_input("prim"); - auto attr = get_input2("attr"); + auto attrs = get_input2("attr(s)"); + std::set attribCandidates = separate_string_by(attrs, " :;,."); auto& tv = tvObj->value; std::visit( - [&prim, &attr](auto& tv) { + [&prim, &attrs = attribCandidates](auto& tv) { using tv_t = RM_CVREF_T(tv); using val_t = typename tv_t::value_type; using namespace zs; @@ -153,7 +169,7 @@ namespace zeno { prim->resize(tv.size()); } - auto process = [&tv, &attr](auto& primAttrib) { + auto process = [&tv](auto& primAttrib, const auto &attr) { using T = typename RM_CVREF_T(primAttrib)::value_type; if constexpr (zs::is_arithmetic_v) { using AllocatorT = RM_CVREF_T(tv.get_allocator()); @@ -195,12 +211,14 @@ namespace zeno { sizeof(ZsT) * tv.size()); } } - }; - if (attr == "pos") - // if constexpr (zs::is_same_v, RM_CVREF_T(prim->attr(attr))>) - process(prim->attr("pos")); - else - match(process)(prim->attr(attr)); + }; + for (const auto &attr : attrs) { + if (attr == "pos") + // if constexpr (zs::is_same_v, RM_CVREF_T(prim->attr(attr))>) + process(prim->attr("pos"), attr); + else + match([&attr, &process](auto&primAttrib) {process(primAttrib, attr);})(prim->attr(attr)); + } } else @@ -215,7 +233,7 @@ namespace zeno { ZENDEFNODE(CopyZsTileVectorTo, { {"ZsTileVector", {"PrimitiveObject", "prim"}, - {"string", "attr", "clr"}, + {"string", "attr(s)", "clr"}, {"enum convert enforce_bit_cast", "option", "convert"}}, {"prim"}, {}, From b6d8b70c9638069a7729cf55e7ac516e92009783 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 1 Aug 2024 18:45:52 +0800 Subject: [PATCH 118/244] improve-fbx --- projects/FBX/FBXSDK.cpp | 192 ++++++++++++++-------------------------- 1 file changed, 65 insertions(+), 127 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 93d390dd61..360a25f041 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -396,132 +396,6 @@ void getAttr(T* arr, std::string name, std::shared_ptr prim) { } } -bool GetMesh(FbxNode* pNode, std::shared_ptr prim, std::string name) { - FbxMesh* pMesh = pNode->GetMesh(); - if (!pMesh) return false; - std::string nodeName = pNode->GetName(); - if (name.size() > 0 && nodeName != name) { - return false; - } - prim->userData().set2("RootName", nodeName); - - FbxAMatrix bindMatrix = pNode->EvaluateGlobalTransform(); - auto s = bindMatrix.GetS(); - auto t = bindMatrix.GetT(); -// zeno::log_info("s {} {} {}", s[0], s[1], s[2]); -// zeno::log_info("t {} {} {}", t[0], t[1], t[2]); - - int numVertices = pMesh->GetControlPointsCount(); - FbxVector4* vertices = pMesh->GetControlPoints(); - prim->verts.resize(numVertices); - - for (int i = 0; i < numVertices; ++i) { - auto pos = bindMatrix.MultT( FbxVector4(vertices[i][0], vertices[i][1], vertices[i][2], 1.0)); - prim->verts[i] = vec3f(pos[0], pos[1], pos[2]); - } - int numPolygons = pMesh->GetPolygonCount(); - prim->polys.resize(numPolygons); - std::vector loops; - loops.reserve(numPolygons * 4); - int count = 0; - for (int i = 0; i < numPolygons; ++i) { - int numVertices = pMesh->GetPolygonSize(i); - for (int j = 0; j < numVertices; ++j) { - int vertexIndex = pMesh->GetPolygonVertex(i, j); - loops.push_back(vertexIndex); - } - prim->polys[i] = {count, numVertices}; - count += numVertices; - } - loops.shrink_to_fit(); - prim->loops.values = loops; -// zeno::log_info("pMesh->GetDeformerCount(FbxDeformer::eSkin) {}", pMesh->GetDeformerCount(FbxDeformer::eSkin)); - auto &ud = prim->userData(); - if (pMesh->GetDeformerCount(FbxDeformer::eSkin)) { - auto &bi = prim->verts.add_attr("boneName"); - std::fill(bi.begin(), bi.end(), vec4i(-1, -1, -1, -1)); - auto &bw = prim->verts.add_attr("boneWeight"); - std::fill(bw.begin(), bw.end(), vec4f(-1.0, -1.0, -1.0, -1.0)); - - FbxSkin* pSkin = (FbxSkin*)pMesh->GetDeformer(0, FbxDeformer::eSkin); - std::vector bone_names; - - // Iterate over each cluster (bone) - for (int j = 0; j < pSkin->GetClusterCount(); ++j) { - FbxCluster* pCluster = pSkin->GetCluster(j); - - // Get the link node (bone) - FbxNode* pBoneNode = pCluster->GetLink(); - if (!pBoneNode) continue; - - // Get the bone weights - int numIndices = pCluster->GetControlPointIndicesCount(); - int* indices = pCluster->GetControlPointIndices(); - double* weights = pCluster->GetControlPointWeights(); - - bone_names.emplace_back(pBoneNode->GetName()); - for (int k = 0; k < numIndices; ++k) { - for (auto l = 0; l < 4; l++) { - if (bi[indices[k]][l] == -1) { - bi[indices[k]][l] = j; - bw[indices[k]][l] = weights[k]; - break; - } - } - } - } - ud.set2("boneName_count", int(bone_names.size())); - for (auto i = 0; i < bone_names.size(); i++) { - ud.set2(zeno::format("boneName_{}", i), bone_names[i]); - } - } - if (pMesh->GetElementUVCount() > 0) { - auto* arr = pMesh->GetElementUV(0); - std::string name = "uv"; - if (arr->GetMappingMode() == FbxLayerElement::EMappingMode::eByControlPoint) { - zeno::log_info("{}, eByControlPoint", name); - auto &attr = prim->verts.add_attr(name); - for (auto i = 0; i < prim->verts.size(); i++) { - int pIndex = i; - if (arr->GetReferenceMode() == FbxLayerElement::EReferenceMode::eIndexToDirect) { - pIndex = arr->GetIndexArray().GetAt(i); - } - auto x = arr->GetDirectArray().GetAt(pIndex)[0]; - auto y = arr->GetDirectArray().GetAt(pIndex)[1]; - attr[i] = vec3f(x, y, 0); - } - } - else if (arr->GetMappingMode() == FbxLayerElement::EMappingMode::eByPolygonVertex) { - zeno::log_info("{}, eByPolygonVertex", name); - if (arr->GetReferenceMode() == FbxLayerElement::EReferenceMode::eDirect) { - auto &uvs = prim->loops.add_attr("uvs"); - std::iota(uvs.begin(), uvs.end(), 0); - prim->uvs.resize(prim->loops.size()); - } - else if (arr->GetReferenceMode() == FbxLayerElement::EReferenceMode::eIndexToDirect) { - auto &uvs = prim->loops.add_attr("uvs"); - for (auto i = 0; i < prim->loops.size(); i++) { - uvs[i] = arr->GetIndexArray().GetAt(i); - } - int count = arr->GetDirectArray().GetCount(); - prim->uvs.resize(count); - } - for (auto i = 0; i < prim->uvs.size(); i++) { - auto x = arr->GetDirectArray().GetAt(i)[0]; - auto y = arr->GetDirectArray().GetAt(i)[1]; - prim->uvs[i] = vec2f(x, y); - } - } - } - if (pMesh->GetElementNormalCount() > 0) { - getAttr(pMesh->GetElementNormal(0), "nrm", prim); - } - if (pMesh->GetElementTangentCount() > 0) { - getAttr(pMesh->GetElementTangent(0), "tang", prim); - } - return true; -} - std::shared_ptr GetMesh(FbxNode* pNode) { FbxMesh* pMesh = pNode->GetMesh(); if (!pMesh) return nullptr; @@ -744,7 +618,12 @@ struct NewFBXImportSkin : INode { else { for(int i = 0; i < lRootNode->GetChildCount(); i++) { auto pNode = lRootNode->GetChild(i); - if (GetMesh(pNode, prim, rootName)) { + std::string nodeName = pNode->GetName(); + if (nodeName == rootName) { + auto sub_prim = GetMesh(pNode); + if (sub_prim) { + prim = sub_prim; + } break; } } @@ -755,6 +634,20 @@ struct NewFBXImportSkin : INode { v = v * 0.01; } } + if (get_input2("CopyVectorsFromLoopsToVert")) { + auto vectors_str = get_input2("vectors"); + std::vector vectors = zeno::split_str(vectors_str, ','); + for (auto vector: vectors) { + vector = zeno::trim_string(vector); + if (vector.size() && prim->loops.attr_is(vector)) { + auto &nrm = prim->loops.attr(vector); + auto &vnrm = prim->verts.add_attr(vector); + for (auto i = 0; i < prim->loops.size(); i++) { + vnrm[prim->loops[i]] = nrm[i]; + } + } + } + } { auto &ud = prim->userData(); ud.set2("version", vec3i(major, minor, revision)); @@ -774,6 +667,8 @@ ZENDEFNODE(NewFBXImportSkin, { {"readpath", "path"}, {"string", "rootName", ""}, {"bool", "ConvertUnits", "1"}, + {"string", "vectors", "nrm,"}, + {"bool", "CopyVectorsFromLoopsToVert", "1"}, }, { "prim", @@ -1383,6 +1278,48 @@ struct NewFBXBoneDeform : INode { nrms[i] = zeno::normalize(nrm ); } } + auto vectors_str = get_input2("vectors"); + std::vector vectors = zeno::split_str(vectors_str, ','); + for (auto vector: vectors) { + vector = zeno::trim_string(vector); + if (vector.size()) { + if (prim->verts.attr_is(vector)) { + auto &nrms = prim->verts.attr(vector); + for (auto i = 0; i < vert_count; i++) { + glm::mat4 matrix(0); + float w = 0; + for (auto j = 0; j < 4; j++) { + if (bi[i][j] < 0) { + continue; + } + matrix += matrixs[bi[i][j]] * bw[i][j]; + w += bw[i][j]; + } + matrix = matrix / w; + auto nrm = transform_nrm(matrix, nrms[i]); + nrms[i] = zeno::normalize(nrm); + } + } + if (prim->loops.attr_is(vector)) { + auto &nrms = prim->loops.attr(vector); + for (auto i = 0; i < prim->loops.size(); i++) { + auto vi = prim->loops[i]; + glm::mat4 matrix(0); + float w = 0; + for (auto j = 0; j < 4; j++) { + if (bi[vi][j] < 0) { + continue; + } + matrix += matrixs[bi[vi][j]] * bw[vi][j]; + w += bw[vi][j]; + } + matrix = matrix / w; + auto nrm = transform_nrm(matrix, nrms[i]); + nrms[i] = zeno::normalize(nrm); + } + } + } + } set_output("prim", prim); } @@ -1393,6 +1330,7 @@ ZENDEFNODE(NewFBXBoneDeform, { "GeometryToDeform", "RestPointTransforms", "DeformPointTransforms", + {"string", "vectors", "nrm,"}, }, { "prim", From b0f3585cf7d3ce64ed932bee0c5060d17ebbac70 Mon Sep 17 00:00:00 2001 From: littlemine Date: Fri, 2 Aug 2024 16:47:46 +0800 Subject: [PATCH 119/244] update zpcjit module --- projects/CUDA/zpc | 2 +- projects/CUDA/zpc_jit | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index fbdde50422..450a8f82be 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit fbdde50422a210e0094de52cf4e4ec19ffa78357 +Subproject commit 450a8f82be433cfcf2235f539e2a6d7851589f08 diff --git a/projects/CUDA/zpc_jit b/projects/CUDA/zpc_jit index 2321ffaeb0..b361e6824e 160000 --- a/projects/CUDA/zpc_jit +++ b/projects/CUDA/zpc_jit @@ -1 +1 @@ -Subproject commit 2321ffaeb0c05e26ed4e3ed82fb2d46de6bc634d +Subproject commit b361e6824ed194b0e782dc9426a08cb1713d9e59 From 13aa09cd4b4b84ba796cf0163972e078d49dbcd6 Mon Sep 17 00:00:00 2001 From: zhxx1987 Date: Fri, 2 Aug 2024 17:17:28 +0800 Subject: [PATCH 120/244] upd typo zpcjit typo fix --- projects/CUDA/zpc_jit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/CUDA/zpc_jit b/projects/CUDA/zpc_jit index b361e6824e..447d42c048 160000 --- a/projects/CUDA/zpc_jit +++ b/projects/CUDA/zpc_jit @@ -1 +1 @@ -Subproject commit b361e6824ed194b0e782dc9426a08cb1713d9e59 +Subproject commit 447d42c04848622bfa0403ae1a457c2cfbbe6e3c From 6ab1be174026c08609e5151c12b527069e85655f Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 2 Aug 2024 21:56:38 +0800 Subject: [PATCH 121/244] NewFBXImportSkin --- projects/FBX/FBXSDK.cpp | 115 ++++++++++++++++++++++++++-------------- 1 file changed, 74 insertions(+), 41 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 360a25f041..409bbd546e 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -520,6 +520,56 @@ std::shared_ptr GetMesh(FbxNode* pNode) { return prim; } +void TraverseNodesToGetNames(FbxNode* pNode, std::vector &names) { + if (!pNode) return; + + FbxMesh* mesh = pNode->GetMesh(); + if (mesh) { + auto name = pNode->GetName(); + names.emplace_back(name); + } + + for (int i = 0; i < pNode->GetChildCount(); i++) { + TraverseNodesToGetNames(pNode->GetChild(i), names); + } +} + +void TraverseNodesToGetPrim(FbxNode* pNode, std::string target_name, std::shared_ptr &prim) { + if (!pNode) return; + + FbxMesh* mesh = pNode->GetMesh(); + if (mesh) { + auto name = mesh->GetName(); + if (target_name == name) { + auto sub_prim = GetMesh(pNode); + if (sub_prim) { + prim = sub_prim; + } + return; + } + } + + for (int i = 0; i < pNode->GetChildCount(); i++) { + TraverseNodesToGetPrim(pNode->GetChild(i), target_name, prim); + } +} +void TraverseNodesToGetPrims(FbxNode* pNode, std::vector> &prims) { + if (!pNode) return; + + FbxMesh* mesh = pNode->GetMesh(); + if (mesh) { + auto name = mesh->GetName(); + auto sub_prim = GetMesh(pNode); + if (sub_prim) { + prims.push_back(sub_prim); + } + } + + for (int i = 0; i < pNode->GetChildCount(); i++) { + TraverseNodesToGetPrims(pNode->GetChild(i), prims); + } +} + struct NewFBXImportSkin : INode { virtual void apply() override { // Change the following filename to a suitable filename value. @@ -560,26 +610,11 @@ struct NewFBXImportSkin : INode { FbxNode* lRootNode = lScene->GetRootNode(); std::vector availableRootNames; if(lRootNode) { - for(int i = 0; i < lRootNode->GetChildCount(); i++) { - auto pNode = lRootNode->GetChild(i); - FbxMesh* pMesh = pNode->GetMesh(); - if (pMesh) { - std::string meshName = pNode->GetName(); - availableRootNames.emplace_back(meshName); - } - } - } - auto rootName = get_input2("rootName"); - if(lRootNode) { + TraverseNodesToGetNames(lRootNode, availableRootNames); + auto rootName = get_input2("rootName"); if (rootName.empty()) { std::vector> prims; - for(int i = 0; i < lRootNode->GetChildCount(); i++) { - auto pNode = lRootNode->GetChild(i); - auto sub_prim = GetMesh(pNode); - if (sub_prim) { - prims.push_back(sub_prim); - } - } + TraverseNodesToGetPrims(lRootNode, prims); std::map nameMappingGlobal; @@ -616,17 +651,7 @@ struct NewFBXImportSkin : INode { } } else { - for(int i = 0; i < lRootNode->GetChildCount(); i++) { - auto pNode = lRootNode->GetChild(i); - std::string nodeName = pNode->GetName(); - if (nodeName == rootName) { - auto sub_prim = GetMesh(pNode); - if (sub_prim) { - prim = sub_prim; - } - break; - } - } + TraverseNodesToGetPrim(lRootNode, rootName, prim); } } if (get_input2("ConvertUnits")) { @@ -666,7 +691,7 @@ ZENDEFNODE(NewFBXImportSkin, { { {"readpath", "path"}, {"string", "rootName", ""}, - {"bool", "ConvertUnits", "1"}, + {"bool", "ConvertUnits", "0"}, {"string", "vectors", "nrm,"}, {"bool", "CopyVectorsFromLoopsToVert", "1"}, }, @@ -714,8 +739,6 @@ struct NewFBXImportSkeleton : INode { // Note that we are not printing the root node because it should // not contain any attributes. auto prim = std::make_shared(); - auto &ud = prim->userData(); - ud.set2("version", vec3i(major, minor, revision)); auto pose_count = lScene->GetPoseCount(); bool found_bind_pose = false; @@ -759,11 +782,14 @@ struct NewFBXImportSkeleton : INode { } std::vector bone_connects; for (int j = 1; j < pose->GetCount(); ++j) { - auto parent_name = pose->GetNode(j)->GetParent()->GetName(); - auto index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); - if (index < bone_names.size()) { - bone_connects.push_back(index); - bone_connects.push_back(j - 1); + auto parent = pose->GetNode(j)->GetParent(); + if (parent) { + auto parent_name = parent->GetName(); + auto index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); + if (index < bone_names.size()) { + bone_connects.push_back(index); + bone_connects.push_back(j - 1); + } } } { @@ -773,16 +799,22 @@ struct NewFBXImportSkeleton : INode { prim->polys[j] = {j * 2, 2}; } } - ud.set2("boneName_count", int(bone_names.size())); + prim->userData().set2("boneName_count", int(bone_names.size())); for (auto i = 0; i < bone_names.size(); i++) { - ud.set2(zeno::format("boneName_{}", i), bone_names[i]); + prim->userData().set2(zeno::format("boneName_{}", i), bone_names[i]); } + break; } if (get_input2("ConvertUnits")) { for (auto & v: prim->verts) { v = v * 0.01; } + // todo : on matrix + } + { + auto &ud = prim->userData(); + ud.set2("version", vec3i(major, minor, revision)); } set_output("prim", prim); // Destroy the SDK manager and all the other objects it was handling. @@ -793,7 +825,7 @@ struct NewFBXImportSkeleton : INode { ZENDEFNODE(NewFBXImportSkeleton, { { {"readpath", "path"}, - {"bool", "ConvertUnits", "1"}, + {"bool", "ConvertUnits", "0"}, }, { "prim", @@ -964,6 +996,7 @@ struct NewFBXImportAnimation : INode { for (auto & v: prim->verts) { v = v * 0.01; } + // todo: on matrix } set_output("prim", prim); } @@ -975,7 +1008,7 @@ ZENDEFNODE(NewFBXImportAnimation, { {"string", "clipName", ""}, {"frameid"}, {"float", "fps", "25"}, - {"bool", "ConvertUnits", "1"}, + {"bool", "ConvertUnits", "0"}, }, { "prim", From 7c2955bf45fe620be54659f4343097f6fba49fc6 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 2 Aug 2024 22:23:38 +0800 Subject: [PATCH 122/244] NewFBXImportSkeleton --- projects/FBX/FBXSDK.cpp | 78 ++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 409bbd546e..68028b4d40 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -753,64 +753,92 @@ struct NewFBXImportSkeleton : INode { lSdkManager->CreateMissingBindPoses(lScene); } pose_count = lScene->GetPoseCount(); + + std::vector bone_names; + std::map parent_mapping; + std::vector poss; + std::vector transform_r0; + std::vector transform_r1; + std::vector transform_r2; for (auto i = 0; i < pose_count; i++) { auto pose = lScene->GetPose(i); if (pose == nullptr || !pose->IsBindPose()) { continue; } - std::string name = pose->GetName(); - prim->verts.resize(pose->GetCount() - 1); - std::vector bone_names; - auto &boneNames = prim->verts.add_attr("boneName"); - auto &transform_r0 = prim->verts.add_attr("transform_r0"); - auto &transform_r1 = prim->verts.add_attr("transform_r1"); - auto &transform_r2 = prim->verts.add_attr("transform_r2"); for (int j = 1; j < pose->GetCount(); ++j) { + std::string bone_name = pose->GetNode(j)->GetName(); + if (std::count(bone_names.begin(), bone_names.end(), bone_name)) { + continue; + } + FbxMatrix transformMatrix = pose->GetMatrix(j); auto t = transformMatrix.GetRow(3); - prim->verts[j - 1] = vec3f(t[0], t[1], t[2]); + poss.emplace_back(t[0], t[1], t[2]); auto r0 = transformMatrix.GetRow(0); auto r1 = transformMatrix.GetRow(1); auto r2 = transformMatrix.GetRow(2); - transform_r0[j - 1] = vec3f(r0[0], r0[1], r0[2]); - transform_r1[j - 1] = vec3f(r1[0], r1[1], r1[2]); - transform_r2[j - 1] = vec3f(r2[0], r2[1], r2[2]); + transform_r0.emplace_back(r0[0], r0[1], r0[2]); + transform_r1.emplace_back(r1[0], r1[1], r1[2]); + transform_r2.emplace_back(r2[0], r2[1], r2[2]); bone_names.emplace_back(pose->GetNode(j)->GetName()); - boneNames[j - 1] = j - 1; } - std::vector bone_connects; for (int j = 1; j < pose->GetCount(); ++j) { + auto self_name = pose->GetNode(j)->GetName(); auto parent = pose->GetNode(j)->GetParent(); if (parent) { auto parent_name = parent->GetName(); - auto index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); - if (index < bone_names.size()) { - bone_connects.push_back(index); - bone_connects.push_back(j - 1); - } + parent_mapping[self_name] = parent_name; } } - { - prim->loops.values = bone_connects; - prim->polys.resize(bone_connects.size() / 2); - for (auto j = 0; j < bone_connects.size() / 2; j++) { - prim->polys[j] = {j * 2, 2}; + } + { + prim->verts.resize(bone_names.size()); + prim->verts.values = poss; + prim->verts.add_attr("transform_r0") = transform_r0; + prim->verts.add_attr("transform_r1") = transform_r1; + prim->verts.add_attr("transform_r2") = transform_r2; + auto &boneNames = prim->verts.add_attr("boneName"); + std::iota(boneNames.begin(), boneNames.end(), 0); + + std::vector bone_connects; + for (auto bone_name: bone_names) { + if (parent_mapping.count(bone_name)) { + auto self_index = std::find(bone_names.begin(), bone_names.end(), bone_name) - bone_names.begin(); + auto parent_name = parent_mapping[bone_name]; + auto parent_index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); + if (self_index >= 0 && parent_index >= 0) { + bone_connects.push_back(parent_index); + bone_connects.push_back(self_index); + } } } + prim->loops.values = bone_connects; + prim->polys.resize(bone_connects.size() / 2); + for (auto j = 0; j < bone_connects.size() / 2; j++) { + prim->polys[j] = {j * 2, 2}; + } + prim->userData().set2("boneName_count", int(bone_names.size())); for (auto i = 0; i < bone_names.size(); i++) { prim->userData().set2(zeno::format("boneName_{}", i), bone_names[i]); } - break; } if (get_input2("ConvertUnits")) { for (auto & v: prim->verts) { v = v * 0.01; } - // todo : on matrix + auto &transform_r0 = prim->verts.add_attr("transform_r0"); + auto &transform_r1 = prim->verts.add_attr("transform_r1"); + auto &transform_r2 = prim->verts.add_attr("transform_r2"); + for (auto i = 0; i < prim->verts.size(); i++) { + transform_r0[i][0] *= 0.01; + transform_r1[i][1] *= 0.01; + transform_r2[i][2] *= 0.01; + + } } { auto &ud = prim->userData(); From be1179dedfcb89d3ab91397b2a4b65e78a194510 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 2 Aug 2024 22:56:50 +0800 Subject: [PATCH 123/244] NewFBXImportAnimation --- projects/FBX/FBXSDK.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 68028b4d40..2189d9b0b8 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -445,6 +445,7 @@ std::shared_ptr GetMesh(FbxNode* pNode) { std::vector bone_names; // Iterate over each cluster (bone) + // TODO: pick 4 max weight for (int j = 0; j < pSkin->GetClusterCount(); ++j) { FbxCluster* pCluster = pSkin->GetCluster(j); @@ -837,7 +838,6 @@ struct NewFBXImportSkeleton : INode { transform_r0[i][0] *= 0.01; transform_r1[i][1] *= 0.01; transform_r2[i][2] *= 0.01; - } } { @@ -933,7 +933,8 @@ struct NewFBXImportAnimation : INode { int stack_index = int(std::find(clip_names.begin(), clip_names.end(), clip_name) - clip_names.begin()); if (stack_index == clip_names.size()) { - zeno::log_error("FBX: Can not find clip name"); + zeno::log_info("FBX: Can not find default clip name, use first"); + stack_index = 0; } // zeno::log_info("stack_index: {}", stack_index); @@ -1024,7 +1025,14 @@ struct NewFBXImportAnimation : INode { for (auto & v: prim->verts) { v = v * 0.01; } - // todo: on matrix + auto &transform_r0 = prim->verts.add_attr("transform_r0"); + auto &transform_r1 = prim->verts.add_attr("transform_r1"); + auto &transform_r2 = prim->verts.add_attr("transform_r2"); + for (auto i = 0; i < prim->verts.size(); i++) { + transform_r0[i][0] *= 0.01; + transform_r1[i][1] *= 0.01; + transform_r2[i][2] *= 0.01; + } } set_output("prim", prim); } From 22a83e81cae207c35983058975cba73bca63bacd Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 2 Aug 2024 23:05:01 +0800 Subject: [PATCH 124/244] NewFBXBoneDeform --- projects/FBX/FBXSDK.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 2189d9b0b8..2bfdc03e28 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1249,7 +1249,7 @@ struct NewFBXBoneDeform : INode { auto index = std::find(_new.begin(), _new.end(), old[i]) - _new.begin(); if (index == _new.size()) { index = -1; - zeno::log_error("connot find bone: {}, {}", i, old[i]); + zeno::log_info("connot find bone: {}, {}", i, old[i]); } mapping.push_back(index); } @@ -1304,8 +1304,12 @@ struct NewFBXBoneDeform : INode { std::vector matrixs; matrixs.reserve(geometryToDeformBoneNames.size()); for (auto i = 0; i < geometryToDeformBoneNames.size(); i++) { - auto res_inv_matrix = restPointTransformsInv[restPointTransformsBoneMapping[i]]; - auto deform_matrix = deformPointTransforms[deformPointTransformsBoneMapping[i]]; + glm::mat4 res_inv_matrix = glm::mat4(1); + glm::mat4 deform_matrix = glm::mat4(1); + if (restPointTransformsBoneMapping[i] >= 0 && deformPointTransformsBoneMapping[i] >= 0) { + res_inv_matrix = restPointTransformsInv[restPointTransformsBoneMapping[i]]; + deform_matrix = deformPointTransforms[deformPointTransformsBoneMapping[i]]; + } auto matrix = deform_matrix * res_inv_matrix; matrixs.push_back(matrix); } From 0ea6d11fee34b6329401b5904aca058e73a9470b Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Sat, 3 Aug 2024 01:15:08 +0800 Subject: [PATCH 125/244] pick max 4 boneweight --- projects/FBX/FBXSDK.cpp | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 2bfdc03e28..139c3ea986 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -443,9 +443,8 @@ std::shared_ptr GetMesh(FbxNode* pNode) { FbxSkin* pSkin = (FbxSkin*)pMesh->GetDeformer(0, FbxDeformer::eSkin); std::vector bone_names; - // Iterate over each cluster (bone) - // TODO: pick 4 max weight + std::vector>> bone_weight(numVertices); for (int j = 0; j < pSkin->GetClusterCount(); ++j) { FbxCluster* pCluster = pSkin->GetCluster(j); @@ -460,13 +459,22 @@ std::shared_ptr GetMesh(FbxNode* pNode) { bone_names.emplace_back(pBoneNode->GetName()); for (int k = 0; k < numIndices; ++k) { - for (auto l = 0; l < 4; l++) { - if (bi[indices[k]][l] == -1) { - bi[indices[k]][l] = j; - bw[indices[k]][l] = weights[k]; - break; - } - } + bone_weight[indices[k]].emplace_back(j, weights[k]); + } + } + for (auto i = 0; i < prim->verts.size(); i++) { + if (bone_weight[i].size() > 4) { + std::sort(bone_weight[i].begin(), bone_weight[i].end(), [](const std::pair& a, const std::pair& b) { + return a.second > b.second; + }); + bone_weight[i].resize(4); + } + for (auto j = 0; j < bone_weight[i].size(); j++) { + bi[i][j] = bone_weight[i][j].first; + bw[i][j] = bone_weight[i][j].second; + } + if (bone_weight[i].size() == 4) { + bw[i] = zeno::normalize(bw[i]); } } ud.set2("boneName_count", int(bone_names.size())); From 37f1098d0ff263a328c1e47c8760719cf97795b2 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Sat, 3 Aug 2024 02:32:04 +0800 Subject: [PATCH 126/244] all boneweight --- projects/FBX/FBXSDK.cpp | 103 ++++++++++++++++++++-------------------- 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 139c3ea986..034d73a55f 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -436,10 +436,6 @@ std::shared_ptr GetMesh(FbxNode* pNode) { // zeno::log_info("pMesh->GetDeformerCount(FbxDeformer::eSkin) {}", pMesh->GetDeformerCount(FbxDeformer::eSkin)); auto &ud = prim->userData(); if (pMesh->GetDeformerCount(FbxDeformer::eSkin)) { - auto &bi = prim->verts.add_attr("boneName"); - std::fill(bi.begin(), bi.end(), vec4i(-1, -1, -1, -1)); - auto &bw = prim->verts.add_attr("boneWeight"); - std::fill(bw.begin(), bw.end(), vec4f(-1.0, -1.0, -1.0, -1.0)); FbxSkin* pSkin = (FbxSkin*)pMesh->GetDeformer(0, FbxDeformer::eSkin); std::vector bone_names; @@ -462,21 +458,23 @@ std::shared_ptr GetMesh(FbxNode* pNode) { bone_weight[indices[k]].emplace_back(j, weights[k]); } } + int maxnum_boneWeight = 0; + for (auto i = 0; i < prim->verts.size(); i++) { + maxnum_boneWeight = zeno::max(maxnum_boneWeight, bone_weight[i].size()); + } + for (auto i = 0; i < maxnum_boneWeight; i++) { + auto &bi = prim->verts.add_attr(zeno::format("boneName_{}", i)); + std::fill(bi.begin(), bi.end(), -1); + auto &bw = prim->verts.add_attr(zeno::format("boneWeight_{}", i)); + std::fill(bw.begin(), bw.end(), -1.0); + } for (auto i = 0; i < prim->verts.size(); i++) { - if (bone_weight[i].size() > 4) { - std::sort(bone_weight[i].begin(), bone_weight[i].end(), [](const std::pair& a, const std::pair& b) { - return a.second > b.second; - }); - bone_weight[i].resize(4); - } for (auto j = 0; j < bone_weight[i].size(); j++) { - bi[i][j] = bone_weight[i][j].first; - bw[i][j] = bone_weight[i][j].second; - } - if (bone_weight[i].size() == 4) { - bw[i] = zeno::normalize(bw[i]); + prim->verts.attr(format("boneName_{}", j))[i] = bone_weight[i][j].first; + prim->verts.attr(format("boneWeight_{}", j))[i] = bone_weight[i][j].second; } } + ud.set2("maxnum_boneWeight", int(maxnum_boneWeight)); ud.set2("boneName_count", int(bone_names.size())); for (auto i = 0; i < bone_names.size(); i++) { ud.set2(zeno::format("boneName_{}", i), bone_names[i]); @@ -628,6 +626,10 @@ struct NewFBXImportSkin : INode { std::map nameMappingGlobal; std::vector prims_ptr; + int maxnum_boneWeight = 0; + for (auto prim: prims) { + maxnum_boneWeight = zeno::max(maxnum_boneWeight, prim->userData().get2("maxnum_boneWeight")); + } for (auto prim: prims) { prims_ptr.push_back(prim.get()); std::vector nameMapping; @@ -644,17 +646,26 @@ struct NewFBXImportSkin : INode { for (auto i = 0; i < boneName_count; i++) { prim->userData().del(zeno::format("boneName_{}", i)); } - auto &bis = prim->verts.add_attr("boneName"); - for (auto &bi: bis) { - for (auto i = 0; i < 4; i++) { - if (bi[i] != -1) { - bi[i] = nameMapping[bi[i]]; + for (auto j = 0; j < maxnum_boneWeight; j++) { + if (!prim->verts.attr_is(format("boneName_{}", j))) { + auto &bi = prim->verts.add_attr(zeno::format("boneName_{}", j)); + std::fill(bi.begin(), bi.end(), -1); + auto &bw = prim->verts.add_attr(zeno::format("boneWeight_{}", j)); + std::fill(bw.begin(), bw.end(), -1.0); + } + else { + auto &bi = prim->verts.attr(zeno::format("boneName_{}", j)); + for (auto &_bi: bi) { + if (_bi != -1) { + _bi = nameMapping[_bi]; + } } } } } prim = primMerge(prims_ptr); prim->userData().set2("boneName_count", int(nameMappingGlobal.size())); + prim->userData().set2("maxnum_boneWeight", maxnum_boneWeight); for (auto [key, value]: nameMappingGlobal) { prim->userData().set2(zeno::format("boneName_{}", value), key); } @@ -1324,41 +1335,29 @@ struct NewFBXBoneDeform : INode { auto prim = std::dynamic_pointer_cast(geometryToDeform->clone()); - auto &bi = prim->verts.add_attr("boneName"); - auto &bw = prim->verts.add_attr("boneWeight"); + int maxnum_boneWeight = prim->userData().get2("maxnum_boneWeight"); + std::vector*> bi; + std::vector*> bw; + for (auto i = 0; i < maxnum_boneWeight; i++) { + bi.push_back(&prim->verts.add_attr(format("boneName_{}", i))); + bw.push_back(&prim->verts.add_attr(format("boneWeight_{}", i))); + } size_t vert_count = prim->verts.size(); #pragma omp parallel for for (auto i = 0; i < vert_count; i++) { auto opos = prim->verts[i]; vec3f pos = {}; float w = 0; - for (auto j = 0; j < 4; j++) { - if (bi[i][j] < 0) { + for (auto j = 0; j < maxnum_boneWeight; j++) { + if (bi[j]->operator[](i) < 0) { continue; } - auto matrix = matrixs[bi[i][j]]; - pos += transform_pos(matrix, opos) * bw[i][j]; - w += bw[i][j]; + auto matrix = matrixs[bi[j]->operator[](i)]; + pos += transform_pos(matrix, opos) * bw[j]->operator[](i); + w += bw[j]->operator[](i); } prim->verts[i] = pos / w; } - if (prim->verts.attr_is("nrm")) { - auto &nrms = prim->verts.attr("nrm"); - for (auto i = 0; i < vert_count; i++) { - glm::mat4 matrix(0); - float w = 0; - for (auto j = 0; j < 4; j++) { - if (bi[i][j] < 0) { - continue; - } - matrix += matrixs[bi[i][j]] * bw[i][j]; - w += bw[i][j]; - } - matrix = matrix / w; - auto nrm = transform_nrm(matrix, nrms[i]); - nrms[i] = zeno::normalize(nrm ); - } - } auto vectors_str = get_input2("vectors"); std::vector vectors = zeno::split_str(vectors_str, ','); for (auto vector: vectors) { @@ -1369,12 +1368,12 @@ struct NewFBXBoneDeform : INode { for (auto i = 0; i < vert_count; i++) { glm::mat4 matrix(0); float w = 0; - for (auto j = 0; j < 4; j++) { - if (bi[i][j] < 0) { + for (auto j = 0; j < maxnum_boneWeight; j++) { + if (bi[j]->operator[](i) < 0) { continue; } - matrix += matrixs[bi[i][j]] * bw[i][j]; - w += bw[i][j]; + matrix += matrixs[bi[j]->operator[](i)] * bw[j]->operator[](i); + w += bw[j]->operator[](i); } matrix = matrix / w; auto nrm = transform_nrm(matrix, nrms[i]); @@ -1387,12 +1386,12 @@ struct NewFBXBoneDeform : INode { auto vi = prim->loops[i]; glm::mat4 matrix(0); float w = 0; - for (auto j = 0; j < 4; j++) { - if (bi[vi][j] < 0) { + for (auto j = 0; j < maxnum_boneWeight; j++) { + if (bi[j]->operator[](vi) < 0) { continue; } - matrix += matrixs[bi[vi][j]] * bw[vi][j]; - w += bw[vi][j]; + matrix += matrixs[bi[j]->operator[](vi)] * bw[j]->operator[](vi); + w += bw[j]->operator[](vi); } matrix = matrix / w; auto nrm = transform_nrm(matrix, nrms[i]); From 9ab719e981f88524817ab99dd8a68c8e2bc0eef4 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Sat, 3 Aug 2024 02:40:12 +0800 Subject: [PATCH 127/244] fix --- projects/FBX/FBXSDK.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 034d73a55f..df08962dc3 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -854,9 +854,9 @@ struct NewFBXImportSkeleton : INode { auto &transform_r1 = prim->verts.add_attr("transform_r1"); auto &transform_r2 = prim->verts.add_attr("transform_r2"); for (auto i = 0; i < prim->verts.size(); i++) { - transform_r0[i][0] *= 0.01; - transform_r1[i][1] *= 0.01; - transform_r2[i][2] *= 0.01; + transform_r0[i] *= 0.01; + transform_r1[i] *= 0.01; + transform_r2[i] *= 0.01; } } { @@ -1048,9 +1048,9 @@ struct NewFBXImportAnimation : INode { auto &transform_r1 = prim->verts.add_attr("transform_r1"); auto &transform_r2 = prim->verts.add_attr("transform_r2"); for (auto i = 0; i < prim->verts.size(); i++) { - transform_r0[i][0] *= 0.01; - transform_r1[i][1] *= 0.01; - transform_r2[i][2] *= 0.01; + transform_r0[i] *= 0.01; + transform_r1[i] *= 0.01; + transform_r2[i] *= 0.01; } } set_output("prim", prim); From 25fea6522437fcb147075818e34a90da35e9fc97 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Sat, 3 Aug 2024 15:54:31 +0800 Subject: [PATCH 128/244] abc skipInvisibleObject --- projects/Alembic/ABCCommon.h | 1 + projects/Alembic/AlembicVAT.cpp | 4 +- projects/Alembic/GetAlembicPrim.cpp | 2 +- projects/Alembic/ReadAlembic.cpp | 127 +++++++++++++++------------- 4 files changed, 70 insertions(+), 64 deletions(-) diff --git a/projects/Alembic/ABCCommon.h b/projects/Alembic/ABCCommon.h index f71fcfa8b3..0db4219060 100644 --- a/projects/Alembic/ABCCommon.h +++ b/projects/Alembic/ABCCommon.h @@ -35,6 +35,7 @@ extern void traverseABC( std::string path, const TimeAndSamplesMap & iTimeMap, ObjectVisibility parent_visible, + bool skipInvisibleObject, bool outOfRangeAsEmpty ); diff --git a/projects/Alembic/AlembicVAT.cpp b/projects/Alembic/AlembicVAT.cpp index 80da750420..b502b11b33 100644 --- a/projects/Alembic/AlembicVAT.cpp +++ b/projects/Alembic/AlembicVAT.cpp @@ -203,7 +203,7 @@ struct AlembicToSoftBodyVAT: public INode { const int32_t frameIndex = frameEnd - idx - 1; auto abctree = std::make_shared(); auto prims = std::make_shared(); - traverseABC(obj, *abctree, idx, read_done, false, "", timeMap, ObjectVisibility::kVisibilityDeferred, false); + traverseABC(obj, *abctree, idx, read_done, false, "", timeMap, ObjectVisibility::kVisibilityDeferred, false, false); if (use_xform) { prims = get_xformed_prims(abctree); } else { @@ -403,7 +403,7 @@ struct AlembicToDynamicRemeshVAT : public INode { const int32_t frameIndex = frameEnd - idx - 1; auto abctree = std::make_shared(); auto prims = std::make_shared(); - traverseABC(obj, *abctree, idx, read_done, false, "", timeMap, ObjectVisibility::kVisibilityDeferred, false); + traverseABC(obj, *abctree, idx, read_done, false, "", timeMap, ObjectVisibility::kVisibilityDeferred, false, false); if (use_xform) { prims = get_xformed_prims(abctree); } else { diff --git a/projects/Alembic/GetAlembicPrim.cpp b/projects/Alembic/GetAlembicPrim.cpp index 636e88411a..a8304e82a8 100644 --- a/projects/Alembic/GetAlembicPrim.cpp +++ b/projects/Alembic/GetAlembicPrim.cpp @@ -433,7 +433,7 @@ struct ImportAlembicPrim : INode { auto obj = archive.getTop(); bool read_face_set = get_input2("read_face_set"); bool outOfRangeAsEmpty = get_input2("outOfRangeAsEmpty"); - traverseABC(obj, *abctree, frameid, read_done, read_face_set, "", timeMap, ObjectVisibility::kVisibilityDeferred, outOfRangeAsEmpty); + traverseABC(obj, *abctree, frameid, read_done, read_face_set, "", timeMap, ObjectVisibility::kVisibilityDeferred, false, outOfRangeAsEmpty); } bool use_xform = get_input2("use_xform"); auto index = get_input2("index"); diff --git a/projects/Alembic/ReadAlembic.cpp b/projects/Alembic/ReadAlembic.cpp index 9f2a75c8cf..3cba387f0b 100644 --- a/projects/Alembic/ReadAlembic.cpp +++ b/projects/Alembic/ReadAlembic.cpp @@ -1006,6 +1006,7 @@ void traverseABC( std::string path, const TimeAndSamplesMap & iTimeMap, ObjectVisibility parent_visible, + bool skipInvisibleObject, bool outOfRangeAsEmpty ) { { @@ -1037,66 +1038,67 @@ void traverseABC( else { tree.visible = parent_visible; } + if (!(tree.visible == ObjectVisibility::kVisibilityHidden && skipInvisibleObject)) { + if (Alembic::AbcGeom::IPolyMesh::matches(md)) { + if (!read_done) { + log_debug("[alembic] found a mesh [{}]", obj.getName()); + } - if (Alembic::AbcGeom::IPolyMesh::matches(md)) { - if (!read_done) { - log_debug("[alembic] found a mesh [{}]", obj.getName()); - } - - Alembic::AbcGeom::IPolyMesh meshy(obj); - auto &mesh = meshy.getSchema(); - tree.prim = foundABCMesh(mesh, frameid, read_done, read_face_set, outOfRangeAsEmpty, obj.getName()); - tree.prim->userData().set2("_abc_name", obj.getName()); - prim_set_abcpath(tree.prim.get(), path); - } else if (Alembic::AbcGeom::IXformSchema::matches(md)) { - if (!read_done) { - log_debug("[alembic] found a Xform [{}]", obj.getName()); - } - Alembic::AbcGeom::IXform xfm(obj); - auto &cam_sch = xfm.getSchema(); - tree.xform = foundABCXform(cam_sch, frameid); - } else if (Alembic::AbcGeom::ICameraSchema::matches(md)) { - if (!read_done) { - log_debug("[alembic] found a Camera [{}]", obj.getName()); - } - Alembic::AbcGeom::ICamera cam(obj); - auto &cam_sch = cam.getSchema(); - tree.camera_info = foundABCCamera(cam_sch, frameid); - } else if(Alembic::AbcGeom::IPointsSchema::matches(md)) { - if (!read_done) { - log_debug("[alembic] found points [{}]", obj.getName()); - } - Alembic::AbcGeom::IPoints points(obj); - auto &points_sch = points.getSchema(); - tree.prim = foundABCPoints(points_sch, frameid, read_done, outOfRangeAsEmpty); - tree.prim->userData().set2("_abc_name", obj.getName()); - prim_set_abcpath(tree.prim.get(), path); - tree.prim->userData().set2("faceset_count", 0); - } else if(Alembic::AbcGeom::ICurvesSchema::matches(md)) { - if (!read_done) { - log_debug("[alembic] found curves [{}]", obj.getName()); - } - Alembic::AbcGeom::ICurves curves(obj); - auto &curves_sch = curves.getSchema(); - tree.prim = foundABCCurves(curves_sch, frameid, read_done, outOfRangeAsEmpty); - tree.prim->userData().set2("_abc_name", obj.getName()); - prim_set_abcpath(tree.prim.get(), path); - tree.prim->userData().set2("faceset_count", 0); - } else if (Alembic::AbcGeom::ISubDSchema::matches(md)) { - if (!read_done) { - log_debug("[alembic] found SubD [{}]", obj.getName()); - } - Alembic::AbcGeom::ISubD subd(obj); - auto &subd_sch = subd.getSchema(); - tree.prim = foundABCSubd(subd_sch, frameid, read_done, read_face_set, outOfRangeAsEmpty); - tree.prim->userData().set2("_abc_name", obj.getName()); - prim_set_abcpath(tree.prim.get(), path); - } - if (tree.prim) { - tree.prim->userData().set2("vis", tree.visible); - if (tree.visible == 0) { - for (auto i = 0; i < tree.prim->verts.size(); i++) { - tree.prim->verts[i] = {}; + Alembic::AbcGeom::IPolyMesh meshy(obj); + auto &mesh = meshy.getSchema(); + tree.prim = foundABCMesh(mesh, frameid, read_done, read_face_set, outOfRangeAsEmpty, obj.getName()); + tree.prim->userData().set2("_abc_name", obj.getName()); + prim_set_abcpath(tree.prim.get(), path); + } else if (Alembic::AbcGeom::IXformSchema::matches(md)) { + if (!read_done) { + log_debug("[alembic] found a Xform [{}]", obj.getName()); + } + Alembic::AbcGeom::IXform xfm(obj); + auto &cam_sch = xfm.getSchema(); + tree.xform = foundABCXform(cam_sch, frameid); + } else if (Alembic::AbcGeom::ICameraSchema::matches(md)) { + if (!read_done) { + log_debug("[alembic] found a Camera [{}]", obj.getName()); + } + Alembic::AbcGeom::ICamera cam(obj); + auto &cam_sch = cam.getSchema(); + tree.camera_info = foundABCCamera(cam_sch, frameid); + } else if(Alembic::AbcGeom::IPointsSchema::matches(md)) { + if (!read_done) { + log_debug("[alembic] found points [{}]", obj.getName()); + } + Alembic::AbcGeom::IPoints points(obj); + auto &points_sch = points.getSchema(); + tree.prim = foundABCPoints(points_sch, frameid, read_done, outOfRangeAsEmpty); + tree.prim->userData().set2("_abc_name", obj.getName()); + prim_set_abcpath(tree.prim.get(), path); + tree.prim->userData().set2("faceset_count", 0); + } else if(Alembic::AbcGeom::ICurvesSchema::matches(md)) { + if (!read_done) { + log_debug("[alembic] found curves [{}]", obj.getName()); + } + Alembic::AbcGeom::ICurves curves(obj); + auto &curves_sch = curves.getSchema(); + tree.prim = foundABCCurves(curves_sch, frameid, read_done, outOfRangeAsEmpty); + tree.prim->userData().set2("_abc_name", obj.getName()); + prim_set_abcpath(tree.prim.get(), path); + tree.prim->userData().set2("faceset_count", 0); + } else if (Alembic::AbcGeom::ISubDSchema::matches(md)) { + if (!read_done) { + log_debug("[alembic] found SubD [{}]", obj.getName()); + } + Alembic::AbcGeom::ISubD subd(obj); + auto &subd_sch = subd.getSchema(); + tree.prim = foundABCSubd(subd_sch, frameid, read_done, read_face_set, outOfRangeAsEmpty); + tree.prim->userData().set2("_abc_name", obj.getName()); + prim_set_abcpath(tree.prim.get(), path); + } + if (tree.prim) { + tree.prim->userData().set2("vis", tree.visible); + if (tree.visible == 0) { + for (auto i = 0; i < tree.prim->verts.size(); i++) { + tree.prim->verts[i] = {}; + } } } } @@ -1116,7 +1118,7 @@ void traverseABC( Alembic::AbcGeom::IObject child(obj, name); auto childTree = std::make_shared(); - traverseABC(child, *childTree, frameid, read_done, read_face_set, path, iTimeMap, tree.visible, outOfRangeAsEmpty); + traverseABC(child, *childTree, frameid, read_done, read_face_set, path, iTimeMap, tree.visible, skipInvisibleObject, outOfRangeAsEmpty); tree.children.push_back(std::move(childTree)); } } @@ -1172,6 +1174,7 @@ struct ReadAlembic : INode { auto obj = archive.getTop(); bool read_face_set = get_input2("read_face_set"); bool outOfRangeAsEmpty = get_input2("outOfRangeAsEmpty"); + bool skipInvisibleObject = get_input2("skipInvisibleObject"); Alembic::Util::uint32_t numSamplings = archive.getNumTimeSamplings(); TimeAndSamplesMap timeMap; for (Alembic::Util::uint32_t s = 0; s < numSamplings; ++s) { @@ -1179,7 +1182,8 @@ struct ReadAlembic : INode { archive.getMaxNumSamplesForTimeSamplingIndex(s)); } - traverseABC(obj, *abctree, frameid, read_done, read_face_set, "", timeMap, ObjectVisibility::kVisibilityDeferred, outOfRangeAsEmpty); + traverseABC(obj, *abctree, frameid, read_done, read_face_set, "", timeMap, ObjectVisibility::kVisibilityDeferred, + skipInvisibleObject, outOfRangeAsEmpty); read_done = true; usedPath = path; } @@ -1207,6 +1211,7 @@ ZENDEFNODE(ReadAlembic, { {"readpath", "path"}, {"bool", "read_face_set", "1"}, {"bool", "outOfRangeAsEmpty", "0"}, + {"bool", "skipInvisibleObject", "1"}, {"frameid"}, }, { From 2facf47e67d086244a84182f77e8e584eb75fc69 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 5 Aug 2024 15:38:59 +0800 Subject: [PATCH 129/244] not multi read fbx --- projects/FBX/FBXSDK.cpp | 210 ++++++++++++++-------------------------- 1 file changed, 70 insertions(+), 140 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index df08962dc3..262ece24ba 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -298,6 +298,61 @@ ZENDEFNODE(FBXSDKVisibility, }); namespace zeno { +struct FBXObject : PrimitiveObject { + FbxManager* lSdkManager = nullptr; + FbxScene* lScene = nullptr; +}; + +struct ReadFBXFile: INode { + virtual void apply() override { + // Change the following filename to a suitable filename value. + auto lFilename = get_input2("path"); + + // Initialize the SDK manager. This object handles all our memory management. + FbxManager* lSdkManager = FbxManager::Create(); + + // Create the IO settings object. + FbxIOSettings *ios = FbxIOSettings::Create(lSdkManager, IOSROOT); + lSdkManager->SetIOSettings(ios); + + // Create an importer using the SDK manager. + FbxImporter* lImporter = FbxImporter::Create(lSdkManager,""); + + // Use the first argument as the filename for the importer. + if(!lImporter->Initialize(lFilename.c_str(), -1, lSdkManager->GetIOSettings())) { + printf("Call to FbxImporter::Initialize() failed.\n"); + printf("Error returned: %s\n\n", lImporter->GetStatus().GetErrorString()); + exit(-1); + } + int major, minor, revision; + lImporter->GetFileVersion(major, minor, revision); + auto fbx_object = std::make_shared(); + fbx_object->lSdkManager = lSdkManager; + // Create a new scene so that it can be populated by the imported file. + fbx_object->lScene = FbxScene::Create(lSdkManager,"myScene"); + + // Import the contents of the file into the scene. + lImporter->Import(fbx_object->lScene); + + // The file is imported; so get rid of the importer. + lImporter->Destroy(); + fbx_object->userData().set2("version", vec3i(major, minor, revision)); + + set_output("fbx_object", std::move(fbx_object)); + } +}; + +ZENDEFNODE(ReadFBXFile, { + { + {"readpath", "path"}, + }, + { + "fbx_object", + }, + {}, + {"FBX"}, +}); + /** * Return a string-based representation based on the attribute type. */ @@ -579,36 +634,8 @@ void TraverseNodesToGetPrims(FbxNode* pNode, std::vector("path"); - - // Initialize the SDK manager. This object handles all our memory management. - FbxManager* lSdkManager = FbxManager::Create(); - - // Create the IO settings object. - FbxIOSettings *ios = FbxIOSettings::Create(lSdkManager, IOSROOT); - lSdkManager->SetIOSettings(ios); - - // Create an importer using the SDK manager. - FbxImporter* lImporter = FbxImporter::Create(lSdkManager,""); - - // Use the first argument as the filename for the importer. - if(!lImporter->Initialize(lFilename.c_str(), -1, lSdkManager->GetIOSettings())) { - printf("Call to FbxImporter::Initialize() failed.\n"); - printf("Error returned: %s\n\n", lImporter->GetStatus().GetErrorString()); - exit(-1); - } - int major, minor, revision; - lImporter->GetFileVersion(major, minor, revision); - - // Create a new scene so that it can be populated by the imported file. - FbxScene* lScene = FbxScene::Create(lSdkManager,"myScene"); - - // Import the contents of the file into the scene. - lImporter->Import(lScene); - - // The file is imported; so get rid of the importer. - lImporter->Destroy(); + auto fbx_object = get_input2("fbx_object"); + auto lScene = fbx_object->lScene; // Print the nodes of the scene and their attributes recursively. // Note that we are not printing the root node because it should @@ -695,21 +722,18 @@ struct NewFBXImportSkin : INode { } { auto &ud = prim->userData(); - ud.set2("version", vec3i(major, minor, revision)); ud.set2("AvailableRootName_count", int(availableRootNames.size())); for (int i = 0; i < availableRootNames.size(); i++) { ud.set2(format("AvailableRootName_{}", i), availableRootNames[i]); } } set_output("prim", prim); - // Destroy the SDK manager and all the other objects it was handling. - lSdkManager->Destroy(); } }; ZENDEFNODE(NewFBXImportSkin, { { - {"readpath", "path"}, + "fbx_object", {"string", "rootName", ""}, {"bool", "ConvertUnits", "0"}, {"string", "vectors", "nrm,"}, @@ -724,36 +748,9 @@ ZENDEFNODE(NewFBXImportSkin, { struct NewFBXImportSkeleton : INode { virtual void apply() override { - // Change the following filename to a suitable filename value. - auto lFilename = get_input2("path"); - - // Initialize the SDK manager. This object handles all our memory management. - FbxManager* lSdkManager = FbxManager::Create(); - - // Create the IO settings object. - FbxIOSettings *ios = FbxIOSettings::Create(lSdkManager, IOSROOT); - lSdkManager->SetIOSettings(ios); - - // Create an importer using the SDK manager. - FbxImporter* lImporter = FbxImporter::Create(lSdkManager,""); - - // Use the first argument as the filename for the importer. - if(!lImporter->Initialize(lFilename.c_str(), -1, lSdkManager->GetIOSettings())) { - printf("Call to FbxImporter::Initialize() failed.\n"); - printf("Error returned: %s\n\n", lImporter->GetStatus().GetErrorString()); - exit(-1); - } - int major, minor, revision; - lImporter->GetFileVersion(major, minor, revision); - - // Create a new scene so that it can be populated by the imported file. - FbxScene* lScene = FbxScene::Create(lSdkManager,"myScene"); - - // Import the contents of the file into the scene. - lImporter->Import(lScene); - - // The file is imported; so get rid of the importer. - lImporter->Destroy(); + auto fbx_object = get_input2("fbx_object"); + auto lSdkManager = fbx_object->lSdkManager; + auto lScene = fbx_object->lScene; // Print the nodes of the scene and their attributes recursively. // Note that we are not printing the root node because it should @@ -859,19 +856,13 @@ struct NewFBXImportSkeleton : INode { transform_r2[i] *= 0.01; } } - { - auto &ud = prim->userData(); - ud.set2("version", vec3i(major, minor, revision)); - } set_output("prim", prim); - // Destroy the SDK manager and all the other objects it was handling. - lSdkManager->Destroy(); } }; ZENDEFNODE(NewFBXImportSkeleton, { { - {"readpath", "path"}, + "fbx_object", {"bool", "ConvertUnits", "0"}, }, { @@ -894,45 +885,15 @@ struct NewFBXImportAnimation : INode { FbxTime curTime; // The time for each key in the animation curve(s) curTime.SetSecondDouble(t); // Starting time - // Change the following filename to a suitable filename value. - auto lFilename = get_input2("path"); - - // Initialize the SDK manager. This object handles all our memory management. - FbxManager* lSdkManager = FbxManager::Create(); - - // Create the IO settings object. - FbxIOSettings *ios = FbxIOSettings::Create(lSdkManager, IOSROOT); - lSdkManager->SetIOSettings(ios); - // Destroy the SDK manager and all the other objects it was handling. - zeno::scope_exit sp([=]() { lSdkManager->Destroy(); }); - - // Create an importer using the SDK manager. - FbxImporter* lImporter = FbxImporter::Create(lSdkManager,""); - - // Use the first argument as the filename for the importer. - if(!lImporter->Initialize(lFilename.c_str(), -1, lSdkManager->GetIOSettings())) { - printf("Call to FbxImporter::Initialize() failed.\n"); - printf("Error returned: %s\n\n", lImporter->GetStatus().GetErrorString()); - exit(-1); - } - int major, minor, revision; - lImporter->GetFileVersion(major, minor, revision); - - // Create a new scene so that it can be populated by the imported file. - FbxScene* lScene = FbxScene::Create(lSdkManager,"myScene"); - - // Import the contents of the file into the scene. - lImporter->Import(lScene); - - // The file is imported; so get rid of the importer. - lImporter->Destroy(); + auto fbx_object = get_input2("fbx_object"); + auto lSdkManager = fbx_object->lSdkManager; + auto lScene = fbx_object->lScene; // Print the nodes of the scene and their attributes recursively. // Note that we are not printing the root node because it should // not contain any attributes. auto prim = std::make_shared(); auto &ud = prim->userData(); - ud.set2("version", vec3i(major, minor, revision)); FbxArray animationStackNames; std::vector clip_names; @@ -1059,7 +1020,7 @@ struct NewFBXImportAnimation : INode { ZENDEFNODE(NewFBXImportAnimation, { { - {"readpath", "path"}, + "fbx_object", {"string", "clipName", ""}, {"frameid"}, {"float", "fps", "25"}, @@ -1084,46 +1045,15 @@ struct NewFBXImportCamera : INode { float t = float(frameid) / fps; FbxTime curTime; // The time for each key in the animation curve(s) curTime.SetSecondDouble(t); // Starting time - - // Change the following filename to a suitable filename value. - auto lFilename = get_input2("path"); - - // Initialize the SDK manager. This object handles all our memory management. - FbxManager* lSdkManager = FbxManager::Create(); - - // Create the IO settings object. - FbxIOSettings *ios = FbxIOSettings::Create(lSdkManager, IOSROOT); - lSdkManager->SetIOSettings(ios); - // Destroy the SDK manager and all the other objects it was handling. - zeno::scope_exit sp([=]() { lSdkManager->Destroy(); }); - - // Create an importer using the SDK manager. - FbxImporter* lImporter = FbxImporter::Create(lSdkManager,""); - - // Use the first argument as the filename for the importer. - if(!lImporter->Initialize(lFilename.c_str(), -1, lSdkManager->GetIOSettings())) { - printf("Call to FbxImporter::Initialize() failed.\n"); - printf("Error returned: %s\n\n", lImporter->GetStatus().GetErrorString()); - exit(-1); - } - int major, minor, revision; - lImporter->GetFileVersion(major, minor, revision); - - // Create a new scene so that it can be populated by the imported file. - FbxScene* lScene = FbxScene::Create(lSdkManager,"myScene"); - - // Import the contents of the file into the scene. - lImporter->Import(lScene); - - // The file is imported; so get rid of the importer. - lImporter->Destroy(); + auto fbx_object = get_input2("fbx_object"); + auto lSdkManager = fbx_object->lSdkManager; + auto lScene = fbx_object->lScene; // Print the nodes of the scene and their attributes recursively. // Note that we are not printing the root node because it should // not contain any attributes. auto prim = std::make_shared(); auto &ud = prim->userData(); - ud.set2("version", vec3i(major, minor, revision)); FbxArray animationStackNames; std::vector clip_names; @@ -1224,7 +1154,7 @@ struct NewFBXImportCamera : INode { ZENDEFNODE(NewFBXImportCamera, { { - {"readpath", "path"}, + "fbx_object", {"string", "clipName", ""}, {"frameid"}, {"float", "fps", "25"}, From 4c5cc9f6374367224cf66c0df80d49b88e351d65 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 5 Aug 2024 17:10:57 +0800 Subject: [PATCH 130/244] openmp --- projects/FBX/FBXSDK.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 262ece24ba..7111bd921c 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1273,7 +1273,7 @@ struct NewFBXBoneDeform : INode { bw.push_back(&prim->verts.add_attr(format("boneWeight_{}", i))); } size_t vert_count = prim->verts.size(); -#pragma omp parallel for + #pragma omp parallel for for (auto i = 0; i < vert_count; i++) { auto opos = prim->verts[i]; vec3f pos = {}; @@ -1295,6 +1295,7 @@ struct NewFBXBoneDeform : INode { if (vector.size()) { if (prim->verts.attr_is(vector)) { auto &nrms = prim->verts.attr(vector); + #pragma omp parallel for for (auto i = 0; i < vert_count; i++) { glm::mat4 matrix(0); float w = 0; @@ -1312,6 +1313,7 @@ struct NewFBXBoneDeform : INode { } if (prim->loops.attr_is(vector)) { auto &nrms = prim->loops.attr(vector); + #pragma omp parallel for for (auto i = 0; i < prim->loops.size(); i++) { auto vi = prim->loops[i]; glm::mat4 matrix(0); From 7d4c40b9fcc228bf63845d6065f97dc6546aadd1 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 5 Aug 2024 17:28:48 +0800 Subject: [PATCH 131/244] change ReadFBXFile like ReadAlembic --- projects/FBX/FBXSDK.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 7111bd921c..695d2ca19e 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -304,9 +304,15 @@ struct FBXObject : PrimitiveObject { }; struct ReadFBXFile: INode { + std::shared_ptr _inner_fbx_object; + std::string usedPath; virtual void apply() override { // Change the following filename to a suitable filename value. auto lFilename = get_input2("path"); + if (lFilename == usedPath && _inner_fbx_object != nullptr) { + set_output("fbx_object", _inner_fbx_object); + return; + } // Initialize the SDK manager. This object handles all our memory management. FbxManager* lSdkManager = FbxManager::Create(); @@ -337,6 +343,8 @@ struct ReadFBXFile: INode { // The file is imported; so get rid of the importer. lImporter->Destroy(); fbx_object->userData().set2("version", vec3i(major, minor, revision)); + usedPath = lFilename; + _inner_fbx_object = fbx_object; set_output("fbx_object", std::move(fbx_object)); } From 246cfc15674cea2cda09fb06882dd8a9445ac738 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 5 Aug 2024 18:03:31 +0800 Subject: [PATCH 132/244] fix --- projects/FBX/FBXSDK.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 695d2ca19e..1ae8532587 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -609,7 +609,7 @@ void TraverseNodesToGetPrim(FbxNode* pNode, std::string target_name, std::shared FbxMesh* mesh = pNode->GetMesh(); if (mesh) { - auto name = mesh->GetName(); + auto name = pNode->GetName(); if (target_name == name) { auto sub_prim = GetMesh(pNode); if (sub_prim) { @@ -628,7 +628,6 @@ void TraverseNodesToGetPrims(FbxNode* pNode, std::vectorGetMesh(); if (mesh) { - auto name = mesh->GetName(); auto sub_prim = GetMesh(pNode); if (sub_prim) { prims.push_back(sub_prim); From d6cd460076388e39cec614dd1a77548472710281 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 5 Aug 2024 20:13:43 +0800 Subject: [PATCH 133/244] add abcpath --- projects/FBX/FBXSDK.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 1ae8532587..5ee56132df 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -587,6 +587,7 @@ std::shared_ptr GetMesh(FbxNode* pNode) { if (pMesh->GetElementTangentCount() > 0) { getAttr(pMesh->GetElementTangent(0), "tang", prim); } + prim_set_abcpath(prim.get(), format("/ABC/{}", nodeName)); return prim; } @@ -697,7 +698,7 @@ struct NewFBXImportSkin : INode { } } } - prim = primMerge(prims_ptr); + prim = primMergeWithFacesetMatid(prims_ptr); prim->userData().set2("boneName_count", int(nameMappingGlobal.size())); prim->userData().set2("maxnum_boneWeight", maxnum_boneWeight); for (auto [key, value]: nameMappingGlobal) { From bcb67bbe9f2af19041da5f6d177f222ab2434b26 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 6 Aug 2024 05:12:24 +0800 Subject: [PATCH 134/244] enhance zsvector copy handling --- projects/PyZpc/interop/Vector_nodes.cu | 376 +++++++++++++++++-------- 1 file changed, 266 insertions(+), 110 deletions(-) diff --git a/projects/PyZpc/interop/Vector_nodes.cu b/projects/PyZpc/interop/Vector_nodes.cu index 3bec332e78..194924a039 100644 --- a/projects/PyZpc/interop/Vector_nodes.cu +++ b/projects/PyZpc/interop/Vector_nodes.cu @@ -1,6 +1,7 @@ #include "Vector.hpp" #include "zensim/ZpcFunctional.hpp" #include "zensim/cuda/execution/ExecutionPolicy.cuh" +#include "zensim/omp/execution/ExecutionPolicy.hpp" #include #include #include @@ -9,36 +10,38 @@ namespace zeno { struct MakeZsVector : INode { - void apply() override { - // TODO - auto input_size = get_input2("size"); - auto input_memsrc = get_input2("memsrc"); - auto intput_devid = get_input2("dev_id"); - // auto input_virtual = get_input2("virtual"); - auto intput_elem_type = get_input2("elem_type"); - - zs::memsrc_e memsrc; - if (input_memsrc == "host") - memsrc = zs::memsrc_e::host; - else if (input_memsrc == "device") - memsrc = zs::memsrc_e::device; - else - memsrc = zs::memsrc_e::um; - -#define MAKE_VECTOR_OBJ_T(T) \ - if (intput_elem_type == #T) { \ - auto allocator = zs::get_memory_source(memsrc, static_cast(intput_devid)); \ - vectorObj->set(zs::Vector>{allocator, 0}); \ - } - - auto vectorObj = std::make_shared(); - MAKE_VECTOR_OBJ_T(int) - MAKE_VECTOR_OBJ_T(float) - MAKE_VECTOR_OBJ_T(double) - std::visit([input_size](auto &vec) { vec.resize(input_size); }, vectorObj->value); - - set_output("ZsVector", std::move(vectorObj)); - } + void apply() override { + // TODO + auto input_size = get_input2("size"); + auto input_memsrc = get_input2("memsrc"); + auto intput_devid = get_input2("dev_id"); + // auto input_virtual = get_input2("virtual"); + auto intput_elem_type = get_input2("elem_type"); + + zs::memsrc_e memsrc; + if (input_memsrc == "host") + memsrc = zs::memsrc_e::host; + else if (input_memsrc == "device") + memsrc = zs::memsrc_e::device; + else + memsrc = zs::memsrc_e::um; + +#define MAKE_VECTOR_OBJ_T(T) \ + if (intput_elem_type == #T) { \ + auto allocator = \ + zs::get_memory_source(memsrc, static_cast(intput_devid)); \ + vectorObj->set(zs::Vector>{allocator, 0}); \ + } + + auto vectorObj = std::make_shared(); + MAKE_VECTOR_OBJ_T(int) + MAKE_VECTOR_OBJ_T(float) + MAKE_VECTOR_OBJ_T(double) + std::visit([input_size](auto &vec) { vec.resize(input_size); }, + vectorObj->value); + + set_output("ZsVector", std::move(vectorObj)); + } }; // memsrc, size, elem_type, dev_id, virtual @@ -54,32 +57,35 @@ ZENDEFNODE(MakeZsVector, { }); struct ReduceZsVector : INode { - void apply() override { - auto vectorObj = get_input("ZsVector"); - auto opStr = get_input2("op"); - auto &vector = vectorObj->value; - - float result; - std::visit( - [&result, &opStr](auto &vector) { - auto pol = zs::cuda_exec(); - using vector_t = RM_CVREF_T(vector); - using val_t = typename vector_t::value_type; - zs::Vector res{1, zs::memsrc_e::device}; - if (opStr == "add") - zs::reduce(pol, std::begin(vector), std::end(vector), std::begin(res), static_cast(0), - zs::plus{}); - else if (opStr == "max") - zs::reduce(pol, std::begin(vector), std::end(vector), std::begin(res), zs::limits::min(), - zs::getmax{}); - else - zs::reduce(pol, std::begin(vector), std::end(vector), std::begin(res), zs::limits::max(), - zs::getmin{}); - result = static_cast(res.getVal()); - }, - vector); - set_output2("result", result); - } + void apply() override { + auto vectorObj = get_input("ZsVector"); + auto opStr = get_input2("op"); + auto &vector = vectorObj->value; + + float result; + std::visit( + [&result, &opStr](auto &vector) { + auto pol = zs::cuda_exec(); + using vector_t = RM_CVREF_T(vector); + using val_t = typename vector_t::value_type; + zs::Vector res{1, zs::memsrc_e::device}; + if (opStr == "add") + zs::reduce(pol, std::begin(vector), std::end(vector), + std::begin(res), static_cast(0), + zs::plus{}); + else if (opStr == "max") + zs::reduce(pol, std::begin(vector), std::end(vector), + std::begin(res), zs::limits::min(), + zs::getmax{}); + else + zs::reduce(pol, std::begin(vector), std::end(vector), + std::begin(res), zs::limits::max(), + zs::getmin{}); + result = static_cast(res.getVal()); + }, + vector); + set_output2("result", result); + } }; ZENDEFNODE(ReduceZsVector, { @@ -89,33 +95,119 @@ ZENDEFNODE(ReduceZsVector, { {"PyZFX"}, }); +template +struct _is_float : std::bool_constant> {}; +template +struct _is_float> + : std::bool_constant> {}; + struct CopyZsVectorTo : INode { - void apply() override { - auto vectorObj = get_input("ZsVector"); - auto prim = get_input("prim"); - auto attr = get_input2("attr"); - auto &vector = vectorObj->value; - - float result; - std::visit( - [&prim, &attr](auto &vector) { - using vector_t = RM_CVREF_T(vector); - using val_t = typename vector_t::value_type; - if constexpr (zs::is_same_v || zs::is_same_v) { - if (prim->size() != vector.size()) { - fmt::print("BEWARE! copy sizes mismatch! resize to match.\n"); - prim->resize(vector.size()); - } - - auto &dst = prim->attr(attr); - - std::memcpy(dst.data(), vector.data(), sizeof(val_t) * vector.size()); + void apply() override { + auto vectorObj = get_input("ZsVector"); + auto prim = get_input("prim"); + auto attr = get_input2("attr"); + auto &vector = vectorObj->value; + + std::visit( + [&prim, &attr](auto &vector) { + using vector_t = RM_CVREF_T(vector); + using val_t = typename vector_t::value_type; + if constexpr (zs::is_same_v || + zs::is_same_v) { + + auto process = [&prim = prim, &vector = vector, + &attr](auto &primAttr) { + using T = RM_CVREF_T(primAttr[0]); + constexpr bool sameType = + _is_float::value == zs::is_same_v; + + constexpr auto nbytes = sizeof(T); + if (prim->size() * (nbytes / sizeof(float)) < vector.size()) { + fmt::print("BEWARE! copy sizes mismatch! resize to match.\n"); + if (vector.size() % (nbytes / sizeof(float)) != 0) { + throw std::runtime_error(fmt::format( + "vector of type {} copied to primattr [{}] " + "containing {} " + "elements of type {}, yet vector size is {}\n", + zs::get_type_str().asChars(), attr, prim->size(), + zs::get_type_str().asChars(), vector.size())); } - }, - vector); + /// @note this does not invalidate primAttr + prim->resize(vector.size() / (nbytes / sizeof(float))); + } + if constexpr (sameType) + zs::Resource::copy( + zs::MemoryEntity{zs::MemoryLocation{zs::memsrc_e::host, -1}, + (void *)primAttr.data()}, + zs::MemoryEntity{vector.memoryLocation(), + (void *)vector.data()}, + sizeof(val_t) * vector.size()); + else { + if constexpr (zs::is_same_v) { + // float -> int + zs::omp_exec()( + zs::range(vector.size()), + [&vector, primAttrAddr = (int *)primAttr.data()]( + size_t i) { primAttrAddr[i] = (int)vector[i]; }); + } else { + // int -> float + zs::omp_exec()( + zs::range(vector.size()), + [&vector, primAttrAddr = (float *)primAttr.data()]( + size_t i) { primAttrAddr[i] = (float)vector[i]; }); + } + } + }; + if (attr == "pos") + process(prim->verts.values); + else + std::visit(process, prim->attr(attr)); - set_output2("prim", prim); - } + } else if constexpr (zs::is_same_v) { + auto process = [&prim = prim, &vector = vector, + &attr](auto &primAttr) { + using T = RM_CVREF_T(primAttr[0]); + constexpr auto nbytes = sizeof(T); + if (prim->size() * (nbytes / sizeof(float)) < vector.size()) { + fmt::print("BEWARE! copy sizes mismatch! resize to match.\n"); + if (vector.size() % (nbytes / sizeof(float)) != 0) { + throw std::runtime_error(fmt::format( + "vector of type {} copied to primattr [{}] " + "containing {} " + "elements of type {}, yet vector size is {}\n", + zs::get_type_str().asChars(), attr, prim->size(), + zs::get_type_str().asChars(), vector.size())); + } + /// @note this does not invalidate primAttr + prim->resize(vector.size() / (nbytes / sizeof(float))); + } + + if constexpr (!_is_float::value) { + // double -> int + zs::omp_exec()( + zs::range(vector.size()), + [&vector, primAttrAddr = (int *)primAttr.data()](size_t i) { + primAttrAddr[i] = (int)vector[i]; + }); + } else { + // double -> float + zs::omp_exec()( + zs::range(vector.size()), + [&vector, primAttrAddr = (float *)primAttr.data()]( + size_t i) { primAttrAddr[i] = (float)vector[i]; }); + } + }; + + if (attr == "pos") + process(prim->verts.values); + else + std::visit(process, prim->attr(attr)); + } + }, + vector); + + set_output2("prim", prim); + } }; ZENDEFNODE(CopyZsVectorTo, { @@ -126,38 +218,102 @@ ZENDEFNODE(CopyZsVectorTo, { }); struct CopyZsVectorFrom : INode { - void apply() override { - auto vectorObj = get_input("ZsVector"); - auto prim = get_input("prim"); - auto attr = get_input2("attr"); - auto &vector = vectorObj->value; - - float result; - std::visit( - [&prim, &attr](auto &vector) { - using vector_t = RM_CVREF_T(vector); - using val_t = typename vector_t::value_type; - if constexpr (zs::is_same_v || zs::is_same_v) { - if (prim->size() != vector.size()) { - fmt::print("BEWARE! copy sizes mismatch! resize to match.\n"); - vector.resize(prim->size()); - } - - const auto &src = prim->attr(attr); - - std::memcpy(vector.data(), src.data(), sizeof(val_t) * vector.size()); + void apply() override { + auto vectorObj = get_input("ZsVector"); + auto prim = get_input("prim"); + auto attr = get_input2("attr"); + auto &vector = vectorObj->value; + + std::visit( + [&prim, &attr](auto &vector) { + using vector_t = RM_CVREF_T(vector); + using val_t = typename vector_t::value_type; + if constexpr (zs::is_same_v || + zs::is_same_v) { + + auto process = [&prim = prim, &vector = vector, + &attr](auto &primAttr) { + using T = RM_CVREF_T(primAttr[0]); + constexpr bool sameType = + _is_float::value == zs::is_same_v; + + constexpr auto nbytes = sizeof(T); + if (prim->size() * (nbytes / sizeof(float)) > vector.size()) { + fmt::print("BEWARE! copy sizes mismatch! resize to match.\n"); + vector.resize(prim->size() * (nbytes / sizeof(float))); + } + if constexpr (sameType) + zs::Resource::copy( + zs::MemoryEntity{vector.memoryLocation(), + (void *)vector.data()}, + zs::MemoryEntity{zs::MemoryLocation{zs::memsrc_e::host, -1}, + (void *)primAttr.data()}, + nbytes * prim->size()); + else { + if constexpr (zs::is_same_v) { + // float <- int + zs::omp_exec()( + zs::range(prim->size() * (nbytes / sizeof(float))), + [&vector, primAttrAddr = (int *)primAttr.data()]( + size_t i) { vector[i] = (float)primAttrAddr[i]; }); + } else { + // int <- float + zs::omp_exec()( + zs::range(prim->size() * (nbytes / sizeof(float))), + [&vector, primAttrAddr = (float *)primAttr.data()]( + size_t i) { vector[i] = (int)primAttrAddr[i]; }); } - }, - vector); + } + }; + + if (attr == "pos") + process(prim->verts.values); + else + std::visit(process, prim->attr(attr)); + + } else if constexpr (zs::is_same_v) { + auto process = [&prim = prim, &vector = vector, + &attr](auto &primAttr) { + using T = RM_CVREF_T(primAttr[0]); + constexpr auto nbytes = sizeof(T); + if (prim->size() * (nbytes / sizeof(float)) > vector.size()) { + fmt::print("BEWARE! copy sizes mismatch! resize to match.\n"); + vector.resize(prim->size() * (nbytes / sizeof(float))); + } + + if constexpr (!_is_float::value) { + // double <- int + zs::omp_exec()( + zs::range(prim->size() * (nbytes / sizeof(float))), + [&vector, primAttrAddr = (int *)primAttr.data()](size_t i) { + vector[i] = (double)primAttrAddr[i]; + }); + } else { + // double <- float + zs::omp_exec()( + zs::range(prim->size() * (nbytes / sizeof(float))), + [&vector, primAttrAddr = (float *)primAttr.data()]( + size_t i) { vector[i] = (double)primAttrAddr[i]; }); + } + }; + + if (attr == "pos") + process(prim->verts.values); + else + std::visit(process, prim->attr(attr)); + } + }, + vector); - set_output2("ZsVector", vectorObj); - } + set_output2("ZsVector", vectorObj); + } }; -ZENDEFNODE(CopyZsVectorFrom, { - {"ZsVector", "prim", {"string", "attr", "clr"}}, - {"ZsVector"}, - {}, - {"PyZFX"}, - }); +ZENDEFNODE(CopyZsVectorFrom, + { + {"ZsVector", "prim", {"string", "attr", "clr"}}, + {"ZsVector"}, + {}, + {"PyZFX"}, + }); } // namespace zeno \ No newline at end of file From ab9f6885a136fe9beefc1adf710298a7f6fe05aa Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 6 Aug 2024 16:43:35 +0800 Subject: [PATCH 135/244] update a zpc jit translator fix --- projects/CUDA/zpc_jit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/CUDA/zpc_jit b/projects/CUDA/zpc_jit index 447d42c048..93b8fb2efb 160000 --- a/projects/CUDA/zpc_jit +++ b/projects/CUDA/zpc_jit @@ -1 +1 @@ -Subproject commit 447d42c04848622bfa0403ae1a457c2cfbbe6e3c +Subproject commit 93b8fb2efb476dc08aa12587e6197893bd7a9743 From 2fe337b9529cda2fac1f4094094f232b4e4405a9 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Tue, 6 Aug 2024 21:03:09 +0800 Subject: [PATCH 136/244] NewFBXRigPose --- projects/FBX/FBXSDK.cpp | 208 +++++++++++++++++++++++++++++++++------- 1 file changed, 174 insertions(+), 34 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 5ee56132df..0bfba5da01 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -18,6 +18,9 @@ #include #include #include +#include +#include +#include #ifdef ZENO_FBXSDK #include @@ -1188,6 +1191,177 @@ ZENDEFNODE(NewFBXImportCamera, { } #endif namespace zeno { +struct RigPoseItemObject : PrimitiveObject { + std::string boneName; + vec3f translate = {0, 0, 0}; + vec3f rotate = {0, 0, 0}; +}; +struct NewFBXRigPoseItem : INode { + virtual void apply() override { + auto item = std::make_shared(); + item->boneName = get_input2("boneName"); + item->translate = get_input2("translate"); + item->rotate = get_input2("rotate"); + set_output2("poseItem", std::move(item)); + } +}; + +ZENDEFNODE(NewFBXRigPoseItem, { + { + {"string", "boneName", ""}, + {"vec3f", "translate", "0, 0, 0"}, + {"vec3f", "rotate", "0, 0, 0"}, + }, + { + "poseItem", + }, + {}, + {"FBXSDK"}, +}); +static std::vector getBoneMatrix(PrimitiveObject *prim) { + std::vector matrixs; + auto &verts = prim->verts; + auto &transform_r0 = prim->verts.add_attr("transform_r0"); + auto &transform_r1 = prim->verts.add_attr("transform_r1"); + auto &transform_r2 = prim->verts.add_attr("transform_r2"); + for (auto i = 0; i < prim->verts.size(); i++) { + glm::mat4 matrix; + matrix[0] = {transform_r0[i][0], transform_r0[i][1], transform_r0[i][2], 0}; + matrix[1] = {transform_r1[i][0], transform_r1[i][1], transform_r1[i][2], 0}; + matrix[2] = {transform_r2[i][0], transform_r2[i][1], transform_r2[i][2], 0}; + matrix[3] = {verts[i][0], verts[i][1], verts[i][2], 1}; + matrixs.push_back(matrix); + } + return matrixs; +} +static std::vector getInvertedBoneMatrix(PrimitiveObject *prim) { + std::vector inv_matrixs; + auto matrixs = getBoneMatrix(prim); + for (auto i = 0; i < matrixs.size(); i++) { + auto m = matrixs[i]; + auto inv_m = glm::inverse(m); + inv_matrixs.push_back(inv_m); + } + return inv_matrixs; +} +static vec3f transform_pos(glm::mat4 &transform, vec3f pos) { + auto p = transform * glm::vec4(pos[0], pos[1], pos[2], 1); + return {p.x, p.y, p.z}; +} +static vec3f transform_nrm(glm::mat4 &transform, vec3f pos) { + auto p = glm::transpose(glm::inverse(transform)) * glm::vec4(pos[0], pos[1], pos[2], 0); + return {p.x, p.y, p.z}; +} + +struct NewFBXRigPose : INode { + std::map getBoneNameMapping(PrimitiveObject *prim) { + auto boneName_count = prim->userData().get2("boneName_count"); + std::map boneNames; + for (auto i = 0; i < boneName_count; i++) { + auto boneName = prim->userData().get2(format("boneName_{}", i)); + boneNames[boneName] = i; + } + return boneNames; + } + std::vector TopologicalSorting(std::map bone_connects, std::shared_ptr skeleton) { + std::vector ordering; + std::set ordering_set; + while (bone_connects.size()) { + std::set need_to_remove; + for (auto [s, p]: bone_connects) { + if (bone_connects.count(p) == 0) { + if (ordering_set.count(p) == 0) { + ordering.emplace_back(p); + ordering_set.insert(p); + } + need_to_remove.insert(s); + } + } + for (auto index: need_to_remove) { + bone_connects.erase(index); + } + } + for (auto i = 0; i < skeleton->verts.size(); i++) { + if (ordering_set.count(i) == 0) { + ordering.push_back(i); + } + } + if (false) { // debug + for (auto i = 0; i < ordering.size(); i++) { + auto bi = ordering[i]; + auto bone_name = skeleton->userData().get2(format("boneName_{}", bi)); + zeno::log_info("{}: {}: {}", i, bi, bone_name); + } + } + return ordering; + } + virtual void apply() override { + auto skeleton = std::dynamic_pointer_cast(get_input("skeleton")->clone()); + auto nodelist = get_input("Transformations")->getRaw(); + std::map Transformations; + { + auto boneNameMapping = getBoneNameMapping(skeleton.get()); + for (auto n: nodelist) { + if (boneNameMapping.count(n->boneName)) { + Transformations[boneNameMapping[n->boneName]] = n; + } + else { + zeno::log_warn("{} missing", n->boneName); + } + } + } + + auto WorldSpace = get_input2("WorldSpace"); + std::map bone_connects; + for (auto i = 0; i < skeleton->polys.size(); i++) { + bone_connects[skeleton->loops[i * 2 + 1]] = skeleton->loops[i * 2]; + } + + auto ordering = TopologicalSorting(bone_connects, skeleton); + auto &verts = skeleton->verts; + auto &transform_r0 = skeleton->verts.add_attr("transform_r0"); + auto &transform_r1 = skeleton->verts.add_attr("transform_r1"); + auto &transform_r2 = skeleton->verts.add_attr("transform_r2"); + auto transforms = getBoneMatrix(skeleton.get()); + auto transformsInv = getInvertedBoneMatrix(skeleton.get()); + std::map cache; + for (auto bi: ordering) { + glm::mat4 transform = glm::mat4(1.0f); + if (Transformations.count(bi)) { + auto trans = Transformations[bi]; + glm::mat4 matTrans = glm::translate(vec_to_other(trans->translate)); + glm::mat4 matRotx = glm::rotate( (float)(trans->rotate[0] * M_PI / 180), glm::vec3(1,0,0) ); + glm::mat4 matRoty = glm::rotate( (float)(trans->rotate[1] * M_PI / 180), glm::vec3(0,1,0) ); + glm::mat4 matRotz = glm::rotate( (float)(trans->rotate[2] * M_PI / 180), glm::vec3(0,0,1) ); + transform = matTrans*matRoty*matRotx*matRotz; + transform = transforms[bi] * transform * transformsInv[bi]; + } + if (bone_connects.count(bi)) { + transform = cache[bone_connects[bi]] * transform; + } + cache[bi] = transform; + verts[bi] = transform_pos(transform, verts[bi]); + transform_r0[bi] = transform_nrm(transform, transform_r0[bi]); + transform_r1[bi] = transform_nrm(transform, transform_r1[bi]); + transform_r2[bi] = transform_nrm(transform, transform_r2[bi]); + } + + set_output2("skeleton", std::move(skeleton)); + } +}; + +ZENDEFNODE(NewFBXRigPose, { + { + "skeleton", + {"bool", "WorldSpace", "0"}, + {"list", "Transformations"}, + }, + { + "skeleton", + }, + {}, + {"FBXSDK"}, +}); struct NewFBXBoneDeform : INode { std::vector getBoneNames(PrimitiveObject *prim) { auto boneName_count = prim->userData().get2("boneName_count"); @@ -1212,40 +1386,6 @@ struct NewFBXBoneDeform : INode { } return mapping; } - std::vector getBoneMatrix(PrimitiveObject *prim) { - std::vector matrixs; - auto &verts = prim->verts; - auto &transform_r0 = prim->verts.add_attr("transform_r0"); - auto &transform_r1 = prim->verts.add_attr("transform_r1"); - auto &transform_r2 = prim->verts.add_attr("transform_r2"); - for (auto i = 0; i < prim->verts.size(); i++) { - glm::mat4 matrix; - matrix[0] = {transform_r0[i][0], transform_r0[i][1], transform_r0[i][2], 0}; - matrix[1] = {transform_r1[i][0], transform_r1[i][1], transform_r1[i][2], 0}; - matrix[2] = {transform_r2[i][0], transform_r2[i][1], transform_r2[i][2], 0}; - matrix[3] = {verts[i][0], verts[i][1], verts[i][2], 1}; - matrixs.push_back(matrix); - } - return matrixs; - } - std::vector getInvertedBoneMatrix(PrimitiveObject *prim) { - std::vector inv_matrixs; - auto matrixs = getBoneMatrix(prim); - for (auto i = 0; i < matrixs.size(); i++) { - auto m = matrixs[i]; - auto inv_m = glm::inverse(m); - inv_matrixs.push_back(inv_m); - } - return inv_matrixs; - } - vec3f transform_pos(glm::mat4 &transform, vec3f pos) { - auto p = transform * glm::vec4(pos[0], pos[1], pos[2], 1); - return {p.x, p.y, p.z}; - } - vec3f transform_nrm(glm::mat4 &transform, vec3f pos) { - auto p = glm::transpose(glm::inverse(transform)) * glm::vec4(pos[0], pos[1], pos[2], 0); - return {p.x, p.y, p.z}; - } virtual void apply() override { auto geometryToDeform = get_input2("GeometryToDeform"); auto geometryToDeformBoneNames = getBoneNames(geometryToDeform.get()); From 38054a57fa5ee65d111eea02ff4443103a050efd Mon Sep 17 00:00:00 2001 From: littlemine Date: Wed, 7 Aug 2024 01:34:17 +0800 Subject: [PATCH 137/244] upd zpc jit for zsvector copy --- projects/CUDA/zpc | 2 +- projects/CUDA/zpc_jit | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 450a8f82be..764099cd57 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 450a8f82be433cfcf2235f539e2a6d7851589f08 +Subproject commit 764099cd5778d08e0d6bd375c79ec4702e7dcd68 diff --git a/projects/CUDA/zpc_jit b/projects/CUDA/zpc_jit index 93b8fb2efb..261de4b3c0 160000 --- a/projects/CUDA/zpc_jit +++ b/projects/CUDA/zpc_jit @@ -1 +1 @@ -Subproject commit 93b8fb2efb476dc08aa12587e6197893bd7a9743 +Subproject commit 261de4b3c0d13285d51b61e103d762c4a8b9f942 From f596bde58d00d60526e6f091d6dcb694ca279296 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 7 Aug 2024 19:07:13 +0800 Subject: [PATCH 138/244] WorldSpace --- projects/FBX/FBXSDK.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 0bfba5da01..19f9b96faf 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1336,7 +1336,7 @@ struct NewFBXRigPose : INode { transform = matTrans*matRoty*matRotx*matRotz; transform = transforms[bi] * transform * transformsInv[bi]; } - if (bone_connects.count(bi)) { + if (bone_connects.count(bi) && WorldSpace == false) { transform = cache[bone_connects[bi]] * transform; } cache[bi] = transform; From 5a51b0458ede98171783a42e4b2606e7105259a9 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 8 Aug 2024 17:42:18 +0800 Subject: [PATCH 139/244] improve-abc-curve --- projects/Alembic/ReadAlembic.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/projects/Alembic/ReadAlembic.cpp b/projects/Alembic/ReadAlembic.cpp index 3cba387f0b..9dbfe444ec 100644 --- a/projects/Alembic/ReadAlembic.cpp +++ b/projects/Alembic/ReadAlembic.cpp @@ -990,6 +990,20 @@ static std::shared_ptr foundABCCurves(Alembic::AbcGeom::ICurves offset += count; } } + if (auto width = mesh.getWidthsParam()) { + auto widthsamp = + width.getIndexedValue(Alembic::Abc::v12::ISampleSelector((Alembic::AbcCoreAbstract::index_t)sample_index)); + int index_size = (int)widthsamp.getIndices()->size(); + prim->userData().set2("index_size", index_size); + if (prim->verts.size() == index_size) { + auto &width_attr = prim->add_attr("width"); + for (auto i = 0; i < prim->verts.size(); i++) { + auto index = widthsamp.getIndices()->operator[](i); + auto value = widthsamp.getVals()->operator[](index); + width_attr[i] = value; + } + } + } ICompoundProperty arbattrs = mesh.getArbGeomParams(); read_attributes2(prim, arbattrs, iSS, read_done); ICompoundProperty usrData = mesh.getUserProperties(); From 0d821513b8a90f7b8b7c848b9001aea0aa5dd634 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 8 Aug 2024 17:46:35 +0800 Subject: [PATCH 140/244] fix --- projects/Alembic/ReadAlembic.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/Alembic/ReadAlembic.cpp b/projects/Alembic/ReadAlembic.cpp index 9dbfe444ec..d6af7d05ce 100644 --- a/projects/Alembic/ReadAlembic.cpp +++ b/projects/Alembic/ReadAlembic.cpp @@ -994,7 +994,6 @@ static std::shared_ptr foundABCCurves(Alembic::AbcGeom::ICurves auto widthsamp = width.getIndexedValue(Alembic::Abc::v12::ISampleSelector((Alembic::AbcCoreAbstract::index_t)sample_index)); int index_size = (int)widthsamp.getIndices()->size(); - prim->userData().set2("index_size", index_size); if (prim->verts.size() == index_size) { auto &width_attr = prim->add_attr("width"); for (auto i = 0; i < prim->verts.size(); i++) { From 3a30ee2c15d913baa77d19dd3b3e85ea2f84467a Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 8 Aug 2024 20:25:53 +0800 Subject: [PATCH 141/244] fix --- projects/FBX/FBXSDK.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 19f9b96faf..27518f3beb 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -833,10 +833,10 @@ struct NewFBXImportSkeleton : INode { std::vector bone_connects; for (auto bone_name: bone_names) { if (parent_mapping.count(bone_name)) { - auto self_index = std::find(bone_names.begin(), bone_names.end(), bone_name) - bone_names.begin(); auto parent_name = parent_mapping[bone_name]; - auto parent_index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); - if (self_index >= 0 && parent_index >= 0) { + if (std::count(bone_names.begin(), bone_names.end(), parent_name)) { + auto self_index = std::find(bone_names.begin(), bone_names.end(), bone_name) - bone_names.begin(); + auto parent_index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); bone_connects.push_back(parent_index); bone_connects.push_back(self_index); } From bb928c969e97454d272d9a5d686b461fcd140fc4 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 8 Aug 2024 20:55:43 +0800 Subject: [PATCH 142/244] NewFBXExtractKeyframe --- projects/FBX/FBXSDK.cpp | 73 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 27518f3beb..6d476f9bf1 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1253,16 +1253,16 @@ static vec3f transform_nrm(glm::mat4 &transform, vec3f pos) { return {p.x, p.y, p.z}; } -struct NewFBXRigPose : INode { - std::map getBoneNameMapping(PrimitiveObject *prim) { - auto boneName_count = prim->userData().get2("boneName_count"); - std::map boneNames; - for (auto i = 0; i < boneName_count; i++) { - auto boneName = prim->userData().get2(format("boneName_{}", i)); - boneNames[boneName] = i; - } - return boneNames; +static std::map getBoneNameMapping(PrimitiveObject *prim) { + auto boneName_count = prim->userData().get2("boneName_count"); + std::map boneNames; + for (auto i = 0; i < boneName_count; i++) { + auto boneName = prim->userData().get2(format("boneName_{}", i)); + boneNames[boneName] = i; } + return boneNames; +} +struct NewFBXRigPose : INode { std::vector TopologicalSorting(std::map bone_connects, std::shared_ptr skeleton) { std::vector ordering; std::set ordering_set; @@ -1499,6 +1499,61 @@ ZENDEFNODE(NewFBXBoneDeform, { {"primitive"}, }); +struct NewFBXExtractKeyframe : INode { + virtual void apply() override { + auto restPointTransformsPrim = get_input2("RestPointTransforms"); + auto restBoneNameMapping = getBoneNameMapping(restPointTransformsPrim.get()); + auto restPointTransformsInv = getInvertedBoneMatrix(restPointTransformsPrim.get()); + auto deformPointTransformsPrim = get_input2("DeformPointTransforms"); + auto deformPointTransforms = getBoneMatrix(deformPointTransformsPrim.get()); + std::vector keyframe_boneName; + std::vector keyframe_bone_matrix; + auto boneName_count = deformPointTransformsPrim->userData().get2("boneName_count"); + for (auto i = 0; i < boneName_count; i++) { + auto boneName = deformPointTransformsPrim->userData().get2(format("boneName_{}", i)); + if (restBoneNameMapping.count(boneName) == 0) { + continue; + } + keyframe_boneName.emplace_back(boneName); + glm::mat4 restPointTransformInv = restPointTransformsInv[restBoneNameMapping[boneName]]; + glm::mat4 deformPointTransform = deformPointTransforms[i]; + auto keyframeTransform = restPointTransformInv * deformPointTransform; + keyframe_bone_matrix.emplace_back(keyframeTransform); + } + + auto keyframe = std::make_shared(); + keyframe->userData().set2("boneName_count", int(keyframe_boneName.size())); + for (auto i = 0; i < keyframe_boneName.size(); i++) { + keyframe->userData().set2(format("boneName_{}", i), keyframe_boneName[i]); + } + keyframe->verts.resize(keyframe_boneName.size()); + auto &verts = keyframe->verts; + auto &transform_r0 = keyframe->verts.add_attr("transform_r0"); + auto &transform_r1 = keyframe->verts.add_attr("transform_r1"); + auto &transform_r2 = keyframe->verts.add_attr("transform_r2"); + for (auto i = 0; i < keyframe->verts.size(); i++) { + auto matrix = keyframe_bone_matrix[i]; + transform_r0[i] = {matrix[0][0], matrix[0][1], matrix[0][2]}; + transform_r1[i] = {matrix[1][0], matrix[1][1], matrix[1][2]}; + transform_r2[i] = {matrix[2][0], matrix[2][1], matrix[2][2]}; + verts[i] = {matrix[3][0], matrix[3][1], matrix[3][2]}; + } + set_output2("keyframe", keyframe); + } +}; + +ZENDEFNODE(NewFBXExtractKeyframe, { + { + "RestPointTransforms", + "DeformPointTransforms", + }, + { + "keyframe", + }, + {}, + {"primitive"}, +}); + struct NormalView : INode { virtual void apply() override { From e61f6699edb6d14474b972292ff4241f9d2aca38 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 9 Aug 2024 17:54:35 +0800 Subject: [PATCH 143/244] maybe work --- projects/FBX/FBXSDK.cpp | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 6d476f9bf1..2d8a06cb72 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1262,6 +1262,16 @@ static std::map getBoneNameMapping(PrimitiveObject *prim) { } return boneNames; } +static std::vector getBoneNames(PrimitiveObject *prim) { + auto boneName_count = prim->userData().get2("boneName_count"); + std::vector boneNames; + boneNames.reserve(boneName_count); + for (auto i = 0; i < boneName_count; i++) { + auto boneName = prim->userData().get2(format("boneName_{}", i)); + boneNames.emplace_back(boneName); + } + return boneNames; +} struct NewFBXRigPose : INode { std::vector TopologicalSorting(std::map bone_connects, std::shared_ptr skeleton) { std::vector ordering; @@ -1363,16 +1373,6 @@ ZENDEFNODE(NewFBXRigPose, { {"FBXSDK"}, }); struct NewFBXBoneDeform : INode { - std::vector getBoneNames(PrimitiveObject *prim) { - auto boneName_count = prim->userData().get2("boneName_count"); - std::vector boneNames; - boneNames.reserve(boneName_count); - for (auto i = 0; i < boneName_count; i++) { - auto boneName = prim->userData().get2(format("boneName_{}", i)); - boneNames.emplace_back(boneName); - } - return boneNames; - } std::vector getBoneMapping(std::vector &old, std::vector &_new) { std::vector mapping; mapping.reserve(old.size()); @@ -1500,14 +1500,28 @@ ZENDEFNODE(NewFBXBoneDeform, { }); struct NewFBXExtractKeyframe : INode { + std::map get_parent_name(PrimitiveObject *prim) { + std::vector bone_names = getBoneNames(prim); + std::map res; + for (auto i = 0; i < prim->polys.size(); i++) { + auto p = prim->loops[i * 2 + 0]; + auto s = prim->loops[i * 2 + 1]; + res[bone_names[s]] = bone_names[p]; + } + return res; + } virtual void apply() override { auto restPointTransformsPrim = get_input2("RestPointTransforms"); auto restBoneNameMapping = getBoneNameMapping(restPointTransformsPrim.get()); + auto restPointTransforms = getBoneMatrix(restPointTransformsPrim.get()); auto restPointTransformsInv = getInvertedBoneMatrix(restPointTransformsPrim.get()); auto deformPointTransformsPrim = get_input2("DeformPointTransforms"); + auto deformBoneNameMapping = getBoneNameMapping(deformPointTransformsPrim.get()); auto deformPointTransforms = getBoneMatrix(deformPointTransformsPrim.get()); + auto deformPointTransformsInv = getInvertedBoneMatrix(deformPointTransformsPrim.get()); std::vector keyframe_boneName; std::vector keyframe_bone_matrix; + auto parent_names = get_parent_name(deformPointTransformsPrim.get()); auto boneName_count = deformPointTransformsPrim->userData().get2("boneName_count"); for (auto i = 0; i < boneName_count; i++) { auto boneName = deformPointTransformsPrim->userData().get2(format("boneName_{}", i)); @@ -1515,9 +1529,14 @@ struct NewFBXExtractKeyframe : INode { continue; } keyframe_boneName.emplace_back(boneName); + glm::mat4 parent_matrix = glm::mat4(1); + if (parent_names.count(boneName)) { + int pi = deformBoneNameMapping[parent_names[boneName]]; + parent_matrix = restPointTransforms[pi] * deformPointTransformsInv[pi]; + } glm::mat4 restPointTransformInv = restPointTransformsInv[restBoneNameMapping[boneName]]; glm::mat4 deformPointTransform = deformPointTransforms[i]; - auto keyframeTransform = restPointTransformInv * deformPointTransform; + auto keyframeTransform = restPointTransformInv * parent_matrix * deformPointTransform; keyframe_bone_matrix.emplace_back(keyframeTransform); } From 51b6c0b56ea60bbb8d58923b5acfd2dc3aecef7b Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 9 Aug 2024 18:42:50 +0800 Subject: [PATCH 144/244] NewFBXGenerateAnimation --- projects/FBX/FBXSDK.cpp | 117 ++++++++++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 28 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 2d8a06cb72..62f6643c3c 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1272,39 +1272,39 @@ static std::vector getBoneNames(PrimitiveObject *prim) { } return boneNames; } -struct NewFBXRigPose : INode { - std::vector TopologicalSorting(std::map bone_connects, std::shared_ptr skeleton) { - std::vector ordering; - std::set ordering_set; - while (bone_connects.size()) { - std::set need_to_remove; - for (auto [s, p]: bone_connects) { - if (bone_connects.count(p) == 0) { - if (ordering_set.count(p) == 0) { - ordering.emplace_back(p); - ordering_set.insert(p); - } - need_to_remove.insert(s); +static std::vector TopologicalSorting(std::map bone_connects, zeno::PrimitiveObject* skeleton) { + std::vector ordering; + std::set ordering_set; + while (bone_connects.size()) { + std::set need_to_remove; + for (auto [s, p]: bone_connects) { + if (bone_connects.count(p) == 0) { + if (ordering_set.count(p) == 0) { + ordering.emplace_back(p); + ordering_set.insert(p); } - } - for (auto index: need_to_remove) { - bone_connects.erase(index); + need_to_remove.insert(s); } } - for (auto i = 0; i < skeleton->verts.size(); i++) { - if (ordering_set.count(i) == 0) { - ordering.push_back(i); - } + for (auto index: need_to_remove) { + bone_connects.erase(index); } - if (false) { // debug - for (auto i = 0; i < ordering.size(); i++) { - auto bi = ordering[i]; - auto bone_name = skeleton->userData().get2(format("boneName_{}", bi)); - zeno::log_info("{}: {}: {}", i, bi, bone_name); - } + } + for (auto i = 0; i < skeleton->verts.size(); i++) { + if (ordering_set.count(i) == 0) { + ordering.push_back(i); + } + } + if (false) { // debug + for (auto i = 0; i < ordering.size(); i++) { + auto bi = ordering[i]; + auto bone_name = skeleton->userData().get2(format("boneName_{}", bi)); + zeno::log_info("{}: {}: {}", i, bi, bone_name); } - return ordering; } + return ordering; +} +struct NewFBXRigPose : INode { virtual void apply() override { auto skeleton = std::dynamic_pointer_cast(get_input("skeleton")->clone()); auto nodelist = get_input("Transformations")->getRaw(); @@ -1327,7 +1327,7 @@ struct NewFBXRigPose : INode { bone_connects[skeleton->loops[i * 2 + 1]] = skeleton->loops[i * 2]; } - auto ordering = TopologicalSorting(bone_connects, skeleton); + auto ordering = TopologicalSorting(bone_connects, skeleton.get()); auto &verts = skeleton->verts; auto &transform_r0 = skeleton->verts.add_attr("transform_r0"); auto &transform_r1 = skeleton->verts.add_attr("transform_r1"); @@ -1557,6 +1557,8 @@ struct NewFBXExtractKeyframe : INode { transform_r2[i] = {matrix[2][0], matrix[2][1], matrix[2][2]}; verts[i] = {matrix[3][0], matrix[3][1], matrix[3][2]}; } + auto &boneNames = keyframe->verts.add_attr("boneName"); + std::iota(boneNames.begin(), boneNames.end(), 0); set_output2("keyframe", keyframe); } }; @@ -1574,6 +1576,65 @@ ZENDEFNODE(NewFBXExtractKeyframe, { }); +struct NewFBXGenerateAnimation : INode { + virtual void apply() override { + auto keyframe = get_input2("keyframe"); + std::map Transformations; + { + auto keyframe_matrix = getBoneMatrix(keyframe.get()); + auto boneNames = getBoneNames(keyframe.get()); + for (auto i = 0; i < boneNames.size(); i++) { + Transformations[boneNames[i]] = keyframe_matrix[i]; + } + } + + auto skeleton = std::dynamic_pointer_cast(get_input("skeleton")->clone()); + std::map bone_connects; + for (auto i = 0; i < skeleton->polys.size(); i++) { + bone_connects[skeleton->loops[i * 2 + 1]] = skeleton->loops[i * 2]; + } + auto ordering = TopologicalSorting(bone_connects, skeleton.get()); + auto &verts = skeleton->verts; + auto &transform_r0 = skeleton->verts.add_attr("transform_r0"); + auto &transform_r1 = skeleton->verts.add_attr("transform_r1"); + auto &transform_r2 = skeleton->verts.add_attr("transform_r2"); + auto transforms = getBoneMatrix(skeleton.get()); + auto transformsInv = getInvertedBoneMatrix(skeleton.get()); + auto boneNames = getBoneNames(skeleton.get()); + std::map cache; + for (auto bi: ordering) { + glm::mat4 transform = glm::mat4(1.0f); + if (Transformations.count(boneNames[bi])) { + auto trans = Transformations[boneNames[bi]]; + transform = transforms[bi] * trans * transformsInv[bi]; + } + if (bone_connects.count(bi)) { + transform = cache[bone_connects[bi]] * transform; + } + cache[bi] = transform; + verts[bi] = transform_pos(transform, verts[bi]); + transform_r0[bi] = transform_nrm(transform, transform_r0[bi]); + transform_r1[bi] = transform_nrm(transform, transform_r1[bi]); + transform_r2[bi] = transform_nrm(transform, transform_r2[bi]); + } + + set_output2("DeformPointTransforms", skeleton); + } +}; + +ZENDEFNODE(NewFBXGenerateAnimation, { + { + "skeleton", + "keyframe", + }, + { + "DeformPointTransforms", + }, + {}, + {"primitive"}, +}); + + struct NormalView : INode { virtual void apply() override { auto prim = get_input2("prim"); From 5cdc270563e800c291aa87566856cd8f03815419 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 9 Aug 2024 19:02:09 +0800 Subject: [PATCH 145/244] fix --- projects/FBX/FBXSDK.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 62f6643c3c..66dc96655d 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1532,7 +1532,10 @@ struct NewFBXExtractKeyframe : INode { glm::mat4 parent_matrix = glm::mat4(1); if (parent_names.count(boneName)) { int pi = deformBoneNameMapping[parent_names[boneName]]; - parent_matrix = restPointTransforms[pi] * deformPointTransformsInv[pi]; + if (restBoneNameMapping.count(parent_names[boneName])) { + auto rpi = restBoneNameMapping[parent_names[boneName]]; + parent_matrix = restPointTransforms[rpi] * deformPointTransformsInv[pi]; + } } glm::mat4 restPointTransformInv = restPointTransformsInv[restBoneNameMapping[boneName]]; glm::mat4 deformPointTransform = deformPointTransforms[i]; From 87341020b0779df1097a58bc7b23b17189f29c0d Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Mon, 12 Aug 2024 19:44:42 +0800 Subject: [PATCH 146/244] fix no bones --- projects/FBX/FBXSDK.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 66dc96655d..3540fe2715 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -666,12 +666,12 @@ struct NewFBXImportSkin : INode { std::vector prims_ptr; int maxnum_boneWeight = 0; for (auto prim: prims) { - maxnum_boneWeight = zeno::max(maxnum_boneWeight, prim->userData().get2("maxnum_boneWeight")); + maxnum_boneWeight = zeno::max(maxnum_boneWeight, prim->userData().get2("maxnum_boneWeight", 0)); } for (auto prim: prims) { prims_ptr.push_back(prim.get()); std::vector nameMapping; - auto boneName_count = prim->userData().get2("boneName_count"); + auto boneName_count = prim->userData().get2("boneName_count", 0); for (auto i = 0; i < boneName_count; i++) { auto boneName = prim->userData().get2(zeno::format("boneName_{}", i)); if (nameMappingGlobal.count(boneName) == 0) { From b6153907ae8c4964306d12eb9b33b86b3e46fa64 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Tue, 13 Aug 2024 21:47:39 +0800 Subject: [PATCH 147/244] fix no bone weight --- projects/FBX/FBXSDK.cpp | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 3540fe2715..9c598b05b6 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1427,14 +1427,18 @@ struct NewFBXBoneDeform : INode { vec3f pos = {}; float w = 0; for (auto j = 0; j < maxnum_boneWeight; j++) { - if (bi[j]->operator[](i) < 0) { + auto index = bi[j]->operator[](i); + if (index < 0) { continue; } - auto matrix = matrixs[bi[j]->operator[](i)]; - pos += transform_pos(matrix, opos) * bw[j]->operator[](i); - w += bw[j]->operator[](i); + auto matrix = matrixs[index]; + auto weight = bw[j]->operator[](i); + pos += transform_pos(matrix, opos) * weight; + w += weight; + } + if (w > 0) { + prim->verts[i] = pos / w; } - prim->verts[i] = pos / w; } auto vectors_str = get_input2("vectors"); std::vector vectors = zeno::split_str(vectors_str, ','); @@ -1448,11 +1452,13 @@ struct NewFBXBoneDeform : INode { glm::mat4 matrix(0); float w = 0; for (auto j = 0; j < maxnum_boneWeight; j++) { - if (bi[j]->operator[](i) < 0) { + auto index = bi[j]->operator[](i); + if (index < 0) { continue; } - matrix += matrixs[bi[j]->operator[](i)] * bw[j]->operator[](i); - w += bw[j]->operator[](i); + auto weight = bw[j]->operator[](i); + matrix += matrixs[index] * weight; + w += weight; } matrix = matrix / w; auto nrm = transform_nrm(matrix, nrms[i]); @@ -1467,11 +1473,13 @@ struct NewFBXBoneDeform : INode { glm::mat4 matrix(0); float w = 0; for (auto j = 0; j < maxnum_boneWeight; j++) { - if (bi[j]->operator[](vi) < 0) { + auto index = bi[j]->operator[](vi); + if (index < 0) { continue; } - matrix += matrixs[bi[j]->operator[](vi)] * bw[j]->operator[](vi); - w += bw[j]->operator[](vi); + auto weight = bw[j]->operator[](vi); + matrix += matrixs[index] * weight; + w += weight; } matrix = matrix / w; auto nrm = transform_nrm(matrix, nrms[i]); From 6410369caac1619c0d09a0ecd730e967545b48b6 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 14 Aug 2024 20:35:11 +0800 Subject: [PATCH 148/244] DualQuaternion --- projects/FBX/DualQuaternion.cpp | 131 ++++++++++++++++++++++++++++++++ projects/FBX/DualQuaternion.h | 40 ++++++++++ projects/FBX/FBXSDK.cpp | 68 ++++++++++++++--- 3 files changed, 228 insertions(+), 11 deletions(-) create mode 100644 projects/FBX/DualQuaternion.cpp create mode 100644 projects/FBX/DualQuaternion.h diff --git a/projects/FBX/DualQuaternion.cpp b/projects/FBX/DualQuaternion.cpp new file mode 100644 index 0000000000..8f7c731f75 --- /dev/null +++ b/projects/FBX/DualQuaternion.cpp @@ -0,0 +1,131 @@ +#include "DualQuaternion.h" +#include +#include + +DualQuaternion operator+(const DualQuaternion& l, const DualQuaternion& r) { + return DualQuaternion(l.real + r.real, l.dual + r.dual); +} + +DualQuaternion operator*(const DualQuaternion& dq, float f) { + return DualQuaternion(dq.real * f, dq.dual * f); +} + +bool operator==(const DualQuaternion& l, const DualQuaternion& r) { + return l.real == r.real && l.dual == r.dual; +} + +bool operator!=(const DualQuaternion& l, const DualQuaternion& r) { + return l.real != r.real || l.dual != r.dual; +} + +// Remember, multiplication order is left to right. +// This is the opposite of matrix and quaternion multiplication order +DualQuaternion operator*(const DualQuaternion& l, const DualQuaternion& r) { + DualQuaternion lhs = normalized(l); + DualQuaternion rhs = normalized(r); +// DualQuaternion lhs = l; +// DualQuaternion rhs = r; + + return DualQuaternion(lhs.real * rhs.real, lhs.real * rhs.dual + lhs.dual * rhs.real); +} + +float dot(const DualQuaternion& l, const DualQuaternion& r) { + return dot(l.real, r.real); +} + +DualQuaternion conjugate(const DualQuaternion& dq) { + return DualQuaternion(conjugate(dq.real), conjugate(dq.dual)); +} + +DualQuaternion normalized(const DualQuaternion& dq) { + float magSq = dot(dq.real, dq.real); + if (magSq < 0.000001f) { + return DualQuaternion(); + } + float invMag = 1.0f / sqrtf(magSq); + + return DualQuaternion(dq.real * invMag, dq.dual * invMag); +} + +void normalize(DualQuaternion& dq) { + float magSq = dot(dq.real, dq.real); + if (magSq < 0.000001f) { + return; + } + float invMag = 1.0f / sqrtf(magSq); + + dq.real = dq.real * invMag; + dq.dual = dq.dual * invMag; +} + +static void decomposeMtx(const glm::mat4& m, glm::vec3& pos, glm::quat& rot, glm::vec3& scale) +{ + pos = m[3]; + for(int i = 0; i < 3; i++) + scale[i] = glm::length(glm::vec3(m[i])); + const glm::mat3 rotMtx( + glm::vec3(m[0]) / scale[0], + glm::vec3(m[1]) / scale[1], + glm::vec3(m[2]) / scale[2]); + rot = glm::quat_cast(rotMtx); +} + +constexpr glm::quat dual_quat(const glm::quat& q,const glm::vec3& t) { + + auto qx = q.x; + auto qy = q.y; + auto qz = q.z; + auto qw = q.w; + auto tx = t[0]; + auto ty = t[1]; + auto tz = t[2]; + + glm::quat qd; + qd.w = -0.5*( tx*qx + ty*qy + tz*qz); // qd.w + qd.x = 0.5*( tx*qw + ty*qz - tz*qy); // qd.x + qd.y = 0.5*(-tx*qz + ty*qw + tz*qx); // qd.y + qd.z = 0.5*( tx*qy - ty*qx + tz*qw); // qd.z + + return qd; +} +DualQuaternion mat4ToDualQuat2(const glm::mat4& transformation) { + glm::vec3 scale; + glm::quat rotation; + glm::vec3 translation; + decomposeMtx(transformation, translation, rotation, scale); + glm::quat qr = rotation; + glm::quat qd = dual_quat(qr, translation); + return DualQuaternion(qr, qd); +} + +glm::mat4 dualQuatToMat4(const DualQuaternion& dq) { + glm::mat4 rotation = glm::toMat4(dq.real); + + glm::quat d = conjugate(dq.real) * (dq.dual * 2.0f); + glm::mat4 position = glm::translate(glm::vec3(d.x, d.y, d.z)); + + glm::mat4 result = position * rotation; + return result; +} + +glm::vec3 transformVector(const DualQuaternion& dq, const glm::vec3& v) { + return dq.real * v; +} + +glm::vec3 transformPoint2(const DualQuaternion& dq, const glm::vec3& v){ + auto d0 = glm::vec3(dq.real.x, dq.real.y, dq.real.z); + auto de = glm::vec3(dq.dual.x, dq.dual.y, dq.dual.z); + auto a0 = dq.real.w; + auto ae = dq.dual.w; + + return v + 2.0f * cross(d0,cross(d0,v) + a0*v) + 2.0f *(a0*de - ae*d0 + cross(d0,de)); +} + + +zeno::vec3f transformVector(const DualQuaternion& dq, const zeno::vec3f& v) { + return zeno::bit_cast(transformVector(dq, zeno::bit_cast(v))); +} + +zeno::vec3f transformPoint2(const DualQuaternion& dq, const zeno::vec3f& v) { + return zeno::bit_cast(transformPoint2(dq, zeno::bit_cast(v))); +} \ No newline at end of file diff --git a/projects/FBX/DualQuaternion.h b/projects/FBX/DualQuaternion.h new file mode 100644 index 0000000000..061a8f5c85 --- /dev/null +++ b/projects/FBX/DualQuaternion.h @@ -0,0 +1,40 @@ +#ifndef _H_DUALQUATERNION_ +#define _H_DUALQUATERNION_ + +#include +#include +#include +#include +#include +#include +#include + +struct DualQuaternion { + glm::quat real = {1, 0, 0, 0}; + glm::quat dual = {0, 0, 0, 0}; + inline DualQuaternion() { } + inline DualQuaternion(const glm::quat& r, const glm::quat& d) : + real(r), dual(d) { } +}; + +DualQuaternion operator+(const DualQuaternion& l, const DualQuaternion& r); +DualQuaternion operator*(const DualQuaternion& dq, float f); +// Multiplication order is left to right +// Left to right is the OPPOSITE of matrices and quaternions +DualQuaternion operator*(const DualQuaternion& l, const DualQuaternion& r); +bool operator==(const DualQuaternion& l, const DualQuaternion& r); +bool operator!=(const DualQuaternion& l, const DualQuaternion& r); + +float dot(const DualQuaternion& l, const DualQuaternion& r); +DualQuaternion conjugate(const DualQuaternion& dq); +DualQuaternion normalized(const DualQuaternion& dq); +void normalize(DualQuaternion& dq); + +DualQuaternion mat4ToDualQuat2(const glm::mat4& t); +glm::mat4 dualQuatToMat4(const DualQuaternion& dq); + +glm::vec3 transformVector(const DualQuaternion& dq, const glm::vec3& v); +zeno::vec3f transformVector(const DualQuaternion& dq, const zeno::vec3f& v); +zeno::vec3f transformPoint2(const DualQuaternion& dq, const zeno::vec3f& v); + +#endif \ No newline at end of file diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 9c598b05b6..013118ca88 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -21,6 +21,7 @@ #include #include #include +#include "DualQuaternion.h" #ifdef ZENO_FBXSDK #include @@ -1009,6 +1010,7 @@ struct NewFBXImportAnimation : INode { ud.set2("boneName_count", int(bone_names.size())); for (auto i = 0; i < bone_names.size(); i++) { ud.set2(zeno::format("boneName_{}", i), bone_names[i]); + zeno::log_info("boneName: {}", bone_names[i]); } } @@ -1387,6 +1389,7 @@ struct NewFBXBoneDeform : INode { return mapping; } virtual void apply() override { + auto usingDualQuaternion = get_input2("SkinningMethod") == "DualQuaternion"; auto geometryToDeform = get_input2("GeometryToDeform"); auto geometryToDeformBoneNames = getBoneNames(geometryToDeform.get()); auto restPointTransformsPrim = get_input2("RestPointTransforms"); @@ -1400,6 +1403,8 @@ struct NewFBXBoneDeform : INode { std::vector matrixs; matrixs.reserve(geometryToDeformBoneNames.size()); + std::vector dqs; + dqs.reserve(geometryToDeformBoneNames.size()); for (auto i = 0; i < geometryToDeformBoneNames.size(); i++) { glm::mat4 res_inv_matrix = glm::mat4(1); glm::mat4 deform_matrix = glm::mat4(1); @@ -1409,6 +1414,7 @@ struct NewFBXBoneDeform : INode { } auto matrix = deform_matrix * res_inv_matrix; matrixs.push_back(matrix); + dqs.push_back(mat4ToDualQuat2(matrix)); } auto prim = std::dynamic_pointer_cast(geometryToDeform->clone()); @@ -1425,19 +1431,30 @@ struct NewFBXBoneDeform : INode { for (auto i = 0; i < vert_count; i++) { auto opos = prim->verts[i]; vec3f pos = {}; + DualQuaternion dq_acc({0, 0, 0, 0}, {0, 0, 0, 0}); float w = 0; for (auto j = 0; j < maxnum_boneWeight; j++) { auto index = bi[j]->operator[](i); if (index < 0) { continue; } - auto matrix = matrixs[index]; auto weight = bw[j]->operator[](i); - pos += transform_pos(matrix, opos) * weight; + if (usingDualQuaternion) { + dq_acc = dq_acc + dqs[index] * weight; + } + else { + pos += transform_pos(matrixs[index], opos) * weight; + } w += weight; } if (w > 0) { - prim->verts[i] = pos / w; + if (usingDualQuaternion) { + dq_acc = normalized(dq_acc); + prim->verts[i] = transformPoint2(dq_acc, opos); + } + else { + prim->verts[i] = pos / w; + } } } auto vectors_str = get_input2("vectors"); @@ -1450,6 +1467,7 @@ struct NewFBXBoneDeform : INode { #pragma omp parallel for for (auto i = 0; i < vert_count; i++) { glm::mat4 matrix(0); + DualQuaternion dq_acc({0, 0, 0, 0}, {0, 0, 0, 0}); float w = 0; for (auto j = 0; j < maxnum_boneWeight; j++) { auto index = bi[j]->operator[](i); @@ -1457,12 +1475,25 @@ struct NewFBXBoneDeform : INode { continue; } auto weight = bw[j]->operator[](i); - matrix += matrixs[index] * weight; + if (usingDualQuaternion) { + dq_acc = dq_acc + dqs[index] * weight; + } + else { + matrix += matrixs[index] * weight; + } w += weight; } - matrix = matrix / w; - auto nrm = transform_nrm(matrix, nrms[i]); - nrms[i] = zeno::normalize(nrm); + if (w > 0) { + if (usingDualQuaternion) { + dq_acc = normalized(dq_acc); + nrms[i] = transformVector(dq_acc, nrms[i]); + } + else { + matrix = matrix / w; + nrms[i] = transform_nrm(matrix, nrms[i]); + } + nrms[i] = zeno::normalize(nrms[i]); + } } } if (prim->loops.attr_is(vector)) { @@ -1471,6 +1502,7 @@ struct NewFBXBoneDeform : INode { for (auto i = 0; i < prim->loops.size(); i++) { auto vi = prim->loops[i]; glm::mat4 matrix(0); + DualQuaternion dq_acc({0, 0, 0, 0}, {0, 0, 0, 0}); float w = 0; for (auto j = 0; j < maxnum_boneWeight; j++) { auto index = bi[j]->operator[](vi); @@ -1478,12 +1510,25 @@ struct NewFBXBoneDeform : INode { continue; } auto weight = bw[j]->operator[](vi); - matrix += matrixs[index] * weight; + if (usingDualQuaternion) { + dq_acc = dq_acc + dqs[index] * weight; + } + else { + matrix += matrixs[index] * weight; + } w += weight; } - matrix = matrix / w; - auto nrm = transform_nrm(matrix, nrms[i]); - nrms[i] = zeno::normalize(nrm); + if (w > 0) { + if (usingDualQuaternion) { + dq_acc = normalized(dq_acc); + nrms[i] = transformVector(dq_acc, nrms[i]); + } + else { + matrix = matrix / w; + nrms[i] = transform_nrm(matrix, nrms[i]); + } + nrms[i] = zeno::normalize(nrms[i]); + } } } } @@ -1498,6 +1543,7 @@ ZENDEFNODE(NewFBXBoneDeform, { "GeometryToDeform", "RestPointTransforms", "DeformPointTransforms", + {"enum Linear DualQuaternion", "SkinningMethod", "Linear"}, {"string", "vectors", "nrm,"}, }, { From b63dfb71388f8d710b55882af68c7bcc84353767 Mon Sep 17 00:00:00 2001 From: littlemine Date: Tue, 20 Aug 2024 15:51:27 +0800 Subject: [PATCH 149/244] fix fbx typo, upd zpc --- projects/CUDA/SpatialAccel.cuh | 10 ++-- projects/CUDA/Structures.hpp | 14 +++--- projects/CUDA/Utils.hpp | 12 ++--- projects/CUDA/iw_query/Query.cpp | 18 ++++---- projects/CUDA/remesh/simplification.cpp | 6 +-- projects/CUDA/test.cpp | 18 +++++++- projects/CUDA/test1.cu | 2 +- projects/CUDA/utils/Groom.cpp | 2 +- projects/CUDA/utils/Primitives.cpp | 46 +++++++++---------- projects/CUDA/utils/Primitives.cu | 4 +- projects/CUDA/utils/TopoUtils.cu | 4 +- projects/CUDA/zpc | 2 +- projects/CuEulerian/hybrid/P2G.cu | 4 +- projects/CuEulerian/levelset/Extrapolation.cu | 8 ++-- .../levelset/Grid_creator_adaptive.cu | 2 +- .../navierstokes/NS_linearsolver.cu | 6 +-- projects/CuEulerian/navierstokes/NS_topo.cu | 2 +- projects/CuEulerian/swe/Erode.cu | 4 +- projects/CuEulerian/swe/SWE_dense.cu | 4 +- projects/CuEulerian/volume/Transfer.cu | 6 +-- projects/FBX/DualQuaternion.cpp | 2 +- projects/PyZpc/interop/Vector_nodes.cu | 4 +- 22 files changed, 98 insertions(+), 82 deletions(-) diff --git a/projects/CUDA/SpatialAccel.cuh b/projects/CUDA/SpatialAccel.cuh index 71c0a095eb..9785a2e3be 100644 --- a/projects/CUDA/SpatialAccel.cuh +++ b/projects/CUDA/SpatialAccel.cuh @@ -89,7 +89,7 @@ struct ZenoLBvh { Vector box{orderedBvs.get_allocator(), 1}; if (numLeaves <= 2) { using TV = typename Box::TV; - box.setVal(Box{TV::uniform(limits::max()), TV::uniform(limits::lowest())}); + box.setVal(Box{TV::uniform(detail::deduce_numeric_max()), TV::uniform(detail::deduce_numeric_lowest())}); pol(Collapse{numLeaves}, [bvh = proxy(*this), box = proxy(box)] ZS_LAMBDA(int vi) mutable { auto bv = bvh.getNodeBV(vi); for (int d = 0; d != dim; ++d) { @@ -239,13 +239,13 @@ void ZenoLBvh::build(zs::CudaExecution auto lOffsets = proxy(leafOffsets); // total bounding volume - const auto defaultBox = Box{TV::uniform(limits::max()), TV::uniform(limits::lowest())}; + const auto defaultBox = Box{TV::uniform(detail::deduce_numeric_max()), TV::uniform(detail::deduce_numeric_lowest())}; Vector wholeBox{primBvs.get_allocator(), 1}; wholeBox.setVal(defaultBox); policy(primBvs, [box = proxy(wholeBox), execTag] ZS_LAMBDA(const Box &bv) mutable { for (int d = 0; d != dim; ++d) { - atomic_min(execTag, &box(0)._min[d], bv._min[d] - 10 * limits::epsilon()); - atomic_max(execTag, &box(0)._max[d], bv._max[d] + 10 * limits::epsilon()); + atomic_min(execTag, &box(0)._min[d], bv._min[d] - 10 * detail::deduce_numeric_epsilon()); + atomic_max(execTag, &box(0)._max[d], bv._max[d] + 10 * detail::deduce_numeric_epsilon()); } }); @@ -624,7 +624,7 @@ void ZenoLBvh::refit(zs::CudaExecution // refit #if 0 policy(orderedBvs, [] ZS_LAMBDA(auto &bv) { - bv = Box{TV::uniform(limits::max()), TV::uniform(limits::lowest())}; + bv = Box{TV::uniform(detail::deduce_numeric_max()), TV::uniform(detail::deduce_numeric_lowest())}; }); #endif policy(Collapse{numLeaves}, [primBvs = proxy(primBvs), orderedBvs = proxy(orderedBvs), diff --git a/projects/CUDA/Structures.hpp b/projects/CUDA/Structures.hpp index aa85b519bc..d44c06ba17 100644 --- a/projects/CUDA/Structures.hpp +++ b/projects/CUDA/Structures.hpp @@ -339,7 +339,7 @@ struct ZenoParticles : IObjectClone { using namespace zs; constexpr execspace_e space = RM_CVREF_T(pol)::exec_tag::value; constexpr auto defaultBv = - bv_t{vec3f::constant(zs::limits::max()), vec3f::constant(zs::limits::lowest())}; + bv_t{vec3f::constant(zs::detail::deduce_numeric_max()), vec3f::constant(zs::detail::deduce_numeric_lowest())}; if (!particles) return defaultBv; @@ -357,14 +357,14 @@ struct ZenoParticles : IObjectClone { y = xn[1]; z = xn[2]; }); - zs::reduce(pol, std::begin(X), std::end(X), std::begin(res), zs::limits::max(), getmin{}); - zs::reduce(pol, std::begin(X), std::end(X), std::begin(res) + 3, zs::limits::lowest(), + zs::reduce(pol, std::begin(X), std::end(X), std::begin(res), zs::detail::deduce_numeric_max(), getmin{}); + zs::reduce(pol, std::begin(X), std::end(X), std::begin(res) + 3, zs::detail::deduce_numeric_lowest(), getmax{}); - zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 1, zs::limits::max(), getmin{}); - zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 4, zs::limits::lowest(), + zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 1, zs::detail::deduce_numeric_max(), getmin{}); + zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 4, zs::detail::deduce_numeric_lowest(), getmax{}); - zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 2, zs::limits::max(), getmin{}); - zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 5, zs::limits::lowest(), + zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 2, zs::detail::deduce_numeric_max(), getmin{}); + zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 5, zs::detail::deduce_numeric_lowest(), getmax{}); res = res.clone({memsrc_e::host, -1}); return bv_t{vec3f{res[0], res[1], res[2]}, vec3f{res[3], res[4], res[5]}}; diff --git a/projects/CUDA/Utils.hpp b/projects/CUDA/Utils.hpp index ed93bd4464..9914c1491b 100644 --- a/projects/CUDA/Utils.hpp +++ b/projects/CUDA/Utils.hpp @@ -282,7 +282,7 @@ constexpr bool pt_accd(VecT p, VecT t0, VecT t1, VecT t2, VecT dp, VecT dt0, dt2 -= mov; dp -= mov; T dispMag2Vec[3] = {dt0.l2NormSqr(), dt1.l2NormSqr(), dt2.l2NormSqr()}; - T tmp = zs::limits::lowest(); + T tmp = zs::detail::deduce_numeric_lowest(); for (int i = 0; i != 3; ++i) if (dispMag2Vec[i] > tmp) tmp = dispMag2Vec[i]; @@ -343,7 +343,7 @@ ee_accd(VecT ea0, VecT ea1, VecT eb0, VecT eb1, VecT dea0, VecT dea1, VecT deb0, T dists[] = {(ea0 - eb0).l2NormSqr(), (ea0 - eb1).l2NormSqr(), (ea1 - eb0).l2NormSqr(), (ea1 - eb1).l2NormSqr()}; { - dist2_cur = zs::limits::max(); + dist2_cur = zs::detail::deduce_numeric_max(); for (const auto &dist : dists) if (dist < dist2_cur) dist2_cur = dist; @@ -378,7 +378,7 @@ ee_accd(VecT ea0, VecT ea1, VecT eb0, VecT eb1, VecT dea0, VecT dea1, VecT deb0, T dists[] = {(ea0 - eb0).l2NormSqr(), (ea0 - eb1).l2NormSqr(), (ea1 - eb0).l2NormSqr(), (ea1 - eb1).l2NormSqr()}; { - dist2_cur = zs::limits::max(); + dist2_cur = zs::detail::deduce_numeric_max(); for (const auto &dist : dists) if (dist < dist2_cur) dist2_cur = dist; @@ -568,7 +568,7 @@ void find_intersection_free_stepsize(Pol &pol, ZenoParticles &zstets, atomic_min(exec_cuda, &finalAlpha[0], alpha); }); // zs::reduce(pol, std::begin(surfAlphas), std::end(surfAlphas), - // std::begin(finalAlpha), limits::max(), getmin{}); + // std::begin(finalAlpha), detail::deduce_numeric_max(), getmin{}); auto surfAlpha = finalAlpha.getVal(); fmt::print(fg(fmt::color::dark_cyan), "surface alpha: {}, default stepsize: {}\n", surfAlpha, stepSize); @@ -708,7 +708,7 @@ void find_boundary_intersection_free_stepsize(Pol &pol, ZenoParticles &zstets, }); }); // zs::reduce(pol, std::begin(surfAlphas), std::end(surfAlphas), - // std::begin(finalAlpha), limits::max(), getmin{}); + // std::begin(finalAlpha), detail::deduce_numeric_max(), getmin{}); auto surfAlpha = finalAlpha.getVal(); stepSize = surfAlpha; fmt::print(fg(fmt::color::dark_cyan), @@ -758,7 +758,7 @@ void find_boundary_intersection_free_stepsize(Pol &pol, ZenoParticles &zstets, }); #if 0 zs::reduce(pol, std::begin(surfEdgeAlphas), std::end(surfEdgeAlphas), - std::begin(finalAlpha), limits::max(), getmin{}); + std::begin(finalAlpha), detail::deduce_numeric_max(), getmin{}); stepSize = std::min(surfAlpha, finalAlpha.getVal()); #else stepSize = finalAlpha.getVal(); diff --git a/projects/CUDA/iw_query/Query.cpp b/projects/CUDA/iw_query/Query.cpp index fa92296dfd..a63f5da2ab 100644 --- a/projects/CUDA/iw_query/Query.cpp +++ b/projects/CUDA/iw_query/Query.cpp @@ -90,9 +90,9 @@ struct QueryNearestPoints : INode { gmin = vertices[i][d]; gmax = vertices[i][d]; }); - reduce(pol, std::begin(gmins), std::end(gmins), std::begin(ret), limits::max(), getmin{}); + reduce(pol, std::begin(gmins), std::end(gmins), std::begin(ret), detail::deduce_numeric_max(), getmin{}); gbv._min[d] = ret.getVal(); - reduce(pol, std::begin(gmaxs), std::end(gmaxs), std::begin(ret), limits::lowest(), getmax{}); + reduce(pol, std::begin(gmaxs), std::end(gmaxs), std::begin(ret), detail::deduce_numeric_lowest(), getmax{}); gbv._max[d] = ret.getVal(); } int axis = 0; // x-axis by default @@ -130,8 +130,8 @@ struct QueryNearestPoints : INode { { int cnt = 0; for (int i = 0; i < vertices.size() - 1; ++i) { - if ((keys[i] >= limits::epsilon() || keys[i] <= -limits::epsilon()) && - (keys[i + 1] >= limits::epsilon() || keys[i + 1] <= -limits::epsilon())) + if ((keys[i] >= detail::deduce_numeric_epsilon() || keys[i] <= -detail::deduce_numeric_epsilon()) && + (keys[i + 1] >= detail::deduce_numeric_epsilon() || keys[i + 1] <= -detail::deduce_numeric_epsilon())) if (keys[i] > keys[i + 1]) { printf("order is wrong at [%d] ... %e, %e...\n", i, keys[i], keys[i + 1]); cnt++; @@ -172,7 +172,7 @@ struct QueryNearestPoints : INode { pol(enumerate(pos, locs, dists, ids, cps), [&xs, &indices, axis](int i, const auto &xi, const int loc, float &dist, int &id, vec3f &cp) { int l = loc + 1; - float d2 = limits::max(); + float d2 = detail::deduce_numeric_max(); int j = -1; int cnt = 0; while (l < xs.size() && cnt++ < 128) { @@ -265,7 +265,7 @@ struct QueryNearestPoints : INode { pol(enumerate(pos, dists, ids, cps), [&pos, &vertices, &locs, &xs, &indices, bvh = proxy(bvh), axis]( int i, const zeno::vec3f &p, float &dist, int &id, zeno::vec3f &cp) { auto target = vertices[id]; - if (auto d = zeno::length(p - target); std::abs(d - dist) > limits::epsilon()) + if (auto d = zeno::length(p - target); std::abs(d - dist) > detail::deduce_numeric_epsilon()) fmt::print("actual dist {}, cp ({}, {}, {}); calced dist {}, cp ({}, {}, {}). \n", d, target[0], target[1], target[2], dist, cp[0], cp[1], cp[2]); const int loc = locs[i]; @@ -275,7 +275,7 @@ struct QueryNearestPoints : INode { auto key = xi[axis]; int l = loc + 1; while (l < xs.size() && zs::sqr(xs[l][axis] - key) < dist2) { - if (auto d2 = zeno::lengthSquared(xs[l] - xi); std::sqrt(d2) + limits::epsilon() < dist) { + if (auto d2 = zeno::lengthSquared(xs[l] - xi); std::sqrt(d2) + detail::deduce_numeric_epsilon() < dist) { fmt::print("[{}] found nearer pair! real id should be {} ({}), not {} ({})\n", i, indices[l], std::sqrt(d2), id, std::sqrt(dist2)); return; @@ -284,7 +284,7 @@ struct QueryNearestPoints : INode { } l = loc; while (l >= 0 && zs::sqr(xs[l][axis] - key) < dist2) { - if (auto d2 = zeno::lengthSquared(xs[l] - xi); std::sqrt(d2) + limits::epsilon() < dist) { + if (auto d2 = zeno::lengthSquared(xs[l] - xi); std::sqrt(d2) + detail::deduce_numeric_epsilon() < dist) { fmt::print("[{}] found nearer pair! real id should be {} ({}), not {} ({})\n", i, indices[l], std::sqrt(d2), id, dist); return; @@ -302,7 +302,7 @@ struct QueryNearestPoints : INode { timer.tick(); pol(zip(range(pos.size()), locs), [&locs, &xs, &vertices, &indices, &pos, &ids, &dists, &cps, axis](int i, const int loc) { - float dist2 = limits::max(); + float dist2 = detail::deduce_numeric_max(); int id = -1; auto xi = pos[i]; auto key = xi[axis]; diff --git a/projects/CUDA/remesh/simplification.cpp b/projects/CUDA/remesh/simplification.cpp index f4773d2db8..087da76c11 100644 --- a/projects/CUDA/remesh/simplification.cpp +++ b/projects/CUDA/remesh/simplification.cpp @@ -69,12 +69,12 @@ struct PolyReduceLite : INode { // zeno::log_warn(fmt::format("begin iter {}\n", i)); /// evaluate vert curvatures pol(range(pos.size()), [&](int i) { - vertEdgeCosts[i] = std::make_pair(limits::max(), std::make_pair(i, -1)); + vertEdgeCosts[i] = std::make_pair(detail::deduce_numeric_max(), std::make_pair(i, -1)); if (vertVerts[i].size() == 0 || vertDiscard[i]) { return; } - auto cost = limits::max(); + auto cost = detail::deduce_numeric_max(); for (auto j : vertVerts[i]) { if (vertDiscard[j]) continue; @@ -105,7 +105,7 @@ struct PolyReduceLite : INode { /// sort edges for collapse auto pair = std::reduce( std::begin(vertEdgeCosts), std::end(vertEdgeCosts), - std::make_pair(limits::max(), std::make_pair(-1, -1)), + std::make_pair(detail::deduce_numeric_max(), std::make_pair(-1, -1)), [](const std::pair> &a, const std::pair> &b) { if (a.first < b.first) return a; diff --git a/projects/CUDA/test.cpp b/projects/CUDA/test.cpp index b414e65329..d8a9845866 100644 --- a/projects/CUDA/test.cpp +++ b/projects/CUDA/test.cpp @@ -21,6 +21,12 @@ #include "zensim/execution/ConcurrencyPrimitive.hpp" #include "zensim/visitors/Print.hpp" +#if 0 +#include "glm/glm.hpp" +#include "glm/gtx/quaternion.hpp" +#include +#endif + namespace zeno { struct spinlock { @@ -442,6 +448,16 @@ struct ZSLinkTest : INode { void apply() override { using namespace zs; constexpr auto space = execspace_e::openmp; + +#if 0 + glm::vec3 v{1, 2, 3}; + glm::mat3 m; + glm::quat q= {1, 0, 0, 0}; + zeno::log_info("glm vec3: {}", v); + zeno::log_info("glm mat3: {}", m); + zeno::log_info("glm quat: {}", q); +#endif + #if 0 using namespace zs; zs::initialize_openvdb(); @@ -839,7 +855,7 @@ struct TestAdaptiveGrid : INode { auto cc = c.cast() / 3; auto vv = zsagv.iSample(0, cc); openvdb::FloatGrid::ValueType vref = sampler.isSample(openvdb::Vec3R(cc[0], cc[1], cc[2])); - if (zs::abs(vref - vv) >= limits::epsilon()) { + if (zs::abs(vref - vv) >= detail::deduce_numeric_epsilon()) { fmt::print(fg(fmt::color::green), "sampled value is {} ({}) at {}, {}, {}\n", v, vv, vref, cc[0], cc[1], cc[2]); } diff --git a/projects/CUDA/test1.cu b/projects/CUDA/test1.cu index 02b89b47f8..f459b540c6 100644 --- a/projects/CUDA/test1.cu +++ b/projects/CUDA/test1.cu @@ -76,7 +76,7 @@ struct ZSCUMathTest : INode { constexpr int n = 100; using TV = zs::vec; //TV m_X[4] = {TV{0, 0, 0}, TV{0, 1, 0}, TV{0, 0, -1}, TV{0, 0, 1}}; - TV m_X[4] = {TV{0, 0, 0}, TV{0, 1, 0}, TV{0, 0, -1}, TV{-limits::epsilon() * 5, 1, -1}}; + TV m_X[4] = {TV{0, 0, 0}, TV{0, 1, 0}, TV{0, 0, -1}, TV{-detail::deduce_numeric_epsilon() * 5, 1, -1}}; auto ra = zs::dihedral_angle(m_X[2], m_X[0], m_X[1], m_X[3], exec_seq); auto grad = zs::dihedral_angle_gradient(m_X[2], m_X[0], m_X[1], m_X[3], exec_seq); auto hess = zs::dihedral_angle_hessian(m_X[2], m_X[0], m_X[1], m_X[3], exec_seq); diff --git a/projects/CUDA/utils/Groom.cpp b/projects/CUDA/utils/Groom.cpp index 16f484e6b9..f57caca64c 100644 --- a/projects/CUDA/utils/Groom.cpp +++ b/projects/CUDA/utils/Groom.cpp @@ -625,7 +625,7 @@ struct GenerateHairs : INode { auto [id, _] = lbvhv.find_nearest( pi, [&](int j, float &dist, int &id) { - float d = zs::limits::max(); + float d = zs::detail::deduce_numeric_max(); d = zs::dist_pp(pi, vec3::from_array(verts[loops[polys[j][0]]])); if (d < dist) { diff --git a/projects/CUDA/utils/Primitives.cpp b/projects/CUDA/utils/Primitives.cpp index 57068512e9..879270a4cf 100644 --- a/projects/CUDA/utils/Primitives.cpp +++ b/projects/CUDA/utils/Primitives.cpp @@ -611,7 +611,7 @@ struct PrimitiveReorder : INode { /// @note bv constexpr auto defaultBv = - bv_t{zsvec3::constant(zs::limits::max()), zsvec3::constant(zs::limits::lowest())}; + bv_t{zsvec3::constant(zs::detail::deduce_numeric_max()), zsvec3::constant(zs::detail::deduce_numeric_lowest())}; bv_t gbv; if (orderVerts || orderTris) { @@ -626,21 +626,21 @@ struct PrimitiveReorder : INode { y = xn[1]; z = xn[2]; }); - zs::reduce(pol, std::begin(X), std::end(X), std::begin(res), zs::limits::max(), getmin{}); - zs::reduce(pol, std::begin(X), std::end(X), std::begin(res) + 3, zs::limits::lowest(), + zs::reduce(pol, std::begin(X), std::end(X), std::begin(res), zs::detail::deduce_numeric_max(), getmin{}); + zs::reduce(pol, std::begin(X), std::end(X), std::begin(res) + 3, zs::detail::deduce_numeric_lowest(), getmax{}); - zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 1, zs::limits::max(), + zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 1, zs::detail::deduce_numeric_max(), getmin{}); - zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 4, zs::limits::lowest(), + zs::reduce(pol, std::begin(Y), std::end(Y), std::begin(res) + 4, zs::detail::deduce_numeric_lowest(), getmax{}); - zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 2, zs::limits::max(), + zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 2, zs::detail::deduce_numeric_max(), getmin{}); - zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 5, zs::limits::lowest(), + zs::reduce(pol, std::begin(Z), std::end(Z), std::begin(res) + 5, zs::detail::deduce_numeric_lowest(), getmax{}); gbv = bv_t{zsvec3{res[0], res[1], res[2]}, zsvec3{res[3], res[4], res[5]}}; } - gbv._min -= limits::epsilon() * 16; - gbv._max += limits::epsilon() * 16; + gbv._min -= detail::deduce_numeric_epsilon() * 16; + gbv._max += detail::deduce_numeric_epsilon() * 16; /// @note reorder struct Mapping { @@ -2820,9 +2820,9 @@ struct ComputeAverageEdgeLength : INode { fmt::print("sum edge lengths: {}, num edges: {}\n", sum[0], els.size()); #endif zs::reduce(pol, std::begin(els), std::end(els), std::begin(sum), 0); - zs::reduce(pol, std::begin(els), std::end(els), std::begin(minEl), zs::limits::max(), + zs::reduce(pol, std::begin(els), std::end(els), std::begin(minEl), zs::detail::deduce_numeric_max(), zs::getmin{}); - zs::reduce(pol, std::begin(els), std::end(els), std::begin(maxEl), zs::limits::min(), + zs::reduce(pol, std::begin(els), std::end(els), std::begin(maxEl), zs::detail::deduce_numeric_min(), zs::getmax{}); set_output("prim", prim); @@ -2988,7 +2988,7 @@ struct ParticleCluster : zeno::INode { }); /// @brief uv - if (pars->has_attr("uv") && uvDist > zs::limits::epsilon() * 10) { + if (pars->has_attr("uv") && uvDist > zs::detail::deduce_numeric_epsilon() * 10) { const auto &uv = pars->attr("uv"); pol(range(pos.size()), [&neighbors, &uv, uvDist2 = uvDist * uvDist](int vi) mutable { int n = neighbors[vi].size(); @@ -3071,7 +3071,7 @@ struct ParticleSegmentation : zeno::INode { float uvDist2 = zs::sqr(get_input2("uv_dist")); const vec2f *uvPtr = nullptr; - if (pars->has_attr("uv") && std::sqrt(uvDist2) > zs::limits::epsilon() * 10) + if (pars->has_attr("uv") && std::sqrt(uvDist2) > zs::detail::deduce_numeric_epsilon() * 10) uvPtr = pars->attr("uv").data(); using namespace zs; @@ -3528,7 +3528,7 @@ struct PrimitiveColoring : INode { pol(range(spmat.outerSize()), [&colors, spmat = proxy(spmat), correct = proxy(correct)](int i) mutable { auto color = colors[i]; - if (color == limits::max()) { + if (color == detail::deduce_numeric_max()) { correct[0] = 0; printf("node [%d]: %f. not colored!\n", i, (float)color); return; @@ -3560,7 +3560,7 @@ struct PrimitiveColoring : INode { pol(range(spmat.outerSize()), [&colors, spmat = proxy(spmat), correct = proxy(correct)](int i) mutable { auto color = colors[i]; - if (color == limits::max()) { + if (color == detail::deduce_numeric_max()) { correct[0] = 0; printf("node [%d]: %f. not colored!\n", i, (float)color); return; @@ -3591,11 +3591,11 @@ struct PrimitiveColoring : INode { bool done = true; pol(zip(weights, minWeights, maskOut, colors), [&](u32 &w, u32 &mw, int &mask, float &color) { //if (w < mw && mask == 0) - if (w < mw && mw != limits::max()) { + if (w < mw && mw != detail::deduce_numeric_max()) { done = false; mask = 1; color = iter; - w = limits::max(); + w = detail::deduce_numeric_max(); } }); return done; @@ -3786,16 +3786,16 @@ struct QueryClosestPrimitive : zeno::INode { std::vector ids(prim->size(), -1); pol(zs::range(prim->size()), [&, lbvh = zs::proxy(lbvh), et = zsbvh->et](int i) { using vec3 = zs::vec; - kvs[i].dist = zs::limits::max(); + kvs[i].dist = zs::detail::deduce_numeric_max(); kvs[i].pid = i; auto pi = vec3::from_array(prim->verts[i]); - float radius = zs::limits::max(); + float radius = zs::detail::deduce_numeric_max(); if (prim->has_attr(radiusTag)) radius = prim->attr(radiusTag)[i]; lbvh.find_nearest( pi, [&ids, &kvs, &pi, &targetPrim, i, et](int j, float &dist, int &idx) { - float d = zs::limits::max(); + float d = zs::detail::deduce_numeric_max(); if (et == ZenoLinearBvh::point) { d = zs::dist_pp(pi, vec3::from_array(targetPrim->verts[j])); } else if (et == ZenoLinearBvh::curve) { @@ -3865,7 +3865,7 @@ struct QueryClosestPrimitive : zeno::INode { auto lbvhv = zs::proxy(lbvh); lbvhv.find_nearest(pi, [&, et = zsbvh->et](int j, float &dist_, int &idx) { using vec3 = zs::vec; - float d = zs::limits::max(); + float d = zs::detail::deduce_numeric_max(); if (et == ZenoLinearBvh::point) { d = zs::dist_pp(pi, vec3::from_array(targetPrim->verts[j])); } else if (et == ZenoLinearBvh::curve) { @@ -4099,9 +4099,9 @@ static zeno::vec3f compute_dimensions(const PrimitiveObject &primA, const Primit pol(zs::enumerate(posB), [&, offset = posA.size()](int col, const auto &p) { locs[col + offset] = p[d]; }); std::vector ret(2); - zs::reduce(pol, std::begin(locs), std::end(locs), std::begin(ret), zs::limits::max(), + zs::reduce(pol, std::begin(locs), std::end(locs), std::begin(ret), zs::detail::deduce_numeric_max(), zs::getmin()); - zs::reduce(pol, std::begin(locs), std::end(locs), std::begin(ret) + 1, zs::limits::lowest(), + zs::reduce(pol, std::begin(locs), std::end(locs), std::begin(ret) + 1, zs::detail::deduce_numeric_lowest(), zs::getmax()); dims[d] = ret[1] - ret[0]; }; diff --git a/projects/CUDA/utils/Primitives.cu b/projects/CUDA/utils/Primitives.cu index 30a5d0d8d7..8dd15b78c7 100644 --- a/projects/CUDA/utils/Primitives.cu +++ b/projects/CUDA/utils/Primitives.cu @@ -199,9 +199,9 @@ struct ZSPrimitiveReduction : zeno::INode { if (opStr == "avg") { result = prim_reduce(verts, 0, pass_on{}, std::plus{}, attrToReduce) / verts.size(); } else if (opStr == "max") { - result = prim_reduce(verts, limits::lowest(), pass_on{}, getmax{}, attrToReduce); + result = prim_reduce(verts, detail::deduce_numeric_lowest(), pass_on{}, getmax{}, attrToReduce); } else if (opStr == "min") { - result = prim_reduce(verts, limits::max(), pass_on{}, getmin{}, attrToReduce); + result = prim_reduce(verts, detail::deduce_numeric_max(), pass_on{}, getmin{}, attrToReduce); } else if (opStr == "absmax") { result = prim_reduce(verts, 0, getabs{}, getmax{}, attrToReduce); } diff --git a/projects/CUDA/utils/TopoUtils.cu b/projects/CUDA/utils/TopoUtils.cu index c70ce06c5c..d2a66d3eb6 100644 --- a/projects/CUDA/utils/TopoUtils.cu +++ b/projects/CUDA/utils/TopoUtils.cu @@ -152,7 +152,7 @@ void update_surface_cell_normals(zs::CudaExecutionPolicy &pol, ZenoParticles::pa using vec3 = RM_CVREF_T(t0); using T = typename vec3::value_type; auto nrm = (t1 - t0).cross(t2 - t0); - if (auto len = nrm.l2NormSqr(); len > limits::epsilon() * 10) + if (auto len = nrm.l2NormSqr(); len > detail::deduce_numeric_epsilon() * 10) nrm /= zs::sqrt(len); else nrm = vec3::zeros(); @@ -179,7 +179,7 @@ void update_surface_cell_normals(zs::CudaExecutionPolicy &pol, ZenoParticles::pa using vec3 = RM_CVREF_T(e0); using T = typename vec3::value_type; auto nrm = ne.cross(e10); - if (auto len = nrm.l2NormSqr(); len > limits::epsilon() * 10) + if (auto len = nrm.l2NormSqr(); len > detail::deduce_numeric_epsilon() * 10) nrm /= zs::sqrt(len); else nrm = vec3::zeros(); diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index 764099cd57..b7ac10424c 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit 764099cd5778d08e0d6bd375c79ec4702e7dcd68 +Subproject commit b7ac10424c58720992ea9795db328bfe68429c07 diff --git a/projects/CuEulerian/hybrid/P2G.cu b/projects/CuEulerian/hybrid/P2G.cu index d706071b0a..8e69a94d73 100644 --- a/projects/CuEulerian/hybrid/P2G.cu +++ b/projects/CuEulerian/hybrid/P2G.cu @@ -342,7 +342,7 @@ struct ZSPrimitiveToSparseGrid : INode { nchns] __device__(std::size_t cellno) mutable { for (int d = 0; d < nchns; ++d) { auto wd = spg(wOffset + d, cellno); - if (wd > limits::epsilon() * 10) { + if (wd > detail::deduce_numeric_epsilon() * 10) { spg(tagDstOffset + d, cellno) /= wd; } } @@ -353,7 +353,7 @@ struct ZSPrimitiveToSparseGrid : INode { wOffset = spg._grid.getPropertyOffset("weight"), nchns] __device__(std::size_t cellno) mutable { auto w = spg(wOffset, cellno); - if (w > limits::epsilon() * 10) { + if (w > detail::deduce_numeric_epsilon() * 10) { for (int d = 0; d < nchns; ++d) spg(tagDstOffset + d, cellno) /= w; } diff --git a/projects/CuEulerian/levelset/Extrapolation.cu b/projects/CuEulerian/levelset/Extrapolation.cu index 10529bf4d2..b2022fcd86 100644 --- a/projects/CuEulerian/levelset/Extrapolation.cu +++ b/projects/CuEulerian/levelset/Extrapolation.cu @@ -215,7 +215,7 @@ struct ZSGridExtrapolateAttr : INode { normal[1] = (sdf_y[1] - sdf_y[0]) / (2.f * dx); normal[2] = (sdf_z[1] - sdf_z[0]) / (2.f * dx); - normal /= zs::max(normal.length(), zs::limits::epsilon() * 10); + normal /= zs::max(normal.length(), zs::detail::deduce_numeric_epsilon() * 10); spgv._grid.tuple(zs::dim_c<3>, "adv", blockno * spgv.block_size + cellno) = normal; } @@ -404,8 +404,8 @@ struct ZSGridExtrapolateAttr : INode { } } - if ((extDir == "negative" && sdf < -zs::limits::epsilon() * 10) || - (extDir == "positive" && sdf > zs::limits::epsilon() * 10) || extDir == "both") { + if ((extDir == "negative" && sdf < -zs::detail::deduce_numeric_epsilon() * 10) || + (extDir == "positive" && sdf > zs::detail::deduce_numeric_epsilon() * 10) || extDir == "both") { zs::vec normal = spgv._grid.pack(zs::dim_c<3>, "adv", blockno * spgv.block_size + cellno); @@ -422,7 +422,7 @@ struct ZSGridExtrapolateAttr : INode { spgv._grid.pack(zs::dim_c<3>, "adv", blockno * spgv.block_size + n_cellno); } - normal /= zs::max(normal.length(), zs::limits::epsilon() * 10); + normal /= zs::max(normal.length(), zs::detail::deduce_numeric_epsilon() * 10); } auto sign = [](float val) { return val > 0 ? 1 : -1; }; diff --git a/projects/CuEulerian/levelset/Grid_creator_adaptive.cu b/projects/CuEulerian/levelset/Grid_creator_adaptive.cu index 03020ac855..064ffe545c 100644 --- a/projects/CuEulerian/levelset/Grid_creator_adaptive.cu +++ b/projects/CuEulerian/levelset/Grid_creator_adaptive.cu @@ -297,7 +297,7 @@ struct ValidateAdaptiveGrid : INode { // zs::vec zsp{p[0], p[1], p[2]}; auto v = zsagv.wSample(zsacc, 0, zsp); - if (zs::abs(vref - v) >= limits::epsilon() && + if (zs::abs(vref - v) >= detail::deduce_numeric_epsilon() && vref != sdf->background()) fmt::print("\tref: {}, actual: {}\n", vref, v); else diff --git a/projects/CuEulerian/navierstokes/NS_linearsolver.cu b/projects/CuEulerian/navierstokes/NS_linearsolver.cu index f842bf2c57..8f42fc474c 100644 --- a/projects/CuEulerian/navierstokes/NS_linearsolver.cu +++ b/projects/CuEulerian/navierstokes/NS_linearsolver.cu @@ -198,7 +198,7 @@ struct ZSNSPressureProject : INode { auto stclSum = tile.shfl(stclVal, 1); auto cutSum = tile.shfl(cutVal, 1); - cutSum = zs::max(cutSum, zs::limits::epsilon() * 10); + cutSum = zs::max(cutSum, zs::detail::deduce_numeric_epsilon() * 10); if (lane_id == 0) { spgv(pOffset, blockno, 0) = stclVal + sor * (stclSum * dx - div * rho) / (cutSum * dx); } @@ -528,7 +528,7 @@ struct ZSNSPressureProject : INode { cut_z[1] = cut2shmem[idx + 1]; float cut_sum = cut_x[0] + cut_x[1] + cut_y[0] + cut_y[1] + cut_z[0] + cut_z[1]; - cut_sum = zs::max(cut_sum, zs::limits::epsilon() * 10); + cut_sum = zs::max(cut_sum, zs::detail::deduce_numeric_epsilon() * 10); p_self = (1.f - sor) * p_self + sor * @@ -1582,7 +1582,7 @@ void ZSNSPressureProject::coloredSOR<0>(zs::CudaExecutionPolicy &pol, ZenoSparse cut_z[1] = spgv.value(cutOffset + 2, icoord + vec3i(0, 0, 1), 1.f); float cut_sum = cut_x[0] + cut_x[1] + cut_y[0] + cut_y[1] + cut_z[0] + cut_z[1]; - cut_sum = zs::max(cut_sum, zs::limits::epsilon() * 10); + cut_sum = zs::max(cut_sum, zs::detail::deduce_numeric_epsilon() * 10); p_self = (1.f - sor) * p_self + sor * diff --git a/projects/CuEulerian/navierstokes/NS_topo.cu b/projects/CuEulerian/navierstokes/NS_topo.cu index 524f6dd9ca..03144ba48d 100644 --- a/projects/CuEulerian/navierstokes/NS_topo.cu +++ b/projects/CuEulerian/navierstokes/NS_topo.cu @@ -353,7 +353,7 @@ struct ZSMaintainSparseGrid : INode { else if (opt == 1) maintain( zsSPG.get(), src_tag(zsSPG, tag), - [] __device__(float v) -> bool { return v > zs::limits::epsilon() * 10; }, nlayers); + [] __device__(float v) -> bool { return v > zs::detail::deduce_numeric_epsilon() * 10; }, nlayers); else if (opt == 2) maintain( zsSPG.get(), src_tag(zsSPG, tag), diff --git a/projects/CuEulerian/swe/Erode.cu b/projects/CuEulerian/swe/Erode.cu index 55dd537ede..4fe6a2335b 100644 --- a/projects/CuEulerian/swe/Erode.cu +++ b/projects/CuEulerian/swe/Erode.cu @@ -29,7 +29,7 @@ namespace zs { } __forceinline__ __device__ zs::vec normalizeSafe(const zs::vec &a, - float b = zs::limits::epsilon()) { + float b = zs::detail::deduce_numeric_epsilon()) { return a * (1 / zs::max(b, a.length())); } @@ -1694,7 +1694,7 @@ namespace zeno { __forceinline__ __device__ float fit(const float data, const float ss, const float se, const float ds, const float de) { - float b = zs::limits::epsilon(); + float b = zs::detail::deduce_numeric_epsilon(); b = zs::max(zs::abs(se - ss), b); b = se - ss >= 0 ? b : -b; float alpha = (data - ss) / b; diff --git a/projects/CuEulerian/swe/SWE_dense.cu b/projects/CuEulerian/swe/SWE_dense.cu index 5e0f439cec..9bef4fe761 100644 --- a/projects/CuEulerian/swe/SWE_dense.cu +++ b/projects/CuEulerian/swe/SWE_dense.cu @@ -421,7 +421,7 @@ struct ZSSolveShallowWaterMomentum : INode { adv_term += w_adv * scheme::HJ_WENO3(u_old[idx(i, j - upwind)], u_old[idx(i, j)], u_old[idx(i, j + upwind)], u_old[idx(i, j + 2 * upwind)], w_adv, dx); h_f = 0.5f * (h[idx(i, j)] + h[idx(i - 1, j)]); - if (zs::abs(h_f) > zs::limits::epsilon() * 10) + if (zs::abs(h_f) > zs::detail::deduce_numeric_epsilon() * 10) grad_term = gravity * ((h[idx(i, j)] - h[idx(i - 1, j)]) / dx + (B[idx(i, j)] - B[idx(i - 1, j)]) / dx); else grad_term = 0; @@ -440,7 +440,7 @@ struct ZSSolveShallowWaterMomentum : INode { adv_term += w_adv * scheme::HJ_WENO3(w_old[idx(i, j - upwind)], w_old[idx(i, j)], w_old[idx(i, j + upwind)], w_old[idx(i, j + 2 * upwind)], w_adv, dx); h_f = 0.5f * (h[idx(i, j)] + h[idx(i, j - 1)]); - if (zs::abs(h_f) > zs::limits::epsilon() * 10) + if (zs::abs(h_f) > zs::detail::deduce_numeric_epsilon() * 10) grad_term = gravity * ((h[idx(i, j)] - h[idx(i, j - 1)]) / dx + (B[idx(i, j)] - B[idx(i, j - 1)]) / dx); else grad_term = 0; diff --git a/projects/CuEulerian/volume/Transfer.cu b/projects/CuEulerian/volume/Transfer.cu index fb60e15696..e2d641758f 100644 --- a/projects/CuEulerian/volume/Transfer.cu +++ b/projects/CuEulerian/volume/Transfer.cu @@ -131,7 +131,7 @@ struct PrimitiveToZSLevelSet : INode { auto block = ls._grid.block(ls._table.query(c - cellcoord)); auto cellno = lsv_t::grid_view_t::coord_to_cellid(cellcoord); auto nodePos = ls.indexToWorld(c); - constexpr float eps = limits::epsilon(); + constexpr float eps = detail::deduce_numeric_epsilon(); auto dis = zs::sqrt((x - nodePos).l2NormSqr() + eps); atomic_min(exec_cuda, &block("sdf", cellno), dis); @@ -205,8 +205,8 @@ struct PrimitiveToZSLevelSet : INode { ls._table.reset(true); Vector mi{1, memsrc_e::device}, ma{1, memsrc_e::device}; - mi.setVal(IV::uniform(limits::max())); - ma.setVal(IV::uniform(limits::lowest())); + mi.setVal(IV::uniform(detail::deduce_numeric_max())); + ma.setVal(IV::uniform(detail::deduce_numeric_lowest())); iterate(cudaPol, nvoxels, xs, ls, mi, ma); if (numEles) iterate(cudaPol, nvoxels, elePos, ls, mi, ma); diff --git a/projects/FBX/DualQuaternion.cpp b/projects/FBX/DualQuaternion.cpp index 8f7c731f75..68568bc7f3 100644 --- a/projects/FBX/DualQuaternion.cpp +++ b/projects/FBX/DualQuaternion.cpp @@ -80,7 +80,7 @@ constexpr glm::quat dual_quat(const glm::quat& q,const glm::vec3& t) { auto ty = t[1]; auto tz = t[2]; - glm::quat qd; + glm::quat qd{}; qd.w = -0.5*( tx*qx + ty*qy + tz*qz); // qd.w qd.x = 0.5*( tx*qw + ty*qz - tz*qy); // qd.x qd.y = 0.5*(-tx*qz + ty*qw + tz*qx); // qd.y diff --git a/projects/PyZpc/interop/Vector_nodes.cu b/projects/PyZpc/interop/Vector_nodes.cu index 194924a039..e34b84a93f 100644 --- a/projects/PyZpc/interop/Vector_nodes.cu +++ b/projects/PyZpc/interop/Vector_nodes.cu @@ -75,11 +75,11 @@ struct ReduceZsVector : INode { zs::plus{}); else if (opStr == "max") zs::reduce(pol, std::begin(vector), std::end(vector), - std::begin(res), zs::limits::min(), + std::begin(res), zs::detail::deduce_numeric_min(), zs::getmax{}); else zs::reduce(pol, std::begin(vector), std::end(vector), - std::begin(res), zs::limits::max(), + std::begin(res), zs::detail::deduce_numeric_max(), zs::getmin{}); result = static_cast(res.getVal()); }, From 2365334d6c93acdada88db7c82e3fe90e91f26d6 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 21 Aug 2024 14:44:46 +0800 Subject: [PATCH 150/244] apply root transform --- projects/FBX/FBXSDK.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 013118ca88..c0ecd76369 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -343,6 +343,7 @@ struct ReadFBXFile: INode { // Import the contents of the file into the scene. lImporter->Import(fbx_object->lScene); + FbxRootNodeUtility::RemoveAllFbxRoots(fbx_object->lScene); // The file is imported; so get rid of the importer. lImporter->Destroy(); From 40daa9cad8ab8aece511c071af57ccb3e0721667 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 22 Aug 2024 14:47:45 +0800 Subject: [PATCH 151/244] temp --- projects/FBX/FBXSDK.cpp | 132 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index c0ecd76369..737c8106eb 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -10,6 +10,7 @@ #include #include "zeno/utils/log.h" +#include "zeno/utils/bit_operations.h" #include #include "zeno/types/PrimitiveObject.h" #include "zeno/utils/scope_exit.h" @@ -596,6 +597,85 @@ std::shared_ptr GetMesh(FbxNode* pNode) { return prim; } +std::shared_ptr GetSkeleton(FbxNode* pNode) { + FbxMesh* pMesh = pNode->GetMesh(); + if (!pMesh) return nullptr; + std::vector bone_names; + std::vector poss; + std::vector transform_r0; + std::vector transform_r1; + std::vector transform_r2; + std::map parent_mapping; + if (pMesh->GetDeformerCount(FbxDeformer::eSkin)) { + FbxSkin* pSkin = (FbxSkin*)pMesh->GetDeformer(0, FbxDeformer::eSkin); + // Iterate over each cluster (bone) + for (int j = 0; j < pSkin->GetClusterCount(); ++j) { + FbxCluster* pCluster = pSkin->GetCluster(j); + + FbxNode* pBoneNode = pCluster->GetLink(); + if (!pBoneNode) continue; + FbxAMatrix transformLinkMatrix; + pCluster->GetTransformLinkMatrix(transformLinkMatrix); + + // The transformation of the mesh at binding time + FbxAMatrix transformMatrix; + pCluster->GetTransformMatrix(transformMatrix); + + // Inverse bind matrix. + FbxAMatrix bindMatrix_ = transformMatrix.Inverse() * transformLinkMatrix; + auto bindMatrix = bit_cast(bindMatrix_); + auto t = bindMatrix.GetRow(3); + poss.emplace_back(t[0], t[1], t[2]); + + auto r0 = bindMatrix.GetRow(0); + auto r1 = bindMatrix.GetRow(1); + auto r2 = bindMatrix.GetRow(2); + transform_r0.emplace_back(r0[0], r0[1], r0[2]); + transform_r1.emplace_back(r1[0], r1[1], r1[2]); + transform_r2.emplace_back(r2[0], r2[1], r2[2]); + std::string boneName = pBoneNode->GetName(); + bone_names.emplace_back(boneName); + auto pParentNode = pBoneNode->GetParent(); + if (pParentNode) { + std::string parentName = pParentNode->GetName(); + parent_mapping[boneName] = parentName; + } + } + } + std::string nodeName = pNode->GetName(); + auto prim = std::make_shared(); + prim->userData().set2("RootName", nodeName); + prim->verts.resize(bone_names.size()); + prim->verts.values = poss; + prim->verts.add_attr("transform_r0") = transform_r0; + prim->verts.add_attr("transform_r1") = transform_r1; + prim->verts.add_attr("transform_r2") = transform_r2; + std::vector bone_connects; + for (auto bone_name: bone_names) { + if (parent_mapping.count(bone_name)) { + auto parent_name = parent_mapping[bone_name]; + if (std::count(bone_names.begin(), bone_names.end(), parent_name)) { + auto self_index = std::find(bone_names.begin(), bone_names.end(), bone_name) - bone_names.begin(); + auto parent_index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); + bone_connects.push_back(parent_index); + bone_connects.push_back(self_index); + } + } + } + prim->loops.values = bone_connects; + prim->polys.resize(bone_connects.size() / 2); + for (auto j = 0; j < bone_connects.size() / 2; j++) { + prim->polys[j] = {j * 2, 2}; + } + auto &boneNames = prim->verts.add_attr("boneName"); + std::iota(boneNames.begin(), boneNames.end(), 0); + prim->userData().set2("boneName_count", int(bone_names.size())); + for (auto i = 0; i < bone_names.size(); i++) { + prim->userData().set2(zeno::format("boneName_{}", i), bone_names[i]); + } + return prim; +} + void TraverseNodesToGetNames(FbxNode* pNode, std::vector &names) { if (!pNode) return; @@ -629,6 +709,25 @@ void TraverseNodesToGetPrim(FbxNode* pNode, std::string target_name, std::shared TraverseNodesToGetPrim(pNode->GetChild(i), target_name, prim); } } +void TraverseNodesToGetSkeleton(FbxNode* pNode, std::string target_name, std::shared_ptr &prim) { + if (!pNode) return; + + FbxMesh* mesh = pNode->GetMesh(); + if (mesh) { + auto name = pNode->GetName(); + if (target_name == name) { + auto sub_prim = GetSkeleton(pNode); + if (sub_prim) { + prim = sub_prim; + } + return; + } + } + + for (int i = 0; i < pNode->GetChildCount(); i++) { + TraverseNodesToGetSkeleton(pNode->GetChild(i), target_name, prim); + } +} void TraverseNodesToGetPrims(FbxNode* pNode, std::vector> &prims) { if (!pNode) return; @@ -885,6 +984,39 @@ ZENDEFNODE(NewFBXImportSkeleton, { {"primitive"}, }); +struct NewFBXImportSkeleton2 : INode { + virtual void apply() override { + auto fbx_object = get_input2("fbx_object"); + auto lSdkManager = fbx_object->lSdkManager; + auto lScene = fbx_object->lScene; + + auto prim = std::make_shared(); + + FbxNode* lRootNode = lScene->GetRootNode(); + std::vector availableRootNames; + TraverseNodesToGetNames(lRootNode, availableRootNames); + for (auto name: availableRootNames) { + zeno::log_info("fuck: {}", name); + } + TraverseNodesToGetSkeleton(lRootNode, get_input2("name"), prim); + + set_output("prim", prim); + } +}; + +ZENDEFNODE(NewFBXImportSkeleton2, { + { + "fbx_object", + {"bool", "ConvertUnits", "0"}, + {"string", "name", "0"}, + }, + { + "prim", + }, + {}, + {"deprecated"}, +}); + struct NewFBXImportAnimation : INode { virtual void apply() override { int frameid; From 29c67fa2df8e67de829af4efda88dc1cca3bd5ef Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 22 Aug 2024 14:48:56 +0800 Subject: [PATCH 152/244] isShadowRay --- zeno/src/nodes/mtl/ShaderAttrs.cpp | 4 ++-- zenovis/xinxinoptix/CallableDefault.cu | 2 +- zenovis/xinxinoptix/DeflMatShader.cu | 2 ++ zenovis/xinxinoptix/IOMat.h | 3 +++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/zeno/src/nodes/mtl/ShaderAttrs.cpp b/zeno/src/nodes/mtl/ShaderAttrs.cpp index 8b9caefdf3..e7698f805f 100644 --- a/zeno/src/nodes/mtl/ShaderAttrs.cpp +++ b/zeno/src/nodes/mtl/ShaderAttrs.cpp @@ -33,8 +33,8 @@ struct ShaderInputAttr : ShaderNodeClone { ZENDEFNODE(ShaderInputAttr, { { - {"enum pos clr nrm uv tang bitang NoL LoV N T L V H reflectance fresnel instPos instNrm instUv instClr instTang prd.rndf() attrs.localPosLazy() attrs.uniformPosLazy() rayLength worldNrm worldTan worldBTn camFront camUp camRight", "attr", "pos"}, - {"enum float vec2 vec3 vec4", "type", "vec3"}, + {"enum pos clr nrm uv tang bitang NoL LoV N T L V H reflectance fresnel instPos instNrm instUv instClr instTang prd.rndf() attrs.localPosLazy() attrs.uniformPosLazy() rayLength isShadowRay worldNrm worldTan worldBTn camFront camUp camRight", "attr", "pos"}, + {"enum float vec2 vec3 vec4 bool", "type", "vec3"}, }, { {"shader", "out"}, diff --git a/zenovis/xinxinoptix/CallableDefault.cu b/zenovis/xinxinoptix/CallableDefault.cu index 088c05873b..7b6fc8c4f4 100644 --- a/zenovis/xinxinoptix/CallableDefault.cu +++ b/zenovis/xinxinoptix/CallableDefault.cu @@ -24,7 +24,7 @@ extern "C" __device__ MatOutput __direct_callable__evalmat(cudaTextureObject_t z auto att_instTang = attrs.instTang; auto att_rayLength = attrs.rayLength; - + auto att_isShadowRay = attrs.isShadowRay ? 1.0f:0.0f; vec3 b = normalize(cross(attrs.T, attrs.N)); vec3 t = normalize(cross(attrs.N, b)); diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index 4750b192ca..14da41c4c9 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -157,6 +157,7 @@ extern "C" __global__ void __anyhit__shadow_cutout() #endif attrs.pos = attrs.pos + vec3(params.cam.eye); + attrs.isShadowRay = true; //MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); MatOutput mats = optixDirectCall( rt_data->dc_index, rt_data->textures, rt_data->uniforms, attrs ); @@ -438,6 +439,7 @@ extern "C" __global__ void __closesthit__radiance() attrs.T = attrs.tang; } attrs.V = -(ray_dir); + attrs.isShadowRay = false; //MatOutput mats = evalMaterial(rt_data->textures, rt_data->uniforms, attrs); MatOutput mats = optixDirectCall( rt_data->dc_index, rt_data->textures, rt_data->uniforms, attrs ); prd->mask_value = mats.mask_value; diff --git a/zenovis/xinxinoptix/IOMat.h b/zenovis/xinxinoptix/IOMat.h index 89b427c415..116a5d9396 100644 --- a/zenovis/xinxinoptix/IOMat.h +++ b/zenovis/xinxinoptix/IOMat.h @@ -66,7 +66,10 @@ struct MatInput { vec3 instTang; float NoL; float LoV; + float rayLength; + bool isShadowRay; + vec3 reflectance; vec3 N; vec3 T; From edbc21d6c9c51067a326d1d4006e0f0d9169c6e4 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 23 Aug 2024 15:36:11 +0800 Subject: [PATCH 153/244] improve skeleton import --- projects/FBX/FBXSDK.cpp | 322 +++++++++++++++++++++++----------------- 1 file changed, 188 insertions(+), 134 deletions(-) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index 737c8106eb..dbc62d6e32 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -465,7 +465,7 @@ void getAttr(T* arr, std::string name, std::shared_ptr prim) { } } -std::shared_ptr GetMesh(FbxNode* pNode) { +static std::shared_ptr GetMesh(FbxNode* pNode) { FbxMesh* pMesh = pNode->GetMesh(); if (!pMesh) return nullptr; std::string nodeName = pNode->GetName(); @@ -597,7 +597,7 @@ std::shared_ptr GetMesh(FbxNode* pNode) { return prim; } -std::shared_ptr GetSkeleton(FbxNode* pNode) { +static std::shared_ptr GetSkeleton(FbxNode* pNode) { FbxMesh* pMesh = pNode->GetMesh(); if (!pMesh) return nullptr; std::vector bone_names; @@ -676,7 +676,7 @@ std::shared_ptr GetSkeleton(FbxNode* pNode) { return prim; } -void TraverseNodesToGetNames(FbxNode* pNode, std::vector &names) { +static void TraverseNodesToGetNames(FbxNode* pNode, std::vector &names) { if (!pNode) return; FbxMesh* mesh = pNode->GetMesh(); @@ -690,7 +690,7 @@ void TraverseNodesToGetNames(FbxNode* pNode, std::vector &names) { } } -void TraverseNodesToGetPrim(FbxNode* pNode, std::string target_name, std::shared_ptr &prim) { +static void TraverseNodesToGetPrim(FbxNode* pNode, std::string target_name, std::shared_ptr &prim) { if (!pNode) return; FbxMesh* mesh = pNode->GetMesh(); @@ -709,26 +709,7 @@ void TraverseNodesToGetPrim(FbxNode* pNode, std::string target_name, std::shared TraverseNodesToGetPrim(pNode->GetChild(i), target_name, prim); } } -void TraverseNodesToGetSkeleton(FbxNode* pNode, std::string target_name, std::shared_ptr &prim) { - if (!pNode) return; - - FbxMesh* mesh = pNode->GetMesh(); - if (mesh) { - auto name = pNode->GetName(); - if (target_name == name) { - auto sub_prim = GetSkeleton(pNode); - if (sub_prim) { - prim = sub_prim; - } - return; - } - } - - for (int i = 0; i < pNode->GetChildCount(); i++) { - TraverseNodesToGetSkeleton(pNode->GetChild(i), target_name, prim); - } -} -void TraverseNodesToGetPrims(FbxNode* pNode, std::vector> &prims) { +static void TraverseNodesToGetPrims(FbxNode* pNode, std::vector> &prims) { if (!pNode) return; FbxMesh* mesh = pNode->GetMesh(); @@ -858,101 +839,206 @@ ZENDEFNODE(NewFBXImportSkin, { {"primitive"}, }); -struct NewFBXImportSkeleton : INode { - virtual void apply() override { - auto fbx_object = get_input2("fbx_object"); - auto lSdkManager = fbx_object->lSdkManager; - auto lScene = fbx_object->lScene; - - // Print the nodes of the scene and their attributes recursively. - // Note that we are not printing the root node because it should - // not contain any attributes. - auto prim = std::make_shared(); +static int GetSkeletonFromBindPose(FbxManager* lSdkManager, FbxScene* lScene, std::shared_ptr& prim) { + auto pose_count = lScene->GetPoseCount(); + bool found_bind_pose = false; + for (auto i = 0; i < pose_count; i++) { + auto pose = lScene->GetPose(i); + if (pose == nullptr || !pose->IsBindPose()) { + continue; + } + found_bind_pose = true; + } + if (found_bind_pose == false) { + lSdkManager->CreateMissingBindPoses(lScene); + } + pose_count = lScene->GetPoseCount(); - auto pose_count = lScene->GetPoseCount(); - bool found_bind_pose = false; - for (auto i = 0; i < pose_count; i++) { - auto pose = lScene->GetPose(i); - if (pose == nullptr || !pose->IsBindPose()) { + std::vector bone_names; + std::map parent_mapping; + std::vector poss; + std::vector transform_r0; + std::vector transform_r1; + std::vector transform_r2; + for (auto i = 0; i < pose_count; i++) { + auto pose = lScene->GetPose(i); + if (pose == nullptr || !pose->IsBindPose()) { + continue; + } + for (int j = 1; j < pose->GetCount(); ++j) { + std::string bone_name = pose->GetNode(j)->GetName(); + if (std::count(bone_names.begin(), bone_names.end(), bone_name)) { continue; } - found_bind_pose = true; + + FbxMatrix transformMatrix = pose->GetMatrix(j); + auto t = transformMatrix.GetRow(3); + poss.emplace_back(t[0], t[1], t[2]); + + auto r0 = transformMatrix.GetRow(0); + auto r1 = transformMatrix.GetRow(1); + auto r2 = transformMatrix.GetRow(2); + transform_r0.emplace_back(r0[0], r0[1], r0[2]); + transform_r1.emplace_back(r1[0], r1[1], r1[2]); + transform_r2.emplace_back(r2[0], r2[1], r2[2]); + + bone_names.emplace_back(pose->GetNode(j)->GetName()); } - if (found_bind_pose == false) { - lSdkManager->CreateMissingBindPoses(lScene); + for (int j = 1; j < pose->GetCount(); ++j) { + auto self_name = pose->GetNode(j)->GetName(); + auto parent = pose->GetNode(j)->GetParent(); + if (parent) { + auto parent_name = parent->GetName(); + parent_mapping[self_name] = parent_name; + } } - pose_count = lScene->GetPoseCount(); + } + { + prim->verts.resize(bone_names.size()); + prim->verts.values = poss; + prim->verts.add_attr("transform_r0") = transform_r0; + prim->verts.add_attr("transform_r1") = transform_r1; + prim->verts.add_attr("transform_r2") = transform_r2; + auto &boneNames = prim->verts.add_attr("boneName"); + std::iota(boneNames.begin(), boneNames.end(), 0); + std::vector bone_connects; + for (auto bone_name: bone_names) { + if (parent_mapping.count(bone_name)) { + auto parent_name = parent_mapping[bone_name]; + if (std::count(bone_names.begin(), bone_names.end(), parent_name)) { + auto self_index = std::find(bone_names.begin(), bone_names.end(), bone_name) - bone_names.begin(); + auto parent_index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); + bone_connects.push_back(parent_index); + bone_connects.push_back(self_index); + } + } + } + prim->loops.values = bone_connects; + prim->polys.resize(bone_connects.size() / 2); + for (auto j = 0; j < bone_connects.size() / 2; j++) { + prim->polys[j] = {j * 2, 2}; + } + + prim->userData().set2("boneName_count", int(bone_names.size())); + for (auto i = 0; i < bone_names.size(); i++) { + prim->userData().set2(zeno::format("boneName_{}", i), bone_names[i]); + } + } +} + +static void TraverseNodesToGetSkeleton(FbxNode* pNode, std::vector &bone_names, std::vector &transforms, std::map &parent_mapping) { + if (!pNode) return; + + FbxMesh* pMesh = pNode->GetMesh(); + if (pMesh && pMesh->GetDeformerCount(FbxDeformer::eSkin)) { + FbxSkin* pSkin = (FbxSkin*)pMesh->GetDeformer(0, FbxDeformer::eSkin); + // Iterate over each cluster (bone) + for (int j = 0; j < pSkin->GetClusterCount(); ++j) { + FbxCluster* pCluster = pSkin->GetCluster(j); + + FbxNode* pBoneNode = pCluster->GetLink(); + if (!pBoneNode) continue; + std::string boneName = pBoneNode->GetName(); + if (std::count(bone_names.begin(), bone_names.end(), boneName)) { + continue; + } + bone_names.emplace_back(boneName); + FbxAMatrix transformLinkMatrix; + pCluster->GetTransformLinkMatrix(transformLinkMatrix); + + // The transformation of the mesh at binding time + FbxAMatrix transformMatrix; + pCluster->GetTransformMatrix(transformMatrix); + + // Inverse bind matrix. + FbxAMatrix bindMatrix_ = transformMatrix.Inverse() * transformLinkMatrix; + auto bindMatrix = bit_cast(bindMatrix_); + transforms.emplace_back(bindMatrix); + + auto pParentNode = pBoneNode->GetParent(); + if (pParentNode) { + std::string parentName = pParentNode->GetName(); + parent_mapping[boneName] = parentName; + } + } + } + + for (int i = 0; i < pNode->GetChildCount(); i++) { + TraverseNodesToGetSkeleton(pNode->GetChild(i), bone_names, transforms, parent_mapping); + } +} +std::shared_ptr GetSkeletonFromMesh(FbxScene* lScene) { + auto prim = std::make_shared(); + + FbxNode* lRootNode = lScene->GetRootNode(); + if (lRootNode) { std::vector bone_names; + std::vector transforms; std::map parent_mapping; + TraverseNodesToGetSkeleton(lRootNode, bone_names, transforms, parent_mapping); std::vector poss; std::vector transform_r0; std::vector transform_r1; std::vector transform_r2; - for (auto i = 0; i < pose_count; i++) { - auto pose = lScene->GetPose(i); - if (pose == nullptr || !pose->IsBindPose()) { - continue; - } - for (int j = 1; j < pose->GetCount(); ++j) { - std::string bone_name = pose->GetNode(j)->GetName(); - if (std::count(bone_names.begin(), bone_names.end(), bone_name)) { - continue; - } - - FbxMatrix transformMatrix = pose->GetMatrix(j); - auto t = transformMatrix.GetRow(3); - poss.emplace_back(t[0], t[1], t[2]); - - auto r0 = transformMatrix.GetRow(0); - auto r1 = transformMatrix.GetRow(1); - auto r2 = transformMatrix.GetRow(2); - transform_r0.emplace_back(r0[0], r0[1], r0[2]); - transform_r1.emplace_back(r1[0], r1[1], r1[2]); - transform_r2.emplace_back(r2[0], r2[1], r2[2]); + for (auto i = 0; i < bone_names.size(); i++) { + auto bone_name = bone_names[i]; + auto bindMatrix = transforms[i]; + auto t = bindMatrix.GetRow(3); + poss.emplace_back(t[0], t[1], t[2]); - bone_names.emplace_back(pose->GetNode(j)->GetName()); - } - for (int j = 1; j < pose->GetCount(); ++j) { - auto self_name = pose->GetNode(j)->GetName(); - auto parent = pose->GetNode(j)->GetParent(); - if (parent) { - auto parent_name = parent->GetName(); - parent_mapping[self_name] = parent_name; + auto r0 = bindMatrix.GetRow(0); + auto r1 = bindMatrix.GetRow(1); + auto r2 = bindMatrix.GetRow(2); + transform_r0.emplace_back(r0[0], r0[1], r0[2]); + transform_r1.emplace_back(r1[0], r1[1], r1[2]); + transform_r2.emplace_back(r2[0], r2[1], r2[2]); + } + prim->verts.resize(bone_names.size()); + prim->verts.values = poss; + prim->verts.add_attr("transform_r0") = transform_r0; + prim->verts.add_attr("transform_r1") = transform_r1; + prim->verts.add_attr("transform_r2") = transform_r2; + std::vector bone_connects; + for (auto bone_name: bone_names) { + if (parent_mapping.count(bone_name)) { + auto parent_name = parent_mapping[bone_name]; + if (std::count(bone_names.begin(), bone_names.end(), parent_name)) { + auto self_index = std::find(bone_names.begin(), bone_names.end(), bone_name) - bone_names.begin(); + auto parent_index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); + bone_connects.push_back(parent_index); + bone_connects.push_back(self_index); } } } - { - prim->verts.resize(bone_names.size()); - prim->verts.values = poss; - prim->verts.add_attr("transform_r0") = transform_r0; - prim->verts.add_attr("transform_r1") = transform_r1; - prim->verts.add_attr("transform_r2") = transform_r2; - auto &boneNames = prim->verts.add_attr("boneName"); - std::iota(boneNames.begin(), boneNames.end(), 0); + prim->loops.values = bone_connects; + prim->polys.resize(bone_connects.size() / 2); + for (auto j = 0; j < bone_connects.size() / 2; j++) { + prim->polys[j] = {j * 2, 2}; + } + auto &boneNames = prim->verts.add_attr("boneName"); + std::iota(boneNames.begin(), boneNames.end(), 0); + prim->userData().set2("boneName_count", int(bone_names.size())); + for (auto i = 0; i < bone_names.size(); i++) { + prim->userData().set2(zeno::format("boneName_{}", i), bone_names[i]); + } + } + return prim; +} +struct NewFBXImportSkeleton : INode { + virtual void apply() override { + auto fbx_object = get_input2("fbx_object"); + auto lSdkManager = fbx_object->lSdkManager; + auto lScene = fbx_object->lScene; - std::vector bone_connects; - for (auto bone_name: bone_names) { - if (parent_mapping.count(bone_name)) { - auto parent_name = parent_mapping[bone_name]; - if (std::count(bone_names.begin(), bone_names.end(), parent_name)) { - auto self_index = std::find(bone_names.begin(), bone_names.end(), bone_name) - bone_names.begin(); - auto parent_index = std::find(bone_names.begin(), bone_names.end(), parent_name) - bone_names.begin(); - bone_connects.push_back(parent_index); - bone_connects.push_back(self_index); - } - } - } - prim->loops.values = bone_connects; - prim->polys.resize(bone_connects.size() / 2); - for (auto j = 0; j < bone_connects.size() / 2; j++) { - prim->polys[j] = {j * 2, 2}; - } + // Print the nodes of the scene and their attributes recursively. + // Note that we are not printing the root node because it should + // not contain any attributes. + auto prim = std::make_shared(); - prim->userData().set2("boneName_count", int(bone_names.size())); - for (auto i = 0; i < bone_names.size(); i++) { - prim->userData().set2(zeno::format("boneName_{}", i), bone_names[i]); - } + auto pose_count = GetSkeletonFromBindPose(lSdkManager, lScene, prim); + if (pose_count == 0 || get_input2("ForceFromMesh")) { + prim = GetSkeletonFromMesh(lScene); } if (get_input2("ConvertUnits")) { @@ -976,6 +1062,7 @@ ZENDEFNODE(NewFBXImportSkeleton, { { "fbx_object", {"bool", "ConvertUnits", "0"}, + {"bool", "ForceFromMesh", "0"}, }, { "prim", @@ -984,39 +1071,6 @@ ZENDEFNODE(NewFBXImportSkeleton, { {"primitive"}, }); -struct NewFBXImportSkeleton2 : INode { - virtual void apply() override { - auto fbx_object = get_input2("fbx_object"); - auto lSdkManager = fbx_object->lSdkManager; - auto lScene = fbx_object->lScene; - - auto prim = std::make_shared(); - - FbxNode* lRootNode = lScene->GetRootNode(); - std::vector availableRootNames; - TraverseNodesToGetNames(lRootNode, availableRootNames); - for (auto name: availableRootNames) { - zeno::log_info("fuck: {}", name); - } - TraverseNodesToGetSkeleton(lRootNode, get_input2("name"), prim); - - set_output("prim", prim); - } -}; - -ZENDEFNODE(NewFBXImportSkeleton2, { - { - "fbx_object", - {"bool", "ConvertUnits", "0"}, - {"string", "name", "0"}, - }, - { - "prim", - }, - {}, - {"deprecated"}, -}); - struct NewFBXImportAnimation : INode { virtual void apply() override { int frameid; From 4936feb12a902ecc6f3b8308c30ab156616c557a Mon Sep 17 00:00:00 2001 From: littlemine Date: Fri, 23 Aug 2024 18:26:17 +0800 Subject: [PATCH 154/244] upd zpc --- projects/CUDA/test.cu | 3 ++- projects/CUDA/utils/Primitives.cu | 2 +- projects/CUDA/utils/TopoUtils.cu | 6 +++--- projects/CUDA/zpc | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/projects/CUDA/test.cu b/projects/CUDA/test.cu index 51717fe9f7..68578b3788 100644 --- a/projects/CUDA/test.cu +++ b/projects/CUDA/test.cu @@ -80,7 +80,8 @@ struct ZSCULinkTest : INode { { zs::VdbGrid<3, float, zs::index_sequence<3, 4, 5>> ag; using TT = RM_CVREF_T(ag); - fmt::print("adaptive grid type: {}\n", zs::get_var_type_str(ag)); + fmt::print("adaptive grid type: {}\n", + zs::get_var_type_str(ag).asChars()); // fmt::print("tile bits: {}\n", zs::get_type_str()); // fmt::print("hierarchy bits: {}\n", // zs::get_type_str()); diff --git a/projects/CUDA/utils/Primitives.cu b/projects/CUDA/utils/Primitives.cu index 8dd15b78c7..12665d8933 100644 --- a/projects/CUDA/utils/Primitives.cu +++ b/projects/CUDA/utils/Primitives.cu @@ -96,7 +96,7 @@ struct ZSParticlePerlinNoise : INode { auto &tv = zspars->getParticles(); if (!tv.hasProperty(tag)) - throw std::runtime_error(fmt::format("Attribute [{}] doesn't exist!", tag)); + throw std::runtime_error(fmt::format("Attribute [{}] doesn't exist!", tag.asChars())); const int nchns = tv.getPropertySize(tag); auto pol = zs::cuda_exec(); diff --git a/projects/CUDA/utils/TopoUtils.cu b/projects/CUDA/utils/TopoUtils.cu index d2a66d3eb6..6a36e10a8a 100644 --- a/projects/CUDA/utils/TopoUtils.cu +++ b/projects/CUDA/utils/TopoUtils.cu @@ -133,15 +133,15 @@ void update_surface_cell_normals(zs::CudaExecutionPolicy &pol, ZenoParticles::pa constexpr auto space = execspace_e::cuda; if (!verts.hasProperty(xTag)) - throw std::runtime_error(fmt::format("missing property [{}] for vertex positions.", xTag)); + throw std::runtime_error(fmt::format("missing property [{}] for vertex positions.", xTag.asChars())); if (!tris.hasProperty("inds")) throw std::runtime_error("missing property [inds] for surface triangles."); if (!lines.hasProperty("fe_inds") || !lines.hasProperty("inds")) throw std::runtime_error("missing property [fe_inds]/[inds] for surface edges."); if (!tris.hasProperty(triNrmTag)) - throw std::runtime_error(fmt::format("missing property [{}] for surface triangles.", triNrmTag)); + throw std::runtime_error(fmt::format("missing property [{}] for surface triangles.", triNrmTag.asChars())); if (!lines.hasProperty(biNrmTag)) - throw std::runtime_error(fmt::format("missing property [{}] for surface edges.", biNrmTag)); + throw std::runtime_error(fmt::format("missing property [{}] for surface edges.", biNrmTag.asChars())); pol(range(tris.size()), [verts = proxy(verts), xOffset = verts.getPropertyOffset(xTag), vOffset = vOffset, tris = proxy({}, tris), triNrmTag] ZS_LAMBDA(int ti) mutable { diff --git a/projects/CUDA/zpc b/projects/CUDA/zpc index b7ac10424c..de7227086f 160000 --- a/projects/CUDA/zpc +++ b/projects/CUDA/zpc @@ -1 +1 @@ -Subproject commit b7ac10424c58720992ea9795db328bfe68429c07 +Subproject commit de7227086f301191b96143533a80df1690f1a470 From 6d6b543a03c2649b3623e5374bd00d2c38f8cbe5 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Fri, 23 Aug 2024 20:57:45 +0800 Subject: [PATCH 155/244] xinxin ik code --- projects/FBX/FBXSDK.cpp | 168 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index dbc62d6e32..c49688f68d 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1996,4 +1996,172 @@ ZENDEFNODE(PrimAttrFlat, { {"debug"}, }); +#if 0 +float length(std::vector &b) +{ + float l = 0; + for(int i=0;i> &A, std::vector &b, std::vector&x, + int max_iter, float tol) + { + int iter=0; + float b_nrm = length(b); + while(iter &index, + std::vector &J, + std::vector &r, + //skeleton * skel_ptr, + vec3f e_curr + ) + { + J.resize(index.size()); + for(int i=0;i &J, std::vector> &JTJ, float alpha) + { + JTJ.resize(J.size()); + for(int i=0;i &index, + std::vector &dtheta, + std::vector &theta, + float &dist) +{ + dtheta.resize(theta.size()); + dtheta.assign(0); + //skeleton = FK(theta); + //e_curr = getJointPos(id, skeleton); + //de = e - tarPos; + dist = length(de); + if(dist<0.0001) + return; + //computeJointJacobian(); + //computeJTJ(..,..,0.01); + auto b = std::vector(index.size()); + for(int i=0;i(0); + GaussSeidelSolve(JTJ, b, x, 100, 0.00001); + for(int i=0;i & theta, + std::vector & theta_constraints, + std::vector &targets, + std::vector endEffectorIDs, + std::vector startIDs, + int iter_max + ) +{ + std::vector> dtheta; + dtheta.resize(endEffectorIDs.size()); + int iter = 0; + std::vector old_theta; + old_theta = theta; + while(iter w; + w.resize(theta.size()); + w.assign(0); + std::vector total_dtheta; + total_dtheta.resize(theta.size()); + total_dtheta.assign(0); + + for(int j=0;j 0 ? 1 : 0; + } + + + for (int i = 0; i < theta.size(); ++i) { + theta[i] = theta[i] + total_dtheta[i] / (w[i] > 0 ? w[i] : 1); + theta[i] = clamp(theta[i], theta_constraints[i][0], theta_constraints[i][1]); + } + + float max_dtheta = 0; + for(int i=0;i Date: Mon, 26 Aug 2024 14:54:14 +0800 Subject: [PATCH 156/244] remove frame log --- zenovis/xinxinoptix/optixPathTracer.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/zenovis/xinxinoptix/optixPathTracer.cpp b/zenovis/xinxinoptix/optixPathTracer.cpp index 6910a8647e..729ad1b7a3 100644 --- a/zenovis/xinxinoptix/optixPathTracer.cpp +++ b/zenovis/xinxinoptix/optixPathTracer.cpp @@ -703,8 +703,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path ) ); //CUDA_SYNC_CHECK(); - - timer.tick(); OPTIX_CHECK( optixLaunch( state.pipeline, @@ -716,8 +714,6 @@ static void launchSubframe( sutil::CUDAOutputBuffer& output_buffer, Path state.params.tile_h, // launch height 1 // launch depth ) ); - - timer.tock("frametime"); } } output_buffer.unmap(); From 029519c3abcfa21c6a4775b708c6873cd6813480 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Wed, 28 Aug 2024 20:44:02 +0800 Subject: [PATCH 157/244] temp --- projects/FBX/FBXSDK.cpp | 129 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/projects/FBX/FBXSDK.cpp b/projects/FBX/FBXSDK.cpp index c49688f68d..4736a559d2 100644 --- a/projects/FBX/FBXSDK.cpp +++ b/projects/FBX/FBXSDK.cpp @@ -1461,6 +1461,7 @@ static std::vector getBoneNames(PrimitiveObject *prim) { } return boneNames; } + static std::vector TopologicalSorting(std::map bone_connects, zeno::PrimitiveObject* skeleton) { std::vector ordering; std::set ordering_set; @@ -1996,6 +1997,134 @@ ZENDEFNODE(PrimAttrFlat, { {"debug"}, }); +struct IKChainsItemObject : PrimitiveObject { + std::string RootName; + std::string MidName; + std::string TipName; + bool MatchByName = true; + std::string TwistName; + std::string GoalName; + float Blend = 1; + bool OrientTip = true; +}; +struct IKChainsItem : INode { + virtual void apply() override { + auto item = std::make_shared(); + item->RootName = get_input2("RootName"); + item->MidName = get_input2("MidName"); + item->TipName = get_input2("TipName"); + item->MatchByName = get_input2("MatchByName"); + item->TwistName = get_input2("TwistName"); + item->Blend = get_input2("Blend"); + item->OrientTip = get_input2("OrientTip"); + + set_output2("poseItem", std::move(item)); + } +}; + +ZENDEFNODE(IKChainsItem, { + { + {"string", "RootName", ""}, + {"string", "MidName", ""}, + {"string", "TipName", ""}, + {"bool", "MatchByName", "1"}, + {"string", "TwistName", ""}, + {"string", "GoalName", ""}, + {"float", "Blend", "1"}, + {"bool", "OrientTip", "1"}, + }, + { + "poseItem", + }, + {}, + {"Animation"}, +}); + +float sqr(float v) { + return v * v; +} +// return: mid, tip +std::pair twoBoneIK( + vec3f root + , vec3f joint + , vec3f end + , vec3f jointTarget + , vec3f effector +) { + vec3f output_joint = {}; + vec3f output_end = {}; + + auto root_to_effect = effector - root; + auto root_to_jointTarget = jointTarget - root; + + auto upper_limb_length = zeno::length(root - joint); + auto lower_limb_length = zeno::length(joint - end); + auto desired_length = zeno::length(root_to_effect); + if (desired_length < abs(upper_limb_length - lower_limb_length)) { + zeno::log_info("A"); + output_joint = root + normalize(root_to_effect) * abs(upper_limb_length - lower_limb_length); + output_end = root + normalize(root_to_effect) * upper_limb_length; + } + else if (desired_length > upper_limb_length + lower_limb_length) { + zeno::log_info("B"); + + output_joint = root + normalize(root_to_effect) * upper_limb_length; + output_end = root + normalize(root_to_effect) * (upper_limb_length + lower_limb_length); + } + else { + zeno::log_info("C"); + + vec3f to_pole = normalize(cross(cross(root_to_effect, root_to_jointTarget), root_to_effect)); + float cos_theta = (sqr(upper_limb_length) + sqr(desired_length) - sqr(lower_limb_length)) / (2.0f * upper_limb_length * desired_length); + float sin_theta = sqrt(1 - sqr(cos_theta)); + output_joint = root + normalize(root_to_effect) * cos_theta + to_pole * sin_theta; + output_end = effector; + } + + return {output_joint, output_end}; +} + +struct IKChains : INode { + virtual void apply() override { + auto skeleton = get_input2("Skeleton"); + auto ikDrivers = get_input2("IK Drivers"); + auto items = get_input("items")->getRaw(); + auto skeletonBoneNameMapping = getBoneNameMapping(skeleton.get()); + auto ikDriversBoneNameMapping = getBoneNameMapping(ikDrivers.get()); + + for (auto item: items) { + std::string TwistName = item->MatchByName? item->MidName: item->TwistName; + std::string GoalName = item->MatchByName? item->TipName: item->GoalName; + vec3f root = skeleton->verts[skeletonBoneNameMapping[item->RootName]]; + vec3f &joint = skeleton->verts[skeletonBoneNameMapping[item->MidName]]; + vec3f &end = skeleton->verts[skeletonBoneNameMapping[item->TipName]]; + vec3f jointTarget = ikDrivers->verts[ikDriversBoneNameMapping[TwistName]]; + vec3f effector = ikDrivers->verts[ikDriversBoneNameMapping[GoalName]]; + zeno::log_info("{} {} {}", root, joint, end); + zeno::log_info("{} {}", jointTarget, effector); + auto [midPos, tipPos] = twoBoneIK(root, joint, end, jointTarget, effector); + // set ... + joint = midPos; + end = tipPos; + } + + set_output("Skeleton", skeleton); + } +}; + +ZENDEFNODE(IKChains, { + { + "Skeleton", + "IK Drivers", + {"list", "items"}, + }, + { + "Skeleton", + }, + {}, + {"Animation"}, +}); + #if 0 float length(std::vector &b) { From ebf3a952d89c38adacfb1ff8d36dbd7732597338 Mon Sep 17 00:00:00 2001 From: zhouhang95 <765229842@qq.com> Date: Thu, 29 Aug 2024 15:37:46 +0800 Subject: [PATCH 158/244] abc-not-flip-lines --- projects/Alembic/GetAlembicPrim.cpp | 8 ++++---- zeno/include/zeno/funcs/PrimitiveUtils.h | 2 +- zeno/src/nodes/neo/PrimFlipFaces.cpp | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/projects/Alembic/GetAlembicPrim.cpp b/projects/Alembic/GetAlembicPrim.cpp index a8304e82a8..667c943df6 100644 --- a/projects/Alembic/GetAlembicPrim.cpp +++ b/projects/Alembic/GetAlembicPrim.cpp @@ -159,7 +159,7 @@ struct GetAlembicPrim : INode { prim = get_alembic_prim(abctree, index); } if (get_input2("flipFrontBack")) { - primFlipFaces(prim.get()); + primFlipFaces(prim.get(), true); } if (get_input2("triangulate")) { zeno::primTriangulate(prim.get()); @@ -198,7 +198,7 @@ struct AllAlembicPrim : INode { } auto outprim = zeno::primMerge(prims->getRaw()); if (get_input2("flipFrontBack")) { - primFlipFaces(outprim.get()); + primFlipFaces(outprim.get(), true); } if (get_input2("triangulate") == 1) { zeno::primTriangulate(outprim.get()); @@ -297,7 +297,7 @@ struct AlembicPrimList : INode { for (auto &prim: new_prims->arr) { auto _prim = std::dynamic_pointer_cast(prim); if (get_input2("flipFrontBack")) { - primFlipFaces(_prim.get()); + primFlipFaces(_prim.get(), true); } if (get_input2("splitByFaceset") && get_input2("killDeadVerts")) { primKillDeadVerts(_prim.get()); @@ -457,7 +457,7 @@ struct ImportAlembicPrim : INode { outprim = get_alembic_prim(abctree, index); } } - primFlipFaces(outprim.get()); + primFlipFaces(outprim.get(), true); if (get_input2("triangulate")) { zeno::primTriangulate(outprim.get()); } diff --git a/zeno/include/zeno/funcs/PrimitiveUtils.h b/zeno/include/zeno/funcs/PrimitiveUtils.h index 2fca7e1614..39585e683d 100644 --- a/zeno/include/zeno/funcs/PrimitiveUtils.h +++ b/zeno/include/zeno/funcs/PrimitiveUtils.h @@ -16,7 +16,7 @@ ZENO_API void primPolygonate(PrimitiveObject *prim, bool with_uv = true); ZENO_API void primSepTriangles(PrimitiveObject *prim, bool smoothNormal = true, bool keepTriFaces = true); //ZENO_API void primSmoothNormal(PrimitiveObject *prim, bool isFlipped = false); -ZENO_API void primFlipFaces(PrimitiveObject *prim); +ZENO_API void primFlipFaces(PrimitiveObject *prim, bool only_face = false); ZENO_API void primCalcNormal(PrimitiveObject *prim, float flip = 1.0f, std::string nrmAttr = "nrm"); //ZENO_API void primCalcInsetDir(PrimitiveObject *prim, float flip = 1.0f, std::string nrmAttr = "nrm"); diff --git a/zeno/src/nodes/neo/PrimFlipFaces.cpp b/zeno/src/nodes/neo/PrimFlipFaces.cpp index 44bbd6a711..e37b0bd6fb 100644 --- a/zeno/src/nodes/neo/PrimFlipFaces.cpp +++ b/zeno/src/nodes/neo/PrimFlipFaces.cpp @@ -10,8 +10,8 @@ namespace zeno { -ZENO_API void primFlipFaces(PrimitiveObject *prim) { - if (prim->lines.size()) +ZENO_API void primFlipFaces(PrimitiveObject *prim, bool only_face) { + if (!only_face && prim->lines.size()) parallel_for_each(prim->lines.begin(), prim->lines.end(), [&] (auto &line) { std::swap(line[1], line[0]); }); @@ -47,7 +47,7 @@ ZENO_API void primFlipFaces(PrimitiveObject *prim) { struct PrimFlipFaces : zeno::INode { virtual void apply() override { auto prim = get_input("prim"); - primFlipFaces(prim.get()); + primFlipFaces(prim.get(), false); set_output("prim", std::move(prim)); } }; From 50a091420605369845a4020fbe6808a6643f3842 Mon Sep 17 00:00:00 2001 From: iaomw Date: Thu, 29 Aug 2024 18:04:45 +0800 Subject: [PATCH 159/244] Hair & Curves (#1992) * remove unused * Refactor & curves support * remove unused * CurveGroupAux * hair transform * minor update * transform * fix crash * fix windows build * flex order * minor updates * Curves.h --- zeno/include/zeno/types/CurveType.h | 53 + zeno/src/nodes/prim/AsCurves.cpp | 35 + zeno/src/nodes/prim/CyHair.cpp | 43 + zeno/src/nodes/prim/SimpleGeometry.cpp | 8 +- zeno/src/nodes/prim/TransformPrimitive.cpp | 13 + zenovis/src/optx/RenderEngineOptx.cpp | 241 +- zenovis/xinxinoptix/CMakeLists.txt | 18 +- zenovis/xinxinoptix/Curves.h | 141 + zenovis/xinxinoptix/DeflMatShader.cu | 82 +- zenovis/xinxinoptix/GeometryAux.h | 18 + zenovis/xinxinoptix/OptiXStuff.h | 153 +- zenovis/xinxinoptix/SDK/cuda/curve.h | 301 +- .../xinxinoptix/SDK/support/CMakeLists.txt | 73 - .../xinxinoptix/SDK/support/tinyexr/tinyexr.h | 13315 ---------- .../xinxinoptix/SDK/support/tinygltf/LICENSE | 21 - .../xinxinoptix/SDK/support/tinygltf/json.hpp | 20406 ---------------- .../SDK/support/tinygltf/stb_image.h | 7530 ------ .../SDK/support/tinygltf/stb_image_write.h | 1621 -- .../SDK/support/tinygltf/tiny_gltf.h | 7718 ------ zenovis/xinxinoptix/SDK/sutil/Exception.h | 395 +- zenovis/xinxinoptix/SDK/sutil/sutil.cpp | 2 +- zenovis/xinxinoptix/hair/Hair.cpp | 301 + zenovis/xinxinoptix/hair/Hair.h | 160 + zenovis/xinxinoptix/hair/Util.h | 61 + zenovis/xinxinoptix/hair/optixHair.cpp | 285 + zenovis/xinxinoptix/hair/optixHair.h | 226 + zenovis/xinxinoptix/optixPathTracer.cpp | 367 +- zenovis/xinxinoptix/optixPathTracer.h | 4 +- zenovis/xinxinoptix/optixSphere.cpp | 21 +- zenovis/xinxinoptix/xinxinoptixapi.h | 38 +- 30 files changed, 2422 insertions(+), 51228 deletions(-) create mode 100644 zeno/include/zeno/types/CurveType.h create mode 100644 zeno/src/nodes/prim/AsCurves.cpp create mode 100644 zeno/src/nodes/prim/CyHair.cpp create mode 100644 zenovis/xinxinoptix/Curves.h create mode 100644 zenovis/xinxinoptix/GeometryAux.h delete mode 100644 zenovis/xinxinoptix/SDK/support/CMakeLists.txt delete mode 100644 zenovis/xinxinoptix/SDK/support/tinyexr/tinyexr.h delete mode 100644 zenovis/xinxinoptix/SDK/support/tinygltf/LICENSE delete mode 100644 zenovis/xinxinoptix/SDK/support/tinygltf/json.hpp delete mode 100644 zenovis/xinxinoptix/SDK/support/tinygltf/stb_image.h delete mode 100644 zenovis/xinxinoptix/SDK/support/tinygltf/stb_image_write.h delete mode 100644 zenovis/xinxinoptix/SDK/support/tinygltf/tiny_gltf.h create mode 100644 zenovis/xinxinoptix/hair/Hair.cpp create mode 100644 zenovis/xinxinoptix/hair/Hair.h create mode 100644 zenovis/xinxinoptix/hair/Util.h create mode 100644 zenovis/xinxinoptix/hair/optixHair.cpp create mode 100644 zenovis/xinxinoptix/hair/optixHair.h diff --git a/zeno/include/zeno/types/CurveType.h b/zeno/include/zeno/types/CurveType.h new file mode 100644 index 0000000000..7292f1ab7f --- /dev/null +++ b/zeno/include/zeno/types/CurveType.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include + +namespace zeno { + +enum struct CurveType { + + QUADRATIC_BSPLINE, + RIBBON_BSPLINE, + CUBIC_BSPLINE, + + LINEAR, + BEZIER, + CATROM +}; + +static unsigned int CurveDegree(zeno::CurveType type) { + + switch( type ) { + case CurveType::LINEAR: + return 1; + + case CurveType::QUADRATIC_BSPLINE: + case CurveType::RIBBON_BSPLINE: + return 2; + + case CurveType::CUBIC_BSPLINE: + case CurveType::BEZIER: + case CurveType::CATROM: + return 3; + } + return 0; +} + +static std::string CurveTypeDefaultString() { + auto name = magic_enum::enum_name(CurveType::LINEAR); + return std::string(name); +} + +static std::string CurveTypeListString() { + auto list = magic_enum::enum_names(); + + std::string result; + for (auto& ele : list) { + result += " "; + result += ele; + } + return result; +} + +} //zeno \ No newline at end of file diff --git a/zeno/src/nodes/prim/AsCurves.cpp b/zeno/src/nodes/prim/AsCurves.cpp new file mode 100644 index 0000000000..79931a88d3 --- /dev/null +++ b/zeno/src/nodes/prim/AsCurves.cpp @@ -0,0 +1,35 @@ +#include +#include +#include +#include + +#include "magic_enum.hpp" + +namespace zeno { + +struct AsCurves : zeno::INode { + virtual void apply() override { + + auto prim = get_input2("prim"); + + auto typeString = get_input2("type:"); + auto typeEnum = magic_enum::enum_cast(typeString).value_or(CurveType::LINEAR); + auto typeIndex = (int)magic_enum::enum_index(typeEnum).value_or(0); + + prim->userData().set2("curve", typeIndex); + set_output("prim", std::move(prim)); + } +}; + +ZENDEFNODE(AsCurves, +{ { + {"prim"}, + }, + {"prim"}, //output + { + {"enum " + zeno::CurveTypeListString(), "type", zeno::CurveTypeDefaultString() } + }, //prim + {"prim"} +}); + +} // namespace \ No newline at end of file diff --git a/zeno/src/nodes/prim/CyHair.cpp b/zeno/src/nodes/prim/CyHair.cpp new file mode 100644 index 0000000000..7f7fc88d77 --- /dev/null +++ b/zeno/src/nodes/prim/CyHair.cpp @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +#include "magic_enum.hpp" + +#include +#include + +namespace zeno { + +struct CyHair : zeno::INode { + virtual void apply() override { + + auto path = get_input2("path"); + bool exist = std::filesystem::exists(path); + bool yup = get_input2("yup"); + + if (!exist) { + throw std::string("CyHair file doesn't exist"); + } + + auto out = std::make_shared(); + out->userData().set2("yup", yup); + out->userData().set2("path", path); + out->userData().set2("cyhair", true); + + set_output("out", std::move(out)); + } +}; + +ZENDEFNODE(CyHair, +{ { + {"readpath", "path"}, + {"bool", "yup", "1"}, + }, + {"out"}, //output + {}, + {"read"} +}); + +} // namespace \ No newline at end of file diff --git a/zeno/src/nodes/prim/SimpleGeometry.cpp b/zeno/src/nodes/prim/SimpleGeometry.cpp index 52c09cde8f..fc2c51c2ec 100644 --- a/zeno/src/nodes/prim/SimpleGeometry.cpp +++ b/zeno/src/nodes/prim/SimpleGeometry.cpp @@ -1283,10 +1283,10 @@ struct CreateSphere : zeno::INode { memcpy(row2.data(), transform_ptr+8, sizeof(float)*4); memcpy(row3.data(), transform_ptr+12, sizeof(float)*4); - prim->userData().set2("sphere_transform_row0", row0); - prim->userData().set2("sphere_transform_row1", row1); - prim->userData().set2("sphere_transform_row2", row2); - prim->userData().set2("sphere_transform_row3", row3); + prim->userData().set2("_transform_row0", row0); + prim->userData().set2("_transform_row1", row1); + prim->userData().set2("_transform_row2", row2); + prim->userData().set2("_transform_row3", row3); } set_output("prim",std::move(prim)); diff --git a/zeno/src/nodes/prim/TransformPrimitive.cpp b/zeno/src/nodes/prim/TransformPrimitive.cpp index 937a5140a9..ec7777f5c7 100644 --- a/zeno/src/nodes/prim/TransformPrimitive.cpp +++ b/zeno/src/nodes/prim/TransformPrimitive.cpp @@ -377,6 +377,19 @@ struct PrimitiveTransform : zeno::INode { } } + auto transform_ptr = glm::value_ptr(matrix); + + zeno::vec4f row0, row1, row2, row3; + memcpy(row0.data(), transform_ptr, sizeof(float)*4); + memcpy(row1.data(), transform_ptr+4, sizeof(float)*4); + memcpy(row2.data(), transform_ptr+8, sizeof(float)*4); + memcpy(row3.data(), transform_ptr+12, sizeof(float)*4); + + iObject->userData().set2("_transform_row0", row0); + iObject->userData().set2("_transform_row1", row1); + iObject->userData().set2("_transform_row2", row2); + iObject->userData().set2("_transform_row3", row3); + set_output("outPrim", std::move(iObject)); } }; diff --git a/zenovis/src/optx/RenderEngineOptx.cpp b/zenovis/src/optx/RenderEngineOptx.cpp index 8ef3ee2564..a511fb76d1 100644 --- a/zenovis/src/optx/RenderEngineOptx.cpp +++ b/zenovis/src/optx/RenderEngineOptx.cpp @@ -39,6 +39,9 @@ #include #include +#include +#include + namespace zenovis::optx { struct CppTimer { @@ -210,7 +213,82 @@ struct GraphicsManager { // ^^^ Don't wuhui, I mean: Literial Synthetic Lazy internal static Local Shared Pointer auto prim_in = prim_in_lslislSp.get(); - auto isInst = prim_in->userData().get2("isInst", 0); + if (prim_in->userData().has("curve") && prim_in->verts->size() && prim_in->verts.has_attr("width")) { + + auto& ud = prim_in->userData(); + auto mtlid = ud.get2("mtlid", "Default"); + auto curveTypeIndex = ud.get2("curve", 0u); + auto curveTypeEnum = magic_enum::enum_cast(curveTypeIndex).value_or(zeno::CurveType::LINEAR); + + auto& widthArray = prim_in->verts.attr("width"); + auto& pointArray = prim_in->verts; + + std::vector dummy {}; + + auto& normals = prim_in->verts.has_attr("v") ? reinterpret_cast&>(prim_in->verts.attr("v")) : dummy; + auto& points = reinterpret_cast&>(pointArray); + auto& widths = reinterpret_cast&>(widthArray); + + std::vector strands {}; + + int begin = 0; + int end = 1; + + if (prim_in->lines[0][1] < prim_in->lines[0][0]) { + std::swap(begin, end); + } + + strands.push_back(prim_in->lines[0][begin]); + + for (size_t i=1; ilines->size(); ++i) { + auto& prev_segment = prim_in->lines[i-1]; + auto& this_segment = prim_in->lines[i]; + + if (prev_segment[end] != this_segment[begin]) { // new strand + strands.push_back(this_segment[begin]); + } + } + + loadCurveGroup(points, widths, normals, strands, curveTypeEnum, mtlid); + return; + } + + auto is_cyhair = prim_in_lslislSp->userData().has("cyhair"); + if (is_cyhair) { + auto& ud = prim_in_lslislSp->userData(); + auto mtlid = ud.get2("mtlid", "Default"); + + auto type_index = ud.get2("curve", 0u); + auto path_string = ud.get2("path", ""); + + glm::mat4 transform(1.0f); + auto transform_ptr = glm::value_ptr(transform); + + if (ud.has("_transform_row0") && ud.has("_transform_row1") && ud.has("_transform_row2") && ud.has("_transform_row3")) { + + auto row0 = ud.get2("_transform_row0"); + auto row1 = ud.get2("_transform_row1"); + auto row2 = ud.get2("_transform_row2"); + auto row3 = ud.get2("_transform_row3"); + + memcpy(transform_ptr, row0.data(), sizeof(float)*4); + memcpy(transform_ptr+4, row1.data(), sizeof(float)*4); + memcpy(transform_ptr+8, row2.data(), sizeof(float)*4); + memcpy(transform_ptr+12, row3.data(), sizeof(float)*4); + } + + auto yup = ud.get2("yup", true); + auto trans = yup? glm::mat4 { + 0, 0, 1, 0, + 1, 0, 0, 0, + 0, 1, 0, 0, + 0, 0, 0, 1 + } : glm::mat4(1.0); + + trans = transform * trans; + loadHair( path_string, mtlid, type_index, trans); + return; + } auto is_sphere = prim_in_lslislSp->userData().has("sphere_center"); if (is_sphere) { @@ -235,10 +313,10 @@ struct GraphicsManager { } else { //zeno::vec4f row0, row1, row2, row3; - auto row0 = ud.get2("sphere_transform_row0"); - auto row1 = ud.get2("sphere_transform_row1"); - auto row2 = ud.get2("sphere_transform_row2"); - auto row3 = ud.get2("sphere_transform_row3"); + auto row0 = ud.get2("_transform_row0"); + auto row1 = ud.get2("_transform_row1"); + auto row2 = ud.get2("_transform_row2"); + auto row3 = ud.get2("_transform_row3"); glm::mat4 sphere_transform; auto transform_ptr = glm::value_ptr(sphere_transform); @@ -279,6 +357,8 @@ struct GraphicsManager { auto isRealTimeObject = prim_in->userData().get2("isRealTimeObject", 0); auto isUniformCarrier = prim_in->userData().has("ShaderUniforms"); + + auto isInst = prim_in->userData().get2("isInst", 0); if (isInst == 1) { @@ -1026,8 +1106,10 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { ShaderTemplateInfo _light_shader_template { "Light.cu", false, {}, {}, {} }; - + std::set cachedMeshesMaterials, cachedSphereMaterials; + std::map> cachedCurvesMaterials; + std::map cachedMeshMatLUT; bool meshMatLUTChanged(std::map& newLUT) { bool changed = false; @@ -1118,8 +1200,6 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { scene->drawOptions->needRefresh = false; } - //std::cout << "Render Options: SimpleRender " << scene->drawOptions->simpleRender - // << " NeedRefresh " << scene->drawOptions->needRefresh << "\n"; if (sizeNeedUpdate) { zeno::log_debug("[zeno-optix] updating resolution"); @@ -1152,10 +1232,44 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { } if (meshNeedUpdate || matNeedUpdate || staticNeedUpdate) { - //zeno::log_debug("[zeno-optix] updating scene"); - //zeno::log_debug("[zeno-optix] updating material"); + + if ( matNeedUpdate && (staticNeedUpdate || meshNeedUpdate) ) { + cachedMeshesMaterials = xinxinoptix::uniqueMatsForMesh(); + cachedSphereMaterials = xinxinoptix::uniqueMatsForSphere(); + + for (auto& [key, _] : hair_xxx_cache) + { + auto& [filePath, mode, mtid] = key; + + auto ctype = (zeno::CurveType)mode; + + if (cachedCurvesMaterials.count(mtid) > 0) { + auto& ref = cachedCurvesMaterials.at(mtid); + ref.push_back( ctype ); + continue; + } + cachedCurvesMaterials[mtid] = { ctype }; + } + + for (auto& ele : curveGroupCache) { + + auto ctype = ele->curveType; + auto mtlid = ele->mtlid; + + if (cachedCurvesMaterials.count(mtlid) > 0) { + auto& ref = cachedCurvesMaterials.at(mtlid); + ref.push_back( ctype ); + continue; + } + cachedCurvesMaterials[mtlid] = { ctype }; + } + + } // preserve material names for materials-only updating case + std::vector> _meshes_shader_list{}; std::vector> _sphere_shader_list{}; + std::vector> _curves_shader_list{}; + std::vector> _volume_shader_list{}; std::map meshMatLUT{}; @@ -1168,21 +1282,23 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { ensure_shadtmpl(_volume_shader_template); ensure_shadtmpl(_light_shader_template); - { + if (cachedMeshesMaterials.count("Default")) { auto tmp = std::make_shared(); - tmp->mark = ShaderMaker::Mesh; + tmp->mark = ShaderMark::Mesh; tmp->matid = "Default"; tmp->filename = _default_shader_template.name; tmp->callable = _default_callable_template.shadtmpl; _meshes_shader_list.push_back(tmp); + + meshMatLUT.insert({"Default", 0}); } - { + if (cachedSphereMaterials.count("Default")) { auto tmp = std::make_shared(); - tmp->mark = ShaderMaker::Sphere; + tmp->mark = ShaderMark::Sphere; tmp->matid = "Default"; tmp->filename = _default_shader_template.name; tmp->callable = _default_callable_template.shadtmpl; @@ -1190,8 +1306,28 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { _sphere_shader_list.push_back(tmp); } - meshMatLUT.clear(); - meshMatLUT.insert({"Default", 0}); + unsigned int usesCurveTypeFlags = 0; + auto mark_task = [&usesCurveTypeFlags](zeno::CurveType ele) { + + usesCurveTypeFlags |= CURVE_FLAG_MAP.at(ele); + return CURVE_SHADER_MARK.at(ele); + }; + + if (cachedCurvesMaterials.count("Default") ) { + + auto& ref = cachedCurvesMaterials.at("Default"); + + for (auto& ele : ref) { + + auto tmp = std::make_shared(); + tmp->matid = "Default"; + tmp->filename = _default_shader_template.name; + tmp->callable = _default_callable_template.shadtmpl; + + tmp->mark = mark_task(ele); + _curves_shader_list.push_back(tmp); + } + } OptixUtil::g_vdb_indice_visible.clear(); OptixUtil::g_vdb_list_for_each_shader.clear(); @@ -1220,16 +1356,13 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { } } - if ( matNeedUpdate && (staticNeedUpdate || meshNeedUpdate) ) { - cachedMeshesMaterials = xinxinoptix::uniqueMatsForMesh(); - cachedSphereMaterials = xinxinoptix::uniqueMatsForSphere(); - } // preserve material names for materials-only updating case // Auto unload unused texure { std::set realNeedTexPaths; for(auto const &[matkey, mtldet] : matMap) { if (mtldet->parameters.find("vol") != std::string::npos + || cachedCurvesMaterials.count(mtldet->mtlidkey) > 0 || cachedMeshesMaterials.count(mtldet->mtlidkey) > 0 || cachedSphereMaterials.count(mtldet->mtlidkey) > 0) { @@ -1339,42 +1472,75 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { if (isVol) { - shaderP.mark = ShaderMaker::Volume; + shaderP.mark = ShaderMark::Volume; _volume_shader_list.push_back(std::make_shared(shaderP)); } else { if (cachedMeshesMaterials.count(mtldet->mtlidkey) > 0) { meshMatLUT.insert({mtldet->mtlidkey, (int)_meshes_shader_list.size()}); - shaderP.mark = ShaderMaker::Mesh; + shaderP.mark = ShaderMark::Mesh; _meshes_shader_list.push_back(std::make_shared(shaderP)); } if (cachedSphereMaterials.count(mtldet->mtlidkey) > 0) { - shaderP.mark = ShaderMaker::Sphere; + shaderP.mark = ShaderMark::Sphere; _sphere_shader_list.push_back(std::make_shared(shaderP)); } + + if (cachedCurvesMaterials.count(mtldet->mtlidkey) > 0) { + + auto& ref = cachedCurvesMaterials.at(mtldet->mtlidkey); + for (auto& ele : ref) { + + shaderP.mark = mark_task(ele); + _curves_shader_list.push_back(std::make_shared(shaderP)); + } + } } } - { + const auto requireTriangObj = !_meshes_shader_list.empty(); + const auto requireSphereObj = !_sphere_shader_list.empty(); + const auto requireVolumeObj = !_volume_shader_list.empty(); + + bool requireSphereLight = false; + bool requireTriangLight = false; + + for (const auto& [_, ld] : xinxinoptix::get_lightdats()) { + + const auto shape_enum = magic_enum::enum_cast(ld.shape).value_or(zeno::LightShape::Point); + + if (shape_enum == zeno::LightShape::Sphere) { + requireSphereLight = true; + } else if (shape_enum != zeno::LightShape::Point) { + requireTriangLight = true; + } + + if (requireSphereLight && requireTriangLight) { + break; + } + continue; + } + + if (requireTriangLight) { auto tmp = std::make_shared(); tmp->filename = _light_shader_template.name; tmp->callable = _default_callable_template.shadtmpl; - tmp->mark = ShaderMaker::Mesh; + tmp->mark = ShaderMark::Mesh; tmp->matid = "Light"; _meshes_shader_list.push_back(tmp); } - { + if (requireSphereLight) { auto tmp = std::make_shared(); tmp->filename = _light_shader_template.name; tmp->callable = _default_callable_template.shadtmpl; - tmp->mark = ShaderMaker::Sphere; + tmp->mark = ShaderMark::Sphere; tmp->matid = "Light"; _sphere_shader_list.push_back(tmp); @@ -1387,6 +1553,8 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { allShaders.insert(allShaders.end(), _sphere_shader_list.begin(), _sphere_shader_list.end()); allShaders.insert(allShaders.end(), _volume_shader_list.begin(), _volume_shader_list.end()); + allShaders.insert(allShaders.end(), _curves_shader_list.begin(), _curves_shader_list.end()); + const size_t sphere_shader_offset = _meshes_shader_list.size(); const size_t volume_shader_offset = _meshes_shader_list.size() + _sphere_shader_list.size(); @@ -1404,7 +1572,23 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { if (matNeedUpdate) { std::cout<<"shaders size "<< allShaders.size() << std::endl; - xinxinoptix::optixupdatematerial(allShaders); + + unsigned int usesPrimitiveTypeFlags = 0u; + if (requireTriangObj || requireTriangLight) + usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; + if (requireSphereObj || requireSphereLight) + usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE; + if (requireVolumeObj) + usesPrimitiveTypeFlags |= OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; + if (usesCurveTypeFlags) + usesPrimitiveTypeFlags |= usesCurveTypeFlags; + + auto refresh = OptixUtil::configPipeline((OptixPrimitiveTypeFlags)usesPrimitiveTypeFlags); + + xinxinoptix::updateShaders(allShaders, + requireTriangObj, requireTriangLight, + requireSphereObj, requireSphereLight, + requireVolumeObj, usesCurveTypeFlags, refresh); xinxinoptix::updateVolume(volume_shader_offset); } @@ -1435,6 +1619,7 @@ struct RenderEngineOptx : RenderEngine, zeno::disable_copy { xinxinoptix::updateSphereXAS(); OptixUtil::logInfoVRAM("After update Sphere"); + xinxinoptix::updateCurves(); xinxinoptix::UpdateStaticInstMesh(meshMatLUT); xinxinoptix::UpdateDynamicInstMesh(meshMatLUT); diff --git a/zenovis/xinxinoptix/CMakeLists.txt b/zenovis/xinxinoptix/CMakeLists.txt index 0a927a9b76..7a7b2ca679 100644 --- a/zenovis/xinxinoptix/CMakeLists.txt +++ b/zenovis/xinxinoptix/CMakeLists.txt @@ -26,6 +26,8 @@ target_sources(zenovis PRIVATE TypeCaster.cpp TypeCaster.h + GeometryAux.h + SDK/sutil/Aabb.h SDK/sutil/Quaternion.h SDK/sutil/Camera.cpp SDK/sutil/Record.h SDK/sutil/Camera.h SDK/cuda/climits.h @@ -37,8 +39,12 @@ target_sources(zenovis PRIVATE SDK/sutil/PPMLoader.cpp SDK/sutil/Trackball.h SDK/sutil/PPMLoader.h SDK/sutil/vec_math.h SDK/sutil/Preprocessor.h SDK/sutil/WorkDistribution.h + ) +file(GLOB hair_src "hair/*.h" "hair/*.cpp") +target_sources(zenovis PRIVATE ${hair_src}) + find_package(CUDAToolkit REQUIRED COMPONENTS cudart nvrtc REQUIRED) target_link_libraries(zenovis PRIVATE CUDA::cudart CUDA::nvrtc) @@ -58,11 +64,8 @@ target_link_libraries(zenovis PRIVATE TBB::tbb TBB::tbbmalloc) target_include_directories(zenovis PRIVATE ${OPTIX_PATH}/include) target_include_directories(zenovis PRIVATE SDK) +target_include_directories(zenovis PRIVATE SDK/cuda) target_include_directories(zenovis PRIVATE SDK/sutil) -target_include_directories(zenovis PRIVATE SDK/sdk_cuda) -target_include_directories(zenovis PRIVATE SDK/support) -target_include_directories(zenovis PRIVATE SDK/support/GLFW/include) -target_include_directories(zenovis PRIVATE SDK/support/imgui) target_include_directories(zenovis PRIVATE .) configure_file(sampleConfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/sampleConfig.h @ONLY) target_include_directories(zenovis PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) @@ -110,7 +113,10 @@ set(FILE_LIST ${CMAKE_CURRENT_SOURCE_DIR}/SDK/@cuda/helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/SDK/@cuda/climits.h ${CMAKE_CURRENT_SOURCE_DIR}/SDK/@cuda/cstdint.h - + + ${CMAKE_CURRENT_SOURCE_DIR}/SDK/@cuda/curve.h + ${CMAKE_CURRENT_SOURCE_DIR}/SDK/@cuda/BufferView.h + ${CMAKE_CURRENT_SOURCE_DIR}/SDK/@sutil/vec_math.h ${CMAKE_CURRENT_SOURCE_DIR}/SDK/@sutil/Preprocessor.h #include @@ -132,6 +138,8 @@ set(FILE_LIST ${CMAKE_CURRENT_SOURCE_DIR}/@LightBounds.h ${CMAKE_CURRENT_SOURCE_DIR}/@LightTree.h + ${CMAKE_CURRENT_SOURCE_DIR}/@GeometryAux.h + ${CMAKE_CURRENT_SOURCE_DIR}/@Curves.h ${CMAKE_CURRENT_SOURCE_DIR}/@Portal.h ${CMAKE_CURRENT_SOURCE_DIR}/@Sampling.h diff --git a/zenovis/xinxinoptix/Curves.h b/zenovis/xinxinoptix/Curves.h new file mode 100644 index 0000000000..1d7d4bd540 --- /dev/null +++ b/zenovis/xinxinoptix/Curves.h @@ -0,0 +1,141 @@ +#pragma once + +#include +#include + +#include +#include "GeometryAux.h" + +// Get curve hit-point in world coordinates. +static __forceinline__ __device__ float3 getHitPoint() +{ + const float t = optixGetRayTmax(); + const float3 rayOrigin = optixGetWorldRayOrigin(); + const float3 rayDirection = optixGetWorldRayDirection(); + + return rayOrigin + t * rayDirection; +} + +// Compute surface normal of quadratic pimitive in world space. +static __forceinline__ __device__ float3 normalLinear( const int primitiveIndex ) +{ + const OptixTraversableHandle gas = optixGetGASTraversableHandle(); + const unsigned int gasSbtIndex = optixGetSbtGASIndex(); + float4 controlPoints[2]; + + optixGetLinearCurveVertexData( gas, primitiveIndex, gasSbtIndex, 0.0f, controlPoints ); + + LinearInterpolator interpolator; + interpolator.initialize(controlPoints); + + float3 hitPoint = getHitPoint(); + // interpolators work in object space + hitPoint = optixTransformPointFromWorldToObjectSpace( hitPoint ); + const float3 normal = surfaceNormal( interpolator, optixGetCurveParameter(), hitPoint ); + return optixTransformNormalFromObjectToWorldSpace( normal ); +} + +// Compute surface normal of quadratic pimitive in world space. +static __forceinline__ __device__ float3 normalQuadratic( const int primitiveIndex ) +{ + const OptixTraversableHandle gas = optixGetGASTraversableHandle(); + const unsigned int gasSbtIndex = optixGetSbtGASIndex(); + float4 controlPoints[3]; + + optixGetQuadraticBSplineVertexData( gas, primitiveIndex, gasSbtIndex, 0.0f, controlPoints ); + + QuadraticInterpolator interpolator; + interpolator.initializeFromBSpline(controlPoints); + + float3 hitPoint = getHitPoint(); + // interpolators work in object space + hitPoint = optixTransformPointFromWorldToObjectSpace( hitPoint ); + const float3 normal = surfaceNormal( interpolator, optixGetCurveParameter(), hitPoint ); + return optixTransformNormalFromObjectToWorldSpace( normal ); +} + +// Compute surface normal of cubic b-spline pimitive in world space. +static __forceinline__ __device__ float3 normalCubic( const int primitiveIndex ) +{ + const OptixTraversableHandle gas = optixGetGASTraversableHandle(); + const unsigned int gasSbtIndex = optixGetSbtGASIndex(); + float4 controlPoints[4]; + + optixGetCubicBSplineVertexData( gas, primitiveIndex, gasSbtIndex, 0.0f, controlPoints ); + + CubicInterpolator interpolator; + interpolator.initializeFromBSpline(controlPoints); + + float3 hitPoint = getHitPoint(); + // interpolators work in object space + hitPoint = optixTransformPointFromWorldToObjectSpace( hitPoint ); + const float3 normal = surfaceNormal( interpolator, optixGetCurveParameter(), hitPoint ); + return optixTransformNormalFromObjectToWorldSpace( normal ); +} + +// Compute surface normal of Catmull-Rom pimitive in world space. +static __forceinline__ __device__ float3 normalCatrom( const int primitiveIndex ) +{ + const OptixTraversableHandle gas = optixGetGASTraversableHandle(); + const unsigned int gasSbtIndex = optixGetSbtGASIndex(); + float4 controlPoints[4]; + + optixGetCatmullRomVertexData( gas, primitiveIndex, gasSbtIndex, 0.0f, controlPoints ); + + CubicInterpolator interpolator; + interpolator.initializeFromCatrom(controlPoints); + + float3 hitPoint = getHitPoint(); + // interpolators work in object space + hitPoint = optixTransformPointFromWorldToObjectSpace( hitPoint ); + const float3 normal = surfaceNormal( interpolator, optixGetCurveParameter(), hitPoint ); + return optixTransformNormalFromObjectToWorldSpace( normal ); +} + +// Compute surface normal of Catmull-Rom pimitive in world space. +static __forceinline__ __device__ float3 normalBezier( const int primitiveIndex ) +{ + const OptixTraversableHandle gas = optixGetGASTraversableHandle(); + const unsigned int gasSbtIndex = optixGetSbtGASIndex(); + float4 controlPoints[4]; + + optixGetCubicBezierVertexData( gas, primitiveIndex, gasSbtIndex, 0.0f, controlPoints ); + + CubicInterpolator interpolator; + interpolator.initializeFromBezier(controlPoints); + + float3 hitPoint = getHitPoint(); + // interpolators work in object space + hitPoint = optixTransformPointFromWorldToObjectSpace( hitPoint ); + const float3 normal = surfaceNormal( interpolator, optixGetCurveParameter(), hitPoint ); + return optixTransformNormalFromObjectToWorldSpace( normal ); +} + +// Compute normal +// +static __forceinline__ __device__ float3 computeCurveNormal( OptixPrimitiveType type, const int primitiveIndex ) +{ + switch( type ) { + case OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR: + return normalLinear( primitiveIndex ); + case OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE: + return normalQuadratic( primitiveIndex ); + case OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE: + return normalCubic( primitiveIndex ); + case OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM: + return normalCatrom( primitiveIndex ); + case OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BEZIER: + return normalBezier( primitiveIndex ); + + case OPTIX_PRIMITIVE_TYPE_FLAT_QUADRATIC_BSPLINE: + { + const unsigned int prim_idx = optixGetPrimitiveIndex(); + const OptixTraversableHandle gas = optixGetGASTraversableHandle(); + const unsigned int sbtGASIndex = optixGetSbtGASIndex(); + const float2 uv = optixGetRibbonParameters(); + auto normal = optixGetRibbonNormal( gas, prim_idx, sbtGASIndex, 0.f /*time*/, uv ); + return normalize(normal); + } + } + return make_float3(0.0f); +} diff --git a/zenovis/xinxinoptix/DeflMatShader.cu b/zenovis/xinxinoptix/DeflMatShader.cu index 14da41c4c9..1d14dc9c93 100644 --- a/zenovis/xinxinoptix/DeflMatShader.cu +++ b/zenovis/xinxinoptix/DeflMatShader.cu @@ -16,6 +16,14 @@ #include +#ifndef __CUDACC_RTC__ +#define _P_TYPE_ 2 +#endif + +#if (_P_TYPE_==2) +#include "Curves.h" +#endif + static __inline__ __device__ bool isBadVector(const vec3& vector) { for (size_t i=0; i<3; ++i) { @@ -67,9 +75,21 @@ extern "C" __global__ void __anyhit__shadow_cutout() ShadowPRD* prd = getPRD(); MatInput attrs{}; + auto pType = optixGetPrimitiveType(); + if (pType != OPTIX_PRIMITIVE_TYPE_SPHERE && pType != OPTIX_PRIMITIVE_TYPE_TRIANGLE) { + + prd->attanuation = vec3(0); + optixTerminateRay(); + return; + } + bool sphere_external_ray = false; -#if (_SPHERE_) +#if (_P_TYPE_==2) + float3 N = {}; + printf("Should not reach here\n"); + return; +#elif (_P_TYPE_==1) float4 q; // sphere center (q.x, q.y, q.z), sphere radius q.w @@ -209,7 +229,7 @@ extern "C" __global__ void __anyhit__shadow_cutout() float p = rnd(prd->seed); float skip = opacity; - #if (_SPHERE_) + #if (_P_TYPE_==1) if (sphere_external_ray) { skip *= opacity; } @@ -258,7 +278,7 @@ extern "C" __global__ void __anyhit__shadow_cutout() vec3 fakeTrans = vec3(1)-BRDFBasics::fresnelSchlick(vec3(1) - mats.transColor,nDi); prd->attanuation = prd->attanuation * fakeTrans; - #if (_SPHERE_) + #if (_P_TYPE_==1) if (sphere_external_ray) { prd->attanuation *= vec3(1, 0, 0); if (nDi < (1.0f-_FLT_EPL_)) { @@ -309,7 +329,47 @@ extern "C" __global__ void __closesthit__radiance() MatInput attrs{}; float estimation = 0; -#if (_SPHERE_) +#if (_P_TYPE_==2) + + float3 N = {}; + + auto pType = optixGetPrimitiveType(); + if (pType == OPTIX_PRIMITIVE_TYPE_SPHERE || pType == OPTIX_PRIMITIVE_TYPE_TRIANGLE) { + prd->done = true; + return; + } + + float3 normal = computeCurveNormal( optixGetPrimitiveType(), primIdx ); + + if (dot(normal, -ray_dir) < 0) { + normal = -normal; + } + + N = normal; + + float3 wldPos = P; + float3 wldNorm = normal; + float wldOffset = 0.0f; + + prd->geometryNormal = N; + + attrs.pos = P; + attrs.nrm = N; + + auto hair_idx = optixGetInstanceId() - params.hairInstOffset; + auto hairAux = reinterpret_cast(params.hairAux); + + auto& aux = hairAux[hair_idx]; + + uint strandIndex = aux.strand_i[primIdx]; + + float segmentU = optixGetCurveParameter(); + float2 strand_u = aux.strand_u[primIdx]; + float u = strand_u.x + segmentU * strand_u.y; + + attrs.uv = {u, (float)strandIndex/ aux.strand_info.count, 0}; + +#elif (_P_TYPE_==1) float4 q; // sphere center (q.x, q.y, q.z), sphere radius q.w @@ -458,7 +518,12 @@ extern "C" __global__ void __closesthit__radiance() return; } -#if _SPHERE_ +#if (_P_TYPE_==2) + if(mats.doubleSide>0.5f||mats.thin>0.5f){ + N = faceforward( N, -ray_dir, N ); + prd->geometryNormal = N; + } +#elif (_P_TYPE_==1) if(mats.doubleSide>0.5f||mats.thin>0.5f){ N = faceforward( N, -ray_dir, N ); @@ -885,7 +950,12 @@ extern "C" __global__ void __closesthit__radiance() shadowPRD.nonThinTransHit = (mats.thin < 0.5f && mats.specTrans > 0) ? 1 : 0; float3 frontPos, backPos; - SelfIntersectionAvoidance::offsetSpawnPoint( frontPos, backPos, wldPos, prd->geometryNormal, wldOffset ); + if (wldOffset > 0) { + SelfIntersectionAvoidance::offsetSpawnPoint( frontPos, backPos, wldPos, prd->geometryNormal, wldOffset ); + } else { + frontPos = wldPos; + backPos = wldPos; + } shadowPRD.origin = dot(-ray_dir, wldNorm) > 0 ? frontPos : backPos; //auto shadingP = rtgems::offset_ray(shadowPRD.origin + params.cam.eye, prd->geometryNormal); // world space diff --git a/zenovis/xinxinoptix/GeometryAux.h b/zenovis/xinxinoptix/GeometryAux.h new file mode 100644 index 0000000000..90adbf1051 --- /dev/null +++ b/zenovis/xinxinoptix/GeometryAux.h @@ -0,0 +1,18 @@ +#pragma once + +#include +#include + +#ifndef __CUDACC_RTC__ +#include +#else +#define assert(x) /*nop*/ +#endif + +struct CurveGroupAux +{ + BufferView strand_u; // strand_u at segment start per segment + GenericBufferView strand_i; // strand index per segment + BufferView strand_info; // info.x = segment base + // info.y = strand length (segments) +}; diff --git a/zenovis/xinxinoptix/OptiXStuff.h b/zenovis/xinxinoptix/OptiXStuff.h index 9d7d0cb777..d9a64d14a5 100644 --- a/zenovis/xinxinoptix/OptiXStuff.h +++ b/zenovis/xinxinoptix/OptiXStuff.h @@ -68,13 +68,55 @@ namespace OptixUtil using namespace xinxinoptix; ////these are all material independent stuffs; inline raii context ; -inline OptixPipelineCompileOptions pipeline_compile_options {}; + +inline OptixPipelineCompileOptions pipeline_compile_options ; inline raii pipeline ; -inline raii ray_module ; -inline raii sphere_module ; + +inline raii raygen_module ; inline raii raygen_prog_group ; inline raii radiance_miss_group ; inline raii occlusion_miss_group ; + +inline raii d_raygen_record; +inline raii d_miss_records; +inline raii d_hitgroup_records; +inline raii d_callable_records; + +inline raii sphere_ism; + +inline raii round_linear_ism; +inline raii round_bezier_ism; +inline raii round_catrom_ism; + +inline raii round_quadratic_ism; +inline raii flat_quadratic_ism; +inline raii round_cubic_ism; + +inline std::vector< std::function > garbageTasks; + +inline void resetAll() { + + raygen_prog_group.reset(); + radiance_miss_group.reset(); + occlusion_miss_group.reset(); + + raygen_module.reset(); + + auto count = garbageTasks.size(); + for (auto& task : garbageTasks) { + task(); + } + garbageTasks.clear(); + + d_miss_records.reset(); + d_raygen_record.reset(); + d_hitgroup_records.reset(); + d_callable_records.reset(); + + pipeline.reset(); + context.reset(); +} + inline bool isPipelineCreated = false; ////end material independent stuffs @@ -106,6 +148,15 @@ inline void createContext() #endif options.validationMode = OPTIX_DEVICE_CONTEXT_VALIDATION_MODE_ALL; OPTIX_CHECK( optixDeviceContextCreate( cu_ctx, &options, &context ) ); +} + +inline uint CachedPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE; + +inline bool configPipeline(OptixPrimitiveTypeFlags usesPrimitiveTypeFlags) { + + if (usesPrimitiveTypeFlags == CachedPrimitiveTypeFlags) { return false; } + CachedPrimitiveTypeFlags = usesPrimitiveTypeFlags; + pipeline_compile_options = {}; pipeline_compile_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_ANY; //OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING | OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS; pipeline_compile_options.usesMotionBlur = false; @@ -114,17 +165,46 @@ inline void createContext() pipeline_compile_options.pipelineLaunchParamsVariableName = "params"; pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW | OPTIX_EXCEPTION_FLAG_TRACE_DEPTH | OPTIX_EXCEPTION_FLAG_DEBUG; - pipeline_compile_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE | OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE | OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM; + //pipeline_compile_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE | OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM | usesPrimitiveTypeFlags; + pipeline_compile_options.usesPrimitiveTypeFlags = usesPrimitiveTypeFlags; OptixModuleCompileOptions module_compile_options = DefaultCompileOptions(); OptixBuiltinISOptions builtin_is_options {}; + builtin_is_options.usesMotionBlur = false; + builtin_is_options.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS | OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS; + + const static auto PrimitiveTypeConfigs = std::vector> { + + { OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE, OPTIX_PRIMITIVE_TYPE_SPHERE, &sphere_ism }, + + { OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, &round_linear_ism }, + { OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, &round_catrom_ism }, + { OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BEZIER, OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BEZIER, &round_bezier_ism }, + + { OPTIX_PRIMITIVE_TYPE_FLAGS_FLAT_QUADRATIC_BSPLINE, OPTIX_PRIMITIVE_TYPE_FLAT_QUADRATIC_BSPLINE, &flat_quadratic_ism }, + { OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, &round_quadratic_ism }, + { OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, &round_cubic_ism } + }; - builtin_is_options.usesMotionBlur = false; - builtin_is_options.buildFlags = OPTIX_BUILD_FLAG_ALLOW_COMPACTION | OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS | OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS; - builtin_is_options.builtinISModuleType = OPTIX_PRIMITIVE_TYPE_SPHERE; - OPTIX_CHECK( optixBuiltinISModuleGet( context, &module_compile_options, &pipeline_compile_options, - &builtin_is_options, &sphere_module ) ); + auto count = garbageTasks.size(); + for (auto& task : garbageTasks) { + task(); + } + garbageTasks.clear(); + + for (auto& [pflag, ptype, module_ptr] : PrimitiveTypeConfigs) { + if (pflag & pipeline_compile_options.usesPrimitiveTypeFlags) { + builtin_is_options.builtinISModuleType = ptype; + OPTIX_CHECK( optixBuiltinISModuleGet( context, &module_compile_options, &pipeline_compile_options, &builtin_is_options, module_ptr ) ); + + garbageTasks.push_back([module_ptr=module_ptr](){ + optixModuleDestroy(*module_ptr); + *module_ptr = 0u; + }); + } //if + } + return true; } #define COMPILE_WITH_TASKS_CHECK( call ) check( call, #call, __FILE__, __LINE__ ) @@ -178,9 +258,9 @@ static std::vector readData(std::string const& filename) return data; } -inline bool createModule(OptixModule &module, OptixDeviceContext &context, const char *source, const char *name, const char *macro=nullptr, tbb::task_group* _c_group = nullptr) +inline bool createModule(OptixModule &module, OptixDeviceContext &context, const char *source, const char *name, const std::vector& macros={}, tbb::task_group* _c_group = nullptr) { - OptixModuleCompileOptions module_compile_options = DefaultCompileOptions(); + OptixModuleCompileOptions module_compile_options = OptixUtil::DefaultCompileOptions(); module_compile_options.maxRegisterCount = OPTIX_COMPILE_DEFAULT_MAX_REGISTER_COUNT; char log[2048]; @@ -202,11 +282,14 @@ inline bool createModule(OptixModule &module, OptixDeviceContext &context, const ,"--split-compile=0" }; - if (macro != nullptr) { - compilerOptions.push_back(macro); + std::string flat_macros = ""; + + for (auto &ele : macros) { + compilerOptions.push_back(ele.c_str()); + flat_macros += ele; } - const char* input = sutil::getCodePTX( source, macro, name, inputSize, is_success, nullptr, compilerOptions); + const char* input = sutil::getCodePTX( source, flat_macros.c_str(), name, inputSize, is_success, nullptr, compilerOptions); if(is_success==false) { @@ -257,7 +340,7 @@ inline void createRenderGroups(OptixDeviceContext &context, OptixModule &_module &program_group_options, log, &sizeof_log, - &raygen_prog_group + &raygen_prog_group.reset() ) ); } @@ -272,7 +355,7 @@ inline void createRenderGroups(OptixDeviceContext &context, OptixModule &_module 1, // num program groups &program_group_options, log, &sizeof_log, - &radiance_miss_group + &radiance_miss_group.reset() ) ); memset( &desc, 0, sizeof( OptixProgramGroupDesc ) ); desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; @@ -285,11 +368,9 @@ inline void createRenderGroups(OptixDeviceContext &context, OptixModule &_module &program_group_options, log, &sizeof_log, - &occlusion_miss_group + &occlusion_miss_group.reset() ) ); } - - } inline void createRTProgramGroups(OptixDeviceContext &context, OptixModule &_module, @@ -305,17 +386,21 @@ inline void createRTProgramGroups(OptixDeviceContext &context, OptixModule &_mod OptixProgramGroupDesc desc = {}; desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP; + const char* entryName = entry.empty()? nullptr:entry.c_str(); + +if (entryName != nullptr) { if(kind == "OPTIX_PROGRAM_GROUP_KIND_CLOSEHITGROUP") { desc.hitgroup.moduleCH = _module; - desc.hitgroup.entryFunctionNameCH = entry.c_str(); + desc.hitgroup.entryFunctionNameCH = entryName; } else if(kind == "OPTIX_PROGRAM_GROUP_KIND_ANYHITGROUP") { - desc.hitgroup.moduleAH = _module; - desc.hitgroup.entryFunctionNameAH = entry.c_str(); + desc.hitgroup.moduleAH = _module; + desc.hitgroup.entryFunctionNameAH = entryName; } +} if (moduleIS != nullptr) { desc.hitgroup.moduleIS = *moduleIS; @@ -1053,15 +1138,18 @@ struct OptixShaderCore { _occlusionEntry = occlusionEntry; } - bool loadProgram(uint idx, const char* macro=nullptr, tbb::task_group* _c_group = nullptr) + bool loadProgram(uint idx, const std::vector ¯o_list = {}, tbb::task_group* _c_group = nullptr) { std::string tmp_name = "MatShader.cu"; tmp_name = "$" + std::to_string(idx) + tmp_name; - if(createModule(module.reset(), context, _source, tmp_name.c_str(), macro, _c_group)) + if(createModule(module.reset(), context, _source, tmp_name.c_str(), macro_list, _c_group)) { std::cout<<"module created"< macros {}; + + if (fallback) { + macros.push_back("--define-macro=_FALLBACK_"); } - auto callable_done = createModule(callable_module.reset(), context, callable.c_str(), tmp_name.c_str(), macro.empty()? nullptr:macro.c_str()); + auto callable_done = createModule(callable_module.reset(), context, callable.c_str(), tmp_name.c_str(), macros); if (callable_done) { // Callable programs @@ -1124,7 +1213,7 @@ struct OptixShaderWrapper size_t LOG_SIZE = sizeof( LOG ); OPTIX_CHECK( - optixProgramGroupCreate( context, callable_prog_group_descs, 1, &callable_prog_group_options, LOG, &LOG_SIZE, &callable_prog_group.reset()); + optixProgramGroupCreate( context, callable_prog_group_descs, 1, &callable_prog_group_options, LOG, &LOG_SIZE, &callable_prog_group.reset()) ); return true; } @@ -1164,7 +1253,7 @@ inline void createPipeline() size_t num_progs = 3 + rtMaterialShaders.size() * 2; num_progs += rtMaterialShaders.size(); // callables; - OptixProgramGroup* program_groups = new OptixProgramGroup[num_progs]; + std::vector program_groups(num_progs, {}); program_groups[0] = raygen_prog_group; program_groups[1] = radiance_miss_group; program_groups[2] = occlusion_miss_group; @@ -1187,7 +1276,7 @@ inline void createPipeline() context, &pipeline_compile_options, &pipeline_link_options, - program_groups, + program_groups.data(), num_progs, log, &sizeof_log, @@ -1229,8 +1318,6 @@ inline void createPipeline() continuation_stack_size, max_traversal_depth ) ); - delete[]program_groups; - } diff --git a/zenovis/xinxinoptix/SDK/cuda/curve.h b/zenovis/xinxinoptix/SDK/cuda/curve.h index b24ffa8189..29e839389e 100644 --- a/zenovis/xinxinoptix/SDK/cuda/curve.h +++ b/zenovis/xinxinoptix/SDK/cuda/curve.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -31,209 +31,248 @@ #include #include + // // First order polynomial interpolator // -struct LinearBSplineSegment +struct LinearInterpolator { - __device__ __forceinline__ LinearBSplineSegment() {} - __device__ __forceinline__ LinearBSplineSegment( const float4* q ) { initialize( q ); } + __device__ __forceinline__ LinearInterpolator() {} __device__ __forceinline__ void initialize( const float4* q ) { p[0] = q[0]; - p[1] = q[1] - q[0]; // pre-transform p[] for fast evaluation + p[1] = q[1] - q[0]; } - __device__ __forceinline__ float radius( const float& u ) const { return p[0].w + p[1].w * u; } - __device__ __forceinline__ float3 position3( float u ) const { return (float3&)p[0] + u * (float3&)p[1]; } - __device__ __forceinline__ float4 position4( float u ) const { return p[0] + u * p[1]; } + __device__ __forceinline__ float4 position4( float u ) const + { + return p[0] + u * p[1]; // Horner scheme + } - __device__ __forceinline__ float min_radius( float u1, float u2 ) const + __device__ __forceinline__ float3 position3( float u ) const { - return fminf( radius( u1 ), radius( u2 ) ); + return make_float3( position4( u ) ); } - __device__ __forceinline__ float max_radius( float u1, float u2 ) const + __device__ __forceinline__ float radius( const float& u ) const { - if( !p[1].w ) - return p[0].w; // a quick bypass for constant width - return fmaxf( radius( u1 ), radius( u2 ) ); + return position4( u ).w; } - __device__ __forceinline__ float3 velocity3( float u ) const { return (float3&)p[1]; } - __device__ __forceinline__ float4 velocity4( float u ) const { return p[1]; } + __device__ __forceinline__ float4 velocity4( float u ) const + { + return p[1]; + } + + __device__ __forceinline__ float3 velocity3( float u ) const + { + return make_float3( velocity4( u ) ); + } + + __device__ __forceinline__ float derivative_of_radius( float u ) const + { + return velocity4( u ).w; + } __device__ __forceinline__ float3 acceleration3( float u ) const { return make_float3( 0.f ); } __device__ __forceinline__ float4 acceleration4( float u ) const { return make_float4( 0.f ); } - __device__ __forceinline__ float derivative_of_radius( float u ) const { return p[1].w; } - float4 p[2]; // pre-transformed "control points" for fast evaluation + float4 p[2]; }; // // Second order polynomial interpolator // -struct QuadraticBSplineSegment +struct QuadraticInterpolator { - __device__ __forceinline__ QuadraticBSplineSegment() {} - __device__ __forceinline__ QuadraticBSplineSegment( const float4* q ) { initializeFromBSpline( q ); } + __device__ __forceinline__ QuadraticInterpolator() {} __device__ __forceinline__ void initializeFromBSpline( const float4* q ) { - // pre-transform control-points for fast evaluation - p[0] = q[1] / 2.0f + q[0] / 2.0f; - p[1] = q[1] - q[0]; - p[2] = q[0] / 2.0f - q[1] + q[2] / 2.0f; + // Bspline-to-Poly = Matrix([[1/2, -1, 1/2], + // [-1, 1, 0], + // [1/2, 1/2, 0]]) + p[0] = ( q[0] - 2.0f * q[1] + q[2] ) / 2.0f; + p[1] = ( -2.0f * q[0] + 2.0f * q[1] ) / 2.0f; + p[2] = ( q[0] + q[1] ) / 2.0f; } __device__ __forceinline__ void export2BSpline( float4 bs[3] ) const { + // inverse of initializeFromBSpline + // Bspline-to-Poly = Matrix([[1/2, -1, 1/2], + // [-1, 1, 0], + // [1/2, 1/2, 0]]) + // invert to get: + // Poly-to-Bspline = Matrix([[0, -1/2, 1], + // [0, 1/2, 1], + // [2, 3/2, 1]]) bs[0] = p[0] - p[1] / 2; bs[1] = p[0] + p[1] / 2; bs[2] = p[0] + 1.5f * p[1] + 2 * p[2]; } + __device__ __forceinline__ float4 position4( float u ) const + { + return ( p[0] * u + p[1] ) * u + p[2]; // Horner scheme + } + __device__ __forceinline__ float3 position3( float u ) const { - return (float3&)p[0] + u * (float3&)p[1] + u * u * (float3&)p[2]; + return make_float3( position4( u ) ); } - __device__ __forceinline__ float4 position4( float u ) const { return p[0] + u * p[1] + u * u * p[2]; } - __device__ __forceinline__ float radius( float u ) const { return p[0].w + u * ( p[1].w + u * p[2].w ); } + __device__ __forceinline__ float radius( float u ) const + { + return position4( u ).w; + } + + __device__ __forceinline__ float4 velocity4( float u ) const + { + return 2.0f * p[0] * u + p[1]; + } - __device__ __forceinline__ float min_radius( float u1, float u2 ) const + __device__ __forceinline__ float3 velocity3( float u ) const { - float root1 = clamp( -0.5f * p[1].w / p[2].w, u1, u2 ); - return fminf( fminf( radius( u1 ), radius( u2 ) ), radius( root1 ) ); + return make_float3( velocity4( u ) ); } - __device__ __forceinline__ float max_radius( float u1, float u2 ) const + __device__ __forceinline__ float derivative_of_radius( float u ) const { - if( !p[1].w && !p[2].w ) - return p[0].w; // a quick bypass for constant width - float root1 = clamp( -0.5f * p[1].w / p[2].w, u1, u2 ); - return fmaxf( fmaxf( radius( u1 ), radius( u2 ) ), radius( root1 ) ); + return velocity4( u ).w; } - __device__ __forceinline__ float3 velocity3( float u ) const { return (float3&)p[1] + 2 * u * (float3&)p[2]; } - __device__ __forceinline__ float4 velocity4( float u ) const { return p[1] + 2 * u * p[2]; } + __device__ __forceinline__ float4 acceleration4( float u ) const + { + return 2.0f * p[0]; + } - __device__ __forceinline__ float3 acceleration3( float u ) const { return 2 * (float3&)p[2]; } - __device__ __forceinline__ float4 acceleration4( float u ) const { return 2 * p[2]; } + __device__ __forceinline__ float3 acceleration3( float u ) const + { + return make_float3( acceleration4( u ) ); + } - __device__ __forceinline__ float derivative_of_radius( float u ) const { return p[1].w + 2 * u * p[2].w; } - float4 p[3]; // pre-transformed "control points" for fast evaluation + float4 p[3]; }; // // Third order polynomial interpolator // -struct CubicBSplineSegment +// Storing {p0, p1, p2, p3} for evaluation: +// P(u) = p0 * u^3 + p1 * u^2 + p2 * u + p3 +// +struct CubicInterpolator { - __device__ __forceinline__ CubicBSplineSegment() {} - __device__ __forceinline__ CubicBSplineSegment( const float4* q ) { initializeFromBSpline( q ); } + __device__ __forceinline__ CubicInterpolator() {} __device__ __forceinline__ void initializeFromBSpline( const float4* q ) { - // pre-transform control points for fast evaluation - p[0] = ( q[2] + q[0] ) / 6 + ( 4 / 6.0f ) * q[1]; - p[1] = q[2] - q[0]; - p[2] = q[2] - q[1]; - p[3] = q[3] - q[1]; + // Bspline-to-Poly = Matrix([[-1/6, 1/2, -1/2, 1/6], + // [ 1/2, -1, 1/2, 0], + // [-1/2, 0, 1/2, 0], + // [ 1/6, 2/3, 1/6, 0]]) + p[0] = ( q[0] * ( -1.0f ) + q[1] * ( 3.0f ) + q[2] * ( -3.0f ) + q[3] ) / 6.0f; + p[1] = ( q[0] * ( 3.0f ) + q[1] * ( -6.0f ) + q[2] * ( 3.0f ) ) / 6.0f; + p[2] = ( q[0] * ( -3.0f ) + q[2] * ( 3.0f ) ) / 6.0f; + p[3] = ( q[0] * ( 1.0f ) + q[1] * ( 4.0f ) + q[2] * ( 1.0f ) ) / 6.0f; } __device__ __forceinline__ void export2BSpline( float4 bs[4] ) const { // inverse of initializeFromBSpline - bs[0] = p[0] + ( 4 * p[2] - 5 * p[1] ) / 6; - bs[1] = p[0] + ( p[1] - 2 * p[2] ) / 6; - bs[2] = p[0] + ( p[1] + 4 * p[2] ) / 6; - bs[3] = p[0] + p[3] + ( p[1] - 2 * p[2] ) / 6; + // Bspline-to-Poly = Matrix([[-1/6, 1/2, -1/2, 1/6], + // [ 1/2, -1, 1/2, 0], + // [-1/2, 0, 1/2, 0], + // [ 1/6, 2/3, 1/6, 0]]) + // invert to get: + // Poly-to-Bspline = Matrix([[0, 2/3, -1, 1], + // [0, -1/3, 0, 1], + // [0, 2/3, 1, 1], + // [6, 11/3, 2, 1]]) + bs[0] = ( p[1] * ( 2.0f ) + p[2] * ( -1.0f ) + p[3] ) / 3.0f; + bs[1] = ( p[1] * ( -1.0f ) + p[3] ) / 3.0f; + bs[2] = ( p[1] * ( 2.0f ) + p[2] * ( 1.0f ) + p[3] ) / 3.0f; + bs[3] = ( p[0] + p[1] * ( 11.0f ) + p[2] * ( 2.0f ) + p[3] ) / 3.0f; } - __device__ __forceinline__ static float3 terms( float u ) + + __device__ __forceinline__ void initializeFromCatrom(const float4* q) { - float uu = u * u; - float u3 = ( 1 / 6.0f ) * uu * u; - return make_float3( u3 + 0.5f * ( u - uu ), uu - 4 * u3, u3 ); + // Catrom-to-Poly = Matrix([[-1/2, 3/2, -3/2, 1/2], + // [1, -5/2, 2, -1/2], + // [-1/2, 0, 1/2, 0], + // [0, 1, 0, 0]]) + p[0] = ( -1.0f * q[0] + ( 3.0f ) * q[1] + ( -3.0f ) * q[2] + ( 1.0f ) * q[3] ) / 2.0f; + p[1] = ( 2.0f * q[0] + ( -5.0f ) * q[1] + ( 4.0f ) * q[2] + ( -1.0f ) * q[3] ) / 2.0f; + p[2] = ( -1.0f * q[0] + ( 1.0f ) * q[2] ) / 2.0f; + p[3] = ( ( 2.0f ) * q[1] ) / 2.0f; } - __device__ __forceinline__ float3 position3( float u ) const + __device__ __forceinline__ void export2Catrom(float4 cr[4]) const { - float3 q = terms( u ); - return (float3&)p[0] + q.x * (float3&)p[1] + q.y * (float3&)p[2] + q.z * (float3&)p[3]; + // Catrom-to-Poly = Matrix([[-1/2, 3/2, -3/2, 1/2], + // [1, -5/2, 2, -1/2], + // [-1/2, 0, 1/2, 0], + // [0, 1, 0, 0]]) + // invert to get: + // Poly-to-Catrom = Matrix([[1, 1, -1, 1], + // [0, 0, 0, 1], + // [1, 1, 1, 1], + // [6, 4, 2, 1]]) + cr[0] = ( p[0] * 6.f/6.f ) - ( p[1] * 5.f/6.f ) + ( p[2] * 2.f/6.f ) + ( p[3] * 1.f/6.f ); + cr[1] = ( p[0] * 6.f/6.f ) ; + cr[2] = ( p[0] * 6.f/6.f ) + ( p[1] * 1.f/6.f ) + ( p[2] * 2.f/6.f ) + ( p[3] * 1.f/6.f ); + cr[3] = ( p[0] * 6.f/6.f ) + ( p[3] * 6.f/6.f ); } - __device__ __forceinline__ float4 position4( float u ) const + + __device__ __forceinline__ void initializeFromBezier(const float4* q) { - float3 q = terms( u ); - return p[0] + q.x * p[1] + q.y * p[2] + q.z * p[3]; + // Bezier-to-Poly = Matrix([[-1, 3, -3, 1], + // [ 3, -6, 3, 0], + // [-3, 3, 0, 0], + // [ 1, 0, 0, 0]]) + p[0] = q[0] * ( -1.0f ) + q[1] * ( 3.0f ) + q[2] * ( -3.0f ) + q[3]; + p[1] = q[0] * ( 3.0f ) + q[1] * ( -6.0f ) + q[2] * ( 3.0f ); + p[2] = q[0] * ( -3.0f ) + q[1] * ( 3.0f ); + p[3] = q[0]; } - __device__ __forceinline__ float radius( float u ) const + __device__ __forceinline__ void export2Bezier(float4 bz[4]) const { - return p[0].w + u * ( p[1].w / 2 + u * ( ( p[2].w - p[1].w / 2 ) + u * ( p[1].w - 4 * p[2].w + p[3].w ) / 6 ) ); + // inverse of initializeFromBezier + // Bezier-to-Poly = Matrix([[-1, 3, -3, 1], + // [ 3, -6, 3, 0], + // [-3, 3, 0, 0], + // [ 1, 0, 0, 0]]) + // invert to get: + // Poly-to-Bezier = Matrix([[0, 0, 0, 1], + // [0, 0, 1/3, 1], + // [0, 1/3, 2/3, 1], + // [1, 1, 1, 1]]) + bz[0] = p[3]; + bz[1] = p[2] * (1.f/3.f) + p[3]; + bz[2] = p[1] * (1.f/3.f) + p[2] * (2.f/3.f) + p[3]; + bz[3] = p[0] + p[1] + p[2] + p[3]; } - __device__ __forceinline__ float min_radius( float u1, float u2 ) const + __device__ __forceinline__ float4 position4( float u ) const { - // a + 2 b u - c u^2 - float a = p[1].w; - float b = 2 * p[2].w - p[1].w; - float c = 4 * p[2].w - p[1].w - p[3].w; - float rmin = fminf( radius( u1 ), radius( u2 ) ); - if( fabsf( c ) < 1e-5f ) - { - float root1 = clamp( -0.5f * a / b, u1, u2 ); - return fminf( rmin, radius( root1 ) ); - } - else - { - float det = b * b + a * c; - det = det <= 0.0f ? 0.0f : sqrt( det ); - float root1 = clamp( ( b + det ) / c, u1, u2 ); - float root2 = clamp( ( b - det ) / c, u1, u2 ); - return fminf( rmin, fminf( radius( root1 ), radius( root2 ) ) ); - } - } + return ( ( ( p[0] * u ) + p[1] ) * u + p[2] ) * u + p[3]; // Horner scheme + } - __device__ __forceinline__ float max_radius( float u1, float u2 ) const + __device__ __forceinline__ float3 position3( float u ) const { - if( !p[1].w && !p[2].w && !p[3].w ) - return p[0].w; // a quick bypass for constant width - // a + 2 b u - c u^2 - float a = p[1].w; - float b = 2 * p[2].w - p[1].w; - float c = 4 * p[2].w - p[1].w - p[3].w; - float rmax = fmaxf( radius( u1 ), radius( u2 ) ); - if( fabsf( c ) < 1e-5f ) - { - float root1 = clamp( -0.5f * a / b, u1, u2 ); - return fmaxf( rmax, radius( root1 ) ); - } - else - { - float det = b * b + a * c; - det = det <= 0.0f ? 0.0f : sqrt( det ); - float root1 = clamp( ( b + det ) / c, u1, u2 ); - float root2 = clamp( ( b - det ) / c, u1, u2 ); - return fmaxf( rmax, fmaxf( radius( root1 ), radius( root2 ) ) ); - } + // rely on compiler and inlining for dead code removal + return make_float3( position4( u ) ); } - - __device__ __forceinline__ float3 velocity3( float u ) const + __device__ __forceinline__ float radius( float u ) const { - // adjust u to avoid problems with tripple knots. - if( u == 0 ) - u = 0.000001f; - if( u == 1 ) - u = 0.999999f; - float v = 1 - u; - return 0.5f * v * v * (float3&)p[1] + 2 * v * u * (float3&)p[2] + 0.5f * u * u * (float3&)p[3]; + return position4( u ).w; } __device__ __forceinline__ float4 velocity4( float u ) const @@ -243,25 +282,33 @@ struct CubicBSplineSegment u = 0.000001f; if( u == 1 ) u = 0.999999f; - float v = 1 - u; - return 0.5f * v * v * p[1] + 2 * v * u * p[2] + 0.5f * u * u * p[3]; + return ( ( 3.0f * p[0] * u ) + 2.0f * p[1] ) * u + p[2]; } - __device__ __forceinline__ float3 acceleration3( float u ) const { return make_float3( acceleration4( u ) ); } - __device__ __forceinline__ float4 acceleration4( float u ) const + __device__ __forceinline__ float3 velocity3( float u ) const { - return 2 * p[2] - p[1] + ( p[1] - 4 * p[2] + p[3] ) * u; + return make_float3( velocity4( u ) ); } __device__ __forceinline__ float derivative_of_radius( float u ) const { - float v = 1 - u; - return 0.5f * v * v * p[1].w + 2 * v * u * p[2].w + 0.5f * u * u * p[3].w; + return velocity4( u ).w; + } + + __device__ __forceinline__ float4 acceleration4( float u ) const + { + return 6.0f * p[0] * u + 2.0f * p[1]; // Horner scheme } - float4 p[4]; // pre-transformed "control points" for fast evaluation + __device__ __forceinline__ float3 acceleration3( float u ) const + { + return make_float3( acceleration4( u ) ); + } + + float4 p[4]; }; + // Compute curve primitive surface normal in object space. // // Template parameters: @@ -332,18 +379,18 @@ __device__ __forceinline__ float3 surfaceNormal( const CurveType& bc, float u, f } template -__device__ __forceinline__ float3 surfaceNormal( const LinearBSplineSegment& bc, float u, float3& ps ) +__device__ __forceinline__ float3 surfaceNormal( const LinearInterpolator& bc, float u, float3& ps ) { float3 normal; if( u == 0.0f ) { - normal = ps - (float3&)(bc.p[0]); // special handling for round endcaps + normal = ps - ( float3 & )( bc.p[0] ); // special handling for round endcaps } else if( u >= 1.0f ) { // reconstruct second control point (Note: the interpolator pre-transforms // the control-points to speed up repeated evaluation. - const float3 p1 = (float3&)(bc.p[1]) + (float3&)(bc.p[0]); + const float3 p1 = ( float3 & ) (bc.p[1] ) + ( float3 & )( bc.p[0] ); normal = ps - p1; // special handling for round endcaps } else diff --git a/zenovis/xinxinoptix/SDK/support/CMakeLists.txt b/zenovis/xinxinoptix/SDK/support/CMakeLists.txt deleted file mode 100644 index bf99db65c0..0000000000 --- a/zenovis/xinxinoptix/SDK/support/CMakeLists.txt +++ /dev/null @@ -1,73 +0,0 @@ -# -# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# - -find_package( OpenGL REQUIRED ) - -if( UNIX ) - # On Unix-like systems, shared libraries can use the soname system. - set(GLFW_LIB_NAME glfw) -else() - set(GLFW_LIB_NAME glfw3) -endif() - -# Filter out warnings that cause problems with GLFW. -if( WARNINGS_AS_ERRORS AND (USING_GNU_CXX OR USING_CLANG_CXX)) - string( REPLACE "-Wdeclaration-after-statement" "" filtered_c_flags ${CMAKE_C_FLAGS} ) - string( REPLACE "-Wsign-compare" "" filtered_c_flags ${filtered_c_flags} ) - push_variable( CMAKE_C_FLAGS "${filtered_c_flags} -Wno-format-truncation -Wno-deprecated" ) -endif() -add_subdirectory( GLFW ) -set_property( TARGET glfw PROPERTY C_STANDARD 99 ) -if( WARNINGS_AS_ERRORS AND (USING_GNU_CXX OR USING_CLANG_CXX)) - pop_variable( CMAKE_C_FLAGS ) -endif() - -if( WARNINGS_AS_ERRORS AND (USING_GNU_CXX OR USING_CLANG_CXX)) - push_variable( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess" ) -endif() -add_subdirectory( imgui ) -if( WARNINGS_AS_ERRORS AND (USING_GNU_CXX OR USING_CLANG_CXX)) - pop_variable( CMAKE_CXX_FLAGS ) -endif() - -add_library( glad SHARED - KHR/khrplatform.h - glad/glad.c - glad/glad.h -) -target_compile_definitions( glad - PRIVATE GLAD_GLAPI_EXPORT_BUILD - PUBLIC GLAD_GLAPI_EXPORT ) -target_include_directories( glad PUBLIC . ) -target_link_libraries( glad PUBLIC ${OPENGL_LIBRARIES} ) - - -# Set IDE folders for targets -set_property( TARGET glad PROPERTY FOLDER ${OPTIX_IDE_FOLDER} ) -set_property( TARGET glfw PROPERTY FOLDER ${OPTIX_IDE_FOLDER} ) -set_property( TARGET imgui PROPERTY FOLDER ${OPTIX_IDE_FOLDER} ) diff --git a/zenovis/xinxinoptix/SDK/support/tinyexr/tinyexr.h b/zenovis/xinxinoptix/SDK/support/tinyexr/tinyexr.h deleted file mode 100644 index 20adfeffbb..0000000000 --- a/zenovis/xinxinoptix/SDK/support/tinyexr/tinyexr.h +++ /dev/null @@ -1,13315 +0,0 @@ -/* -Copyright (c) 2014 - 2019, Syoyo Fujita and many contributors. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Syoyo Fujita nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// TinyEXR contains some OpenEXR code, which is licensed under ------------ - -/////////////////////////////////////////////////////////////////////////// -// -// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas -// Digital Ltd. LLC -// -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Industrial Light & Magic nor the names of -// its contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -/////////////////////////////////////////////////////////////////////////// - -// End of OpenEXR license ------------------------------------------------- - -#ifndef TINYEXR_H_ -#define TINYEXR_H_ - -// -// -// Do this: -// #define TINYEXR_IMPLEMENTATION -// before you include this file in *one* C or C++ file to create the -// implementation. -// -// // i.e. it should look like this: -// #include ... -// #include ... -// #include ... -// #define TINYEXR_IMPLEMENTATION -// #include "tinyexr.h" -// -// - -#include // for size_t -#include // guess stdint.h is available(C99) - -#ifdef __cplusplus -extern "C" { -#endif - -// Use embedded miniz or not to decode ZIP format pixel. Linking with zlib -// required if this flas is 0. -#ifndef TINYEXR_USE_MINIZ -#define TINYEXR_USE_MINIZ (1) -#endif - -// Disable PIZ comporession when applying cpplint. -#ifndef TINYEXR_USE_PIZ -#define TINYEXR_USE_PIZ (1) -#endif - -#ifndef TINYEXR_USE_ZFP -#define TINYEXR_USE_ZFP (0) // TinyEXR extension. -// http://computation.llnl.gov/projects/floating-point-compression -#endif - -#define TINYEXR_SUCCESS (0) -#define TINYEXR_ERROR_INVALID_MAGIC_NUMBER (-1) -#define TINYEXR_ERROR_INVALID_EXR_VERSION (-2) -#define TINYEXR_ERROR_INVALID_ARGUMENT (-3) -#define TINYEXR_ERROR_INVALID_DATA (-4) -#define TINYEXR_ERROR_INVALID_FILE (-5) -#define TINYEXR_ERROR_INVALID_PARAMETER (-6) -#define TINYEXR_ERROR_CANT_OPEN_FILE (-7) -#define TINYEXR_ERROR_UNSUPPORTED_FORMAT (-8) -#define TINYEXR_ERROR_INVALID_HEADER (-9) -#define TINYEXR_ERROR_UNSUPPORTED_FEATURE (-10) -#define TINYEXR_ERROR_CANT_WRITE_FILE (-11) -#define TINYEXR_ERROR_SERIALZATION_FAILED (-12) - -// @note { OpenEXR file format: http://www.openexr.com/openexrfilelayout.pdf } - -// pixel type: possible values are: UINT = 0 HALF = 1 FLOAT = 2 -#define TINYEXR_PIXELTYPE_UINT (0) -#define TINYEXR_PIXELTYPE_HALF (1) -#define TINYEXR_PIXELTYPE_FLOAT (2) - -#define TINYEXR_MAX_HEADER_ATTRIBUTES (1024) -#define TINYEXR_MAX_CUSTOM_ATTRIBUTES (128) - -#define TINYEXR_COMPRESSIONTYPE_NONE (0) -#define TINYEXR_COMPRESSIONTYPE_RLE (1) -#define TINYEXR_COMPRESSIONTYPE_ZIPS (2) -#define TINYEXR_COMPRESSIONTYPE_ZIP (3) -#define TINYEXR_COMPRESSIONTYPE_PIZ (4) -#define TINYEXR_COMPRESSIONTYPE_ZFP (128) // TinyEXR extension - -#define TINYEXR_ZFP_COMPRESSIONTYPE_RATE (0) -#define TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION (1) -#define TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY (2) - -#define TINYEXR_TILE_ONE_LEVEL (0) -#define TINYEXR_TILE_MIPMAP_LEVELS (1) -#define TINYEXR_TILE_RIPMAP_LEVELS (2) - -#define TINYEXR_TILE_ROUND_DOWN (0) -#define TINYEXR_TILE_ROUND_UP (1) - -typedef struct _EXRVersion { - int version; // this must be 2 - int tiled; // tile format image - int long_name; // long name attribute - int non_image; // deep image(EXR 2.0) - int multipart; // multi-part(EXR 2.0) -} EXRVersion; - -typedef struct _EXRAttribute { - char name[256]; // name and type are up to 255 chars long. - char type[256]; - unsigned char *value; // uint8_t* - int size; - int pad0; -} EXRAttribute; - -typedef struct _EXRChannelInfo { - char name[256]; // less than 255 bytes long - int pixel_type; - int x_sampling; - int y_sampling; - unsigned char p_linear; - unsigned char pad[3]; -} EXRChannelInfo; - -typedef struct _EXRTile { - int offset_x; - int offset_y; - int level_x; - int level_y; - - int width; // actual width in a tile. - int height; // actual height int a tile. - - unsigned char **images; // image[channels][pixels] -} EXRTile; - -typedef struct _EXRHeader { - float pixel_aspect_ratio; - int line_order; - int data_window[4]; - int display_window[4]; - float screen_window_center[2]; - float screen_window_width; - - int chunk_count; - - // Properties for tiled format(`tiledesc`). - int tiled; - int tile_size_x; - int tile_size_y; - int tile_level_mode; - int tile_rounding_mode; - - int long_name; - int non_image; - int multipart; - unsigned int header_len; - - // Custom attributes(exludes required attributes(e.g. `channels`, - // `compression`, etc) - int num_custom_attributes; - EXRAttribute *custom_attributes; // array of EXRAttribute. size = - // `num_custom_attributes`. - - EXRChannelInfo *channels; // [num_channels] - - int *pixel_types; // Loaded pixel type(TINYEXR_PIXELTYPE_*) of `images` for - // each channel. This is overwritten with `requested_pixel_types` when - // loading. - int num_channels; - - int compression_type; // compression type(TINYEXR_COMPRESSIONTYPE_*) - int *requested_pixel_types; // Filled initially by - // ParseEXRHeaderFrom(Meomory|File), then users - // can edit it(only valid for HALF pixel type - // channel) - -} EXRHeader; - -typedef struct _EXRMultiPartHeader { - int num_headers; - EXRHeader *headers; - -} EXRMultiPartHeader; - -typedef struct _EXRImage { - EXRTile *tiles; // Tiled pixel data. The application must reconstruct image - // from tiles manually. NULL if scanline format. - unsigned char **images; // image[channels][pixels]. NULL if tiled format. - - int width; - int height; - int num_channels; - - // Properties for tile format. - int num_tiles; - -} EXRImage; - -typedef struct _EXRMultiPartImage { - int num_images; - EXRImage *images; - -} EXRMultiPartImage; - -typedef struct _DeepImage { - const char **channel_names; - float ***image; // image[channels][scanlines][samples] - int **offset_table; // offset_table[scanline][offsets] - int num_channels; - int width; - int height; - int pad0; -} DeepImage; - -// @deprecated { to be removed. } -// Loads single-frame OpenEXR image. Assume EXR image contains A(single channel -// alpha) or RGB(A) channels. -// Application must free image data as returned by `out_rgba` -// Result image format is: float x RGBA x width x hight -// Returns negative value and may set error string in `err` when there's an -// error -extern int LoadEXR(float **out_rgba, int *width, int *height, - const char *filename, const char **err); - -// @deprecated { to be removed. } -// Simple wrapper API for ParseEXRHeaderFromFile. -// checking given file is a EXR file(by just look up header) -// @return TINYEXR_SUCCEES for EXR image, TINYEXR_ERROR_INVALID_HEADER for -// others -extern int IsEXR(const char *filename); - -// @deprecated { to be removed. } -// Saves single-frame OpenEXR image. Assume EXR image contains RGB(A) channels. -// components must be 1(Grayscale), 3(RGB) or 4(RGBA). -// Input image format is: `float x width x height`, or `float x RGB(A) x width x -// hight` -// Save image as fp16(HALF) format when `save_as_fp16` is positive non-zero -// value. -// Save image as fp32(FLOAT) format when `save_as_fp16` is 0. -// Use ZIP compression by default. -// Returns negative value and may set error string in `err` when there's an -// error -extern int SaveEXR(const float *data, const int width, const int height, - const int components, const int save_as_fp16, - const char *filename, const char **err); - -// Initialize EXRHeader struct -extern void InitEXRHeader(EXRHeader *exr_header); - -// Initialize EXRImage struct -extern void InitEXRImage(EXRImage *exr_image); - -// Free's internal data of EXRHeader struct -extern int FreeEXRHeader(EXRHeader *exr_header); - -// Free's internal data of EXRImage struct -extern int FreeEXRImage(EXRImage *exr_image); - -// Free's error message -extern void FreeEXRErrorMessage(const char *msg); - -// Parse EXR version header of a file. -extern int ParseEXRVersionFromFile(EXRVersion *version, const char *filename); - -// Parse EXR version header from memory-mapped EXR data. -extern int ParseEXRVersionFromMemory(EXRVersion *version, - const unsigned char *memory, size_t size); - -// Parse single-part OpenEXR header from a file and initialize `EXRHeader`. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRHeaderFromFile(EXRHeader *header, const EXRVersion *version, - const char *filename, const char **err); - -// Parse single-part OpenEXR header from a memory and initialize `EXRHeader`. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRHeaderFromMemory(EXRHeader *header, - const EXRVersion *version, - const unsigned char *memory, size_t size, - const char **err); - -// Parse multi-part OpenEXR headers from a file and initialize `EXRHeader*` -// array. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRMultipartHeaderFromFile(EXRHeader ***headers, - int *num_headers, - const EXRVersion *version, - const char *filename, - const char **err); - -// Parse multi-part OpenEXR headers from a memory and initialize `EXRHeader*` -// array -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int ParseEXRMultipartHeaderFromMemory(EXRHeader ***headers, - int *num_headers, - const EXRVersion *version, - const unsigned char *memory, - size_t size, const char **err); - -// Loads single-part OpenEXR image from a file. -// Application must setup `ParseEXRHeaderFromFile` before calling this function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRImageFromFile(EXRImage *image, const EXRHeader *header, - const char *filename, const char **err); - -// Loads single-part OpenEXR image from a memory. -// Application must setup `EXRHeader` with -// `ParseEXRHeaderFromMemory` before calling this function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRImageFromMemory(EXRImage *image, const EXRHeader *header, - const unsigned char *memory, - const size_t size, const char **err); - -// Loads multi-part OpenEXR image from a file. -// Application must setup `ParseEXRMultipartHeaderFromFile` before calling this -// function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRMultipartImageFromFile(EXRImage *images, - const EXRHeader **headers, - unsigned int num_parts, - const char *filename, - const char **err); - -// Loads multi-part OpenEXR image from a memory. -// Application must setup `EXRHeader*` array with -// `ParseEXRMultipartHeaderFromMemory` before calling this function. -// Application can free EXRImage using `FreeEXRImage` -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRMultipartImageFromMemory(EXRImage *images, - const EXRHeader **headers, - unsigned int num_parts, - const unsigned char *memory, - const size_t size, const char **err); - -// Saves multi-channel, single-frame OpenEXR image to a file. -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int SaveEXRImageToFile(const EXRImage *image, - const EXRHeader *exr_header, const char *filename, - const char **err); - -// Saves multi-channel, single-frame OpenEXR image to a memory. -// Image is compressed using EXRImage.compression value. -// Return the number of bytes if success. -// Return zero and will set error string in `err` when there's an -// error. -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern size_t SaveEXRImageToMemory(const EXRImage *image, - const EXRHeader *exr_header, - unsigned char **memory, const char **err); - -// Loads single-frame OpenEXR deep image. -// Application must free memory of variables in DeepImage(image, offset_table) -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadDeepEXR(DeepImage *out_image, const char *filename, - const char **err); - -// NOT YET IMPLEMENTED: -// Saves single-frame OpenEXR deep image. -// Returns negative value and may set error string in `err` when there's an -// error -// extern int SaveDeepEXR(const DeepImage *in_image, const char *filename, -// const char **err); - -// NOT YET IMPLEMENTED: -// Loads multi-part OpenEXR deep image. -// Application must free memory of variables in DeepImage(image, offset_table) -// extern int LoadMultiPartDeepEXR(DeepImage **out_image, int num_parts, const -// char *filename, -// const char **err); - -// For emscripten. -// Loads single-frame OpenEXR image from memory. Assume EXR image contains -// RGB(A) channels. -// Returns negative value and may set error string in `err` when there's an -// error -// When there was an error message, Application must free `err` with -// FreeEXRErrorMessage() -extern int LoadEXRFromMemory(float **out_rgba, int *width, int *height, - const unsigned char *memory, size_t size, - const char **err); - -#ifdef __cplusplus -} -#endif - -#endif // TINYEXR_H_ - -#ifdef TINYEXR_IMPLEMENTATION -#ifndef TINYEXR_IMPLEMENTATION_DEIFNED -#define TINYEXR_IMPLEMENTATION_DEIFNED - -#include -#include -#include -#include -#include -#include - -//#include // debug - -#include -#include -#include - -#if __cplusplus > 199711L -// C++11 -#include -#endif // __cplusplus > 199711L - -#ifdef _OPENMP -#include -#endif - -#if TINYEXR_USE_MINIZ -#else -// Issue #46. Please include your own zlib-compatible API header before -// including `tinyexr.h` -//#include "zlib.h" -#endif - -#if TINYEXR_USE_ZFP -#include "zfp.h" -#endif - -namespace tinyexr { - -#if __cplusplus > 199711L -// C++11 -typedef uint64_t tinyexr_uint64; -typedef int64_t tinyexr_int64; -#else -// Although `long long` is not a standard type pre C++11, assume it is defined -// as a compiler's extension. -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-long-long" -#endif -typedef unsigned long long tinyexr_uint64; -typedef long long tinyexr_int64; -#ifdef __clang__ -#pragma clang diagnostic pop -#endif -#endif - -#if TINYEXR_USE_MINIZ - -namespace miniz { - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-long-long" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wpadded" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wc++11-extensions" -#pragma clang diagnostic ignored "-Wconversion" -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#pragma clang diagnostic ignored "-Wundef" - -#if __has_warning("-Wcomma") -#pragma clang diagnostic ignored "-Wcomma" -#endif - -#if __has_warning("-Wmacro-redefined") -#pragma clang diagnostic ignored "-Wmacro-redefined" -#endif - -#if __has_warning("-Wcast-qual") -#pragma clang diagnostic ignored "-Wcast-qual" -#endif - -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - -#if __has_warning("-Wtautological-constant-compare") -#pragma clang diagnostic ignored "-Wtautological-constant-compare" -#endif - -#endif - -/* miniz.c v1.15 - public domain deflate/inflate, zlib-subset, ZIP - reading/writing/appending, PNG writing - See "unlicense" statement at the end of this file. - Rich Geldreich , last updated Oct. 13, 2013 - Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: - http://www.ietf.org/rfc/rfc1951.txt - - Most API's defined in miniz.c are optional. For example, to disable the - archive related functions just define - MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO - (see the list below for more macros). - - * Change History - 10/13/13 v1.15 r4 - Interim bugfix release while I work on the next major - release with Zip64 support (almost there!): - - Critical fix for the MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY bug - (thanks kahmyong.moon@hp.com) which could cause locate files to not find - files. This bug - would only have occured in earlier versions if you explicitly used this - flag, OR if you used mz_zip_extract_archive_file_to_heap() or - mz_zip_add_mem_to_archive_file_in_place() - (which used this flag). If you can't switch to v1.15 but want to fix - this bug, just remove the uses of this flag from both helper funcs (and of - course don't use the flag). - - Bugfix in mz_zip_reader_extract_to_mem_no_alloc() from kymoon when - pUser_read_buf is not NULL and compressed size is > uncompressed size - - Fixing mz_zip_reader_extract_*() funcs so they don't try to extract - compressed data from directory entries, to account for weird zipfiles which - contain zero-size compressed data on dir entries. - Hopefully this fix won't cause any issues on weird zip archives, - because it assumes the low 16-bits of zip external attributes are DOS - attributes (which I believe they always are in practice). - - Fixing mz_zip_reader_is_file_a_directory() so it doesn't check the - internal attributes, just the filename and external attributes - - mz_zip_reader_init_file() - missing MZ_FCLOSE() call if the seek failed - - Added cmake support for Linux builds which builds all the examples, - tested with clang v3.3 and gcc v4.6. - - Clang fix for tdefl_write_image_to_png_file_in_memory() from toffaletti - - Merged MZ_FORCEINLINE fix from hdeanclark - - Fix include before config #ifdef, thanks emil.brink - - Added tdefl_write_image_to_png_file_in_memory_ex(): supports Y flipping - (super useful for OpenGL apps), and explicit control over the compression - level (so you can - set it to 1 for real-time compression). - - Merged in some compiler fixes from paulharris's github repro. - - Retested this build under Windows (VS 2010, including static analysis), - tcc 0.9.26, gcc v4.6 and clang v3.3. - - Added example6.c, which dumps an image of the mandelbrot set to a PNG - file. - - Modified example2 to help test the - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY flag more. - - In r3: Bugfix to mz_zip_writer_add_file() found during merge: Fix - possible src file fclose() leak if alignment bytes+local header file write - faiiled - - In r4: Minor bugfix to mz_zip_writer_add_from_zip_reader(): - Was pushing the wrong central dir header offset, appears harmless in this - release, but it became a problem in the zip64 branch - 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, - #include (thanks fermtect). - 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix - mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. - - Temporarily/locally slammed in "typedef unsigned long mz_ulong" and - re-ran a randomized regression test on ~500k files. - - Eliminated a bunch of warnings when compiling with GCC 32-bit/64. - - Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze - (static analysis) option and fixed all warnings (except for the silly - "Use of the comma-operator in a tested expression.." analysis warning, - which I purposely use to work around a MSVC compiler warning). - - Created 32-bit and 64-bit Codeblocks projects/workspace. Built and - tested Linux executables. The codeblocks workspace is compatible with - Linux+Win32/x64. - - Added miniz_tester solution/project, which is a useful little app - derived from LZHAM's tester app that I use as part of the regression test. - - Ran miniz.c and tinfl.c through another series of regression testing on - ~500,000 files and archives. - - Modified example5.c so it purposely disables a bunch of high-level - functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the - MINIZ_NO_STDIO bug report.) - - Fix ftell() usage in examples so they exit with an error on files which - are too large (a limitation of the examples, not miniz itself). - 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple - minor level_and_flags issues in the archive API's. - level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce - Dawson for the feedback/bug report. - 5/28/11 v1.11 - Added statement from unlicense.org - 5/27/11 v1.10 - Substantial compressor optimizations: - - Level 1 is now ~4x faster than before. The L1 compressor's throughput - now varies between 70-110MB/sec. on a - - Core i7 (actual throughput varies depending on the type of data, and x64 - vs. x86). - - Improved baseline L2-L9 compression perf. Also, greatly improved - compression perf. issues on some file types. - - Refactored the compression code for better readability and - maintainability. - - Added level 10 compression level (L10 has slightly better ratio than - level 9, but could have a potentially large - drop in throughput on some files). - 5/15/11 v1.09 - Initial stable release. - - * Low-level Deflate/Inflate implementation notes: - - Compression: Use the "tdefl" API's. The compressor supports raw, static, - and dynamic blocks, lazy or - greedy parsing, match length filtering, RLE-only, and Huffman-only streams. - It performs and compresses - approximately as well as zlib. - - Decompression: Use the "tinfl" API's. The entire decompressor is - implemented as a single function - coroutine: see tinfl_decompress(). It supports decompression into a 32KB - (or larger power of 2) wrapping buffer, or into a memory - block large enough to hold the entire file. - - The low-level tdefl/tinfl API's do not make any use of dynamic memory - allocation. - - * zlib-style API notes: - - miniz.c implements a fairly large subset of zlib. There's enough - functionality present for it to be a drop-in - zlib replacement in many apps: - The z_stream struct, optional memory allocation callbacks - deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound - inflateInit/inflateInit2/inflate/inflateEnd - compress, compress2, compressBound, uncompress - CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly - routines. - Supports raw deflate streams or standard zlib streams with adler-32 - checking. - - Limitations: - The callback API's are not implemented yet. No support for gzip headers or - zlib static dictionaries. - I've tried to closely emulate zlib's various flavors of stream flushing - and return status codes, but - there are no guarantees that miniz.c pulls this off perfectly. - - * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, - originally written by - Alex Evans. Supports 1-4 bytes/pixel images. - - * ZIP archive API notes: - - The ZIP archive API's where designed with simplicity and efficiency in - mind, with just enough abstraction to - get the job done with minimal fuss. There are simple API's to retrieve file - information, read files from - existing archives, create new archives, append new files to existing - archives, or clone archive data from - one archive to another. It supports archives located in memory or the heap, - on disk (using stdio.h), - or you can specify custom file read/write callbacks. - - - Archive reading: Just call this function to read a single file from a - disk archive: - - void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const - char *pArchive_name, - size_t *pSize, mz_uint zip_flags); - - For more complex cases, use the "mz_zip_reader" functions. Upon opening an - archive, the entire central - directory is located and read as-is into memory, and subsequent file access - only occurs when reading individual files. - - - Archives file scanning: The simple way is to use this function to scan a - loaded archive for a specific file: - - int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags); - - The locate operation can optionally check file comments too, which (as one - example) can be used to identify - multiple versions of the same file in an archive. This function uses a - simple linear search through the central - directory, so it's not very fast. - - Alternately, you can iterate through all the files in an archive (using - mz_zip_reader_get_num_files()) and - retrieve detailed info on each file by calling mz_zip_reader_file_stat(). - - - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer - immediately writes compressed file data - to disk and builds an exact image of the central directory in memory. The - central directory image is written - all at once at the end of the archive file when the archive is finalized. - - The archive writer can optionally align each file's local header and file - data to any power of 2 alignment, - which can be useful when the archive will be read from optical media. Also, - the writer supports placing - arbitrary data blobs at the very beginning of ZIP archives. Archives - written using either feature are still - readable by any ZIP tool. - - - Archive appending: The simple way to add a single file to an archive is - to call this function: - - mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, - const char *pArchive_name, - const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 - comment_size, mz_uint level_and_flags); - - The archive will be created if it doesn't already exist, otherwise it'll be - appended to. - Note the appending is done in-place and is not an atomic operation, so if - something goes wrong - during the operation it's possible the archive could be left without a - central directory (although the local - file headers and file data will be fine, so the archive will be - recoverable). - - For more complex archive modification scenarios: - 1. The safest way is to use a mz_zip_reader to read the existing archive, - cloning only those bits you want to - preserve into a new archive using using the - mz_zip_writer_add_from_zip_reader() function (which compiles the - compressed file data as-is). When you're done, delete the old archive and - rename the newly written archive, and - you're done. This is safe but requires a bunch of temporary disk space or - heap memory. - - 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using - mz_zip_writer_init_from_reader(), - append new files as needed, then finalize the archive which will write an - updated central directory to the - original archive. (This is basically what - mz_zip_add_mem_to_archive_file_in_place() does.) There's a - possibility that the archive's central directory could be lost with this - method if anything goes wrong, though. - - - ZIP archive support limitations: - No zip64 or spanning support. Extraction functions can only handle - unencrypted, stored or deflated files. - Requires streams capable of seeking. - - * This is a header file library, like stb_image.c. To get only a header file, - either cut and paste the - below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then - include miniz.c from it. - - * Important: For best perf. be sure to customize the below macros for your - target platform: - #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 - #define MINIZ_LITTLE_ENDIAN 1 - #define MINIZ_HAS_64BIT_REGISTERS 1 - - * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before - including miniz.c to ensure miniz - uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be - able to process large files - (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes). -*/ - -#ifndef MINIZ_HEADER_INCLUDED -#define MINIZ_HEADER_INCLUDED - -//#include - -// Defines to completely disable specific portions of miniz.c: -// If all macros here are defined the only functionality remaining will be -// CRC-32, adler-32, tinfl, and tdefl. - -// Define MINIZ_NO_STDIO to disable all usage and any functions which rely on -// stdio for file I/O. -//#define MINIZ_NO_STDIO - -// If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able -// to get the current time, or -// get/set file times, and the C run-time funcs that get/set times won't be -// called. -// The current downside is the times written to your archives will be from 1979. -#define MINIZ_NO_TIME - -// Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. -#define MINIZ_NO_ARCHIVE_APIS - -// Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive -// API's. -//#define MINIZ_NO_ARCHIVE_WRITING_APIS - -// Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression -// API's. -//#define MINIZ_NO_ZLIB_APIS - -// Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent -// conflicts against stock zlib. -//#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES - -// Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. -// Note if MINIZ_NO_MALLOC is defined then the user must always provide custom -// user alloc/free/realloc -// callbacks to the zlib and archive API's, and a few stand-alone helper API's -// which don't provide custom user -// functions (such as tdefl_compress_mem_to_heap() and -// tinfl_decompress_mem_to_heap()) won't work. -//#define MINIZ_NO_MALLOC - -#if defined(__TINYC__) && (defined(__linux) || defined(__linux__)) -// TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc -// on Linux -#define MINIZ_NO_TIME -#endif - -#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) -//#include -#endif - -#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ - defined(__i386) || defined(__i486__) || defined(__i486) || \ - defined(i386) || defined(__ia64__) || defined(__x86_64__) -// MINIZ_X86_OR_X64_CPU is only used to help set the below macros. -#define MINIZ_X86_OR_X64_CPU 1 -#endif - -#if defined(__sparcv9) -// Big endian -#else -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. -#define MINIZ_LITTLE_ENDIAN 1 -#endif -#endif - -#if MINIZ_X86_OR_X64_CPU -// Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient -// integer loads and stores from unaligned addresses. -//#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 -#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES \ - 0 // disable to suppress compiler warnings -#endif - -#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || \ - defined(_LP64) || defined(__LP64__) || defined(__ia64__) || \ - defined(__x86_64__) -// Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are -// reasonably fast (and don't involve compiler generated calls to helper -// functions). -#define MINIZ_HAS_64BIT_REGISTERS 1 -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// ------------------- zlib-style API Definitions. - -// For more compatibility with zlib, miniz.c uses unsigned long for some -// parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! -typedef unsigned long mz_ulong; - -// mz_free() internally uses the MZ_FREE() macro (which by default calls free() -// unless you've modified the MZ_MALLOC macro) to release a block allocated from -// the heap. -void mz_free(void *p); - -#define MZ_ADLER32_INIT (1) -// mz_adler32() returns the initial adler-32 value to use when called with -// ptr==NULL. -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len); - -#define MZ_CRC32_INIT (0) -// mz_crc32() returns the initial CRC-32 value to use when called with -// ptr==NULL. -mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len); - -// Compression strategies. -enum { - MZ_DEFAULT_STRATEGY = 0, - MZ_FILTERED = 1, - MZ_HUFFMAN_ONLY = 2, - MZ_RLE = 3, - MZ_FIXED = 4 -}; - -// Method -#define MZ_DEFLATED 8 - -#ifndef MINIZ_NO_ZLIB_APIS - -// Heap allocation callbacks. -// Note that mz_alloc_func parameter types purpsosely differ from zlib's: -// items/size is size_t, not unsigned long. -typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size); -typedef void (*mz_free_func)(void *opaque, void *address); -typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, - size_t size); - -#define MZ_VERSION "9.1.15" -#define MZ_VERNUM 0x91F0 -#define MZ_VER_MAJOR 9 -#define MZ_VER_MINOR 1 -#define MZ_VER_REVISION 15 -#define MZ_VER_SUBREVISION 0 - -// Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The -// other values are for advanced use (refer to the zlib docs). -enum { - MZ_NO_FLUSH = 0, - MZ_PARTIAL_FLUSH = 1, - MZ_SYNC_FLUSH = 2, - MZ_FULL_FLUSH = 3, - MZ_FINISH = 4, - MZ_BLOCK = 5 -}; - -// Return status codes. MZ_PARAM_ERROR is non-standard. -enum { - MZ_OK = 0, - MZ_STREAM_END = 1, - MZ_NEED_DICT = 2, - MZ_ERRNO = -1, - MZ_STREAM_ERROR = -2, - MZ_DATA_ERROR = -3, - MZ_MEM_ERROR = -4, - MZ_BUF_ERROR = -5, - MZ_VERSION_ERROR = -6, - MZ_PARAM_ERROR = -10000 -}; - -// Compression levels: 0-9 are the standard zlib-style levels, 10 is best -// possible compression (not zlib compatible, and may be very slow), -// MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. -enum { - MZ_NO_COMPRESSION = 0, - MZ_BEST_SPEED = 1, - MZ_BEST_COMPRESSION = 9, - MZ_UBER_COMPRESSION = 10, - MZ_DEFAULT_LEVEL = 6, - MZ_DEFAULT_COMPRESSION = -1 -}; - -// Window bits -#define MZ_DEFAULT_WINDOW_BITS 15 - -struct mz_internal_state; - -// Compression/decompression stream struct. -typedef struct mz_stream_s { - const unsigned char *next_in; // pointer to next byte to read - unsigned int avail_in; // number of bytes available at next_in - mz_ulong total_in; // total number of bytes consumed so far - - unsigned char *next_out; // pointer to next byte to write - unsigned int avail_out; // number of bytes that can be written to next_out - mz_ulong total_out; // total number of bytes produced so far - - char *msg; // error msg (unused) - struct mz_internal_state *state; // internal state, allocated by zalloc/zfree - - mz_alloc_func - zalloc; // optional heap allocation function (defaults to malloc) - mz_free_func zfree; // optional heap free function (defaults to free) - void *opaque; // heap alloc function user pointer - - int data_type; // data_type (unused) - mz_ulong adler; // adler32 of the source or uncompressed data - mz_ulong reserved; // not used -} mz_stream; - -typedef mz_stream *mz_streamp; - -// Returns the version string of miniz.c. -const char *mz_version(void); - -// mz_deflateInit() initializes a compressor with default options: -// Parameters: -// pStream must point to an initialized mz_stream struct. -// level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. -// level 1 enables a specially optimized compression function that's been -// optimized purely for performance, not ratio. -// (This special func. is currently only enabled when -// MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) -// Return values: -// MZ_OK on success. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_PARAM_ERROR if the input parameters are bogus. -// MZ_MEM_ERROR on out of memory. -int mz_deflateInit(mz_streamp pStream, int level); - -// mz_deflateInit2() is like mz_deflate(), except with more control: -// Additional parameters: -// method must be MZ_DEFLATED -// window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with -// zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no -// header or footer) -// mem_level must be between [1, 9] (it's checked but ignored by miniz.c) -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, - int mem_level, int strategy); - -// Quickly resets a compressor without having to reallocate anything. Same as -// calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). -int mz_deflateReset(mz_streamp pStream); - -// mz_deflate() compresses the input to output, consuming as much of the input -// and producing as much output as possible. -// Parameters: -// pStream is the stream to read from and write to. You must initialize/update -// the next_in, avail_in, next_out, and avail_out members. -// flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or -// MZ_FINISH. -// Return values: -// MZ_OK on success (when flushing, or if more input is needed but not -// available, and/or there's more output to be written but the output buffer -// is full). -// MZ_STREAM_END if all input has been consumed and all output bytes have been -// written. Don't call mz_deflate() on the stream anymore. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_PARAM_ERROR if one of the parameters is invalid. -// MZ_BUF_ERROR if no forward progress is possible because the input and/or -// output buffers are empty. (Fill up the input buffer or free up some output -// space and try again.) -int mz_deflate(mz_streamp pStream, int flush); - -// mz_deflateEnd() deinitializes a compressor: -// Return values: -// MZ_OK on success. -// MZ_STREAM_ERROR if the stream is bogus. -int mz_deflateEnd(mz_streamp pStream); - -// mz_deflateBound() returns a (very) conservative upper bound on the amount of -// data that could be generated by deflate(), assuming flush is set to only -// MZ_NO_FLUSH or MZ_FINISH. -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); - -// Single-call compression functions mz_compress() and mz_compress2(): -// Returns MZ_OK on success, or one of the error codes from mz_deflate() on -// failure. -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len); -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len, int level); - -// mz_compressBound() returns a (very) conservative upper bound on the amount of -// data that could be generated by calling mz_compress(). -mz_ulong mz_compressBound(mz_ulong source_len); - -// Initializes a decompressor. -int mz_inflateInit(mz_streamp pStream); - -// mz_inflateInit2() is like mz_inflateInit() with an additional option that -// controls the window size and whether or not the stream has been wrapped with -// a zlib header/footer: -// window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -// -MZ_DEFAULT_WINDOW_BITS (raw deflate). -int mz_inflateInit2(mz_streamp pStream, int window_bits); - -// Decompresses the input stream to the output, consuming only as much of the -// input as needed, and writing as much to the output as possible. -// Parameters: -// pStream is the stream to read from and write to. You must initialize/update -// the next_in, avail_in, next_out, and avail_out members. -// flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. -// On the first call, if flush is MZ_FINISH it's assumed the input and output -// buffers are both sized large enough to decompress the entire stream in a -// single call (this is slightly faster). -// MZ_FINISH implies that there are no more source bytes available beside -// what's already in the input buffer, and that the output buffer is large -// enough to hold the rest of the decompressed data. -// Return values: -// MZ_OK on success. Either more input is needed but not available, and/or -// there's more output to be written but the output buffer is full. -// MZ_STREAM_END if all needed input has been consumed and all output bytes -// have been written. For zlib streams, the adler-32 of the decompressed data -// has also been verified. -// MZ_STREAM_ERROR if the stream is bogus. -// MZ_DATA_ERROR if the deflate stream is invalid. -// MZ_PARAM_ERROR if one of the parameters is invalid. -// MZ_BUF_ERROR if no forward progress is possible because the input buffer is -// empty but the inflater needs more input to continue, or if the output -// buffer is not large enough. Call mz_inflate() again -// with more input data, or with more room in the output buffer (except when -// using single call decompression, described above). -int mz_inflate(mz_streamp pStream, int flush); - -// Deinitializes a decompressor. -int mz_inflateEnd(mz_streamp pStream); - -// Single-call decompression. -// Returns MZ_OK on success, or one of the error codes from mz_inflate() on -// failure. -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len); - -// Returns a string description of the specified error code, or NULL if the -// error code is invalid. -const char *mz_error(int err); - -// Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used -// as a drop-in replacement for the subset of zlib that miniz.c supports. -// Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you -// use zlib in the same project. -#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES -typedef unsigned char Byte; -typedef unsigned int uInt; -typedef mz_ulong uLong; -typedef Byte Bytef; -typedef uInt uIntf; -typedef char charf; -typedef int intf; -typedef void *voidpf; -typedef uLong uLongf; -typedef void *voidp; -typedef void *const voidpc; -#define Z_NULL 0 -#define Z_NO_FLUSH MZ_NO_FLUSH -#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH -#define Z_SYNC_FLUSH MZ_SYNC_FLUSH -#define Z_FULL_FLUSH MZ_FULL_FLUSH -#define Z_FINISH MZ_FINISH -#define Z_BLOCK MZ_BLOCK -#define Z_OK MZ_OK -#define Z_STREAM_END MZ_STREAM_END -#define Z_NEED_DICT MZ_NEED_DICT -#define Z_ERRNO MZ_ERRNO -#define Z_STREAM_ERROR MZ_STREAM_ERROR -#define Z_DATA_ERROR MZ_DATA_ERROR -#define Z_MEM_ERROR MZ_MEM_ERROR -#define Z_BUF_ERROR MZ_BUF_ERROR -#define Z_VERSION_ERROR MZ_VERSION_ERROR -#define Z_PARAM_ERROR MZ_PARAM_ERROR -#define Z_NO_COMPRESSION MZ_NO_COMPRESSION -#define Z_BEST_SPEED MZ_BEST_SPEED -#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION -#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION -#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY -#define Z_FILTERED MZ_FILTERED -#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY -#define Z_RLE MZ_RLE -#define Z_FIXED MZ_FIXED -#define Z_DEFLATED MZ_DEFLATED -#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS -#define alloc_func mz_alloc_func -#define free_func mz_free_func -#define internal_state mz_internal_state -#define z_stream mz_stream -#define deflateInit mz_deflateInit -#define deflateInit2 mz_deflateInit2 -#define deflateReset mz_deflateReset -#define deflate mz_deflate -#define deflateEnd mz_deflateEnd -#define deflateBound mz_deflateBound -#define compress mz_compress -#define compress2 mz_compress2 -#define compressBound mz_compressBound -#define inflateInit mz_inflateInit -#define inflateInit2 mz_inflateInit2 -#define inflate mz_inflate -#define inflateEnd mz_inflateEnd -#define uncompress mz_uncompress -#define crc32 mz_crc32 -#define adler32 mz_adler32 -#define MAX_WBITS 15 -#define MAX_MEM_LEVEL 9 -#define zError mz_error -#define ZLIB_VERSION MZ_VERSION -#define ZLIB_VERNUM MZ_VERNUM -#define ZLIB_VER_MAJOR MZ_VER_MAJOR -#define ZLIB_VER_MINOR MZ_VER_MINOR -#define ZLIB_VER_REVISION MZ_VER_REVISION -#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION -#define zlibVersion mz_version -#define zlib_version mz_version() -#endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES - -#endif // MINIZ_NO_ZLIB_APIS - -// ------------------- Types and macros - -typedef unsigned char mz_uint8; -typedef signed short mz_int16; -typedef unsigned short mz_uint16; -typedef unsigned int mz_uint32; -typedef unsigned int mz_uint; -typedef long long mz_int64; -typedef unsigned long long mz_uint64; -typedef int mz_bool; - -#define MZ_FALSE (0) -#define MZ_TRUE (1) - -// An attempt to work around MSVC's spammy "warning C4127: conditional -// expression is constant" message. -#ifdef _MSC_VER -#define MZ_MACRO_END while (0, 0) -#else -#define MZ_MACRO_END while (0) -#endif - -// ------------------- ZIP archive reading/writing - -#ifndef MINIZ_NO_ARCHIVE_APIS - -enum { - MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, - MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, - MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 -}; - -typedef struct { - mz_uint32 m_file_index; - mz_uint32 m_central_dir_ofs; - mz_uint16 m_version_made_by; - mz_uint16 m_version_needed; - mz_uint16 m_bit_flag; - mz_uint16 m_method; -#ifndef MINIZ_NO_TIME - time_t m_time; -#endif - mz_uint32 m_crc32; - mz_uint64 m_comp_size; - mz_uint64 m_uncomp_size; - mz_uint16 m_internal_attr; - mz_uint32 m_external_attr; - mz_uint64 m_local_header_ofs; - mz_uint32 m_comment_size; - char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; - char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; -} mz_zip_archive_file_stat; - -typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n); -typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n); - -struct mz_zip_internal_state_tag; -typedef struct mz_zip_internal_state_tag mz_zip_internal_state; - -typedef enum { - MZ_ZIP_MODE_INVALID = 0, - MZ_ZIP_MODE_READING = 1, - MZ_ZIP_MODE_WRITING = 2, - MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 -} mz_zip_mode; - -typedef struct mz_zip_archive_tag { - mz_uint64 m_archive_size; - mz_uint64 m_central_directory_file_ofs; - mz_uint m_total_files; - mz_zip_mode m_zip_mode; - - mz_uint m_file_offset_alignment; - - mz_alloc_func m_pAlloc; - mz_free_func m_pFree; - mz_realloc_func m_pRealloc; - void *m_pAlloc_opaque; - - mz_file_read_func m_pRead; - mz_file_write_func m_pWrite; - void *m_pIO_opaque; - - mz_zip_internal_state *m_pState; - -} mz_zip_archive; - -typedef enum { - MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, - MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, - MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, - MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 -} mz_zip_flags; - -// ZIP archive reading - -// Inits a ZIP archive reader. -// These functions read and validate the archive's central directory. -mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, - mz_uint32 flags); -mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, - size_t size, mz_uint32 flags); - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint32 flags); -#endif - -// Returns the total number of files in the archive. -mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip); - -// Returns detailed information about an archive file entry. -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, - mz_zip_archive_file_stat *pStat); - -// Determines if an archive file entry is a directory entry. -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, - mz_uint file_index); -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, - mz_uint file_index); - -// Retrieves the filename of an archive file entry. -// Returns the number of bytes written to pFilename, or if filename_buf_size is -// 0 this function returns the number of bytes needed to fully store the -// filename. -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, - char *pFilename, mz_uint filename_buf_size); - -// Attempts to locates a file in the archive's central directory. -// Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH -// Returns -1 if the file cannot be found. -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags); - -// Extracts a archive file to a memory buffer using no memory allocation. -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, - mz_uint file_index, void *pBuf, - size_t buf_size, mz_uint flags, - void *pUser_read_buf, - size_t user_read_buf_size); -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( - mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, - mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); - -// Extracts a archive file to a memory buffer. -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, - void *pBuf, size_t buf_size, - mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, - const char *pFilename, void *pBuf, - size_t buf_size, mz_uint flags); - -// Extracts a archive file to a dynamically allocated heap buffer. -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, - size_t *pSize, mz_uint flags); -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, - const char *pFilename, size_t *pSize, - mz_uint flags); - -// Extracts a archive file using a callback function to output the file's data. -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, - mz_uint file_index, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, - const char *pFilename, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags); - -#ifndef MINIZ_NO_STDIO -// Extracts a archive file to a disk file and sets its last accessed and -// modified times. -// This function only extracts files, not archive directory records. -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, - const char *pDst_filename, mz_uint flags); -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, - const char *pArchive_filename, - const char *pDst_filename, - mz_uint flags); -#endif - -// Ends archive reading, freeing all allocations, and closing the input archive -// file if mz_zip_reader_init_file() was used. -mz_bool mz_zip_reader_end(mz_zip_archive *pZip); - -// ZIP archive writing - -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -// Inits a ZIP archive writer. -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size); -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, - size_t size_to_reserve_at_beginning, - size_t initial_allocation_size); - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint64 size_to_reserve_at_beginning); -#endif - -// Converts a ZIP archive reader object into a writer object, to allow efficient -// in-place file appends to occur on an existing archive. -// For archives opened using mz_zip_reader_init_file, pFilename must be the -// archive's filename so it can be reopened for writing. If the file can't be -// reopened, mz_zip_reader_end() will be called. -// For archives opened using mz_zip_reader_init_mem, the memory block must be -// growable using the realloc callback (which defaults to realloc unless you've -// overridden it). -// Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's -// user provided m_pWrite function cannot be NULL. -// Note: In-place archive modification is not recommended unless you know what -// you're doing, because if execution stops or something goes wrong before -// the archive is finalized the file's central directory will be hosed. -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, - const char *pFilename); - -// Adds the contents of a memory buffer to an archive. These functions record -// the current local time into the archive. -// To add a directory entry, call this method with an archive name ending in a -// forwardslash with empty buffer. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, - const void *pBuf, size_t buf_size, - mz_uint level_and_flags); -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, - const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags, mz_uint64 uncomp_size, - mz_uint32 uncomp_crc32); - -#ifndef MINIZ_NO_STDIO -// Adds the contents of a disk file to an archive. This function also records -// the disk file's modified time into the archive. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, - const char *pSrc_filename, const void *pComment, - mz_uint16 comment_size, mz_uint level_and_flags); -#endif - -// Adds a file to an archive by fully cloning the data from another archive. -// This function fully clones the source file's compressed data (no -// recompression), along with its full filename, extra data, and comment fields. -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, - mz_zip_archive *pSource_zip, - mz_uint file_index); - -// Finalizes the archive by writing the central directory records followed by -// the end of central directory record. -// After an archive is finalized, the only valid call on the mz_zip_archive -// struct is mz_zip_writer_end(). -// An archive must be manually finalized by calling this function for it to be -// valid. -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip); -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, - size_t *pSize); - -// Ends archive writing, freeing all allocations, and closing the output file if -// mz_zip_writer_init_file() was used. -// Note for the archive to be valid, it must have been finalized before ending. -mz_bool mz_zip_writer_end(mz_zip_archive *pZip); - -// Misc. high-level helper functions: - -// mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) -// appends a memory blob to a ZIP archive. -// level_and_flags - compression level (0-10, see MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or -// just set to MZ_DEFAULT_COMPRESSION. -mz_bool mz_zip_add_mem_to_archive_file_in_place( - const char *pZip_filename, const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, mz_uint16 comment_size, - mz_uint level_and_flags); - -// Reads a single file from an archive into a heap block. -// Returns NULL on failure. -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, - const char *pArchive_name, - size_t *pSize, mz_uint zip_flags); - -#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -#endif // #ifndef MINIZ_NO_ARCHIVE_APIS - -// ------------------- Low-level Decompression API Definitions - -// Decompression flags used by tinfl_decompress(). -// TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and -// ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the -// input is a raw deflate stream. -// TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available -// beyond the end of the supplied input buffer. If clear, the input buffer -// contains all remaining input. -// TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large -// enough to hold the entire decompressed stream. If clear, the output buffer is -// at least the size of the dictionary (typically 32KB). -// TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the -// decompressed bytes. -enum { - TINFL_FLAG_PARSE_ZLIB_HEADER = 1, - TINFL_FLAG_HAS_MORE_INPUT = 2, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, - TINFL_FLAG_COMPUTE_ADLER32 = 8 -}; - -// High level decompression functions: -// tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block -// allocated via malloc(). -// On entry: -// pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data -// to decompress. -// On return: -// Function returns a pointer to the decompressed data, or NULL on failure. -// *pOut_len will be set to the decompressed data's size, which could be larger -// than src_buf_len on uncompressible data. -// The caller must call mz_free() on the returned block when it's no longer -// needed. -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags); - -// tinfl_decompress_mem_to_mem() decompresses a block in memory to another block -// in memory. -// Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes -// written on success. -#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags); - -// tinfl_decompress_mem_to_callback() decompresses a block in memory to an -// internal 32KB buffer, and a user provided callback function will be called to -// flush the buffer. -// Returns 1 on success or 0 on failure. -typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser); -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, - tinfl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -struct tinfl_decompressor_tag; -typedef struct tinfl_decompressor_tag tinfl_decompressor; - -// Max size of LZ dictionary. -#define TINFL_LZ_DICT_SIZE 32768 - -// Return status. -typedef enum { - TINFL_STATUS_BAD_PARAM = -3, - TINFL_STATUS_ADLER32_MISMATCH = -2, - TINFL_STATUS_FAILED = -1, - TINFL_STATUS_DONE = 0, - TINFL_STATUS_NEEDS_MORE_INPUT = 1, - TINFL_STATUS_HAS_MORE_OUTPUT = 2 -} tinfl_status; - -// Initializes the decompressor to its initial state. -#define tinfl_init(r) \ - do { \ - (r)->m_state = 0; \ - } \ - MZ_MACRO_END -#define tinfl_get_adler32(r) (r)->m_check_adler32 - -// Main low-level decompressor coroutine function. This is the only function -// actually needed for decompression. All the other functions are just -// high-level helpers for improved usability. -// This is a universal API, i.e. it can be used as a building block to build any -// desired higher level decompression API. In the limit case, it can be called -// once per every byte input or output. -tinfl_status tinfl_decompress(tinfl_decompressor *r, - const mz_uint8 *pIn_buf_next, - size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, - mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, - const mz_uint32 decomp_flags); - -// Internal/private bits follow. -enum { - TINFL_MAX_HUFF_TABLES = 3, - TINFL_MAX_HUFF_SYMBOLS_0 = 288, - TINFL_MAX_HUFF_SYMBOLS_1 = 32, - TINFL_MAX_HUFF_SYMBOLS_2 = 19, - TINFL_FAST_LOOKUP_BITS = 10, - TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS -}; - -typedef struct { - mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; - mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], - m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; -} tinfl_huff_table; - -#if MINIZ_HAS_64BIT_REGISTERS -#define TINFL_USE_64BIT_BITBUF 1 -#endif - -#if TINFL_USE_64BIT_BITBUF -typedef mz_uint64 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (64) -#else -typedef mz_uint32 tinfl_bit_buf_t; -#define TINFL_BITBUF_SIZE (32) -#endif - -struct tinfl_decompressor_tag { - mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, - m_check_adler32, m_dist, m_counter, m_num_extra, - m_table_sizes[TINFL_MAX_HUFF_TABLES]; - tinfl_bit_buf_t m_bit_buf; - size_t m_dist_from_out_buf_start; - tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; - mz_uint8 m_raw_header[4], - m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; -}; - -// ------------------- Low-level Compression API Definitions - -// Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly -// slower, and raw/dynamic blocks will be output more frequently). -#define TDEFL_LESS_MEMORY 0 - -// tdefl_init() compression flags logically OR'd together (low 12 bits contain -// the max. number of probes per dictionary search): -// TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes -// per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap -// compression), 4095=Huffman+LZ (slowest/best compression). -enum { - TDEFL_HUFFMAN_ONLY = 0, - TDEFL_DEFAULT_MAX_PROBES = 128, - TDEFL_MAX_PROBES_MASK = 0xFFF -}; - -// TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before -// the deflate data, and the Adler-32 of the source data at the end. Otherwise, -// you'll get raw deflate data. -// TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even -// when not writing zlib headers). -// TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more -// efficient lazy parsing. -// TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's -// initialization time to the minimum, but the output may vary from run to run -// given the same input (depending on the contents of memory). -// TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) -// TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. -// TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. -// TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. -// The low 12 bits are reserved to control the max # of hash probes per -// dictionary lookup (see TDEFL_MAX_PROBES_MASK). -enum { - TDEFL_WRITE_ZLIB_HEADER = 0x01000, - TDEFL_COMPUTE_ADLER32 = 0x02000, - TDEFL_GREEDY_PARSING_FLAG = 0x04000, - TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, - TDEFL_RLE_MATCHES = 0x10000, - TDEFL_FILTER_MATCHES = 0x20000, - TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, - TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 -}; - -// High level compression functions: -// tdefl_compress_mem_to_heap() compresses a block in memory to a heap block -// allocated via malloc(). -// On entry: -// pSrc_buf, src_buf_len: Pointer and size of source block to compress. -// flags: The max match finder probes (default is 128) logically OR'd against -// the above flags. Higher probes are slower but improve compression. -// On return: -// Function returns a pointer to the compressed data, or NULL on failure. -// *pOut_len will be set to the compressed data's size, which could be larger -// than src_buf_len on uncompressible data. -// The caller must free() the returned block when it's no longer needed. -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags); - -// tdefl_compress_mem_to_mem() compresses a block in memory to another block in -// memory. -// Returns 0 on failure. -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags); - -// Compresses an image to a compressed PNG file in memory. -// On entry: -// pImage, w, h, and num_chans describe the image to compress. num_chans may be -// 1, 2, 3, or 4. -// The image pitch in bytes per scanline will be w*num_chans. The leftmost -// pixel on the top scanline is stored first in memory. -// level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, -// MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL -// If flip is true, the image will be flipped on the Y axis (useful for OpenGL -// apps). -// On return: -// Function returns a pointer to the compressed data, or NULL on failure. -// *pLen_out will be set to the size of the PNG image file. -// The caller must mz_free() the returned heap block (which will typically be -// larger than *pLen_out) when it's no longer needed. -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, - int h, int num_chans, - size_t *pLen_out, - mz_uint level, mz_bool flip); -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, - int num_chans, size_t *pLen_out); - -// Output stream interface. The compressor uses this interface to write -// compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. -typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, - void *pUser); - -// tdefl_compress_mem_to_output() compresses a block to an output stream. The -// above helpers use this function internally. -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -enum { - TDEFL_MAX_HUFF_TABLES = 3, - TDEFL_MAX_HUFF_SYMBOLS_0 = 288, - TDEFL_MAX_HUFF_SYMBOLS_1 = 32, - TDEFL_MAX_HUFF_SYMBOLS_2 = 19, - TDEFL_LZ_DICT_SIZE = 32768, - TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, - TDEFL_MIN_MATCH_LEN = 3, - TDEFL_MAX_MATCH_LEN = 258 -}; - -// TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed -// output block (using static/fixed Huffman codes). -#if TDEFL_LESS_MEMORY -enum { - TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 12, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#else -enum { - TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, - TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, - TDEFL_MAX_HUFF_SYMBOLS = 288, - TDEFL_LZ_HASH_BITS = 15, - TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, - TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, - TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS -}; -#endif - -// The low-level tdefl functions below may be used directly if the above helper -// functions aren't flexible enough. The low-level functions don't make any heap -// allocations, unlike the above helper functions. -typedef enum { - TDEFL_STATUS_BAD_PARAM = -2, - TDEFL_STATUS_PUT_BUF_FAILED = -1, - TDEFL_STATUS_OKAY = 0, - TDEFL_STATUS_DONE = 1 -} tdefl_status; - -// Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums -typedef enum { - TDEFL_NO_FLUSH = 0, - TDEFL_SYNC_FLUSH = 2, - TDEFL_FULL_FLUSH = 3, - TDEFL_FINISH = 4 -} tdefl_flush; - -// tdefl's compression state structure. -typedef struct { - tdefl_put_buf_func_ptr m_pPut_buf_func; - void *m_pPut_buf_user; - mz_uint m_flags, m_max_probes[2]; - int m_greedy_parsing; - mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; - mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; - mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, - m_bit_buffer; - mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, - m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, - m_wants_to_finish; - tdefl_status m_prev_return_status; - const void *m_pIn_buf; - void *m_pOut_buf; - size_t *m_pIn_buf_size, *m_pOut_buf_size; - tdefl_flush m_flush; - const mz_uint8 *m_pSrc; - size_t m_src_buf_left, m_out_buf_ofs; - mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; - mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; - mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; - mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; - mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; - mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; -} tdefl_compressor; - -// Initializes the compressor. -// There is no corresponding deinit() function because the tdefl API's do not -// dynamically allocate memory. -// pBut_buf_func: If NULL, output data will be supplied to the specified -// callback. In this case, the user should call the tdefl_compress_buffer() API -// for compression. -// If pBut_buf_func is NULL the user should always call the tdefl_compress() -// API. -// flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, -// etc.) -tdefl_status tdefl_init(tdefl_compressor *d, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags); - -// Compresses a block of data, consuming as much of the specified input buffer -// as possible, and writing as much compressed data to the specified output -// buffer as possible. -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, - size_t *pIn_buf_size, void *pOut_buf, - size_t *pOut_buf_size, tdefl_flush flush); - -// tdefl_compress_buffer() is only usable when the tdefl_init() is called with a -// non-NULL tdefl_put_buf_func_ptr. -// tdefl_compress_buffer() always consumes the entire input buffer. -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, - size_t in_buf_size, tdefl_flush flush); - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d); -mz_uint32 tdefl_get_adler32(tdefl_compressor *d); - -// Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't -// defined, because it uses some of its macros. -#ifndef MINIZ_NO_ZLIB_APIS -// Create tdefl_compress() flags given zlib-style compression parameters. -// level may range from [0,10] (where 10 is absolute max compression, but may be -// much slower on some files) -// window_bits may be -15 (raw deflate) or 15 (zlib) -// strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, -// MZ_RLE, or MZ_FIXED -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, - int strategy); -#endif // #ifndef MINIZ_NO_ZLIB_APIS - -#ifdef __cplusplus -} -#endif - -#endif // MINIZ_HEADER_INCLUDED - -// ------------------- End of Header: Implementation follows. (If you only want -// the header, define MINIZ_HEADER_FILE_ONLY.) - -#ifndef MINIZ_HEADER_FILE_ONLY - -typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; -typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; -typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; - -//#include -//#include - -#define MZ_ASSERT(x) assert(x) - -#ifdef MINIZ_NO_MALLOC -#define MZ_MALLOC(x) NULL -#define MZ_FREE(x) (void)x, ((void)0) -#define MZ_REALLOC(p, x) NULL -#else -#define MZ_MALLOC(x) malloc(x) -#define MZ_FREE(x) free(x) -#define MZ_REALLOC(p, x) realloc(p, x) -#endif - -#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) -#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -#define MZ_READ_LE16(p) *((const mz_uint16 *)(p)) -#define MZ_READ_LE32(p) *((const mz_uint32 *)(p)) -#else -#define MZ_READ_LE16(p) \ - ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U)) -#define MZ_READ_LE32(p) \ - ((mz_uint32)(((const mz_uint8 *)(p))[0]) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | \ - ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U)) -#endif - -#ifdef _MSC_VER -#define MZ_FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define MZ_FORCEINLINE inline __attribute__((__always_inline__)) -#else -#define MZ_FORCEINLINE inline -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// ------------------- zlib-style API's - -mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len) { - mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); - size_t block_len = buf_len % 5552; - if (!ptr) return MZ_ADLER32_INIT; - while (buf_len) { - for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { - s1 += ptr[0], s2 += s1; - s1 += ptr[1], s2 += s1; - s1 += ptr[2], s2 += s1; - s1 += ptr[3], s2 += s1; - s1 += ptr[4], s2 += s1; - s1 += ptr[5], s2 += s1; - s1 += ptr[6], s2 += s1; - s1 += ptr[7], s2 += s1; - } - for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; - s1 %= 65521U, s2 %= 65521U; - buf_len -= block_len; - block_len = 5552; - } - return (s2 << 16) + s1; -} - -// Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C -// implementation that balances processor cache usage against speed": -// http://www.geocities.com/malbrain/ -mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len) { - static const mz_uint32 s_crc32[16] = { - 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, - 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, - 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c}; - mz_uint32 crcu32 = (mz_uint32)crc; - if (!ptr) return MZ_CRC32_INIT; - crcu32 = ~crcu32; - while (buf_len--) { - mz_uint8 b = *ptr++; - crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; - crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; - } - return ~crcu32; -} - -void mz_free(void *p) { MZ_FREE(p); } - -#ifndef MINIZ_NO_ZLIB_APIS - -static void *def_alloc_func(void *opaque, size_t items, size_t size) { - (void)opaque, (void)items, (void)size; - return MZ_MALLOC(items * size); -} -static void def_free_func(void *opaque, void *address) { - (void)opaque, (void)address; - MZ_FREE(address); -} -// static void *def_realloc_func(void *opaque, void *address, size_t items, -// size_t size) { -// (void)opaque, (void)address, (void)items, (void)size; -// return MZ_REALLOC(address, items * size); -//} - -const char *mz_version(void) { return MZ_VERSION; } - -int mz_deflateInit(mz_streamp pStream, int level) { - return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, - MZ_DEFAULT_STRATEGY); -} - -int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, - int mem_level, int strategy) { - tdefl_compressor *pComp; - mz_uint comp_flags = - TDEFL_COMPUTE_ADLER32 | - tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); - - if (!pStream) return MZ_STREAM_ERROR; - if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || - ((window_bits != MZ_DEFAULT_WINDOW_BITS) && - (-window_bits != MZ_DEFAULT_WINDOW_BITS))) - return MZ_PARAM_ERROR; - - pStream->data_type = 0; - pStream->adler = MZ_ADLER32_INIT; - pStream->msg = NULL; - pStream->reserved = 0; - pStream->total_in = 0; - pStream->total_out = 0; - if (!pStream->zalloc) pStream->zalloc = def_alloc_func; - if (!pStream->zfree) pStream->zfree = def_free_func; - - pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, - sizeof(tdefl_compressor)); - if (!pComp) return MZ_MEM_ERROR; - - pStream->state = (struct mz_internal_state *)pComp; - - if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { - mz_deflateEnd(pStream); - return MZ_PARAM_ERROR; - } - - return MZ_OK; -} - -int mz_deflateReset(mz_streamp pStream) { - if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || - (!pStream->zfree)) - return MZ_STREAM_ERROR; - pStream->total_in = pStream->total_out = 0; - tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, - ((tdefl_compressor *)pStream->state)->m_flags); - return MZ_OK; -} - -int mz_deflate(mz_streamp pStream, int flush) { - size_t in_bytes, out_bytes; - mz_ulong orig_total_in, orig_total_out; - int mz_status = MZ_OK; - - if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || - (!pStream->next_out)) - return MZ_STREAM_ERROR; - if (!pStream->avail_out) return MZ_BUF_ERROR; - - if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; - - if (((tdefl_compressor *)pStream->state)->m_prev_return_status == - TDEFL_STATUS_DONE) - return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; - - orig_total_in = pStream->total_in; - orig_total_out = pStream->total_out; - for (;;) { - tdefl_status defl_status; - in_bytes = pStream->avail_in; - out_bytes = pStream->avail_out; - - defl_status = tdefl_compress((tdefl_compressor *)pStream->state, - pStream->next_in, &in_bytes, pStream->next_out, - &out_bytes, (tdefl_flush)flush); - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state); - - pStream->next_out += (mz_uint)out_bytes; - pStream->avail_out -= (mz_uint)out_bytes; - pStream->total_out += (mz_uint)out_bytes; - - if (defl_status < 0) { - mz_status = MZ_STREAM_ERROR; - break; - } else if (defl_status == TDEFL_STATUS_DONE) { - mz_status = MZ_STREAM_END; - break; - } else if (!pStream->avail_out) - break; - else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { - if ((flush) || (pStream->total_in != orig_total_in) || - (pStream->total_out != orig_total_out)) - break; - return MZ_BUF_ERROR; // Can't make forward progress without some input. - } - } - return mz_status; -} - -int mz_deflateEnd(mz_streamp pStream) { - if (!pStream) return MZ_STREAM_ERROR; - if (pStream->state) { - pStream->zfree(pStream->opaque, pStream->state); - pStream->state = NULL; - } - return MZ_OK; -} - -mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { - (void)pStream; - // This is really over conservative. (And lame, but it's actually pretty - // tricky to compute a true upper bound given the way tdefl's blocking works.) - return MZ_MAX(128 + (source_len * 110) / 100, - 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); -} - -int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len, int level) { - int status; - mz_stream stream; - memset(&stream, 0, sizeof(stream)); - - // In case mz_ulong is 64-bits (argh I hate longs). - if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; - - stream.next_in = pSource; - stream.avail_in = (mz_uint32)source_len; - stream.next_out = pDest; - stream.avail_out = (mz_uint32)*pDest_len; - - status = mz_deflateInit(&stream, level); - if (status != MZ_OK) return status; - - status = mz_deflate(&stream, MZ_FINISH); - if (status != MZ_STREAM_END) { - mz_deflateEnd(&stream); - return (status == MZ_OK) ? MZ_BUF_ERROR : status; - } - - *pDest_len = stream.total_out; - return mz_deflateEnd(&stream); -} - -int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len) { - return mz_compress2(pDest, pDest_len, pSource, source_len, - MZ_DEFAULT_COMPRESSION); -} - -mz_ulong mz_compressBound(mz_ulong source_len) { - return mz_deflateBound(NULL, source_len); -} - -typedef struct { - tinfl_decompressor m_decomp; - mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; - int m_window_bits; - mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; - tinfl_status m_last_status; -} inflate_state; - -int mz_inflateInit2(mz_streamp pStream, int window_bits) { - inflate_state *pDecomp; - if (!pStream) return MZ_STREAM_ERROR; - if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && - (-window_bits != MZ_DEFAULT_WINDOW_BITS)) - return MZ_PARAM_ERROR; - - pStream->data_type = 0; - pStream->adler = 0; - pStream->msg = NULL; - pStream->total_in = 0; - pStream->total_out = 0; - pStream->reserved = 0; - if (!pStream->zalloc) pStream->zalloc = def_alloc_func; - if (!pStream->zfree) pStream->zfree = def_free_func; - - pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, - sizeof(inflate_state)); - if (!pDecomp) return MZ_MEM_ERROR; - - pStream->state = (struct mz_internal_state *)pDecomp; - - tinfl_init(&pDecomp->m_decomp); - pDecomp->m_dict_ofs = 0; - pDecomp->m_dict_avail = 0; - pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; - pDecomp->m_first_call = 1; - pDecomp->m_has_flushed = 0; - pDecomp->m_window_bits = window_bits; - - return MZ_OK; -} - -int mz_inflateInit(mz_streamp pStream) { - return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); -} - -int mz_inflate(mz_streamp pStream, int flush) { - inflate_state *pState; - mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; - size_t in_bytes, out_bytes, orig_avail_in; - tinfl_status status; - - if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; - if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; - if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) - return MZ_STREAM_ERROR; - - pState = (inflate_state *)pStream->state; - if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; - orig_avail_in = pStream->avail_in; - - first_call = pState->m_first_call; - pState->m_first_call = 0; - if (pState->m_last_status < 0) return MZ_DATA_ERROR; - - if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; - pState->m_has_flushed |= (flush == MZ_FINISH); - - if ((flush == MZ_FINISH) && (first_call)) { - // MZ_FINISH on the first call implies that the input and output buffers are - // large enough to hold the entire compressed/decompressed file. - decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; - in_bytes = pStream->avail_in; - out_bytes = pStream->avail_out; - status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, - pStream->next_out, pStream->next_out, &out_bytes, - decomp_flags); - pState->m_last_status = status; - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tinfl_get_adler32(&pState->m_decomp); - pStream->next_out += (mz_uint)out_bytes; - pStream->avail_out -= (mz_uint)out_bytes; - pStream->total_out += (mz_uint)out_bytes; - - if (status < 0) - return MZ_DATA_ERROR; - else if (status != TINFL_STATUS_DONE) { - pState->m_last_status = TINFL_STATUS_FAILED; - return MZ_BUF_ERROR; - } - return MZ_STREAM_END; - } - // flush != MZ_FINISH then we must assume there's more input. - if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; - - if (pState->m_dict_avail) { - n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); - memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); - pStream->next_out += n; - pStream->avail_out -= n; - pStream->total_out += n; - pState->m_dict_avail -= n; - pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); - return ((pState->m_last_status == TINFL_STATUS_DONE) && - (!pState->m_dict_avail)) - ? MZ_STREAM_END - : MZ_OK; - } - - for (;;) { - in_bytes = pStream->avail_in; - out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; - - status = tinfl_decompress( - &pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, - pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); - pState->m_last_status = status; - - pStream->next_in += (mz_uint)in_bytes; - pStream->avail_in -= (mz_uint)in_bytes; - pStream->total_in += (mz_uint)in_bytes; - pStream->adler = tinfl_get_adler32(&pState->m_decomp); - - pState->m_dict_avail = (mz_uint)out_bytes; - - n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); - memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); - pStream->next_out += n; - pStream->avail_out -= n; - pStream->total_out += n; - pState->m_dict_avail -= n; - pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); - - if (status < 0) - return MZ_DATA_ERROR; // Stream is corrupted (there could be some - // uncompressed data left in the output dictionary - - // oh well). - else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) - return MZ_BUF_ERROR; // Signal caller that we can't make forward progress - // without supplying more input or by setting flush - // to MZ_FINISH. - else if (flush == MZ_FINISH) { - // The output buffer MUST be large to hold the remaining uncompressed data - // when flush==MZ_FINISH. - if (status == TINFL_STATUS_DONE) - return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; - // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's - // at least 1 more byte on the way. If there's no more room left in the - // output buffer then something is wrong. - else if (!pStream->avail_out) - return MZ_BUF_ERROR; - } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || - (!pStream->avail_out) || (pState->m_dict_avail)) - break; - } - - return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) - ? MZ_STREAM_END - : MZ_OK; -} - -int mz_inflateEnd(mz_streamp pStream) { - if (!pStream) return MZ_STREAM_ERROR; - if (pStream->state) { - pStream->zfree(pStream->opaque, pStream->state); - pStream->state = NULL; - } - return MZ_OK; -} - -int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, - const unsigned char *pSource, mz_ulong source_len) { - mz_stream stream; - int status; - memset(&stream, 0, sizeof(stream)); - - // In case mz_ulong is 64-bits (argh I hate longs). - if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; - - stream.next_in = pSource; - stream.avail_in = (mz_uint32)source_len; - stream.next_out = pDest; - stream.avail_out = (mz_uint32)*pDest_len; - - status = mz_inflateInit(&stream); - if (status != MZ_OK) return status; - - status = mz_inflate(&stream, MZ_FINISH); - if (status != MZ_STREAM_END) { - mz_inflateEnd(&stream); - return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR - : status; - } - *pDest_len = stream.total_out; - - return mz_inflateEnd(&stream); -} - -const char *mz_error(int err) { - static struct { - int m_err; - const char *m_pDesc; - } s_error_descs[] = {{MZ_OK, ""}, - {MZ_STREAM_END, "stream end"}, - {MZ_NEED_DICT, "need dictionary"}, - {MZ_ERRNO, "file error"}, - {MZ_STREAM_ERROR, "stream error"}, - {MZ_DATA_ERROR, "data error"}, - {MZ_MEM_ERROR, "out of memory"}, - {MZ_BUF_ERROR, "buf error"}, - {MZ_VERSION_ERROR, "version error"}, - {MZ_PARAM_ERROR, "parameter error"}}; - mz_uint i; - for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) - if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; - return NULL; -} - -#endif // MINIZ_NO_ZLIB_APIS - -// ------------------- Low-level Decompression (completely independent from all -// compression API's) - -#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) -#define TINFL_MEMSET(p, c, l) memset(p, c, l) - -#define TINFL_CR_BEGIN \ - switch (r->m_state) { \ - case 0: -#define TINFL_CR_RETURN(state_index, result) \ - do { \ - status = result; \ - r->m_state = state_index; \ - goto common_exit; \ - case state_index:; \ - } \ - MZ_MACRO_END -#define TINFL_CR_RETURN_FOREVER(state_index, result) \ - do { \ - for (;;) { \ - TINFL_CR_RETURN(state_index, result); \ - } \ - } \ - MZ_MACRO_END -#define TINFL_CR_FINISH } - -// TODO: If the caller has indicated that there's no more input, and we attempt -// to read beyond the input buf, then something is wrong with the input because -// the inflator never -// reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of -// the stream with 0's in this scenario. -#define TINFL_GET_BYTE(state_index, c) \ - do { \ - if (pIn_buf_cur >= pIn_buf_end) { \ - for (;;) { \ - if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ - TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ - if (pIn_buf_cur < pIn_buf_end) { \ - c = *pIn_buf_cur++; \ - break; \ - } \ - } else { \ - c = 0; \ - break; \ - } \ - } \ - } else \ - c = *pIn_buf_cur++; \ - } \ - MZ_MACRO_END - -#define TINFL_NEED_BITS(state_index, n) \ - do { \ - mz_uint c; \ - TINFL_GET_BYTE(state_index, c); \ - bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ - num_bits += 8; \ - } while (num_bits < (mz_uint)(n)) -#define TINFL_SKIP_BITS(state_index, n) \ - do { \ - if (num_bits < (mz_uint)(n)) { \ - TINFL_NEED_BITS(state_index, n); \ - } \ - bit_buf >>= (n); \ - num_bits -= (n); \ - } \ - MZ_MACRO_END -#define TINFL_GET_BITS(state_index, b, n) \ - do { \ - if (num_bits < (mz_uint)(n)) { \ - TINFL_NEED_BITS(state_index, n); \ - } \ - b = bit_buf & ((1 << (n)) - 1); \ - bit_buf >>= (n); \ - num_bits -= (n); \ - } \ - MZ_MACRO_END - -// TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes -// remaining in the input buffer falls below 2. -// It reads just enough bytes from the input stream that are needed to decode -// the next Huffman code (and absolutely no more). It works by trying to fully -// decode a -// Huffman code by using whatever bits are currently present in the bit buffer. -// If this fails, it reads another byte, and tries again until it succeeds or -// until the -// bit buffer contains >=15 bits (deflate's max. Huffman code size). -#define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ - do { \ - temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ - if (temp >= 0) { \ - code_len = temp >> 9; \ - if ((code_len) && (num_bits >= code_len)) break; \ - } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ - code_len = TINFL_FAST_LOOKUP_BITS; \ - do { \ - temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ - } while ((temp < 0) && (num_bits >= (code_len + 1))); \ - if (temp >= 0) break; \ - } \ - TINFL_GET_BYTE(state_index, c); \ - bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ - num_bits += 8; \ - } while (num_bits < 15); - -// TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex -// than you would initially expect because the zlib API expects the decompressor -// to never read -// beyond the final byte of the deflate stream. (In other words, when this macro -// wants to read another byte from the input, it REALLY needs another byte in -// order to fully -// decode the next Huffman code.) Handling this properly is particularly -// important on raw deflate (non-zlib) streams, which aren't followed by a byte -// aligned adler-32. -// The slow path is only executed at the very end of the input buffer. -#define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ - do { \ - int temp; \ - mz_uint code_len, c; \ - if (num_bits < 15) { \ - if ((pIn_buf_end - pIn_buf_cur) < 2) { \ - TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ - } else { \ - bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | \ - (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ - pIn_buf_cur += 2; \ - num_bits += 16; \ - } \ - } \ - if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= \ - 0) \ - code_len = temp >> 9, temp &= 511; \ - else { \ - code_len = TINFL_FAST_LOOKUP_BITS; \ - do { \ - temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ - } while (temp < 0); \ - } \ - sym = temp; \ - bit_buf >>= code_len; \ - num_bits -= code_len; \ - } \ - MZ_MACRO_END - -tinfl_status tinfl_decompress(tinfl_decompressor *r, - const mz_uint8 *pIn_buf_next, - size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, - mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, - const mz_uint32 decomp_flags) { - static const int s_length_base[31] = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, - 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, - 4, 4, 5, 5, 5, 5, 0, 0, 0}; - static const int s_dist_base[32] = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, - 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, - 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0}; - static const int s_dist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; - static const mz_uint8 s_length_dezigzag[19] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - static const int s_min_table_sizes[3] = {257, 1, 4}; - - tinfl_status status = TINFL_STATUS_FAILED; - mz_uint32 num_bits, dist, counter, num_extra; - tinfl_bit_buf_t bit_buf; - const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = - pIn_buf_next + *pIn_buf_size; - mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = - pOut_buf_next + *pOut_buf_size; - size_t out_buf_size_mask = - (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) - ? (size_t)-1 - : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, - dist_from_out_buf_start; - - // Ensure the output buffer's size is a power of 2, unless the output buffer - // is large enough to hold the entire output file (in which case it doesn't - // matter). - if (((out_buf_size_mask + 1) & out_buf_size_mask) || - (pOut_buf_next < pOut_buf_start)) { - *pIn_buf_size = *pOut_buf_size = 0; - return TINFL_STATUS_BAD_PARAM; - } - - num_bits = r->m_num_bits; - bit_buf = r->m_bit_buf; - dist = r->m_dist; - counter = r->m_counter; - num_extra = r->m_num_extra; - dist_from_out_buf_start = r->m_dist_from_out_buf_start; - TINFL_CR_BEGIN - - bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; - r->m_z_adler32 = r->m_check_adler32 = 1; - if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { - TINFL_GET_BYTE(1, r->m_zhdr0); - TINFL_GET_BYTE(2, r->m_zhdr1); - counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || - (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); - if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) - counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || - ((out_buf_size_mask + 1) < - (size_t)(1ULL << (8U + (r->m_zhdr0 >> 4))))); - if (counter) { - TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); - } - } - - do { - TINFL_GET_BITS(3, r->m_final, 3); - r->m_type = r->m_final >> 1; - if (r->m_type == 0) { - TINFL_SKIP_BITS(5, num_bits & 7); - for (counter = 0; counter < 4; ++counter) { - if (num_bits) - TINFL_GET_BITS(6, r->m_raw_header[counter], 8); - else - TINFL_GET_BYTE(7, r->m_raw_header[counter]); - } - if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != - (mz_uint)(0xFFFF ^ - (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { - TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); - } - while ((counter) && (num_bits)) { - TINFL_GET_BITS(51, dist, 8); - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = (mz_uint8)dist; - counter--; - } - while (counter) { - size_t n; - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); - } - while (pIn_buf_cur >= pIn_buf_end) { - if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { - TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); - } else { - TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); - } - } - n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), - (size_t)(pIn_buf_end - pIn_buf_cur)), - counter); - TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); - pIn_buf_cur += n; - pOut_buf_cur += n; - counter -= (mz_uint)n; - } - } else if (r->m_type == 3) { - TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); - } else { - if (r->m_type == 1) { - mz_uint8 *p = r->m_tables[0].m_code_size; - mz_uint i; - r->m_table_sizes[0] = 288; - r->m_table_sizes[1] = 32; - TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); - for (i = 0; i <= 143; ++i) *p++ = 8; - for (; i <= 255; ++i) *p++ = 9; - for (; i <= 279; ++i) *p++ = 7; - for (; i <= 287; ++i) *p++ = 8; - } else { - for (counter = 0; counter < 3; counter++) { - TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); - r->m_table_sizes[counter] += s_min_table_sizes[counter]; - } - MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); - for (counter = 0; counter < r->m_table_sizes[2]; counter++) { - mz_uint s; - TINFL_GET_BITS(14, s, 3); - r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; - } - r->m_table_sizes[2] = 19; - } - for (; (int)r->m_type >= 0; r->m_type--) { - int tree_next, tree_cur; - tinfl_huff_table *pTable; - mz_uint i, j, used_syms, total, sym_index, next_code[17], - total_syms[16]; - pTable = &r->m_tables[r->m_type]; - MZ_CLEAR_OBJ(total_syms); - MZ_CLEAR_OBJ(pTable->m_look_up); - MZ_CLEAR_OBJ(pTable->m_tree); - for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) - total_syms[pTable->m_code_size[i]]++; - used_syms = 0, total = 0; - next_code[0] = next_code[1] = 0; - for (i = 1; i <= 15; ++i) { - used_syms += total_syms[i]; - next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); - } - if ((65536 != total) && (used_syms > 1)) { - TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); - } - for (tree_next = -1, sym_index = 0; - sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { - mz_uint rev_code = 0, l, cur_code, - code_size = pTable->m_code_size[sym_index]; - if (!code_size) continue; - cur_code = next_code[code_size]++; - for (l = code_size; l > 0; l--, cur_code >>= 1) - rev_code = (rev_code << 1) | (cur_code & 1); - if (code_size <= TINFL_FAST_LOOKUP_BITS) { - mz_int16 k = (mz_int16)((code_size << 9) | sym_index); - while (rev_code < TINFL_FAST_LOOKUP_SIZE) { - pTable->m_look_up[rev_code] = k; - rev_code += (1 << code_size); - } - continue; - } - if (0 == - (tree_cur = pTable->m_look_up[rev_code & - (TINFL_FAST_LOOKUP_SIZE - 1)])) { - pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = - (mz_int16)tree_next; - tree_cur = tree_next; - tree_next -= 2; - } - rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); - for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { - tree_cur -= ((rev_code >>= 1) & 1); - if (!pTable->m_tree[-tree_cur - 1]) { - pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; - tree_cur = tree_next; - tree_next -= 2; - } else - tree_cur = pTable->m_tree[-tree_cur - 1]; - } - tree_cur -= ((rev_code >>= 1) & 1); - pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; - } - if (r->m_type == 2) { - for (counter = 0; - counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { - mz_uint s; - TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); - if (dist < 16) { - r->m_len_codes[counter++] = (mz_uint8)dist; - continue; - } - if ((dist == 16) && (!counter)) { - TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); - } - num_extra = "\02\03\07"[dist - 16]; - TINFL_GET_BITS(18, s, num_extra); - s += "\03\03\013"[dist - 16]; - TINFL_MEMSET(r->m_len_codes + counter, - (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); - counter += s; - } - if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { - TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); - } - TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, - r->m_table_sizes[0]); - TINFL_MEMCPY(r->m_tables[1].m_code_size, - r->m_len_codes + r->m_table_sizes[0], - r->m_table_sizes[1]); - } - } - for (;;) { - mz_uint8 *pSrc; - for (;;) { - if (((pIn_buf_end - pIn_buf_cur) < 4) || - ((pOut_buf_end - pOut_buf_cur) < 2)) { - TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); - if (counter >= 256) break; - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = (mz_uint8)counter; - } else { - int sym2; - mz_uint code_len; -#if TINFL_USE_64BIT_BITBUF - if (num_bits < 30) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 4; - num_bits += 32; - } -#else - if (num_bits < 15) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 2; - num_bits += 16; - } -#endif - if ((sym2 = - r->m_tables[0] - .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= - 0) - code_len = sym2 >> 9; - else { - code_len = TINFL_FAST_LOOKUP_BITS; - do { - sym2 = r->m_tables[0] - .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; - } while (sym2 < 0); - } - counter = sym2; - bit_buf >>= code_len; - num_bits -= code_len; - if (counter & 256) break; - -#if !TINFL_USE_64BIT_BITBUF - if (num_bits < 15) { - bit_buf |= - (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); - pIn_buf_cur += 2; - num_bits += 16; - } -#endif - if ((sym2 = - r->m_tables[0] - .m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= - 0) - code_len = sym2 >> 9; - else { - code_len = TINFL_FAST_LOOKUP_BITS; - do { - sym2 = r->m_tables[0] - .m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; - } while (sym2 < 0); - } - bit_buf >>= code_len; - num_bits -= code_len; - - pOut_buf_cur[0] = (mz_uint8)counter; - if (sym2 & 256) { - pOut_buf_cur++; - counter = sym2; - break; - } - pOut_buf_cur[1] = (mz_uint8)sym2; - pOut_buf_cur += 2; - } - } - if ((counter &= 511) == 256) break; - - num_extra = s_length_extra[counter - 257]; - counter = s_length_base[counter - 257]; - if (num_extra) { - mz_uint extra_bits; - TINFL_GET_BITS(25, extra_bits, num_extra); - counter += extra_bits; - } - - TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); - num_extra = s_dist_extra[dist]; - dist = s_dist_base[dist]; - if (num_extra) { - mz_uint extra_bits; - TINFL_GET_BITS(27, extra_bits, num_extra); - dist += extra_bits; - } - - dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; - if ((dist > dist_from_out_buf_start) && - (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { - TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); - } - - pSrc = pOut_buf_start + - ((dist_from_out_buf_start - dist) & out_buf_size_mask); - - if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { - while (counter--) { - while (pOut_buf_cur >= pOut_buf_end) { - TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); - } - *pOut_buf_cur++ = - pOut_buf_start[(dist_from_out_buf_start++ - dist) & - out_buf_size_mask]; - } - continue; - } -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES - else if ((counter >= 9) && (counter <= dist)) { - const mz_uint8 *pSrc_end = pSrc + (counter & ~7); - do { - ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0]; - ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1]; - pOut_buf_cur += 8; - } while ((pSrc += 8) < pSrc_end); - if ((counter &= 7) < 3) { - if (counter) { - pOut_buf_cur[0] = pSrc[0]; - if (counter > 1) pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur += counter; - } - continue; - } - } -#endif - do { - pOut_buf_cur[0] = pSrc[0]; - pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur[2] = pSrc[2]; - pOut_buf_cur += 3; - pSrc += 3; - } while ((int)(counter -= 3) > 2); - if ((int)counter > 0) { - pOut_buf_cur[0] = pSrc[0]; - if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1]; - pOut_buf_cur += counter; - } - } - } - } while (!(r->m_final & 1)); - if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { - TINFL_SKIP_BITS(32, num_bits & 7); - for (counter = 0; counter < 4; ++counter) { - mz_uint s; - if (num_bits) - TINFL_GET_BITS(41, s, 8); - else - TINFL_GET_BYTE(42, s); - r->m_z_adler32 = (r->m_z_adler32 << 8) | s; - } - } - TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); - TINFL_CR_FINISH - -common_exit: - r->m_num_bits = num_bits; - r->m_bit_buf = bit_buf; - r->m_dist = dist; - r->m_counter = counter; - r->m_num_extra = num_extra; - r->m_dist_from_out_buf_start = dist_from_out_buf_start; - *pIn_buf_size = pIn_buf_cur - pIn_buf_next; - *pOut_buf_size = pOut_buf_cur - pOut_buf_next; - if ((decomp_flags & - (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && - (status >= 0)) { - const mz_uint8 *ptr = pOut_buf_next; - size_t buf_len = *pOut_buf_size; - mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, - s2 = r->m_check_adler32 >> 16; - size_t block_len = buf_len % 5552; - while (buf_len) { - for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { - s1 += ptr[0], s2 += s1; - s1 += ptr[1], s2 += s1; - s1 += ptr[2], s2 += s1; - s1 += ptr[3], s2 += s1; - s1 += ptr[4], s2 += s1; - s1 += ptr[5], s2 += s1; - s1 += ptr[6], s2 += s1; - s1 += ptr[7], s2 += s1; - } - for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; - s1 %= 65521U, s2 %= 65521U; - buf_len -= block_len; - block_len = 5552; - } - r->m_check_adler32 = (s2 << 16) + s1; - if ((status == TINFL_STATUS_DONE) && - (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && - (r->m_check_adler32 != r->m_z_adler32)) - status = TINFL_STATUS_ADLER32_MISMATCH; - } - return status; -} - -// Higher level helper functions. -void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags) { - tinfl_decompressor decomp; - void *pBuf = NULL, *pNew_buf; - size_t src_buf_ofs = 0, out_buf_capacity = 0; - *pOut_len = 0; - tinfl_init(&decomp); - for (;;) { - size_t src_buf_size = src_buf_len - src_buf_ofs, - dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; - tinfl_status status = tinfl_decompress( - &decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, - (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, - &dst_buf_size, - (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); - if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { - MZ_FREE(pBuf); - *pOut_len = 0; - return NULL; - } - src_buf_ofs += src_buf_size; - *pOut_len += dst_buf_size; - if (status == TINFL_STATUS_DONE) break; - new_out_buf_capacity = out_buf_capacity * 2; - if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; - pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); - if (!pNew_buf) { - MZ_FREE(pBuf); - *pOut_len = 0; - return NULL; - } - pBuf = pNew_buf; - out_buf_capacity = new_out_buf_capacity; - } - return pBuf; -} - -size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags) { - tinfl_decompressor decomp; - tinfl_status status; - tinfl_init(&decomp); - status = - tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, - (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, - (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); - return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED - : out_buf_len; -} - -int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, - tinfl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - int result = 0; - tinfl_decompressor decomp; - mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE); - size_t in_buf_ofs = 0, dict_ofs = 0; - if (!pDict) return TINFL_STATUS_FAILED; - tinfl_init(&decomp); - for (;;) { - size_t in_buf_size = *pIn_buf_size - in_buf_ofs, - dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; - tinfl_status status = - tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, - &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, - (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); - in_buf_ofs += in_buf_size; - if ((dst_buf_size) && - (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) - break; - if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { - result = (status == TINFL_STATUS_DONE); - break; - } - dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); - } - MZ_FREE(pDict); - *pIn_buf_size = in_buf_ofs; - return result; -} - -// ------------------- Low-level Compression (independent from all decompression -// API's) - -// Purposely making these tables static for faster init and thread safety. -static const mz_uint16 s_tdefl_len_sym[256] = { - 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, - 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, - 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, - 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, - 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, - 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, - 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, - 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, - 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, - 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, - 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, - 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, - 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, - 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, - 285}; - -static const mz_uint8 s_tdefl_len_extra[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0}; - -static const mz_uint8 s_tdefl_small_dist_sym[512] = { - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, - 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, - 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; - -static const mz_uint8 s_tdefl_small_dist_extra[512] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; - -static const mz_uint8 s_tdefl_large_dist_sym[128] = { - 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, - 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; - -static const mz_uint8 s_tdefl_large_dist_extra[128] = { - 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13}; - -// Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted -// values. -typedef struct { - mz_uint16 m_key, m_sym_index; -} tdefl_sym_freq; -static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, - tdefl_sym_freq *pSyms0, - tdefl_sym_freq *pSyms1) { - mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; - tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; - MZ_CLEAR_OBJ(hist); - for (i = 0; i < num_syms; i++) { - mz_uint freq = pSyms0[i].m_key; - hist[freq & 0xFF]++; - hist[256 + ((freq >> 8) & 0xFF)]++; - } - while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) - total_passes--; - for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { - const mz_uint32 *pHist = &hist[pass << 8]; - mz_uint offsets[256], cur_ofs = 0; - for (i = 0; i < 256; i++) { - offsets[i] = cur_ofs; - cur_ofs += pHist[i]; - } - for (i = 0; i < num_syms; i++) - pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = - pCur_syms[i]; - { - tdefl_sym_freq *t = pCur_syms; - pCur_syms = pNew_syms; - pNew_syms = t; - } - } - return pCur_syms; -} - -// tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, -// alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. -static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n) { - int root, leaf, next, avbl, used, dpth; - if (n == 0) - return; - else if (n == 1) { - A[0].m_key = 1; - return; - } - A[0].m_key += A[1].m_key; - root = 0; - leaf = 2; - for (next = 1; next < n - 1; next++) { - if (leaf >= n || A[root].m_key < A[leaf].m_key) { - A[next].m_key = A[root].m_key; - A[root++].m_key = (mz_uint16)next; - } else - A[next].m_key = A[leaf++].m_key; - if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { - A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); - A[root++].m_key = (mz_uint16)next; - } else - A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); - } - A[n - 2].m_key = 0; - for (next = n - 3; next >= 0; next--) - A[next].m_key = A[A[next].m_key].m_key + 1; - avbl = 1; - used = dpth = 0; - root = n - 2; - next = n - 1; - while (avbl > 0) { - while (root >= 0 && (int)A[root].m_key == dpth) { - used++; - root--; - } - while (avbl > used) { - A[next--].m_key = (mz_uint16)(dpth); - avbl--; - } - avbl = 2 * used; - dpth++; - used = 0; - } -} - -// Limits canonical Huffman code table's max code size. -enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; -static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, - int code_list_len, - int max_code_size) { - int i; - mz_uint32 total = 0; - if (code_list_len <= 1) return; - for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) - pNum_codes[max_code_size] += pNum_codes[i]; - for (i = max_code_size; i > 0; i--) - total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); - while (total != (1UL << max_code_size)) { - pNum_codes[max_code_size]--; - for (i = max_code_size - 1; i > 0; i--) - if (pNum_codes[i]) { - pNum_codes[i]--; - pNum_codes[i + 1] += 2; - break; - } - total--; - } -} - -static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, - int table_len, int code_size_limit, - int static_table) { - int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; - mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; - MZ_CLEAR_OBJ(num_codes); - if (static_table) { - for (i = 0; i < table_len; i++) - num_codes[d->m_huff_code_sizes[table_num][i]]++; - } else { - tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], - *pSyms; - int num_used_syms = 0; - const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0]; - for (i = 0; i < table_len; i++) - if (pSym_count[i]) { - syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; - syms0[num_used_syms++].m_sym_index = (mz_uint16)i; - } - - pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); - tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); - - for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; - - tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, - code_size_limit); - - MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); - MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); - for (i = 1, j = num_used_syms; i <= code_size_limit; i++) - for (l = num_codes[i]; l > 0; l--) - d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); - } - - next_code[1] = 0; - for (j = 0, i = 2; i <= code_size_limit; i++) - next_code[i] = j = ((j + num_codes[i - 1]) << 1); - - for (i = 0; i < table_len; i++) { - mz_uint rev_code = 0, code, code_size; - if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; - code = next_code[code_size]++; - for (l = code_size; l > 0; l--, code >>= 1) - rev_code = (rev_code << 1) | (code & 1); - d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; - } -} - -#define TDEFL_PUT_BITS(b, l) \ - do { \ - mz_uint bits = b; \ - mz_uint len = l; \ - MZ_ASSERT(bits <= ((1U << len) - 1U)); \ - d->m_bit_buffer |= (bits << d->m_bits_in); \ - d->m_bits_in += len; \ - while (d->m_bits_in >= 8) { \ - if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ - *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ - d->m_bit_buffer >>= 8; \ - d->m_bits_in -= 8; \ - } \ - } \ - MZ_MACRO_END - -#define TDEFL_RLE_PREV_CODE_SIZE() \ - { \ - if (rle_repeat_count) { \ - if (rle_repeat_count < 3) { \ - d->m_huff_count[2][prev_code_size] = (mz_uint16)( \ - d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ - while (rle_repeat_count--) \ - packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ - } else { \ - d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 16; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_repeat_count - 3); \ - } \ - rle_repeat_count = 0; \ - } \ - } - -#define TDEFL_RLE_ZERO_CODE_SIZE() \ - { \ - if (rle_z_count) { \ - if (rle_z_count < 3) { \ - d->m_huff_count[2][0] = \ - (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ - while (rle_z_count--) packed_code_sizes[num_packed_code_sizes++] = 0; \ - } else if (rle_z_count <= 10) { \ - d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 17; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_z_count - 3); \ - } else { \ - d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ - packed_code_sizes[num_packed_code_sizes++] = 18; \ - packed_code_sizes[num_packed_code_sizes++] = \ - (mz_uint8)(rle_z_count - 11); \ - } \ - rle_z_count = 0; \ - } \ - } - -static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - -static void tdefl_start_dynamic_block(tdefl_compressor *d) { - int num_lit_codes, num_dist_codes, num_bit_lengths; - mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, - rle_repeat_count, packed_code_sizes_index; - mz_uint8 - code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], - packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], - prev_code_size = 0xFF; - - d->m_huff_count[0][256] = 1; - - tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); - tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); - - for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) - if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; - for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) - if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; - - memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); - memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], - num_dist_codes); - total_code_sizes_to_pack = num_lit_codes + num_dist_codes; - num_packed_code_sizes = 0; - rle_z_count = 0; - rle_repeat_count = 0; - - memset(&d->m_huff_count[2][0], 0, - sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); - for (i = 0; i < total_code_sizes_to_pack; i++) { - mz_uint8 code_size = code_sizes_to_pack[i]; - if (!code_size) { - TDEFL_RLE_PREV_CODE_SIZE(); - if (++rle_z_count == 138) { - TDEFL_RLE_ZERO_CODE_SIZE(); - } - } else { - TDEFL_RLE_ZERO_CODE_SIZE(); - if (code_size != prev_code_size) { - TDEFL_RLE_PREV_CODE_SIZE(); - d->m_huff_count[2][code_size] = - (mz_uint16)(d->m_huff_count[2][code_size] + 1); - packed_code_sizes[num_packed_code_sizes++] = code_size; - } else if (++rle_repeat_count == 6) { - TDEFL_RLE_PREV_CODE_SIZE(); - } - } - prev_code_size = code_size; - } - if (rle_repeat_count) { - TDEFL_RLE_PREV_CODE_SIZE(); - } else { - TDEFL_RLE_ZERO_CODE_SIZE(); - } - - tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); - - TDEFL_PUT_BITS(2, 2); - - TDEFL_PUT_BITS(num_lit_codes - 257, 5); - TDEFL_PUT_BITS(num_dist_codes - 1, 5); - - for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) - if (d->m_huff_code_sizes - [2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) - break; - num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); - TDEFL_PUT_BITS(num_bit_lengths - 4, 4); - for (i = 0; (int)i < num_bit_lengths; i++) - TDEFL_PUT_BITS( - d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); - - for (packed_code_sizes_index = 0; - packed_code_sizes_index < num_packed_code_sizes;) { - mz_uint code = packed_code_sizes[packed_code_sizes_index++]; - MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); - TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); - if (code >= 16) - TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], - "\02\03\07"[code - 16]); - } -} - -static void tdefl_start_static_block(tdefl_compressor *d) { - mz_uint i; - mz_uint8 *p = &d->m_huff_code_sizes[0][0]; - - for (i = 0; i <= 143; ++i) *p++ = 8; - for (; i <= 255; ++i) *p++ = 9; - for (; i <= 279; ++i) *p++ = 7; - for (; i <= 287; ++i) *p++ = 8; - - memset(d->m_huff_code_sizes[1], 5, 32); - - tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); - tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); - - TDEFL_PUT_BITS(1, 2); -} - -static const mz_uint mz_bitmasks[17] = { - 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, - 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF}; - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && \ - MINIZ_HAS_64BIT_REGISTERS -static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { - mz_uint flags; - mz_uint8 *pLZ_codes; - mz_uint8 *pOutput_buf = d->m_pOutput_buf; - mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf; - mz_uint64 bit_buffer = d->m_bit_buffer; - mz_uint bits_in = d->m_bits_in; - -#define TDEFL_PUT_BITS_FAST(b, l) \ - { \ - bit_buffer |= (((mz_uint64)(b)) << bits_in); \ - bits_in += (l); \ - } - - flags = 1; - for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; - flags >>= 1) { - if (flags == 1) flags = *pLZ_codes++ | 0x100; - - if (flags & 1) { - mz_uint s0, s1, n0, n1, sym, num_extra_bits; - mz_uint match_len = pLZ_codes[0], - match_dist = *(const mz_uint16 *)(pLZ_codes + 1); - pLZ_codes += 3; - - MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], - d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], - s_tdefl_len_extra[match_len]); - - // This sequence coaxes MSVC into using cmov's vs. jmp's. - s0 = s_tdefl_small_dist_sym[match_dist & 511]; - n0 = s_tdefl_small_dist_extra[match_dist & 511]; - s1 = s_tdefl_large_dist_sym[match_dist >> 8]; - n1 = s_tdefl_large_dist_extra[match_dist >> 8]; - sym = (match_dist < 512) ? s0 : s1; - num_extra_bits = (match_dist < 512) ? n0 : n1; - - MZ_ASSERT(d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], - d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], - num_extra_bits); - } else { - mz_uint lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - - if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { - flags >>= 1; - lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - - if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { - flags >>= 1; - lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], - d->m_huff_code_sizes[0][lit]); - } - } - } - - if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; - - *(mz_uint64 *)pOutput_buf = bit_buffer; - pOutput_buf += (bits_in >> 3); - bit_buffer >>= (bits_in & ~7); - bits_in &= 7; - } - -#undef TDEFL_PUT_BITS_FAST - - d->m_pOutput_buf = pOutput_buf; - d->m_bits_in = 0; - d->m_bit_buffer = 0; - - while (bits_in) { - mz_uint32 n = MZ_MIN(bits_in, 16); - TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); - bit_buffer >>= n; - bits_in -= n; - } - - TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); - - return (d->m_pOutput_buf < d->m_pOutput_buf_end); -} -#else -static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d) { - mz_uint flags; - mz_uint8 *pLZ_codes; - - flags = 1; - for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; - flags >>= 1) { - if (flags == 1) flags = *pLZ_codes++ | 0x100; - if (flags & 1) { - mz_uint sym, num_extra_bits; - mz_uint match_len = pLZ_codes[0], - match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); - pLZ_codes += 3; - - MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], - d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); - TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], - s_tdefl_len_extra[match_len]); - - if (match_dist < 512) { - sym = s_tdefl_small_dist_sym[match_dist]; - num_extra_bits = s_tdefl_small_dist_extra[match_dist]; - } else { - sym = s_tdefl_large_dist_sym[match_dist >> 8]; - num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; - } - MZ_ASSERT(d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); - TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); - } else { - mz_uint lit = *pLZ_codes++; - MZ_ASSERT(d->m_huff_code_sizes[0][lit]); - TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); - } - } - - TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); - - return (d->m_pOutput_buf < d->m_pOutput_buf_end); -} -#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && - // MINIZ_HAS_64BIT_REGISTERS - -static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block) { - if (static_block) - tdefl_start_static_block(d); - else - tdefl_start_dynamic_block(d); - return tdefl_compress_lz_codes(d); -} - -static int tdefl_flush_block(tdefl_compressor *d, int flush) { - mz_uint saved_bit_buf, saved_bits_in; - mz_uint8 *pSaved_output_buf; - mz_bool comp_block_succeeded = MZ_FALSE; - int n, use_raw_block = - ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && - (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; - mz_uint8 *pOutput_buf_start = - ((d->m_pPut_buf_func == NULL) && - ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) - ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) - : d->m_output_buf; - - d->m_pOutput_buf = pOutput_buf_start; - d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; - - MZ_ASSERT(!d->m_output_flush_remaining); - d->m_output_flush_ofs = 0; - d->m_output_flush_remaining = 0; - - *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); - d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); - - if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { - TDEFL_PUT_BITS(0x78, 8); - TDEFL_PUT_BITS(0x01, 8); - } - - TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); - - pSaved_output_buf = d->m_pOutput_buf; - saved_bit_buf = d->m_bit_buffer; - saved_bits_in = d->m_bits_in; - - if (!use_raw_block) - comp_block_succeeded = - tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || - (d->m_total_lz_bytes < 48)); - - // If the block gets expanded, forget the current contents of the output - // buffer and send a raw block instead. - if (((use_raw_block) || - ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= - d->m_total_lz_bytes))) && - ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { - mz_uint i; - d->m_pOutput_buf = pSaved_output_buf; - d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; - TDEFL_PUT_BITS(0, 2); - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { - TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); - } - for (i = 0; i < d->m_total_lz_bytes; ++i) { - TDEFL_PUT_BITS( - d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], - 8); - } - } - // Check for the extremely unlikely (if not impossible) case of the compressed - // block not fitting into the output buffer when using dynamic codes. - else if (!comp_block_succeeded) { - d->m_pOutput_buf = pSaved_output_buf; - d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; - tdefl_compress_block(d, MZ_TRUE); - } - - if (flush) { - if (flush == TDEFL_FINISH) { - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { - mz_uint i, a = d->m_adler32; - for (i = 0; i < 4; i++) { - TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); - a <<= 8; - } - } - } else { - mz_uint i, z = 0; - TDEFL_PUT_BITS(0, 3); - if (d->m_bits_in) { - TDEFL_PUT_BITS(0, 8 - d->m_bits_in); - } - for (i = 2; i; --i, z ^= 0xFFFF) { - TDEFL_PUT_BITS(z & 0xFFFF, 16); - } - } - } - - MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); - - memset(&d->m_huff_count[0][0], 0, - sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); - memset(&d->m_huff_count[1][0], 0, - sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); - - d->m_pLZ_code_buf = d->m_lz_code_buf + 1; - d->m_pLZ_flags = d->m_lz_code_buf; - d->m_num_flags_left = 8; - d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; - d->m_total_lz_bytes = 0; - d->m_block_index++; - - if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { - if (d->m_pPut_buf_func) { - *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; - if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) - return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); - } else if (pOutput_buf_start == d->m_output_buf) { - int bytes_to_copy = (int)MZ_MIN( - (size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); - memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, - bytes_to_copy); - d->m_out_buf_ofs += bytes_to_copy; - if ((n -= bytes_to_copy) != 0) { - d->m_output_flush_ofs = bytes_to_copy; - d->m_output_flush_remaining = n; - } - } else { - d->m_out_buf_ofs += n; - } - } - - return d->m_output_flush_remaining; -} - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES -#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p) -static MZ_FORCEINLINE void tdefl_find_match( - tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, - mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { - mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, - match_len = *pMatch_len, probe_pos = pos, next_probe_pos, - probe_len; - mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; - const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q; - mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), - s01 = TDEFL_READ_UNALIGNED_WORD(s); - MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); - if (max_match_len <= match_len) return; - for (;;) { - for (;;) { - if (--num_probes_left == 0) return; -#define TDEFL_PROBE \ - next_probe_pos = d->m_next[probe_pos]; \ - if ((!next_probe_pos) || \ - ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ - return; \ - probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ - if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ - break; - TDEFL_PROBE; - TDEFL_PROBE; - TDEFL_PROBE; - } - if (!dist) break; - q = (const mz_uint16 *)(d->m_dict + probe_pos); - if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; - p = s; - probe_len = 32; - do { - } while ( - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && - (--probe_len > 0)); - if (!probe_len) { - *pMatch_dist = dist; - *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); - break; - } else if ((probe_len = ((mz_uint)(p - s) * 2) + - (mz_uint)(*(const mz_uint8 *)p == - *(const mz_uint8 *)q)) > match_len) { - *pMatch_dist = dist; - if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == - max_match_len) - break; - c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); - } - } -} -#else -static MZ_FORCEINLINE void tdefl_find_match( - tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, - mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len) { - mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, - match_len = *pMatch_len, probe_pos = pos, next_probe_pos, - probe_len; - mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; - const mz_uint8 *s = d->m_dict + pos, *p, *q; - mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; - MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); - if (max_match_len <= match_len) return; - for (;;) { - for (;;) { - if (--num_probes_left == 0) return; -#define TDEFL_PROBE \ - next_probe_pos = d->m_next[probe_pos]; \ - if ((!next_probe_pos) || \ - ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ - return; \ - probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ - if ((d->m_dict[probe_pos + match_len] == c0) && \ - (d->m_dict[probe_pos + match_len - 1] == c1)) \ - break; - TDEFL_PROBE; - TDEFL_PROBE; - TDEFL_PROBE; - } - if (!dist) break; - p = s; - q = d->m_dict + probe_pos; - for (probe_len = 0; probe_len < max_match_len; probe_len++) - if (*p++ != *q++) break; - if (probe_len > match_len) { - *pMatch_dist = dist; - if ((*pMatch_len = match_len = probe_len) == max_match_len) return; - c0 = d->m_dict[pos + match_len]; - c1 = d->m_dict[pos + match_len - 1]; - } - } -} -#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN -static mz_bool tdefl_compress_fast(tdefl_compressor *d) { - // Faster, minimally featured LZRW1-style match+parse loop with better - // register utilization. Intended for applications where raw throughput is - // valued more highly than ratio. - mz_uint lookahead_pos = d->m_lookahead_pos, - lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, - total_lz_bytes = d->m_total_lz_bytes, - num_flags_left = d->m_num_flags_left; - mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; - mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; - - while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { - const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; - mz_uint dst_pos = - (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; - mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( - d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); - d->m_src_buf_left -= num_bytes_to_process; - lookahead_size += num_bytes_to_process; - - while (num_bytes_to_process) { - mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); - memcpy(d->m_dict + dst_pos, d->m_pSrc, n); - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, - MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); - d->m_pSrc += n; - dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; - num_bytes_to_process -= n; - } - - dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); - if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) - break; - - while (lookahead_size >= 4) { - mz_uint cur_match_dist, cur_match_len = 1; - mz_uint8 *pCur_dict = d->m_dict + cur_pos; - mz_uint first_trigram = (*(const mz_uint32 *)pCur_dict) & 0xFFFFFF; - mz_uint hash = - (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & - TDEFL_LEVEL1_HASH_SIZE_MASK; - mz_uint probe_pos = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)lookahead_pos; - - if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= - dict_size) && - ((*(const mz_uint32 *)(d->m_dict + - (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & - 0xFFFFFF) == first_trigram)) { - const mz_uint16 *p = (const mz_uint16 *)pCur_dict; - const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos); - mz_uint32 probe_len = 32; - do { - } while ((TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (TDEFL_READ_UNALIGNED_WORD(++p) == - TDEFL_READ_UNALIGNED_WORD(++q)) && - (--probe_len > 0)); - cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + - (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q); - if (!probe_len) - cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; - - if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || - ((cur_match_len == TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 8U * 1024U))) { - cur_match_len = 1; - *pLZ_code_buf++ = (mz_uint8)first_trigram; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - d->m_huff_count[0][(mz_uint8)first_trigram]++; - } else { - mz_uint32 s0, s1; - cur_match_len = MZ_MIN(cur_match_len, lookahead_size); - - MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 1) && - (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); - - cur_match_dist--; - - pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); - *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; - pLZ_code_buf += 3; - *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); - - s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; - s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; - d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; - - d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - - TDEFL_MIN_MATCH_LEN]]++; - } - } else { - *pLZ_code_buf++ = (mz_uint8)first_trigram; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - d->m_huff_count[0][(mz_uint8)first_trigram]++; - } - - if (--num_flags_left == 0) { - num_flags_left = 8; - pLZ_flags = pLZ_code_buf++; - } - - total_lz_bytes += cur_match_len; - lookahead_pos += cur_match_len; - dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); - cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; - MZ_ASSERT(lookahead_size >= cur_match_len); - lookahead_size -= cur_match_len; - - if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { - int n; - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - total_lz_bytes = d->m_total_lz_bytes; - pLZ_code_buf = d->m_pLZ_code_buf; - pLZ_flags = d->m_pLZ_flags; - num_flags_left = d->m_num_flags_left; - } - } - - while (lookahead_size) { - mz_uint8 lit = d->m_dict[cur_pos]; - - total_lz_bytes++; - *pLZ_code_buf++ = lit; - *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); - if (--num_flags_left == 0) { - num_flags_left = 8; - pLZ_flags = pLZ_code_buf++; - } - - d->m_huff_count[0][lit]++; - - lookahead_pos++; - dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); - cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; - lookahead_size--; - - if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { - int n; - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - total_lz_bytes = d->m_total_lz_bytes; - pLZ_code_buf = d->m_pLZ_code_buf; - pLZ_flags = d->m_pLZ_flags; - num_flags_left = d->m_num_flags_left; - } - } - } - - d->m_lookahead_pos = lookahead_pos; - d->m_lookahead_size = lookahead_size; - d->m_dict_size = dict_size; - d->m_total_lz_bytes = total_lz_bytes; - d->m_pLZ_code_buf = pLZ_code_buf; - d->m_pLZ_flags = pLZ_flags; - d->m_num_flags_left = num_flags_left; - return MZ_TRUE; -} -#endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - -static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, - mz_uint8 lit) { - d->m_total_lz_bytes++; - *d->m_pLZ_code_buf++ = lit; - *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); - if (--d->m_num_flags_left == 0) { - d->m_num_flags_left = 8; - d->m_pLZ_flags = d->m_pLZ_code_buf++; - } - d->m_huff_count[0][lit]++; -} - -static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, - mz_uint match_len, - mz_uint match_dist) { - mz_uint32 s0, s1; - - MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && - (match_dist <= TDEFL_LZ_DICT_SIZE)); - - d->m_total_lz_bytes += match_len; - - d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); - - match_dist -= 1; - d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); - d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); - d->m_pLZ_code_buf += 3; - - *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); - if (--d->m_num_flags_left == 0) { - d->m_num_flags_left = 8; - d->m_pLZ_flags = d->m_pLZ_code_buf++; - } - - s0 = s_tdefl_small_dist_sym[match_dist & 511]; - s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; - d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; - - if (match_len >= TDEFL_MIN_MATCH_LEN) - d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; -} - -static mz_bool tdefl_compress_normal(tdefl_compressor *d) { - const mz_uint8 *pSrc = d->m_pSrc; - size_t src_buf_left = d->m_src_buf_left; - tdefl_flush flush = d->m_flush; - - while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { - mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; - // Update dictionary and hash chains. Keeps the lookahead size equal to - // TDEFL_MAX_MATCH_LEN. - if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { - mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & - TDEFL_LZ_DICT_SIZE_MASK, - ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; - mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] - << TDEFL_LZ_HASH_SHIFT) ^ - d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; - mz_uint num_bytes_to_process = (mz_uint)MZ_MIN( - src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); - const mz_uint8 *pSrc_end = pSrc + num_bytes_to_process; - src_buf_left -= num_bytes_to_process; - d->m_lookahead_size += num_bytes_to_process; - while (pSrc != pSrc_end) { - mz_uint8 c = *pSrc++; - d->m_dict[dst_pos] = c; - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; - hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); - d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)(ins_pos); - dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; - ins_pos++; - } - } else { - while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { - mz_uint8 c = *pSrc++; - mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & - TDEFL_LZ_DICT_SIZE_MASK; - src_buf_left--; - d->m_dict[dst_pos] = c; - if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) - d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; - if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { - mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; - mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] - << (TDEFL_LZ_HASH_SHIFT * 2)) ^ - (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] - << TDEFL_LZ_HASH_SHIFT) ^ - c) & - (TDEFL_LZ_HASH_SIZE - 1); - d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; - d->m_hash[hash] = (mz_uint16)(ins_pos); - } - } - } - d->m_dict_size = - MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); - if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; - - // Simple lazy/greedy parsing state machine. - len_to_move = 1; - cur_match_dist = 0; - cur_match_len = - d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); - cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; - if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { - if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { - mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; - cur_match_len = 0; - while (cur_match_len < d->m_lookahead_size) { - if (d->m_dict[cur_pos + cur_match_len] != c) break; - cur_match_len++; - } - if (cur_match_len < TDEFL_MIN_MATCH_LEN) - cur_match_len = 0; - else - cur_match_dist = 1; - } - } else { - tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, - d->m_lookahead_size, &cur_match_dist, &cur_match_len); - } - if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && - (cur_match_dist >= 8U * 1024U)) || - (cur_pos == cur_match_dist) || - ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { - cur_match_dist = cur_match_len = 0; - } - if (d->m_saved_match_len) { - if (cur_match_len > d->m_saved_match_len) { - tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); - if (cur_match_len >= 128) { - tdefl_record_match(d, cur_match_len, cur_match_dist); - d->m_saved_match_len = 0; - len_to_move = cur_match_len; - } else { - d->m_saved_lit = d->m_dict[cur_pos]; - d->m_saved_match_dist = cur_match_dist; - d->m_saved_match_len = cur_match_len; - } - } else { - tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); - len_to_move = d->m_saved_match_len - 1; - d->m_saved_match_len = 0; - } - } else if (!cur_match_dist) - tdefl_record_literal(d, - d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); - else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || - (cur_match_len >= 128)) { - tdefl_record_match(d, cur_match_len, cur_match_dist); - len_to_move = cur_match_len; - } else { - d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; - d->m_saved_match_dist = cur_match_dist; - d->m_saved_match_len = cur_match_len; - } - // Move the lookahead forward by len_to_move bytes. - d->m_lookahead_pos += len_to_move; - MZ_ASSERT(d->m_lookahead_size >= len_to_move); - d->m_lookahead_size -= len_to_move; - d->m_dict_size = - MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE); - // Check if it's time to flush the current LZ codes to the internal output - // buffer. - if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || - ((d->m_total_lz_bytes > 31 * 1024) && - (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= - d->m_total_lz_bytes) || - (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { - int n; - d->m_pSrc = pSrc; - d->m_src_buf_left = src_buf_left; - if ((n = tdefl_flush_block(d, 0)) != 0) - return (n < 0) ? MZ_FALSE : MZ_TRUE; - } - } - - d->m_pSrc = pSrc; - d->m_src_buf_left = src_buf_left; - return MZ_TRUE; -} - -static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d) { - if (d->m_pIn_buf_size) { - *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf; - } - - if (d->m_pOut_buf_size) { - size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, - d->m_output_flush_remaining); - memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, - d->m_output_buf + d->m_output_flush_ofs, n); - d->m_output_flush_ofs += (mz_uint)n; - d->m_output_flush_remaining -= (mz_uint)n; - d->m_out_buf_ofs += n; - - *d->m_pOut_buf_size = d->m_out_buf_ofs; - } - - return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE - : TDEFL_STATUS_OKAY; -} - -tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, - size_t *pIn_buf_size, void *pOut_buf, - size_t *pOut_buf_size, tdefl_flush flush) { - if (!d) { - if (pIn_buf_size) *pIn_buf_size = 0; - if (pOut_buf_size) *pOut_buf_size = 0; - return TDEFL_STATUS_BAD_PARAM; - } - - d->m_pIn_buf = pIn_buf; - d->m_pIn_buf_size = pIn_buf_size; - d->m_pOut_buf = pOut_buf; - d->m_pOut_buf_size = pOut_buf_size; - d->m_pSrc = (const mz_uint8 *)(pIn_buf); - d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; - d->m_out_buf_ofs = 0; - d->m_flush = flush; - - if (((d->m_pPut_buf_func != NULL) == - ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || - (d->m_prev_return_status != TDEFL_STATUS_OKAY) || - (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || - (pIn_buf_size && *pIn_buf_size && !pIn_buf) || - (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { - if (pIn_buf_size) *pIn_buf_size = 0; - if (pOut_buf_size) *pOut_buf_size = 0; - return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); - } - d->m_wants_to_finish |= (flush == TDEFL_FINISH); - - if ((d->m_output_flush_remaining) || (d->m_finished)) - return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); - -#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && - ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && - ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | - TDEFL_RLE_MATCHES)) == 0)) { - if (!tdefl_compress_fast(d)) return d->m_prev_return_status; - } else -#endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN - { - if (!tdefl_compress_normal(d)) return d->m_prev_return_status; - } - - if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && - (pIn_buf)) - d->m_adler32 = - (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, - d->m_pSrc - (const mz_uint8 *)pIn_buf); - - if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && - (!d->m_output_flush_remaining)) { - if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; - d->m_finished = (flush == TDEFL_FINISH); - if (flush == TDEFL_FULL_FLUSH) { - MZ_CLEAR_OBJ(d->m_hash); - MZ_CLEAR_OBJ(d->m_next); - d->m_dict_size = 0; - } - } - - return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); -} - -tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, - size_t in_buf_size, tdefl_flush flush) { - MZ_ASSERT(d->m_pPut_buf_func); - return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); -} - -tdefl_status tdefl_init(tdefl_compressor *d, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - d->m_pPut_buf_func = pPut_buf_func; - d->m_pPut_buf_user = pPut_buf_user; - d->m_flags = (mz_uint)(flags); - d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; - d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; - d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; - if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); - d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = - d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; - d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = - d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; - d->m_pLZ_code_buf = d->m_lz_code_buf + 1; - d->m_pLZ_flags = d->m_lz_code_buf; - d->m_num_flags_left = 8; - d->m_pOutput_buf = d->m_output_buf; - d->m_pOutput_buf_end = d->m_output_buf; - d->m_prev_return_status = TDEFL_STATUS_OKAY; - d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; - d->m_adler32 = 1; - d->m_pIn_buf = NULL; - d->m_pOut_buf = NULL; - d->m_pIn_buf_size = NULL; - d->m_pOut_buf_size = NULL; - d->m_flush = TDEFL_NO_FLUSH; - d->m_pSrc = NULL; - d->m_src_buf_left = 0; - d->m_out_buf_ofs = 0; - memset(&d->m_huff_count[0][0], 0, - sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); - memset(&d->m_huff_count[1][0], 0, - sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); - return TDEFL_STATUS_OKAY; -} - -tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d) { - return d->m_prev_return_status; -} - -mz_uint32 tdefl_get_adler32(tdefl_compressor *d) { return d->m_adler32; } - -mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, - tdefl_put_buf_func_ptr pPut_buf_func, - void *pPut_buf_user, int flags) { - tdefl_compressor *pComp; - mz_bool succeeded; - if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; - pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); - if (!pComp) return MZ_FALSE; - succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == - TDEFL_STATUS_OKAY); - succeeded = - succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == - TDEFL_STATUS_DONE); - MZ_FREE(pComp); - return succeeded; -} - -typedef struct { - size_t m_size, m_capacity; - mz_uint8 *m_pBuf; - mz_bool m_expandable; -} tdefl_output_buffer; - -static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, - void *pUser) { - tdefl_output_buffer *p = (tdefl_output_buffer *)pUser; - size_t new_size = p->m_size + len; - if (new_size > p->m_capacity) { - size_t new_capacity = p->m_capacity; - mz_uint8 *pNew_buf; - if (!p->m_expandable) return MZ_FALSE; - do { - new_capacity = MZ_MAX(128U, new_capacity << 1U); - } while (new_size > new_capacity); - pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity); - if (!pNew_buf) return MZ_FALSE; - p->m_pBuf = pNew_buf; - p->m_capacity = new_capacity; - } - memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len); - p->m_size = new_size; - return MZ_TRUE; -} - -void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, - size_t *pOut_len, int flags) { - tdefl_output_buffer out_buf; - MZ_CLEAR_OBJ(out_buf); - if (!pOut_len) - return MZ_FALSE; - else - *pOut_len = 0; - out_buf.m_expandable = MZ_TRUE; - if (!tdefl_compress_mem_to_output( - pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) - return NULL; - *pOut_len = out_buf.m_size; - return out_buf.m_pBuf; -} - -size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, - const void *pSrc_buf, size_t src_buf_len, - int flags) { - tdefl_output_buffer out_buf; - MZ_CLEAR_OBJ(out_buf); - if (!pOut_buf) return 0; - out_buf.m_pBuf = (mz_uint8 *)pOut_buf; - out_buf.m_capacity = out_buf_len; - if (!tdefl_compress_mem_to_output( - pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) - return 0; - return out_buf.m_size; -} - -#ifndef MINIZ_NO_ZLIB_APIS -static const mz_uint s_tdefl_num_probes[11] = {0, 1, 6, 32, 16, 32, - 128, 256, 512, 768, 1500}; - -// level may actually range from [0,10] (10 is a "hidden" max level, where we -// want a bit more compression and it's fine if throughput to fall off a cliff -// on some files). -mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, - int strategy) { - mz_uint comp_flags = - s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | - ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); - if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; - - if (!level) - comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; - else if (strategy == MZ_FILTERED) - comp_flags |= TDEFL_FILTER_MATCHES; - else if (strategy == MZ_HUFFMAN_ONLY) - comp_flags &= ~TDEFL_MAX_PROBES_MASK; - else if (strategy == MZ_FIXED) - comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; - else if (strategy == MZ_RLE) - comp_flags |= TDEFL_RLE_MATCHES; - - return comp_flags; -} -#endif // MINIZ_NO_ZLIB_APIS - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4204) // nonstandard extension used : non-constant - // aggregate initializer (also supported by GNU - // C and C99, so no big deal) -#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is - // deprecated. Instead, use the ISO C and C++ - // conformant name: _strdup. -#endif - -// Simple PNG writer function by Alex Evans, 2011. Released into the public -// domain: https://gist.github.com/908299, more context at -// http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. -// This is actually a modification of Alex's original code so PNG files -// generated by this function pass pngcheck. -void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, - int h, int num_chans, - size_t *pLen_out, - mz_uint level, mz_bool flip) { - // Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was - // defined. - static const mz_uint s_tdefl_png_num_probes[11] = { - 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500}; - tdefl_compressor *pComp = - (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor)); - tdefl_output_buffer out_buf; - int i, bpl = w * num_chans, y, z; - mz_uint32 c; - *pLen_out = 0; - if (!pComp) return NULL; - MZ_CLEAR_OBJ(out_buf); - out_buf.m_expandable = MZ_TRUE; - out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); - if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity))) { - MZ_FREE(pComp); - return NULL; - } - // write dummy header - for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); - // compress image data - tdefl_init( - pComp, tdefl_output_buffer_putter, &out_buf, - s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER); - for (y = 0; y < h; ++y) { - tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); - tdefl_compress_buffer(pComp, - (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, - bpl, TDEFL_NO_FLUSH); - } - if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != - TDEFL_STATUS_DONE) { - MZ_FREE(pComp); - MZ_FREE(out_buf.m_pBuf); - return NULL; - } - // write real header - *pLen_out = out_buf.m_size - 41; - { - static const mz_uint8 chans[] = {0x00, 0x00, 0x04, 0x02, 0x06}; - mz_uint8 pnghdr[41] = {0x89, - 0x50, - 0x4e, - 0x47, - 0x0d, - 0x0a, - 0x1a, - 0x0a, - 0x00, - 0x00, - 0x00, - 0x0d, - 0x49, - 0x48, - 0x44, - 0x52, - 0, - 0, - (mz_uint8)(w >> 8), - (mz_uint8)w, - 0, - 0, - (mz_uint8)(h >> 8), - (mz_uint8)h, - 8, - chans[num_chans], - 0, - 0, - 0, - 0, - 0, - 0, - 0, - (mz_uint8)(*pLen_out >> 24), - (mz_uint8)(*pLen_out >> 16), - (mz_uint8)(*pLen_out >> 8), - (mz_uint8)*pLen_out, - 0x49, - 0x44, - 0x41, - 0x54}; - c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); - for (i = 0; i < 4; ++i, c <<= 8) - ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); - memcpy(out_buf.m_pBuf, pnghdr, 41); - } - // write footer (IDAT CRC-32, followed by IEND chunk) - if (!tdefl_output_buffer_putter( - "\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { - *pLen_out = 0; - MZ_FREE(pComp); - MZ_FREE(out_buf.m_pBuf); - return NULL; - } - c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, - *pLen_out + 4); - for (i = 0; i < 4; ++i, c <<= 8) - (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); - // compute final size of file, grab compressed data buffer and return - *pLen_out += 57; - MZ_FREE(pComp); - return out_buf.m_pBuf; -} -void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, - int num_chans, size_t *pLen_out) { - // Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we - // can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's - // where #defined out) - return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, - pLen_out, 6, MZ_FALSE); -} - -// ------------------- .ZIP archive reading - -#ifndef MINIZ_NO_ARCHIVE_APIS -#error "No arvhive APIs" - -#ifdef MINIZ_NO_STDIO -#define MZ_FILE void * -#else -#include -#include - -#if defined(_MSC_VER) || defined(__MINGW64__) -static FILE *mz_fopen(const char *pFilename, const char *pMode) { - FILE *pFile = NULL; - fopen_s(&pFile, pFilename, pMode); - return pFile; -} -static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream) { - FILE *pFile = NULL; - if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL; - return pFile; -} -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN mz_fopen -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 _ftelli64 -#define MZ_FSEEK64 _fseeki64 -#define MZ_FILE_STAT_STRUCT _stat -#define MZ_FILE_STAT _stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN mz_freopen -#define MZ_DELETE_FILE remove -#elif defined(__MINGW32__) -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello64 -#define MZ_FSEEK64 fseeko64 -#define MZ_FILE_STAT_STRUCT _stat -#define MZ_FILE_STAT _stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#elif defined(__TINYC__) -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftell -#define MZ_FSEEK64 fseek -#define MZ_FILE_STAT_STRUCT stat -#define MZ_FILE_STAT stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#elif defined(__GNUC__) && defined(_LARGEFILE64_SOURCE) && _LARGEFILE64_SOURCE -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen64(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello64 -#define MZ_FSEEK64 fseeko64 -#define MZ_FILE_STAT_STRUCT stat64 -#define MZ_FILE_STAT stat64 -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(p, m, s) freopen64(p, m, s) -#define MZ_DELETE_FILE remove -#else -#ifndef MINIZ_NO_TIME -#include -#endif -#define MZ_FILE FILE -#define MZ_FOPEN(f, m) fopen(f, m) -#define MZ_FCLOSE fclose -#define MZ_FREAD fread -#define MZ_FWRITE fwrite -#define MZ_FTELL64 ftello -#define MZ_FSEEK64 fseeko -#define MZ_FILE_STAT_STRUCT stat -#define MZ_FILE_STAT stat -#define MZ_FFLUSH fflush -#define MZ_FREOPEN(f, m, s) freopen(f, m, s) -#define MZ_DELETE_FILE remove -#endif // #ifdef _MSC_VER -#endif // #ifdef MINIZ_NO_STDIO - -#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) - -// Various ZIP archive enums. To completely avoid cross platform compiler -// alignment and platform endian issues, miniz.c doesn't use structs for any of -// this stuff. -enum { - // ZIP archive identifiers and record sizes - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, - MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, - MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, - // Central directory header record offsets - MZ_ZIP_CDH_SIG_OFS = 0, - MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, - MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, - MZ_ZIP_CDH_BIT_FLAG_OFS = 8, - MZ_ZIP_CDH_METHOD_OFS = 10, - MZ_ZIP_CDH_FILE_TIME_OFS = 12, - MZ_ZIP_CDH_FILE_DATE_OFS = 14, - MZ_ZIP_CDH_CRC32_OFS = 16, - MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, - MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, - MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, - MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, - MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, - MZ_ZIP_CDH_DISK_START_OFS = 34, - MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, - MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, - MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, - // Local directory header offsets - MZ_ZIP_LDH_SIG_OFS = 0, - MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, - MZ_ZIP_LDH_BIT_FLAG_OFS = 6, - MZ_ZIP_LDH_METHOD_OFS = 8, - MZ_ZIP_LDH_FILE_TIME_OFS = 10, - MZ_ZIP_LDH_FILE_DATE_OFS = 12, - MZ_ZIP_LDH_CRC32_OFS = 14, - MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, - MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, - MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, - MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, - // End of central directory offsets - MZ_ZIP_ECDH_SIG_OFS = 0, - MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, - MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, - MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, - MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, - MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, - MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, - MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, -}; - -typedef struct { - void *m_p; - size_t m_size, m_capacity; - mz_uint m_element_size; -} mz_zip_array; - -struct mz_zip_internal_state_tag { - mz_zip_array m_central_dir; - mz_zip_array m_central_dir_offsets; - mz_zip_array m_sorted_central_dir_offsets; - MZ_FILE *m_pFile; - void *m_pMem; - size_t m_mem_size; - size_t m_mem_capacity; -}; - -#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) \ - (array_ptr)->m_element_size = element_size -#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) \ - ((element_type *)((array_ptr)->m_p))[index] - -static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, - mz_zip_array *pArray) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); - memset(pArray, 0, sizeof(mz_zip_array)); -} - -static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t min_new_capacity, - mz_uint growing) { - void *pNew_p; - size_t new_capacity = min_new_capacity; - MZ_ASSERT(pArray->m_element_size); - if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; - if (growing) { - new_capacity = MZ_MAX(1, pArray->m_capacity); - while (new_capacity < min_new_capacity) new_capacity *= 2; - } - if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, - pArray->m_element_size, new_capacity))) - return MZ_FALSE; - pArray->m_p = pNew_p; - pArray->m_capacity = new_capacity; - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t new_capacity, - mz_uint growing) { - if (new_capacity > pArray->m_capacity) { - if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) - return MZ_FALSE; - } - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t new_size, - mz_uint growing) { - if (new_size > pArray->m_capacity) { - if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) - return MZ_FALSE; - } - pArray->m_size = new_size; - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, - mz_zip_array *pArray, - size_t n) { - return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); -} - -static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, - mz_zip_array *pArray, - const void *pElements, - size_t n) { - size_t orig_size = pArray->m_size; - if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) - return MZ_FALSE; - memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, - pElements, n * pArray->m_element_size); - return MZ_TRUE; -} - -#ifndef MINIZ_NO_TIME -static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) { - struct tm tm; - memset(&tm, 0, sizeof(tm)); - tm.tm_isdst = -1; - tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; - tm.tm_mon = ((dos_date >> 5) & 15) - 1; - tm.tm_mday = dos_date & 31; - tm.tm_hour = (dos_time >> 11) & 31; - tm.tm_min = (dos_time >> 5) & 63; - tm.tm_sec = (dos_time << 1) & 62; - return mktime(&tm); -} - -static void mz_zip_time_to_dos_time(time_t time, mz_uint16 *pDOS_time, - mz_uint16 *pDOS_date) { -#ifdef _MSC_VER - struct tm tm_struct; - struct tm *tm = &tm_struct; - errno_t err = localtime_s(tm, &time); - if (err) { - *pDOS_date = 0; - *pDOS_time = 0; - return; - } -#else - struct tm *tm = localtime(&time); -#endif - *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + - ((tm->tm_sec) >> 1)); - *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + - ((tm->tm_mon + 1) << 5) + tm->tm_mday); -} -#endif - -#ifndef MINIZ_NO_STDIO -static mz_bool mz_zip_get_file_modified_time(const char *pFilename, - mz_uint16 *pDOS_time, - mz_uint16 *pDOS_date) { -#ifdef MINIZ_NO_TIME - (void)pFilename; - *pDOS_date = *pDOS_time = 0; -#else - struct MZ_FILE_STAT_STRUCT file_stat; - // On Linux with x86 glibc, this call will fail on large files (>= 0x80000000 - // bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. - if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; - mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); -#endif // #ifdef MINIZ_NO_TIME - return MZ_TRUE; -} - -#ifndef MINIZ_NO_TIME -static mz_bool mz_zip_set_file_times(const char *pFilename, time_t access_time, - time_t modified_time) { - struct utimbuf t; - t.actime = access_time; - t.modtime = modified_time; - return !utime(pFilename, &t); -} -#endif // #ifndef MINIZ_NO_TIME -#endif // #ifndef MINIZ_NO_STDIO - -static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, - mz_uint32 flags) { - (void)flags; - if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) - return MZ_FALSE; - - if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; - if (!pZip->m_pFree) pZip->m_pFree = def_free_func; - if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; - - pZip->m_zip_mode = MZ_ZIP_MODE_READING; - pZip->m_archive_size = 0; - pZip->m_central_directory_file_ofs = 0; - pZip->m_total_files = 0; - - if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) - return MZ_FALSE; - memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, - sizeof(mz_uint8)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, - sizeof(mz_uint32)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, - sizeof(mz_uint32)); - return MZ_TRUE; -} - -static MZ_FORCEINLINE mz_bool -mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, - const mz_zip_array *pCentral_dir_offsets, - mz_uint l_index, mz_uint r_index) { - const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, - l_index)), - *pE; - const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); - mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), - r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); - mz_uint8 l = 0, r = 0; - pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pE = pL + MZ_MIN(l_len, r_len); - while (pL < pE) { - if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; - pL++; - pR++; - } - return (pL == pE) ? (l_len < r_len) : (l < r); -} - -#define MZ_SWAP_UINT32(a, b) \ - do { \ - mz_uint32 t = a; \ - a = b; \ - b = t; \ - } \ - MZ_MACRO_END - -// Heap sort of lowercased filenames, used to help accelerate plain central -// directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), -// but it could allocate memory.) -static void mz_zip_reader_sort_central_dir_offsets_by_filename( - mz_zip_archive *pZip) { - mz_zip_internal_state *pState = pZip->m_pState; - const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; - const mz_zip_array *pCentral_dir = &pState->m_central_dir; - mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( - &pState->m_sorted_central_dir_offsets, mz_uint32, 0); - const int size = pZip->m_total_files; - int start = (size - 2) >> 1, end; - while (start >= 0) { - int child, root = start; - for (;;) { - if ((child = (root << 1) + 1) >= size) break; - child += - (((child + 1) < size) && - (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[child], pIndices[child + 1]))); - if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[root], pIndices[child])) - break; - MZ_SWAP_UINT32(pIndices[root], pIndices[child]); - root = child; - } - start--; - } - - end = size - 1; - while (end > 0) { - int child, root = 0; - MZ_SWAP_UINT32(pIndices[end], pIndices[0]); - for (;;) { - if ((child = (root << 1) + 1) >= end) break; - child += - (((child + 1) < end) && - mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[child], pIndices[child + 1])); - if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, - pIndices[root], pIndices[child])) - break; - MZ_SWAP_UINT32(pIndices[root], pIndices[child]); - root = child; - } - end--; - } -} - -static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, - mz_uint32 flags) { - mz_uint cdir_size, num_this_disk, cdir_disk_index; - mz_uint64 cdir_ofs; - mz_int64 cur_file_ofs; - const mz_uint8 *p; - mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; - mz_uint8 *pBuf = (mz_uint8 *)buf_u32; - mz_bool sort_central_dir = - ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0); - // Basic sanity checks - reject files which are too small, and check the first - // 4 bytes of the file to make sure a local header is there. - if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - // Find the end of central directory record by scanning the file from the end - // towards the beginning. - cur_file_ofs = - MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); - for (;;) { - int i, - n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) - return MZ_FALSE; - for (i = n - 4; i >= 0; --i) - if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break; - if (i >= 0) { - cur_file_ofs += i; - break; - } - if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= - (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) - return MZ_FALSE; - cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); - } - // Read and verify the end of central directory record. - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || - ((pZip->m_total_files = - MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != - MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) - return MZ_FALSE; - - num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); - cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); - if (((num_this_disk | cdir_disk_index) != 0) && - ((num_this_disk != 1) || (cdir_disk_index != 1))) - return MZ_FALSE; - - if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < - pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) - return MZ_FALSE; - - cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); - if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE; - - pZip->m_central_directory_file_ofs = cdir_ofs; - - if (pZip->m_total_files) { - mz_uint i, n; - - // Read the entire central directory into a heap block, and allocate another - // heap block to hold the unsorted central dir file record offsets, and - // another to hold the sorted indices. - if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, - MZ_FALSE)) || - (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, - pZip->m_total_files, MZ_FALSE))) - return MZ_FALSE; - - if (sort_central_dir) { - if (!mz_zip_array_resize(pZip, - &pZip->m_pState->m_sorted_central_dir_offsets, - pZip->m_total_files, MZ_FALSE)) - return MZ_FALSE; - } - - if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, - pZip->m_pState->m_central_dir.m_p, - cdir_size) != cdir_size) - return MZ_FALSE; - - // Now create an index into the central directory file records, do some - // basic sanity checking on each record, and check for zip64 entries (which - // are not yet supported). - p = (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p; - for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { - mz_uint total_header_size, comp_size, decomp_size, disk_index; - if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || - (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) - return MZ_FALSE; - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - i) = - (mz_uint32)(p - (const mz_uint8 *)pZip->m_pState->m_central_dir.m_p); - if (sort_central_dir) - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, - mz_uint32, i) = i; - comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && - (decomp_size != comp_size)) || - (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || - (comp_size == 0xFFFFFFFF)) - return MZ_FALSE; - disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); - if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE; - if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) - return MZ_FALSE; - if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > - n) - return MZ_FALSE; - n -= total_header_size; - p += total_header_size; - } - } - - if (sort_central_dir) - mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); - - return MZ_TRUE; -} - -mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, - mz_uint32 flags) { - if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE; - if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; - pZip->m_archive_size = size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} - -static size_t mz_zip_mem_read_func(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - size_t s = (file_ofs >= pZip->m_archive_size) - ? 0 - : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); - memcpy(pBuf, (const mz_uint8 *)pZip->m_pState->m_pMem + file_ofs, s); - return s; -} - -mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, - size_t size, mz_uint32 flags) { - if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; - pZip->m_archive_size = size; - pZip->m_pRead = mz_zip_mem_read_func; - pZip->m_pIO_opaque = pZip; -#ifdef __cplusplus - pZip->m_pState->m_pMem = const_cast(pMem); -#else - pZip->m_pState->m_pMem = (void *)pMem; -#endif - pZip->m_pState->m_mem_size = size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_read_func(void *pOpaque, mz_uint64 file_ofs, - void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); - if (((mz_int64)file_ofs < 0) || - (((cur_ofs != (mz_int64)file_ofs)) && - (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) - return 0; - return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); -} - -mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint32 flags) { - mz_uint64 file_size; - MZ_FILE *pFile = MZ_FOPEN(pFilename, "rb"); - if (!pFile) return MZ_FALSE; - if (MZ_FSEEK64(pFile, 0, SEEK_END)) { - MZ_FCLOSE(pFile); - return MZ_FALSE; - } - file_size = MZ_FTELL64(pFile); - if (!mz_zip_reader_init_internal(pZip, flags)) { - MZ_FCLOSE(pFile); - return MZ_FALSE; - } - pZip->m_pRead = mz_zip_file_read_func; - pZip->m_pIO_opaque = pZip; - pZip->m_pState->m_pFile = pFile; - pZip->m_archive_size = file_size; - if (!mz_zip_reader_read_central_dir(pZip, flags)) { - mz_zip_reader_end(pZip); - return MZ_FALSE; - } - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip) { - return pZip ? pZip->m_total_files : 0; -} - -static MZ_FORCEINLINE const mz_uint8 *mz_zip_reader_get_cdh( - mz_zip_archive *pZip, mz_uint file_index) { - if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return NULL; - return &MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - file_index)); -} - -mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, - mz_uint file_index) { - mz_uint m_bit_flag; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) return MZ_FALSE; - m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); - return (m_bit_flag & 1); -} - -mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, - mz_uint file_index) { - mz_uint filename_len, external_attr; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) return MZ_FALSE; - - // First see if the filename ends with a '/' character. - filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - if (filename_len) { - if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') - return MZ_TRUE; - } - - // Bugfix: This code was also checking if the internal attribute was non-zero, - // which wasn't correct. - // Most/all zip writers (hopefully) set DOS file/directory attributes in the - // low 16-bits, so check for the DOS directory flag and ignore the source OS - // ID in the created by field. - // FIXME: Remove this check? Is it necessary - we already check the filename. - external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); - if ((external_attr & 0x10) != 0) return MZ_TRUE; - - return MZ_FALSE; -} - -mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, - mz_zip_archive_file_stat *pStat) { - mz_uint n; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if ((!p) || (!pStat)) return MZ_FALSE; - - // Unpack the central directory record. - pStat->m_file_index = file_index; - pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); - pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); - pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); - pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); - pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); -#ifndef MINIZ_NO_TIME - pStat->m_time = - mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), - MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); -#endif - pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); - pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); - pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); - pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); - - // Copy as much of the filename and comment as possible. - n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); - memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); - pStat->m_filename[n] = '\0'; - - n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); - n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); - pStat->m_comment_size = n; - memcpy(pStat->m_comment, - p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), - n); - pStat->m_comment[n] = '\0'; - - return MZ_TRUE; -} - -mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, - char *pFilename, mz_uint filename_buf_size) { - mz_uint n; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - if (!p) { - if (filename_buf_size) pFilename[0] = '\0'; - return 0; - } - n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); - if (filename_buf_size) { - n = MZ_MIN(n, filename_buf_size - 1); - memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); - pFilename[n] = '\0'; - } - return n + 1; -} - -static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char *pA, - const char *pB, - mz_uint len, - mz_uint flags) { - mz_uint i; - if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); - for (i = 0; i < len; ++i) - if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; - return MZ_TRUE; -} - -static MZ_FORCEINLINE int mz_zip_reader_filename_compare( - const mz_zip_array *pCentral_dir_array, - const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, const char *pR, - mz_uint r_len) { - const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT( - pCentral_dir_array, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, - l_index)), - *pE; - mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); - mz_uint8 l = 0, r = 0; - pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - pE = pL + MZ_MIN(l_len, r_len); - while (pL < pE) { - if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; - pL++; - pR++; - } - return (pL == pE) ? (int)(l_len - r_len) : (l - r); -} - -static int mz_zip_reader_locate_file_binary_search(mz_zip_archive *pZip, - const char *pFilename) { - mz_zip_internal_state *pState = pZip->m_pState; - const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets; - const mz_zip_array *pCentral_dir = &pState->m_central_dir; - mz_uint32 *pIndices = &MZ_ZIP_ARRAY_ELEMENT( - &pState->m_sorted_central_dir_offsets, mz_uint32, 0); - const int size = pZip->m_total_files; - const mz_uint filename_len = (mz_uint)strlen(pFilename); - int l = 0, h = size - 1; - while (l <= h) { - int m = (l + h) >> 1, file_index = pIndices[m], - comp = - mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, - file_index, pFilename, filename_len); - if (!comp) - return file_index; - else if (comp < 0) - l = m + 1; - else - h = m - 1; - } - return -1; -} - -int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, - const char *pComment, mz_uint flags) { - mz_uint file_index; - size_t name_len, comment_len; - if ((!pZip) || (!pZip->m_pState) || (!pName) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return -1; - if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && - (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_size)) - return mz_zip_reader_locate_file_binary_search(pZip, pName); - name_len = strlen(pName); - if (name_len > 0xFFFF) return -1; - comment_len = pComment ? strlen(pComment) : 0; - if (comment_len > 0xFFFF) return -1; - for (file_index = 0; file_index < pZip->m_total_files; file_index++) { - const mz_uint8 *pHeader = &MZ_ZIP_ARRAY_ELEMENT( - &pZip->m_pState->m_central_dir, mz_uint8, - MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, - file_index)); - mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); - const char *pFilename = - (const char *)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; - if (filename_len < name_len) continue; - if (comment_len) { - mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), - file_comment_len = - MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); - const char *pFile_comment = pFilename + filename_len + file_extra_len; - if ((file_comment_len != comment_len) || - (!mz_zip_reader_string_equal(pComment, pFile_comment, - file_comment_len, flags))) - continue; - } - if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { - int ofs = filename_len - 1; - do { - if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || - (pFilename[ofs] == ':')) - break; - } while (--ofs >= 0); - ofs++; - pFilename += ofs; - filename_len -= ofs; - } - if ((filename_len == name_len) && - (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) - return file_index; - } - return -1; -} - -mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, - mz_uint file_index, void *pBuf, - size_t buf_size, mz_uint flags, - void *pUser_read_buf, - size_t user_read_buf_size) { - int status = TINFL_STATUS_DONE; - mz_uint64 needed_size, cur_file_ofs, comp_remaining, - out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; - mz_zip_archive_file_stat file_stat; - void *pRead_buf; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - tinfl_decompressor inflator; - - if ((buf_size) && (!pBuf)) return MZ_FALSE; - - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - - // Empty file, or a directory (but not always a directory - I've seen odd zips - // with directories that have compressed data which inflates to 0 bytes) - if (!file_stat.m_comp_size) return MZ_TRUE; - - // Entry is a subdirectory (I've seen old zips with dir entries which have - // compressed deflate data which inflates to 0 bytes, but these entries claim - // to uncompress to 512 bytes in the headers). - // I'm torn how to handle this case - should it fail instead? - if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; - - // Encryption and patch files are not supported. - if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; - - // This function only supports stored and deflate. - if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && - (file_stat.m_method != MZ_DEFLATED)) - return MZ_FALSE; - - // Ensure supplied output buffer is large enough. - needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size - : file_stat.m_uncomp_size; - if (buf_size < needed_size) return MZ_FALSE; - - // Read and parse the local directory entry. - cur_file_ofs = file_stat.m_local_header_ofs; - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - - cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) - return MZ_FALSE; - - if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { - // The file is stored or the caller has requested the compressed data. - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, - (size_t)needed_size) != needed_size) - return MZ_FALSE; - return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || - (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, - (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); - } - - // Decompress the file either directly from memory or from a file input - // buffer. - tinfl_init(&inflator); - - if (pZip->m_pState->m_pMem) { - // Read directly from the archive in memory. - pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; - read_buf_size = read_buf_avail = file_stat.m_comp_size; - comp_remaining = 0; - } else if (pUser_read_buf) { - // Use a user provided read buffer. - if (!user_read_buf_size) return MZ_FALSE; - pRead_buf = (mz_uint8 *)pUser_read_buf; - read_buf_size = user_read_buf_size; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } else { - // Temporarily allocate a read buffer. - read_buf_size = - MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && - (read_buf_size > 0x7FFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) -#endif - return MZ_FALSE; - if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - (size_t)read_buf_size))) - return MZ_FALSE; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } - - do { - size_t in_buf_size, - out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); - if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - read_buf_ofs = 0; - } - in_buf_size = (size_t)read_buf_avail; - status = tinfl_decompress( - &inflator, (mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, - (mz_uint8 *)pBuf, (mz_uint8 *)pBuf + out_buf_ofs, &out_buf_size, - TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | - (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); - read_buf_avail -= in_buf_size; - read_buf_ofs += in_buf_size; - out_buf_ofs += out_buf_size; - } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); - - if (status == TINFL_STATUS_DONE) { - // Make sure the entire file was decompressed, and check its CRC. - if ((out_buf_ofs != file_stat.m_uncomp_size) || - (mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, - (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) - status = TINFL_STATUS_FAILED; - } - - if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - - return status == TINFL_STATUS_DONE; -} - -mz_bool mz_zip_reader_extract_file_to_mem_no_alloc( - mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, - mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, - flags, pUser_read_buf, - user_read_buf_size); -} - -mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, - void *pBuf, size_t buf_size, - mz_uint flags) { - return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, - flags, NULL, 0); -} - -mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, - const char *pFilename, void *pBuf, - size_t buf_size, mz_uint flags) { - return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, - buf_size, flags, NULL, 0); -} - -void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, - size_t *pSize, mz_uint flags) { - mz_uint64 comp_size, uncomp_size, alloc_size; - const mz_uint8 *p = mz_zip_reader_get_cdh(pZip, file_index); - void *pBuf; - - if (pSize) *pSize = 0; - if (!p) return NULL; - - comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); - - alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) -#endif - return NULL; - if (NULL == - (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) - return NULL; - - if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, - flags)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return NULL; - } - - if (pSize) *pSize = (size_t)alloc_size; - return pBuf; -} - -void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, - const char *pFilename, size_t *pSize, - mz_uint flags) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) { - if (pSize) *pSize = 0; - return MZ_FALSE; - } - return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); -} - -mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, - mz_uint file_index, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags) { - int status = TINFL_STATUS_DONE; - mz_uint file_crc32 = MZ_CRC32_INIT; - mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, - out_buf_ofs = 0, cur_file_ofs; - mz_zip_archive_file_stat file_stat; - void *pRead_buf = NULL; - void *pWrite_buf = NULL; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - - // Empty file, or a directory (but not always a directory - I've seen odd zips - // with directories that have compressed data which inflates to 0 bytes) - if (!file_stat.m_comp_size) return MZ_TRUE; - - // Entry is a subdirectory (I've seen old zips with dir entries which have - // compressed deflate data which inflates to 0 bytes, but these entries claim - // to uncompress to 512 bytes in the headers). - // I'm torn how to handle this case - should it fail instead? - if (mz_zip_reader_is_file_a_directory(pZip, file_index)) return MZ_TRUE; - - // Encryption and patch files are not supported. - if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; - - // This function only supports stored and deflate. - if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && - (file_stat.m_method != MZ_DEFLATED)) - return MZ_FALSE; - - // Read and parse the local directory entry. - cur_file_ofs = file_stat.m_local_header_ofs; - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - - cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) - return MZ_FALSE; - - // Decompress the file either directly from memory or from a file input - // buffer. - if (pZip->m_pState->m_pMem) { - pRead_buf = (mz_uint8 *)pZip->m_pState->m_pMem + cur_file_ofs; - read_buf_size = read_buf_avail = file_stat.m_comp_size; - comp_remaining = 0; - } else { - read_buf_size = - MZ_MIN(file_stat.m_comp_size, (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); - if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - (size_t)read_buf_size))) - return MZ_FALSE; - read_buf_avail = 0; - comp_remaining = file_stat.m_comp_size; - } - - if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { - // The file is stored or the caller has requested the compressed data. - if (pZip->m_pState->m_pMem) { -#ifdef _MSC_VER - if (((0, sizeof(size_t) == sizeof(mz_uint32))) && - (file_stat.m_comp_size > 0xFFFFFFFF)) -#else - if (((sizeof(size_t) == sizeof(mz_uint32))) && - (file_stat.m_comp_size > 0xFFFFFFFF)) -#endif - return MZ_FALSE; - if (pCallback(pOpaque, out_buf_ofs, pRead_buf, - (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) - status = TINFL_STATUS_FAILED; - else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) - file_crc32 = - (mz_uint32)mz_crc32(file_crc32, (const mz_uint8 *)pRead_buf, - (size_t)file_stat.m_comp_size); - cur_file_ofs += file_stat.m_comp_size; - out_buf_ofs += file_stat.m_comp_size; - comp_remaining = 0; - } else { - while (comp_remaining) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - - if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) - file_crc32 = (mz_uint32)mz_crc32( - file_crc32, (const mz_uint8 *)pRead_buf, (size_t)read_buf_avail); - - if (pCallback(pOpaque, out_buf_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - out_buf_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - } - } - } else { - tinfl_decompressor inflator; - tinfl_init(&inflator); - - if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, - TINFL_LZ_DICT_SIZE))) - status = TINFL_STATUS_FAILED; - else { - do { - mz_uint8 *pWrite_buf_cur = - (mz_uint8 *)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); - size_t in_buf_size, - out_buf_size = - TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); - if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { - read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); - if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, - (size_t)read_buf_avail) != read_buf_avail) { - status = TINFL_STATUS_FAILED; - break; - } - cur_file_ofs += read_buf_avail; - comp_remaining -= read_buf_avail; - read_buf_ofs = 0; - } - - in_buf_size = (size_t)read_buf_avail; - status = tinfl_decompress( - &inflator, (const mz_uint8 *)pRead_buf + read_buf_ofs, &in_buf_size, - (mz_uint8 *)pWrite_buf, pWrite_buf_cur, &out_buf_size, - comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); - read_buf_avail -= in_buf_size; - read_buf_ofs += in_buf_size; - - if (out_buf_size) { - if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != - out_buf_size) { - status = TINFL_STATUS_FAILED; - break; - } - file_crc32 = - (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); - if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { - status = TINFL_STATUS_FAILED; - break; - } - } - } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || - (status == TINFL_STATUS_HAS_MORE_OUTPUT)); - } - } - - if ((status == TINFL_STATUS_DONE) && - (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { - // Make sure the entire file was decompressed, and check its CRC. - if ((out_buf_ofs != file_stat.m_uncomp_size) || - (file_crc32 != file_stat.m_crc32)) - status = TINFL_STATUS_FAILED; - } - - if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); - - return status == TINFL_STATUS_DONE; -} - -mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, - const char *pFilename, - mz_file_write_func pCallback, - void *pOpaque, mz_uint flags) { - int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, - flags); -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_write_callback(void *pOpaque, mz_uint64 ofs, - const void *pBuf, size_t n) { - (void)ofs; - return MZ_FWRITE(pBuf, 1, n, (MZ_FILE *)pOpaque); -} - -mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, - const char *pDst_filename, - mz_uint flags) { - mz_bool status; - mz_zip_archive_file_stat file_stat; - MZ_FILE *pFile; - if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; - pFile = MZ_FOPEN(pDst_filename, "wb"); - if (!pFile) return MZ_FALSE; - status = mz_zip_reader_extract_to_callback( - pZip, file_index, mz_zip_file_write_callback, pFile, flags); - if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE; -#ifndef MINIZ_NO_TIME - if (status) - mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); -#endif - return status; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_reader_end(mz_zip_archive *pZip) { - if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || - (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return MZ_FALSE; - - if (pZip->m_pState) { - mz_zip_internal_state *pState = pZip->m_pState; - pZip->m_pState = NULL; - mz_zip_array_clear(pZip, &pState->m_central_dir); - mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); - mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); - -#ifndef MINIZ_NO_STDIO - if (pState->m_pFile) { - MZ_FCLOSE(pState->m_pFile); - pState->m_pFile = NULL; - } -#endif // #ifndef MINIZ_NO_STDIO - - pZip->m_pFree(pZip->m_pAlloc_opaque, pState); - } - pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; - - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, - const char *pArchive_filename, - const char *pDst_filename, - mz_uint flags) { - int file_index = - mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); - if (file_index < 0) return MZ_FALSE; - return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); -} -#endif - -// ------------------- .ZIP archive writing - -#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -static void mz_write_le16(mz_uint8 *p, mz_uint16 v) { - p[0] = (mz_uint8)v; - p[1] = (mz_uint8)(v >> 8); -} -static void mz_write_le32(mz_uint8 *p, mz_uint32 v) { - p[0] = (mz_uint8)v; - p[1] = (mz_uint8)(v >> 8); - p[2] = (mz_uint8)(v >> 16); - p[3] = (mz_uint8)(v >> 24); -} -#define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8 *)(p), (mz_uint16)(v)) -#define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8 *)(p), (mz_uint32)(v)) - -mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size) { - if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || - (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) - return MZ_FALSE; - - if (pZip->m_file_offset_alignment) { - // Ensure user specified file offset alignment is a power of 2. - if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) - return MZ_FALSE; - } - - if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; - if (!pZip->m_pFree) pZip->m_pFree = def_free_func; - if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; - - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; - pZip->m_archive_size = existing_size; - pZip->m_central_directory_file_ofs = 0; - pZip->m_total_files = 0; - - if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) - return MZ_FALSE; - memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, - sizeof(mz_uint8)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, - sizeof(mz_uint32)); - MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, - sizeof(mz_uint32)); - return MZ_TRUE; -} - -static size_t mz_zip_heap_write_func(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_zip_internal_state *pState = pZip->m_pState; - mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); -#ifdef _MSC_VER - if ((!n) || - ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) -#else - if ((!n) || - ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) -#endif - return 0; - if (new_size > pState->m_mem_capacity) { - void *pNew_block; - size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); - while (new_capacity < new_size) new_capacity *= 2; - if (NULL == (pNew_block = pZip->m_pRealloc( - pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) - return 0; - pState->m_pMem = pNew_block; - pState->m_mem_capacity = new_capacity; - } - memcpy((mz_uint8 *)pState->m_pMem + file_ofs, pBuf, n); - pState->m_mem_size = (size_t)new_size; - return n; -} - -mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, - size_t size_to_reserve_at_beginning, - size_t initial_allocation_size) { - pZip->m_pWrite = mz_zip_heap_write_func; - pZip->m_pIO_opaque = pZip; - if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; - if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, - size_to_reserve_at_beginning))) { - if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - pZip->m_pState->m_mem_capacity = initial_allocation_size; - } - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -static size_t mz_zip_file_write_func(void *pOpaque, mz_uint64 file_ofs, - const void *pBuf, size_t n) { - mz_zip_archive *pZip = (mz_zip_archive *)pOpaque; - mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); - if (((mz_int64)file_ofs < 0) || - (((cur_ofs != (mz_int64)file_ofs)) && - (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) - return 0; - return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); -} - -mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, - mz_uint64 size_to_reserve_at_beginning) { - MZ_FILE *pFile; - pZip->m_pWrite = mz_zip_file_write_func; - pZip->m_pIO_opaque = pZip; - if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; - if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - pZip->m_pState->m_pFile = pFile; - if (size_to_reserve_at_beginning) { - mz_uint64 cur_ofs = 0; - char buf[4096]; - MZ_CLEAR_OBJ(buf); - do { - size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { - mz_zip_writer_end(pZip); - return MZ_FALSE; - } - cur_ofs += n; - size_to_reserve_at_beginning -= n; - } while (size_to_reserve_at_beginning); - } - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, - const char *pFilename) { - mz_zip_internal_state *pState; - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) - return MZ_FALSE; - // No sense in trying to write to an archive that's already at the support max - // size - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) - return MZ_FALSE; - - pState = pZip->m_pState; - - if (pState->m_pFile) { -#ifdef MINIZ_NO_STDIO - pFilename; - return MZ_FALSE; -#else - // Archive is being read from stdio - try to reopen as writable. - if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; - if (!pFilename) return MZ_FALSE; - pZip->m_pWrite = mz_zip_file_write_func; - if (NULL == - (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { - // The mz_zip_archive is now in a bogus state because pState->m_pFile is - // NULL, so just close it. - mz_zip_reader_end(pZip); - return MZ_FALSE; - } -#endif // #ifdef MINIZ_NO_STDIO - } else if (pState->m_pMem) { - // Archive lives in a memory block. Assume it's from the heap that we can - // resize using the realloc callback. - if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; - pState->m_mem_capacity = pState->m_mem_size; - pZip->m_pWrite = mz_zip_heap_write_func; - } - // Archive is being read via a user provided read function - make sure the - // user has specified a write function too. - else if (!pZip->m_pWrite) - return MZ_FALSE; - - // Start writing new files at the archive's current central directory - // location. - pZip->m_archive_size = pZip->m_central_directory_file_ofs; - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; - pZip->m_central_directory_file_ofs = 0; - - return MZ_TRUE; -} - -mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, - const void *pBuf, size_t buf_size, - mz_uint level_and_flags) { - return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, - level_and_flags, 0, 0); -} - -typedef struct { - mz_zip_archive *m_pZip; - mz_uint64 m_cur_archive_file_ofs; - mz_uint64 m_comp_size; -} mz_zip_writer_add_state; - -static mz_bool mz_zip_writer_add_put_buf_callback(const void *pBuf, int len, - void *pUser) { - mz_zip_writer_add_state *pState = (mz_zip_writer_add_state *)pUser; - if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, - pState->m_cur_archive_file_ofs, pBuf, - len) != len) - return MZ_FALSE; - pState->m_cur_archive_file_ofs += len; - pState->m_comp_size += len; - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_create_local_dir_header( - mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, - mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, - mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, - mz_uint16 dos_time, mz_uint16 dos_date) { - (void)pZip; - memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_create_central_dir_header( - mz_zip_archive *pZip, mz_uint8 *pDst, mz_uint16 filename_size, - mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, - mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, - mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, - mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { - (void)pZip; - memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); - MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); - MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_add_to_central_dir( - mz_zip_archive *pZip, const char *pFilename, mz_uint16 filename_size, - const void *pExtra, mz_uint16 extra_size, const void *pComment, - mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, - mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, - mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, - mz_uint32 ext_attributes) { - mz_zip_internal_state *pState = pZip->m_pState; - mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; - size_t orig_central_dir_size = pState->m_central_dir.m_size; - mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; - - // No zip64 support yet - if ((local_header_ofs > 0xFFFFFFFF) || - (((mz_uint64)pState->m_central_dir.m_size + - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + - comment_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_central_dir_header( - pZip, central_dir_header, filename_size, extra_size, comment_size, - uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, - dos_date, local_header_ofs, ext_attributes)) - return MZ_FALSE; - - if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, - filename_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, - extra_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, - comment_size)) || - (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, - ¢ral_dir_ofs, 1))) { - // Try to push the central directory array back into its original state. - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - return MZ_TRUE; -} - -static mz_bool mz_zip_writer_validate_archive_name(const char *pArchive_name) { - // Basic ZIP archive filename validity checks: Valid filenames cannot start - // with a forward slash, cannot contain a drive letter, and cannot use - // DOS-style backward slashes. - if (*pArchive_name == '/') return MZ_FALSE; - while (*pArchive_name) { - if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE; - pArchive_name++; - } - return MZ_TRUE; -} - -static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment( - mz_zip_archive *pZip) { - mz_uint32 n; - if (!pZip->m_file_offset_alignment) return 0; - n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); - return (pZip->m_file_offset_alignment - n) & - (pZip->m_file_offset_alignment - 1); -} - -static mz_bool mz_zip_writer_write_zeros(mz_zip_archive *pZip, - mz_uint64 cur_file_ofs, mz_uint32 n) { - char buf[4096]; - memset(buf, 0, MZ_MIN(sizeof(buf), n)); - while (n) { - mz_uint32 s = MZ_MIN(sizeof(buf), n); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) - return MZ_FALSE; - cur_file_ofs += s; - n -= s; - } - return MZ_TRUE; -} - -mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, - const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags, mz_uint64 uncomp_size, - mz_uint32 uncomp_crc32) { - mz_uint16 method = 0, dos_time = 0, dos_date = 0; - mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; - mz_uint64 local_dir_header_ofs = pZip->m_archive_size, - cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; - size_t archive_name_size; - mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; - tdefl_compressor *pComp = NULL; - mz_bool store_data_uncompressed; - mz_zip_internal_state *pState; - - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - level = level_and_flags & 0xF; - store_data_uncompressed = - ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); - - if ((!pZip) || (!pZip->m_pState) || - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || - (!pArchive_name) || ((comment_size) && (!pComment)) || - (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - - pState = pZip->m_pState; - - if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) - return MZ_FALSE; - // No zip64 support yet - if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - -#ifndef MINIZ_NO_TIME - { - time_t cur_time; - time(&cur_time); - mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); - } -#endif // #ifndef MINIZ_NO_TIME - - archive_name_size = strlen(pArchive_name); - if (archive_name_size > 0xFFFF) return MZ_FALSE; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - comment_size + archive_name_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { - // Set DOS Subdirectory attribute bit. - ext_attributes |= 0x10; - // Subdirectories cannot contain data. - if ((buf_size) || (uncomp_size)) return MZ_FALSE; - } - - // Try to do any allocations before writing to the archive, so if an - // allocation fails the file remains unmodified. (A good idea if we're doing - // an in-place modification.) - if ((!mz_zip_array_ensure_room( - pZip, &pState->m_central_dir, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || - (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) - return MZ_FALSE; - - if ((!store_data_uncompressed) && (buf_size)) { - if (NULL == (pComp = (tdefl_compressor *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) - return MZ_FALSE; - } - - if (!mz_zip_writer_write_zeros( - pZip, cur_archive_file_ofs, - num_alignment_padding_bytes + sizeof(local_dir_header))) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - local_dir_header_ofs += num_alignment_padding_bytes; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - cur_archive_file_ofs += - num_alignment_padding_bytes + sizeof(local_dir_header); - - MZ_CLEAR_OBJ(local_dir_header); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, - archive_name_size) != archive_name_size) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - cur_archive_file_ofs += archive_name_size; - - if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { - uncomp_crc32 = - (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8 *)pBuf, buf_size); - uncomp_size = buf_size; - if (uncomp_size <= 3) { - level = 0; - store_data_uncompressed = MZ_TRUE; - } - } - - if (store_data_uncompressed) { - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, - buf_size) != buf_size) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - - cur_archive_file_ofs += buf_size; - comp_size = buf_size; - - if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED; - } else if (buf_size) { - mz_zip_writer_add_state state; - - state.m_pZip = pZip; - state.m_cur_archive_file_ofs = cur_archive_file_ofs; - state.m_comp_size = 0; - - if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, - tdefl_create_comp_flags_from_zip_params( - level, -15, MZ_DEFAULT_STRATEGY)) != - TDEFL_STATUS_OKAY) || - (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != - TDEFL_STATUS_DONE)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - return MZ_FALSE; - } - - comp_size = state.m_comp_size; - cur_archive_file_ofs = state.m_cur_archive_file_ofs; - - method = MZ_DEFLATED; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - pComp = NULL; - - // no zip64 support yet - if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_local_dir_header( - pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, - comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) - return MZ_FALSE; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, - sizeof(local_dir_header)) != sizeof(local_dir_header)) - return MZ_FALSE; - - if (!mz_zip_writer_add_to_central_dir( - pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, - comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, - dos_time, dos_date, local_dir_header_ofs, ext_attributes)) - return MZ_FALSE; - - pZip->m_total_files++; - pZip->m_archive_size = cur_archive_file_ofs; - - return MZ_TRUE; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, - const char *pSrc_filename, const void *pComment, - mz_uint16 comment_size, - mz_uint level_and_flags) { - mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; - mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; - mz_uint64 local_dir_header_ofs = pZip->m_archive_size, - cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, - comp_size = 0; - size_t archive_name_size; - mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; - MZ_FILE *pSrc_file = NULL; - - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - level = level_and_flags & 0xF; - - if ((!pZip) || (!pZip->m_pState) || - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || - ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - - archive_name_size = strlen(pArchive_name); - if (archive_name_size > 0xFFFF) return MZ_FALSE; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + - comment_size + archive_name_size) > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) - return MZ_FALSE; - - pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); - if (!pSrc_file) return MZ_FALSE; - MZ_FSEEK64(pSrc_file, 0, SEEK_END); - uncomp_size = MZ_FTELL64(pSrc_file); - MZ_FSEEK64(pSrc_file, 0, SEEK_SET); - - if (uncomp_size > 0xFFFFFFFF) { - // No zip64 support yet - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - if (uncomp_size <= 3) level = 0; - - if (!mz_zip_writer_write_zeros( - pZip, cur_archive_file_ofs, - num_alignment_padding_bytes + sizeof(local_dir_header))) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - local_dir_header_ofs += num_alignment_padding_bytes; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - cur_archive_file_ofs += - num_alignment_padding_bytes + sizeof(local_dir_header); - - MZ_CLEAR_OBJ(local_dir_header); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, - archive_name_size) != archive_name_size) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - cur_archive_file_ofs += archive_name_size; - - if (uncomp_size) { - mz_uint64 uncomp_remaining = uncomp_size; - void *pRead_buf = - pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); - if (!pRead_buf) { - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - if (!level) { - while (uncomp_remaining) { - mz_uint n = - (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); - if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || - (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, - n) != n)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - uncomp_crc32 = - (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8 *)pRead_buf, n); - uncomp_remaining -= n; - cur_archive_file_ofs += n; - } - comp_size = uncomp_size; - } else { - mz_bool result = MZ_FALSE; - mz_zip_writer_add_state state; - tdefl_compressor *pComp = (tdefl_compressor *)pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); - if (!pComp) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - state.m_pZip = pZip; - state.m_cur_archive_file_ofs = cur_archive_file_ofs; - state.m_comp_size = 0; - - if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, - tdefl_create_comp_flags_from_zip_params( - level, -15, MZ_DEFAULT_STRATEGY)) != - TDEFL_STATUS_OKAY) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - for (;;) { - size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, - (mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE); - tdefl_status status; - - if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) - break; - - uncomp_crc32 = (mz_uint32)mz_crc32( - uncomp_crc32, (const mz_uint8 *)pRead_buf, in_buf_size); - uncomp_remaining -= in_buf_size; - - status = tdefl_compress_buffer( - pComp, pRead_buf, in_buf_size, - uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); - if (status == TDEFL_STATUS_DONE) { - result = MZ_TRUE; - break; - } else if (status != TDEFL_STATUS_OKAY) - break; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); - - if (!result) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - MZ_FCLOSE(pSrc_file); - return MZ_FALSE; - } - - comp_size = state.m_comp_size; - cur_archive_file_ofs = state.m_cur_archive_file_ofs; - - method = MZ_DEFLATED; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); - } - - MZ_FCLOSE(pSrc_file); - pSrc_file = NULL; - - // no zip64 support yet - if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) - return MZ_FALSE; - - if (!mz_zip_writer_create_local_dir_header( - pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, - comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) - return MZ_FALSE; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, - sizeof(local_dir_header)) != sizeof(local_dir_header)) - return MZ_FALSE; - - if (!mz_zip_writer_add_to_central_dir( - pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, - comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, - dos_time, dos_date, local_dir_header_ofs, ext_attributes)) - return MZ_FALSE; - - pZip->m_total_files++; - pZip->m_archive_size = cur_archive_file_ofs; - - return MZ_TRUE; -} -#endif // #ifndef MINIZ_NO_STDIO - -mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, - mz_zip_archive *pSource_zip, - mz_uint file_index) { - mz_uint n, bit_flags, num_alignment_padding_bytes; - mz_uint64 comp_bytes_remaining, local_dir_header_ofs; - mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; - mz_uint32 - local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / - sizeof(mz_uint32)]; - mz_uint8 *pLocal_header = (mz_uint8 *)local_header_u32; - mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; - size_t orig_central_dir_size; - mz_zip_internal_state *pState; - void *pBuf; - const mz_uint8 *pSrc_central_header; - - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) - return MZ_FALSE; - if (NULL == - (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) - return MZ_FALSE; - pState = pZip->m_pState; - - num_alignment_padding_bytes = - mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); - - // no zip64 support yet - if ((pZip->m_total_files == 0xFFFF) || - ((pZip->m_archive_size + num_alignment_padding_bytes + - MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > - 0xFFFFFFFF)) - return MZ_FALSE; - - cur_src_file_ofs = - MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); - cur_dst_file_ofs = pZip->m_archive_size; - - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, - pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) - return MZ_FALSE; - cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; - - if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, - num_alignment_padding_bytes)) - return MZ_FALSE; - cur_dst_file_ofs += num_alignment_padding_bytes; - local_dir_header_ofs = cur_dst_file_ofs; - if (pZip->m_file_offset_alignment) { - MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == - 0); - } - - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != - MZ_ZIP_LOCAL_DIR_HEADER_SIZE) - return MZ_FALSE; - cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; - - n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); - comp_bytes_remaining = - n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); - - if (NULL == (pBuf = pZip->m_pAlloc( - pZip->m_pAlloc_opaque, 1, - (size_t)MZ_MAX(sizeof(mz_uint32) * 4, - MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, - comp_bytes_remaining))))) - return MZ_FALSE; - - while (comp_bytes_remaining) { - n = (mz_uint)MZ_MIN((mz_uint)MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, - n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - cur_src_file_ofs += n; - - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - cur_dst_file_ofs += n; - - comp_bytes_remaining -= n; - } - - bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); - if (bit_flags & 8) { - // Copy data descriptor - if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, - sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - - n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); - if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - return MZ_FALSE; - } - - cur_src_file_ofs += n; - cur_dst_file_ofs += n; - } - pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); - - // no zip64 support yet - if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE; - - orig_central_dir_size = pState->m_central_dir.m_size; - - memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); - MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, - local_dir_header_ofs); - if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, - MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) - return MZ_FALSE; - - n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + - MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + - MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); - if (!mz_zip_array_push_back( - pZip, &pState->m_central_dir, - pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) { - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE; - n = (mz_uint32)orig_central_dir_size; - if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { - mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, - MZ_FALSE); - return MZ_FALSE; - } - - pZip->m_total_files++; - pZip->m_archive_size = cur_dst_file_ofs; - - return MZ_TRUE; -} - -mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip) { - mz_zip_internal_state *pState; - mz_uint64 central_dir_ofs, central_dir_size; - mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; - - if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) - return MZ_FALSE; - - pState = pZip->m_pState; - - // no zip64 support yet - if ((pZip->m_total_files > 0xFFFF) || - ((pZip->m_archive_size + pState->m_central_dir.m_size + - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) - return MZ_FALSE; - - central_dir_ofs = 0; - central_dir_size = 0; - if (pZip->m_total_files) { - // Write central directory - central_dir_ofs = pZip->m_archive_size; - central_dir_size = pState->m_central_dir.m_size; - pZip->m_central_directory_file_ofs = central_dir_ofs; - if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, - pState->m_central_dir.m_p, - (size_t)central_dir_size) != central_dir_size) - return MZ_FALSE; - pZip->m_archive_size += central_dir_size; - } - - // Write end of central directory record - MZ_CLEAR_OBJ(hdr); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, - MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); - MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, - pZip->m_total_files); - MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); - MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); - - if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, - sizeof(hdr)) != sizeof(hdr)) - return MZ_FALSE; -#ifndef MINIZ_NO_STDIO - if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE; -#endif // #ifndef MINIZ_NO_STDIO - - pZip->m_archive_size += sizeof(hdr); - - pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; - return MZ_TRUE; -} - -mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **pBuf, - size_t *pSize) { - if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE; - if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE; - if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; - - *pBuf = pZip->m_pState->m_pMem; - *pSize = pZip->m_pState->m_mem_size; - pZip->m_pState->m_pMem = NULL; - pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; - return MZ_TRUE; -} - -mz_bool mz_zip_writer_end(mz_zip_archive *pZip) { - mz_zip_internal_state *pState; - mz_bool status = MZ_TRUE; - if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || - ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && - (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) - return MZ_FALSE; - - pState = pZip->m_pState; - pZip->m_pState = NULL; - mz_zip_array_clear(pZip, &pState->m_central_dir); - mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); - mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); - -#ifndef MINIZ_NO_STDIO - if (pState->m_pFile) { - MZ_FCLOSE(pState->m_pFile); - pState->m_pFile = NULL; - } -#endif // #ifndef MINIZ_NO_STDIO - - if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { - pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); - pState->m_pMem = NULL; - } - - pZip->m_pFree(pZip->m_pAlloc_opaque, pState); - pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; - return status; -} - -#ifndef MINIZ_NO_STDIO -mz_bool mz_zip_add_mem_to_archive_file_in_place( - const char *pZip_filename, const char *pArchive_name, const void *pBuf, - size_t buf_size, const void *pComment, mz_uint16 comment_size, - mz_uint level_and_flags) { - mz_bool status, created_new_archive = MZ_FALSE; - mz_zip_archive zip_archive; - struct MZ_FILE_STAT_STRUCT file_stat; - MZ_CLEAR_OBJ(zip_archive); - if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; - if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || - ((comment_size) && (!pComment)) || - ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) - return MZ_FALSE; - if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; - if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { - // Create a new archive. - if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) - return MZ_FALSE; - created_new_archive = MZ_TRUE; - } else { - // Append to an existing archive. - if (!mz_zip_reader_init_file( - &zip_archive, pZip_filename, - level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) - return MZ_FALSE; - if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) { - mz_zip_reader_end(&zip_archive); - return MZ_FALSE; - } - } - status = - mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, - pComment, comment_size, level_and_flags, 0, 0); - // Always finalize, even if adding failed for some reason, so we have a valid - // central directory. (This may not always succeed, but we can try.) - if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE; - if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE; - if ((!status) && (created_new_archive)) { - // It's a new archive and something went wrong, so just delete it. - int ignoredStatus = MZ_DELETE_FILE(pZip_filename); - (void)ignoredStatus; - } - return status; -} - -void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, - const char *pArchive_name, - size_t *pSize, mz_uint flags) { - int file_index; - mz_zip_archive zip_archive; - void *p = NULL; - - if (pSize) *pSize = 0; - - if ((!pZip_filename) || (!pArchive_name)) return NULL; - - MZ_CLEAR_OBJ(zip_archive); - if (!mz_zip_reader_init_file( - &zip_archive, pZip_filename, - flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) - return NULL; - - if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, NULL, - flags)) >= 0) - p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); - - mz_zip_reader_end(&zip_archive); - return p; -} - -#endif // #ifndef MINIZ_NO_STDIO - -#endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS - -#endif // #ifndef MINIZ_NO_ARCHIVE_APIS - -#ifdef __cplusplus -} -#endif - -#endif // MINIZ_HEADER_FILE_ONLY - -/* - This is free and unencumbered software released into the public domain. - - Anyone is free to copy, modify, publish, use, compile, sell, or - distribute this software, either in source code form or as a compiled - binary, for any purpose, commercial or non-commercial, and by any - means. - - In jurisdictions that recognize copyright laws, the author or authors - of this software dedicate any and all copyright interest in the - software to the public domain. We make this dedication for the benefit - of the public at large and to the detriment of our heirs and - successors. We intend this dedication to be an overt act of - relinquishment in perpetuity of all present and future rights to this - software under copyright law. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR - OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. - - For more information, please refer to -*/ - -// ---------------------- end of miniz ---------------------------------------- - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -#ifdef _MSC_VER -#pragma warning(pop) -#endif -} // namespace miniz -#else - -// Reuse MINIZ_LITTE_ENDIAN macro - -#if defined(__sparcv9) -// Big endian -#else -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU -// Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. -#define MINIZ_LITTLE_ENDIAN 1 -#endif -#endif - -#endif // TINYEXR_USE_MINIZ - -// static bool IsBigEndian(void) { -// union { -// unsigned int i; -// char c[4]; -// } bint = {0x01020304}; -// -// return bint.c[0] == 1; -//} - -static void SetErrorMessage(const std::string &msg, const char **err) { - if (err) { -#ifdef _WIN32 - (*err) = _strdup(msg.c_str()); -#else - (*err) = strdup(msg.c_str()); -#endif - } -} - -static const int kEXRVersionSize = 8; - -static void cpy2(unsigned short *dst_val, const unsigned short *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; -} - -static void swap2(unsigned short *val) { -#ifdef MINIZ_LITTLE_ENDIAN - (void)val; -#else - unsigned short tmp = *val; - unsigned char *dst = reinterpret_cast(val); - unsigned char *src = reinterpret_cast(&tmp); - - dst[0] = src[1]; - dst[1] = src[0]; -#endif -} - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunused-function" -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wunused-function" -#endif -static void cpy4(int *dst_val, const int *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -} - -static void cpy4(unsigned int *dst_val, const unsigned int *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -} - -static void cpy4(float *dst_val, const float *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; -} -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -#ifdef __GNUC__ -#pragma GCC diagnostic pop -#endif - -static void swap4(unsigned int *val) { -#ifdef MINIZ_LITTLE_ENDIAN - (void)val; -#else - unsigned int tmp = *val; - unsigned char *dst = reinterpret_cast(val); - unsigned char *src = reinterpret_cast(&tmp); - - dst[0] = src[3]; - dst[1] = src[2]; - dst[2] = src[1]; - dst[3] = src[0]; -#endif -} - -#if 0 -static void cpy8(tinyexr::tinyexr_uint64 *dst_val, const tinyexr::tinyexr_uint64 *src_val) { - unsigned char *dst = reinterpret_cast(dst_val); - const unsigned char *src = reinterpret_cast(src_val); - - dst[0] = src[0]; - dst[1] = src[1]; - dst[2] = src[2]; - dst[3] = src[3]; - dst[4] = src[4]; - dst[5] = src[5]; - dst[6] = src[6]; - dst[7] = src[7]; -} -#endif - -static void swap8(tinyexr::tinyexr_uint64 *val) { -#ifdef MINIZ_LITTLE_ENDIAN - (void)val; -#else - tinyexr::tinyexr_uint64 tmp = (*val); - unsigned char *dst = reinterpret_cast(val); - unsigned char *src = reinterpret_cast(&tmp); - - dst[0] = src[7]; - dst[1] = src[6]; - dst[2] = src[5]; - dst[3] = src[4]; - dst[4] = src[3]; - dst[5] = src[2]; - dst[6] = src[1]; - dst[7] = src[0]; -#endif -} - -// https://gist.github.com/rygorous/2156668 -// Reuse MINIZ_LITTLE_ENDIAN flag from miniz. -union FP32 { - unsigned int u; - float f; - struct { -#if MINIZ_LITTLE_ENDIAN - unsigned int Mantissa : 23; - unsigned int Exponent : 8; - unsigned int Sign : 1; -#else - unsigned int Sign : 1; - unsigned int Exponent : 8; - unsigned int Mantissa : 23; -#endif - } s; -}; - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wpadded" -#endif - -union FP16 { - unsigned short u; - struct { -#if MINIZ_LITTLE_ENDIAN - unsigned int Mantissa : 10; - unsigned int Exponent : 5; - unsigned int Sign : 1; -#else - unsigned int Sign : 1; - unsigned int Exponent : 5; - unsigned int Mantissa : 10; -#endif - } s; -}; - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -static FP32 half_to_float(FP16 h) { - static const FP32 magic = {113 << 23}; - static const unsigned int shifted_exp = 0x7c00 - << 13; // exponent mask after shift - FP32 o; - - o.u = (h.u & 0x7fffU) << 13U; // exponent/mantissa bits - unsigned int exp_ = shifted_exp & o.u; // just the exponent - o.u += (127 - 15) << 23; // exponent adjust - - // handle exponent special cases - if (exp_ == shifted_exp) // Inf/NaN? - o.u += (128 - 16) << 23; // extra exp adjust - else if (exp_ == 0) // Zero/Denormal? - { - o.u += 1 << 23; // extra exp adjust - o.f -= magic.f; // renormalize - } - - o.u |= (h.u & 0x8000U) << 16U; // sign bit - return o; -} - -static FP16 float_to_half_full(FP32 f) { - FP16 o = {0}; - - // Based on ISPC reference code (with minor modifications) - if (f.s.Exponent == 0) // Signed zero/denormal (which will underflow) - o.s.Exponent = 0; - else if (f.s.Exponent == 255) // Inf or NaN (all exponent bits set) - { - o.s.Exponent = 31; - o.s.Mantissa = f.s.Mantissa ? 0x200 : 0; // NaN->qNaN and Inf->Inf - } else // Normalized number - { - // Exponent unbias the single, then bias the halfp - int newexp = f.s.Exponent - 127 + 15; - if (newexp >= 31) // Overflow, return signed infinity - o.s.Exponent = 31; - else if (newexp <= 0) // Underflow - { - if ((14 - newexp) <= 24) // Mantissa might be non-zero - { - unsigned int mant = f.s.Mantissa | 0x800000; // Hidden 1 bit - o.s.Mantissa = mant >> (14 - newexp); - if ((mant >> (13 - newexp)) & 1) // Check for rounding - o.u++; // Round, might overflow into exp bit, but this is OK - } - } else { - o.s.Exponent = static_cast(newexp); - o.s.Mantissa = f.s.Mantissa >> 13; - if (f.s.Mantissa & 0x1000) // Check for rounding - o.u++; // Round, might overflow to inf, this is OK - } - } - - o.s.Sign = f.s.Sign; - return o; -} - -// NOTE: From OpenEXR code -// #define IMF_INCREASING_Y 0 -// #define IMF_DECREASING_Y 1 -// #define IMF_RAMDOM_Y 2 -// -// #define IMF_NO_COMPRESSION 0 -// #define IMF_RLE_COMPRESSION 1 -// #define IMF_ZIPS_COMPRESSION 2 -// #define IMF_ZIP_COMPRESSION 3 -// #define IMF_PIZ_COMPRESSION 4 -// #define IMF_PXR24_COMPRESSION 5 -// #define IMF_B44_COMPRESSION 6 -// #define IMF_B44A_COMPRESSION 7 - -#ifdef __clang__ -#pragma clang diagnostic push - -#if __has_warning("-Wzero-as-null-pointer-constant") -#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" -#endif - -#endif - -static const char *ReadString(std::string *s, const char *ptr, size_t len) { - // Read untile NULL(\0). - const char *p = ptr; - const char *q = ptr; - while ((size_t(q - ptr) < len) && (*q) != 0) { - q++; - } - - if (size_t(q - ptr) >= len) { - (*s) = std::string(); - return NULL; - } - - (*s) = std::string(p, q); - - return q + 1; // skip '\0' -} - -static bool ReadAttribute(std::string *name, std::string *type, - std::vector *data, size_t *marker_size, - const char *marker, size_t size) { - size_t name_len = strnlen(marker, size); - if (name_len == size) { - // String does not have a terminating character. - return false; - } - *name = std::string(marker, name_len); - - marker += name_len + 1; - size -= name_len + 1; - - size_t type_len = strnlen(marker, size); - if (type_len == size) { - return false; - } - *type = std::string(marker, type_len); - - marker += type_len + 1; - size -= type_len + 1; - - if (size < sizeof(uint32_t)) { - return false; - } - - uint32_t data_len; - memcpy(&data_len, marker, sizeof(uint32_t)); - tinyexr::swap4(reinterpret_cast(&data_len)); - - if (data_len == 0) { - if ((*type).compare("string") == 0) { - // Accept empty string attribute. - - marker += sizeof(uint32_t); - size -= sizeof(uint32_t); - - *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t); - - data->resize(1); - (*data)[0] = '\0'; - - return true; - } else { - return false; - } - } - - marker += sizeof(uint32_t); - size -= sizeof(uint32_t); - - if (size < data_len) { - return false; - } - - data->resize(static_cast(data_len)); - memcpy(&data->at(0), marker, static_cast(data_len)); - - *marker_size = name_len + 1 + type_len + 1 + sizeof(uint32_t) + data_len; - return true; -} - -static void WriteAttributeToMemory(std::vector *out, - const char *name, const char *type, - const unsigned char *data, int len) { - out->insert(out->end(), name, name + strlen(name) + 1); - out->insert(out->end(), type, type + strlen(type) + 1); - - int outLen = len; - tinyexr::swap4(reinterpret_cast(&outLen)); - out->insert(out->end(), reinterpret_cast(&outLen), - reinterpret_cast(&outLen) + sizeof(int)); - out->insert(out->end(), data, data + len); -} - -typedef struct { - std::string name; // less than 255 bytes long - int pixel_type; - int x_sampling; - int y_sampling; - unsigned char p_linear; - unsigned char pad[3]; -} ChannelInfo; - -typedef struct HeaderInfo { - std::vector channels; - std::vector attributes; - - int data_window[4]; - int line_order; - int display_window[4]; - float screen_window_center[2]; - float screen_window_width; - float pixel_aspect_ratio; - - int chunk_count; - - // Tiled format - int tile_size_x; - int tile_size_y; - int tile_level_mode; - int tile_rounding_mode; - - unsigned int header_len; - - int compression_type; - - void clear() { - channels.clear(); - attributes.clear(); - - data_window[0] = 0; - data_window[1] = 0; - data_window[2] = 0; - data_window[3] = 0; - line_order = 0; - display_window[0] = 0; - display_window[1] = 0; - display_window[2] = 0; - display_window[3] = 0; - screen_window_center[0] = 0.0f; - screen_window_center[1] = 0.0f; - screen_window_width = 0.0f; - pixel_aspect_ratio = 0.0f; - - chunk_count = 0; - - // Tiled format - tile_size_x = 0; - tile_size_y = 0; - tile_level_mode = 0; - tile_rounding_mode = 0; - - header_len = 0; - compression_type = 0; - } -} HeaderInfo; - -static bool ReadChannelInfo(std::vector &channels, - const std::vector &data) { - const char *p = reinterpret_cast(&data.at(0)); - - for (;;) { - if ((*p) == 0) { - break; - } - ChannelInfo info; - - tinyexr_int64 data_len = static_cast(data.size()) - - (p - reinterpret_cast(data.data())); - if (data_len < 0) { - return false; - } - - p = ReadString(&info.name, p, size_t(data_len)); - if ((p == NULL) && (info.name.empty())) { - // Buffer overrun. Issue #51. - return false; - } - - const unsigned char *data_end = - reinterpret_cast(p) + 16; - if (data_end >= (data.data() + data.size())) { - return false; - } - - memcpy(&info.pixel_type, p, sizeof(int)); - p += 4; - info.p_linear = static_cast(p[0]); // uchar - p += 1 + 3; // reserved: uchar[3] - memcpy(&info.x_sampling, p, sizeof(int)); // int - p += 4; - memcpy(&info.y_sampling, p, sizeof(int)); // int - p += 4; - - tinyexr::swap4(reinterpret_cast(&info.pixel_type)); - tinyexr::swap4(reinterpret_cast(&info.x_sampling)); - tinyexr::swap4(reinterpret_cast(&info.y_sampling)); - - channels.push_back(info); - } - - return true; -} - -static void WriteChannelInfo(std::vector &data, - const std::vector &channels) { - size_t sz = 0; - - // Calculate total size. - for (size_t c = 0; c < channels.size(); c++) { - sz += strlen(channels[c].name.c_str()) + 1; // +1 for \0 - sz += 16; // 4 * int - } - data.resize(sz + 1); - - unsigned char *p = &data.at(0); - - for (size_t c = 0; c < channels.size(); c++) { - memcpy(p, channels[c].name.c_str(), strlen(channels[c].name.c_str())); - p += strlen(channels[c].name.c_str()); - (*p) = '\0'; - p++; - - int pixel_type = channels[c].pixel_type; - int x_sampling = channels[c].x_sampling; - int y_sampling = channels[c].y_sampling; - tinyexr::swap4(reinterpret_cast(&pixel_type)); - tinyexr::swap4(reinterpret_cast(&x_sampling)); - tinyexr::swap4(reinterpret_cast(&y_sampling)); - - memcpy(p, &pixel_type, sizeof(int)); - p += sizeof(int); - - (*p) = channels[c].p_linear; - p += 4; - - memcpy(p, &x_sampling, sizeof(int)); - p += sizeof(int); - - memcpy(p, &y_sampling, sizeof(int)); - p += sizeof(int); - } - - (*p) = '\0'; -} - -static void CompressZip(unsigned char *dst, - tinyexr::tinyexr_uint64 &compressedSize, - const unsigned char *src, unsigned long src_size) { - std::vector tmpBuf(src_size); - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfZipCompressor.cpp - // - - // - // Reorder the pixel data. - // - - const char *srcPtr = reinterpret_cast(src); - - { - char *t1 = reinterpret_cast(&tmpBuf.at(0)); - char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; - const char *stop = srcPtr + src_size; - - for (;;) { - if (srcPtr < stop) - *(t1++) = *(srcPtr++); - else - break; - - if (srcPtr < stop) - *(t2++) = *(srcPtr++); - else - break; - } - } - - // - // Predictor. - // - - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + src_size; - int p = t[-1]; - - while (t < stop) { - int d = int(t[0]) - p + (128 + 256); - p = t[0]; - t[0] = static_cast(d); - ++t; - } - } - -#if TINYEXR_USE_MINIZ - // - // Compress the data using miniz - // - - miniz::mz_ulong outSize = miniz::mz_compressBound(src_size); - int ret = miniz::mz_compress( - dst, &outSize, static_cast(&tmpBuf.at(0)), - src_size); - assert(ret == miniz::MZ_OK); - (void)ret; - - compressedSize = outSize; -#else - uLong outSize = compressBound(static_cast(src_size)); - int ret = compress(dst, &outSize, static_cast(&tmpBuf.at(0)), - src_size); - assert(ret == Z_OK); - - compressedSize = outSize; -#endif - - // Use uncompressed data when compressed data is larger than uncompressed. - // (Issue 40) - if (compressedSize >= src_size) { - compressedSize = src_size; - memcpy(dst, src, src_size); - } -} - -static bool DecompressZip(unsigned char *dst, - unsigned long *uncompressed_size /* inout */, - const unsigned char *src, unsigned long src_size) { - if ((*uncompressed_size) == src_size) { - // Data is not compressed(Issue 40). - memcpy(dst, src, src_size); - return true; - } - std::vector tmpBuf(*uncompressed_size); - -#if TINYEXR_USE_MINIZ - int ret = - miniz::mz_uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); - if (miniz::MZ_OK != ret) { - return false; - } -#else - int ret = uncompress(&tmpBuf.at(0), uncompressed_size, src, src_size); - if (Z_OK != ret) { - return false; - } -#endif - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfZipCompressor.cpp - // - - // Predictor. - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + (*uncompressed_size); - - while (t < stop) { - int d = int(t[-1]) + int(t[0]) - 128; - t[0] = static_cast(d); - ++t; - } - } - - // Reorder the pixel data. - { - const char *t1 = reinterpret_cast(&tmpBuf.at(0)); - const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + - (*uncompressed_size + 1) / 2; - char *s = reinterpret_cast(dst); - char *stop = s + (*uncompressed_size); - - for (;;) { - if (s < stop) - *(s++) = *(t1++); - else - break; - - if (s < stop) - *(s++) = *(t2++); - else - break; - } - } - - return true; -} - -// RLE code from OpenEXR -------------------------------------- - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wsign-conversion" -#endif - -#ifdef _MSC_VER -#pragma warning(push) -#pragma warning(disable : 4204) // nonstandard extension used : non-constant - // aggregate initializer (also supported by GNU - // C and C99, so no big deal) -#pragma warning(disable : 4244) // 'initializing': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4267) // 'argument': conversion from '__int64' to - // 'int', possible loss of data -#pragma warning(disable : 4996) // 'strdup': The POSIX name for this item is - // deprecated. Instead, use the ISO C and C++ - // conformant name: _strdup. -#endif - -const int MIN_RUN_LENGTH = 3; -const int MAX_RUN_LENGTH = 127; - -// -// Compress an array of bytes, using run-length encoding, -// and return the length of the compressed data. -// - -static int rleCompress(int inLength, const char in[], signed char out[]) { - const char *inEnd = in + inLength; - const char *runStart = in; - const char *runEnd = in + 1; - signed char *outWrite = out; - - while (runStart < inEnd) { - while (runEnd < inEnd && *runStart == *runEnd && - runEnd - runStart - 1 < MAX_RUN_LENGTH) { - ++runEnd; - } - - if (runEnd - runStart >= MIN_RUN_LENGTH) { - // - // Compressable run - // - - *outWrite++ = static_cast(runEnd - runStart) - 1; - *outWrite++ = *(reinterpret_cast(runStart)); - runStart = runEnd; - } else { - // - // Uncompressable run - // - - while (runEnd < inEnd && - ((runEnd + 1 >= inEnd || *runEnd != *(runEnd + 1)) || - (runEnd + 2 >= inEnd || *(runEnd + 1) != *(runEnd + 2))) && - runEnd - runStart < MAX_RUN_LENGTH) { - ++runEnd; - } - - *outWrite++ = static_cast(runStart - runEnd); - - while (runStart < runEnd) { - *outWrite++ = *(reinterpret_cast(runStart++)); - } - } - - ++runEnd; - } - - return static_cast(outWrite - out); -} - -// -// Uncompress an array of bytes compressed with rleCompress(). -// Returns the length of the oncompressed data, or 0 if the -// length of the uncompressed data would be more than maxLength. -// - -static int rleUncompress(int inLength, int maxLength, const signed char in[], - char out[]) { - char *outStart = out; - - while (inLength > 0) { - if (*in < 0) { - int count = -(static_cast(*in++)); - inLength -= count + 1; - - // Fixes #116: Add bounds check to in buffer. - if ((0 > (maxLength -= count)) || (inLength < 0)) return 0; - - memcpy(out, in, count); - out += count; - in += count; - } else { - int count = *in++; - inLength -= 2; - - if (0 > (maxLength -= count + 1)) return 0; - - memset(out, *reinterpret_cast(in), count + 1); - out += count + 1; - - in++; - } - } - - return static_cast(out - outStart); -} - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif - -// End of RLE code from OpenEXR ----------------------------------- - -static void CompressRle(unsigned char *dst, - tinyexr::tinyexr_uint64 &compressedSize, - const unsigned char *src, unsigned long src_size) { - std::vector tmpBuf(src_size); - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfRleCompressor.cpp - // - - // - // Reorder the pixel data. - // - - const char *srcPtr = reinterpret_cast(src); - - { - char *t1 = reinterpret_cast(&tmpBuf.at(0)); - char *t2 = reinterpret_cast(&tmpBuf.at(0)) + (src_size + 1) / 2; - const char *stop = srcPtr + src_size; - - for (;;) { - if (srcPtr < stop) - *(t1++) = *(srcPtr++); - else - break; - - if (srcPtr < stop) - *(t2++) = *(srcPtr++); - else - break; - } - } - - // - // Predictor. - // - - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + src_size; - int p = t[-1]; - - while (t < stop) { - int d = int(t[0]) - p + (128 + 256); - p = t[0]; - t[0] = static_cast(d); - ++t; - } - } - - // outSize will be (srcSiz * 3) / 2 at max. - int outSize = rleCompress(static_cast(src_size), - reinterpret_cast(&tmpBuf.at(0)), - reinterpret_cast(dst)); - assert(outSize > 0); - - compressedSize = static_cast(outSize); - - // Use uncompressed data when compressed data is larger than uncompressed. - // (Issue 40) - if (compressedSize >= src_size) { - compressedSize = src_size; - memcpy(dst, src, src_size); - } -} - -static bool DecompressRle(unsigned char *dst, - const unsigned long uncompressed_size, - const unsigned char *src, unsigned long src_size) { - if (uncompressed_size == src_size) { - // Data is not compressed(Issue 40). - memcpy(dst, src, src_size); - return true; - } - - // Workaround for issue #112. - // TODO(syoyo): Add more robust out-of-bounds check in `rleUncompress`. - if (src_size <= 2) { - return false; - } - - std::vector tmpBuf(uncompressed_size); - - int ret = rleUncompress(static_cast(src_size), - static_cast(uncompressed_size), - reinterpret_cast(src), - reinterpret_cast(&tmpBuf.at(0))); - if (ret != static_cast(uncompressed_size)) { - return false; - } - - // - // Apply EXR-specific? postprocess. Grabbed from OpenEXR's - // ImfRleCompressor.cpp - // - - // Predictor. - { - unsigned char *t = &tmpBuf.at(0) + 1; - unsigned char *stop = &tmpBuf.at(0) + uncompressed_size; - - while (t < stop) { - int d = int(t[-1]) + int(t[0]) - 128; - t[0] = static_cast(d); - ++t; - } - } - - // Reorder the pixel data. - { - const char *t1 = reinterpret_cast(&tmpBuf.at(0)); - const char *t2 = reinterpret_cast(&tmpBuf.at(0)) + - (uncompressed_size + 1) / 2; - char *s = reinterpret_cast(dst); - char *stop = s + uncompressed_size; - - for (;;) { - if (s < stop) - *(s++) = *(t1++); - else - break; - - if (s < stop) - *(s++) = *(t2++); - else - break; - } - } - - return true; -} - -#if TINYEXR_USE_PIZ - -#ifdef __clang__ -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wc++11-long-long" -#pragma clang diagnostic ignored "-Wold-style-cast" -#pragma clang diagnostic ignored "-Wpadded" -#pragma clang diagnostic ignored "-Wsign-conversion" -#pragma clang diagnostic ignored "-Wc++11-extensions" -#pragma clang diagnostic ignored "-Wconversion" -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" - -#if __has_warning("-Wcast-qual") -#pragma clang diagnostic ignored "-Wcast-qual" -#endif - -#endif - -// -// PIZ compress/uncompress, based on OpenEXR's ImfPizCompressor.cpp -// -// ----------------------------------------------------------------- -// Copyright (c) 2004, Industrial Light & Magic, a division of Lucas -// Digital Ltd. LLC) -// (3 clause BSD license) -// - -struct PIZChannelData { - unsigned short *start; - unsigned short *end; - int nx; - int ny; - int ys; - int size; -}; - -//----------------------------------------------------------------------------- -// -// 16-bit Haar Wavelet encoding and decoding -// -// The source code in this file is derived from the encoding -// and decoding routines written by Christian Rouet for his -// PIZ image file format. -// -//----------------------------------------------------------------------------- - -// -// Wavelet basis functions without modulo arithmetic; they produce -// the best compression ratios when the wavelet-transformed data are -// Huffman-encoded, but the wavelet transform works only for 14-bit -// data (untransformed data values must be less than (1 << 14)). -// - -inline void wenc14(unsigned short a, unsigned short b, unsigned short &l, - unsigned short &h) { - short as = static_cast(a); - short bs = static_cast(b); - - short ms = (as + bs) >> 1; - short ds = as - bs; - - l = static_cast(ms); - h = static_cast(ds); -} - -inline void wdec14(unsigned short l, unsigned short h, unsigned short &a, - unsigned short &b) { - short ls = static_cast(l); - short hs = static_cast(h); - - int hi = hs; - int ai = ls + (hi & 1) + (hi >> 1); - - short as = static_cast(ai); - short bs = static_cast(ai - hi); - - a = static_cast(as); - b = static_cast(bs); -} - -// -// Wavelet basis functions with modulo arithmetic; they work with full -// 16-bit data, but Huffman-encoding the wavelet-transformed data doesn't -// compress the data quite as well. -// - -const int NBITS = 16; -const int A_OFFSET = 1 << (NBITS - 1); -const int M_OFFSET = 1 << (NBITS - 1); -const int MOD_MASK = (1 << NBITS) - 1; - -inline void wenc16(unsigned short a, unsigned short b, unsigned short &l, - unsigned short &h) { - int ao = (a + A_OFFSET) & MOD_MASK; - int m = ((ao + b) >> 1); - int d = ao - b; - - if (d < 0) m = (m + M_OFFSET) & MOD_MASK; - - d &= MOD_MASK; - - l = static_cast(m); - h = static_cast(d); -} - -inline void wdec16(unsigned short l, unsigned short h, unsigned short &a, - unsigned short &b) { - int m = l; - int d = h; - int bb = (m - (d >> 1)) & MOD_MASK; - int aa = (d + bb - A_OFFSET) & MOD_MASK; - b = static_cast(bb); - a = static_cast(aa); -} - -// -// 2D Wavelet encoding: -// - -static void wav2Encode( - unsigned short *in, // io: values are transformed in place - int nx, // i : x size - int ox, // i : x offset - int ny, // i : y size - int oy, // i : y offset - unsigned short mx) // i : maximum in[x][y] value -{ - bool w14 = (mx < (1 << 14)); - int n = (nx > ny) ? ny : nx; - int p = 1; // == 1 << level - int p2 = 2; // == 1 << (level+1) - - // - // Hierachical loop on smaller dimension n - // - - while (p2 <= n) { - unsigned short *py = in; - unsigned short *ey = in + oy * (ny - p2); - int oy1 = oy * p; - int oy2 = oy * p2; - int ox1 = ox * p; - int ox2 = ox * p2; - unsigned short i00, i01, i10, i11; - - // - // Y loop - // - - for (; py <= ey; py += oy2) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - // - // X loop - // - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - unsigned short *p10 = px + oy1; - unsigned short *p11 = p10 + ox1; - - // - // 2D wavelet encoding - // - - if (w14) { - wenc14(*px, *p01, i00, i01); - wenc14(*p10, *p11, i10, i11); - wenc14(i00, i10, *px, *p10); - wenc14(i01, i11, *p01, *p11); - } else { - wenc16(*px, *p01, i00, i01); - wenc16(*p10, *p11, i10, i11); - wenc16(i00, i10, *px, *p10); - wenc16(i01, i11, *p01, *p11); - } - } - - // - // Encode (1D) odd column (still in Y loop) - // - - if (nx & p) { - unsigned short *p10 = px + oy1; - - if (w14) - wenc14(*px, *p10, i00, *p10); - else - wenc16(*px, *p10, i00, *p10); - - *px = i00; - } - } - - // - // Encode (1D) odd line (must loop in X) - // - - if (ny & p) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - - if (w14) - wenc14(*px, *p01, i00, *p01); - else - wenc16(*px, *p01, i00, *p01); - - *px = i00; - } - } - - // - // Next level - // - - p = p2; - p2 <<= 1; - } -} - -// -// 2D Wavelet decoding: -// - -static void wav2Decode( - unsigned short *in, // io: values are transformed in place - int nx, // i : x size - int ox, // i : x offset - int ny, // i : y size - int oy, // i : y offset - unsigned short mx) // i : maximum in[x][y] value -{ - bool w14 = (mx < (1 << 14)); - int n = (nx > ny) ? ny : nx; - int p = 1; - int p2; - - // - // Search max level - // - - while (p <= n) p <<= 1; - - p >>= 1; - p2 = p; - p >>= 1; - - // - // Hierarchical loop on smaller dimension n - // - - while (p >= 1) { - unsigned short *py = in; - unsigned short *ey = in + oy * (ny - p2); - int oy1 = oy * p; - int oy2 = oy * p2; - int ox1 = ox * p; - int ox2 = ox * p2; - unsigned short i00, i01, i10, i11; - - // - // Y loop - // - - for (; py <= ey; py += oy2) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - // - // X loop - // - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - unsigned short *p10 = px + oy1; - unsigned short *p11 = p10 + ox1; - - // - // 2D wavelet decoding - // - - if (w14) { - wdec14(*px, *p10, i00, i10); - wdec14(*p01, *p11, i01, i11); - wdec14(i00, i01, *px, *p01); - wdec14(i10, i11, *p10, *p11); - } else { - wdec16(*px, *p10, i00, i10); - wdec16(*p01, *p11, i01, i11); - wdec16(i00, i01, *px, *p01); - wdec16(i10, i11, *p10, *p11); - } - } - - // - // Decode (1D) odd column (still in Y loop) - // - - if (nx & p) { - unsigned short *p10 = px + oy1; - - if (w14) - wdec14(*px, *p10, i00, *p10); - else - wdec16(*px, *p10, i00, *p10); - - *px = i00; - } - } - - // - // Decode (1D) odd line (must loop in X) - // - - if (ny & p) { - unsigned short *px = py; - unsigned short *ex = py + ox * (nx - p2); - - for (; px <= ex; px += ox2) { - unsigned short *p01 = px + ox1; - - if (w14) - wdec14(*px, *p01, i00, *p01); - else - wdec16(*px, *p01, i00, *p01); - - *px = i00; - } - } - - // - // Next level - // - - p2 = p; - p >>= 1; - } -} - -//----------------------------------------------------------------------------- -// -// 16-bit Huffman compression and decompression. -// -// The source code in this file is derived from the 8-bit -// Huffman compression and decompression routines written -// by Christian Rouet for his PIZ image file format. -// -//----------------------------------------------------------------------------- - -// Adds some modification for tinyexr. - -const int HUF_ENCBITS = 16; // literal (value) bit length -const int HUF_DECBITS = 14; // decoding bit size (>= 8) - -const int HUF_ENCSIZE = (1 << HUF_ENCBITS) + 1; // encoding table size -const int HUF_DECSIZE = 1 << HUF_DECBITS; // decoding table size -const int HUF_DECMASK = HUF_DECSIZE - 1; - -struct HufDec { // short code long code - //------------------------------- - int len : 8; // code length 0 - int lit : 24; // lit p size - int *p; // 0 lits -}; - -inline long long hufLength(long long code) { return code & 63; } - -inline long long hufCode(long long code) { return code >> 6; } - -inline void outputBits(int nBits, long long bits, long long &c, int &lc, - char *&out) { - c <<= nBits; - lc += nBits; - - c |= bits; - - while (lc >= 8) *out++ = static_cast((c >> (lc -= 8))); -} - -inline long long getBits(int nBits, long long &c, int &lc, const char *&in) { - while (lc < nBits) { - c = (c << 8) | *(reinterpret_cast(in++)); - lc += 8; - } - - lc -= nBits; - return (c >> lc) & ((1 << nBits) - 1); -} - -// -// ENCODING TABLE BUILDING & (UN)PACKING -// - -// -// Build a "canonical" Huffman code table: -// - for each (uncompressed) symbol, hcode contains the length -// of the corresponding code (in the compressed data) -// - canonical codes are computed and stored in hcode -// - the rules for constructing canonical codes are as follows: -// * shorter codes (if filled with zeroes to the right) -// have a numerically higher value than longer codes -// * for codes with the same length, numerical values -// increase with numerical symbol values -// - because the canonical code table can be constructed from -// symbol lengths alone, the code table can be transmitted -// without sending the actual code values -// - see http://www.compressconsult.com/huffman/ -// - -static void hufCanonicalCodeTable(long long hcode[HUF_ENCSIZE]) { - long long n[59]; - - // - // For each i from 0 through 58, count the - // number of different codes of length i, and - // store the count in n[i]. - // - - for (int i = 0; i <= 58; ++i) n[i] = 0; - - for (int i = 0; i < HUF_ENCSIZE; ++i) n[hcode[i]] += 1; - - // - // For each i from 58 through 1, compute the - // numerically lowest code with length i, and - // store that code in n[i]. - // - - long long c = 0; - - for (int i = 58; i > 0; --i) { - long long nc = ((c + n[i]) >> 1); - n[i] = c; - c = nc; - } - - // - // hcode[i] contains the length, l, of the - // code for symbol i. Assign the next available - // code of length l to the symbol and store both - // l and the code in hcode[i]. - // - - for (int i = 0; i < HUF_ENCSIZE; ++i) { - int l = static_cast(hcode[i]); - - if (l > 0) hcode[i] = l | (n[l]++ << 6); - } -} - -// -// Compute Huffman codes (based on frq input) and store them in frq: -// - code structure is : [63:lsb - 6:msb] | [5-0: bit length]; -// - max code length is 58 bits; -// - codes outside the range [im-iM] have a null length (unused values); -// - original frequencies are destroyed; -// - encoding tables are used by hufEncode() and hufBuildDecTable(); -// - -struct FHeapCompare { - bool operator()(long long *a, long long *b) { return *a > *b; } -}; - -static void hufBuildEncTable( - long long *frq, // io: input frequencies [HUF_ENCSIZE], output table - int *im, // o: min frq index - int *iM) // o: max frq index -{ - // - // This function assumes that when it is called, array frq - // indicates the frequency of all possible symbols in the data - // that are to be Huffman-encoded. (frq[i] contains the number - // of occurrences of symbol i in the data.) - // - // The loop below does three things: - // - // 1) Finds the minimum and maximum indices that point - // to non-zero entries in frq: - // - // frq[im] != 0, and frq[i] == 0 for all i < im - // frq[iM] != 0, and frq[i] == 0 for all i > iM - // - // 2) Fills array fHeap with pointers to all non-zero - // entries in frq. - // - // 3) Initializes array hlink such that hlink[i] == i - // for all array entries. - // - - std::vector hlink(HUF_ENCSIZE); - std::vector fHeap(HUF_ENCSIZE); - - *im = 0; - - while (!frq[*im]) (*im)++; - - int nf = 0; - - for (int i = *im; i < HUF_ENCSIZE; i++) { - hlink[i] = i; - - if (frq[i]) { - fHeap[nf] = &frq[i]; - nf++; - *iM = i; - } - } - - // - // Add a pseudo-symbol, with a frequency count of 1, to frq; - // adjust the fHeap and hlink array accordingly. Function - // hufEncode() uses the pseudo-symbol for run-length encoding. - // - - (*iM)++; - frq[*iM] = 1; - fHeap[nf] = &frq[*iM]; - nf++; - - // - // Build an array, scode, such that scode[i] contains the number - // of bits assigned to symbol i. Conceptually this is done by - // constructing a tree whose leaves are the symbols with non-zero - // frequency: - // - // Make a heap that contains all symbols with a non-zero frequency, - // with the least frequent symbol on top. - // - // Repeat until only one symbol is left on the heap: - // - // Take the two least frequent symbols off the top of the heap. - // Create a new node that has first two nodes as children, and - // whose frequency is the sum of the frequencies of the first - // two nodes. Put the new node back into the heap. - // - // The last node left on the heap is the root of the tree. For each - // leaf node, the distance between the root and the leaf is the length - // of the code for the corresponding symbol. - // - // The loop below doesn't actually build the tree; instead we compute - // the distances of the leaves from the root on the fly. When a new - // node is added to the heap, then that node's descendants are linked - // into a single linear list that starts at the new node, and the code - // lengths of the descendants (that is, their distance from the root - // of the tree) are incremented by one. - // - - std::make_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - - std::vector scode(HUF_ENCSIZE); - memset(scode.data(), 0, sizeof(long long) * HUF_ENCSIZE); - - while (nf > 1) { - // - // Find the indices, mm and m, of the two smallest non-zero frq - // values in fHeap, add the smallest frq to the second-smallest - // frq, and remove the smallest frq value from fHeap. - // - - int mm = fHeap[0] - frq; - std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - --nf; - - int m = fHeap[0] - frq; - std::pop_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - - frq[m] += frq[mm]; - std::push_heap(&fHeap[0], &fHeap[nf], FHeapCompare()); - - // - // The entries in scode are linked into lists with the - // entries in hlink serving as "next" pointers and with - // the end of a list marked by hlink[j] == j. - // - // Traverse the lists that start at scode[m] and scode[mm]. - // For each element visited, increment the length of the - // corresponding code by one bit. (If we visit scode[j] - // during the traversal, then the code for symbol j becomes - // one bit longer.) - // - // Merge the lists that start at scode[m] and scode[mm] - // into a single list that starts at scode[m]. - // - - // - // Add a bit to all codes in the first list. - // - - for (int j = m;; j = hlink[j]) { - scode[j]++; - - assert(scode[j] <= 58); - - if (hlink[j] == j) { - // - // Merge the two lists. - // - - hlink[j] = mm; - break; - } - } - - // - // Add a bit to all codes in the second list - // - - for (int j = mm;; j = hlink[j]) { - scode[j]++; - - assert(scode[j] <= 58); - - if (hlink[j] == j) break; - } - } - - // - // Build a canonical Huffman code table, replacing the code - // lengths in scode with (code, code length) pairs. Copy the - // code table from scode into frq. - // - - hufCanonicalCodeTable(scode.data()); - memcpy(frq, scode.data(), sizeof(long long) * HUF_ENCSIZE); -} - -// -// Pack an encoding table: -// - only code lengths, not actual codes, are stored -// - runs of zeroes are compressed as follows: -// -// unpacked packed -// -------------------------------- -// 1 zero 0 (6 bits) -// 2 zeroes 59 -// 3 zeroes 60 -// 4 zeroes 61 -// 5 zeroes 62 -// n zeroes (6 or more) 63 n-6 (6 + 8 bits) -// - -const int SHORT_ZEROCODE_RUN = 59; -const int LONG_ZEROCODE_RUN = 63; -const int SHORTEST_LONG_RUN = 2 + LONG_ZEROCODE_RUN - SHORT_ZEROCODE_RUN; -const int LONGEST_LONG_RUN = 255 + SHORTEST_LONG_RUN; - -static void hufPackEncTable( - const long long *hcode, // i : encoding table [HUF_ENCSIZE] - int im, // i : min hcode index - int iM, // i : max hcode index - char **pcode) // o: ptr to packed table (updated) -{ - char *p = *pcode; - long long c = 0; - int lc = 0; - - for (; im <= iM; im++) { - int l = hufLength(hcode[im]); - - if (l == 0) { - int zerun = 1; - - while ((im < iM) && (zerun < LONGEST_LONG_RUN)) { - if (hufLength(hcode[im + 1]) > 0) break; - im++; - zerun++; - } - - if (zerun >= 2) { - if (zerun >= SHORTEST_LONG_RUN) { - outputBits(6, LONG_ZEROCODE_RUN, c, lc, p); - outputBits(8, zerun - SHORTEST_LONG_RUN, c, lc, p); - } else { - outputBits(6, SHORT_ZEROCODE_RUN + zerun - 2, c, lc, p); - } - continue; - } - } - - outputBits(6, l, c, lc, p); - } - - if (lc > 0) *p++ = (unsigned char)(c << (8 - lc)); - - *pcode = p; -} - -// -// Unpack an encoding table packed by hufPackEncTable(): -// - -static bool hufUnpackEncTable( - const char **pcode, // io: ptr to packed table (updated) - int ni, // i : input size (in bytes) - int im, // i : min hcode index - int iM, // i : max hcode index - long long *hcode) // o: encoding table [HUF_ENCSIZE] -{ - memset(hcode, 0, sizeof(long long) * HUF_ENCSIZE); - - const char *p = *pcode; - long long c = 0; - int lc = 0; - - for (; im <= iM; im++) { - if (p - *pcode >= ni) { - return false; - } - - long long l = hcode[im] = getBits(6, c, lc, p); // code length - - if (l == (long long)LONG_ZEROCODE_RUN) { - if (p - *pcode > ni) { - return false; - } - - int zerun = getBits(8, c, lc, p) + SHORTEST_LONG_RUN; - - if (im + zerun > iM + 1) { - return false; - } - - while (zerun--) hcode[im++] = 0; - - im--; - } else if (l >= (long long)SHORT_ZEROCODE_RUN) { - int zerun = l - SHORT_ZEROCODE_RUN + 2; - - if (im + zerun > iM + 1) { - return false; - } - - while (zerun--) hcode[im++] = 0; - - im--; - } - } - - *pcode = const_cast(p); - - hufCanonicalCodeTable(hcode); - - return true; -} - -// -// DECODING TABLE BUILDING -// - -// -// Clear a newly allocated decoding table so that it contains only zeroes. -// - -static void hufClearDecTable(HufDec *hdecod) // io: (allocated by caller) -// decoding table [HUF_DECSIZE] -{ - for (int i = 0; i < HUF_DECSIZE; i++) { - hdecod[i].len = 0; - hdecod[i].lit = 0; - hdecod[i].p = NULL; - } - // memset(hdecod, 0, sizeof(HufDec) * HUF_DECSIZE); -} - -// -// Build a decoding hash table based on the encoding table hcode: -// - short codes (<= HUF_DECBITS) are resolved with a single table access; -// - long code entry allocations are not optimized, because long codes are -// unfrequent; -// - decoding tables are used by hufDecode(); -// - -static bool hufBuildDecTable(const long long *hcode, // i : encoding table - int im, // i : min index in hcode - int iM, // i : max index in hcode - HufDec *hdecod) // o: (allocated by caller) -// decoding table [HUF_DECSIZE] -{ - // - // Init hashtable & loop on all codes. - // Assumes that hufClearDecTable(hdecod) has already been called. - // - - for (; im <= iM; im++) { - long long c = hufCode(hcode[im]); - int l = hufLength(hcode[im]); - - if (c >> l) { - // - // Error: c is supposed to be an l-bit code, - // but c contains a value that is greater - // than the largest l-bit number. - // - - // invalidTableEntry(); - return false; - } - - if (l > HUF_DECBITS) { - // - // Long code: add a secondary entry - // - - HufDec *pl = hdecod + (c >> (l - HUF_DECBITS)); - - if (pl->len) { - // - // Error: a short code has already - // been stored in table entry *pl. - // - - // invalidTableEntry(); - return false; - } - - pl->lit++; - - if (pl->p) { - int *p = pl->p; - pl->p = new int[pl->lit]; - - for (int i = 0; i < pl->lit - 1; ++i) pl->p[i] = p[i]; - - delete[] p; - } else { - pl->p = new int[1]; - } - - pl->p[pl->lit - 1] = im; - } else if (l) { - // - // Short code: init all primary entries - // - - HufDec *pl = hdecod + (c << (HUF_DECBITS - l)); - - for (long long i = 1ULL << (HUF_DECBITS - l); i > 0; i--, pl++) { - if (pl->len || pl->p) { - // - // Error: a short code or a long code has - // already been stored in table entry *pl. - // - - // invalidTableEntry(); - return false; - } - - pl->len = l; - pl->lit = im; - } - } - } - - return true; -} - -// -// Free the long code entries of a decoding table built by hufBuildDecTable() -// - -static void hufFreeDecTable(HufDec *hdecod) // io: Decoding table -{ - for (int i = 0; i < HUF_DECSIZE; i++) { - if (hdecod[i].p) { - delete[] hdecod[i].p; - hdecod[i].p = 0; - } - } -} - -// -// ENCODING -// - -inline void outputCode(long long code, long long &c, int &lc, char *&out) { - outputBits(hufLength(code), hufCode(code), c, lc, out); -} - -inline void sendCode(long long sCode, int runCount, long long runCode, - long long &c, int &lc, char *&out) { - // - // Output a run of runCount instances of the symbol sCount. - // Output the symbols explicitly, or if that is shorter, output - // the sCode symbol once followed by a runCode symbol and runCount - // expressed as an 8-bit number. - // - - if (hufLength(sCode) + hufLength(runCode) + 8 < hufLength(sCode) * runCount) { - outputCode(sCode, c, lc, out); - outputCode(runCode, c, lc, out); - outputBits(8, runCount, c, lc, out); - } else { - while (runCount-- >= 0) outputCode(sCode, c, lc, out); - } -} - -// -// Encode (compress) ni values based on the Huffman encoding table hcode: -// - -static int hufEncode // return: output size (in bits) - (const long long *hcode, // i : encoding table - const unsigned short *in, // i : uncompressed input buffer - const int ni, // i : input buffer size (in bytes) - int rlc, // i : rl code - char *out) // o: compressed output buffer -{ - char *outStart = out; - long long c = 0; // bits not yet written to out - int lc = 0; // number of valid bits in c (LSB) - int s = in[0]; - int cs = 0; - - // - // Loop on input values - // - - for (int i = 1; i < ni; i++) { - // - // Count same values or send code - // - - if (s == in[i] && cs < 255) { - cs++; - } else { - sendCode(hcode[s], cs, hcode[rlc], c, lc, out); - cs = 0; - } - - s = in[i]; - } - - // - // Send remaining code - // - - sendCode(hcode[s], cs, hcode[rlc], c, lc, out); - - if (lc) *out = (c << (8 - lc)) & 0xff; - - return (out - outStart) * 8 + lc; -} - -// -// DECODING -// - -// -// In order to force the compiler to inline them, -// getChar() and getCode() are implemented as macros -// instead of "inline" functions. -// - -#define getChar(c, lc, in) \ - { \ - c = (c << 8) | *(unsigned char *)(in++); \ - lc += 8; \ - } - -#if 0 -#define getCode(po, rlc, c, lc, in, out, ob, oe) \ - { \ - if (po == rlc) { \ - if (lc < 8) getChar(c, lc, in); \ - \ - lc -= 8; \ - \ - unsigned char cs = (c >> lc); \ - \ - if (out + cs > oe) return false; \ - \ - /* TinyEXR issue 78 */ \ - unsigned short s = out[-1]; \ - \ - while (cs-- > 0) *out++ = s; \ - } else if (out < oe) { \ - *out++ = po; \ - } else { \ - return false; \ - } \ - } -#else -static bool getCode(int po, int rlc, long long &c, int &lc, const char *&in, - const char *in_end, unsigned short *&out, - const unsigned short *ob, const unsigned short *oe) { - (void)ob; - if (po == rlc) { - if (lc < 8) { - /* TinyEXR issue 78 */ - if ((in + 1) >= in_end) { - return false; - } - - getChar(c, lc, in); - } - - lc -= 8; - - unsigned char cs = (c >> lc); - - if (out + cs > oe) return false; - - // Bounds check for safety - // Issue 100. - if ((out - 1) < ob) return false; - unsigned short s = out[-1]; - - while (cs-- > 0) *out++ = s; - } else if (out < oe) { - *out++ = po; - } else { - return false; - } - return true; -} -#endif - -// -// Decode (uncompress) ni bits based on encoding & decoding tables: -// - -static bool hufDecode(const long long *hcode, // i : encoding table - const HufDec *hdecod, // i : decoding table - const char *in, // i : compressed input buffer - int ni, // i : input size (in bits) - int rlc, // i : run-length code - int no, // i : expected output size (in bytes) - unsigned short *out) // o: uncompressed output buffer -{ - long long c = 0; - int lc = 0; - unsigned short *outb = out; // begin - unsigned short *oe = out + no; // end - const char *ie = in + (ni + 7) / 8; // input byte size - - // - // Loop on input bytes - // - - while (in < ie) { - getChar(c, lc, in); - - // - // Access decoding table - // - - while (lc >= HUF_DECBITS) { - const HufDec pl = hdecod[(c >> (lc - HUF_DECBITS)) & HUF_DECMASK]; - - if (pl.len) { - // - // Get short code - // - - lc -= pl.len; - // std::cout << "lit = " << pl.lit << std::endl; - // std::cout << "rlc = " << rlc << std::endl; - // std::cout << "c = " << c << std::endl; - // std::cout << "lc = " << lc << std::endl; - // std::cout << "in = " << in << std::endl; - // std::cout << "out = " << out << std::endl; - // std::cout << "oe = " << oe << std::endl; - if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { - return false; - } - } else { - if (!pl.p) { - return false; - } - // invalidCode(); // wrong code - - // - // Search long code - // - - int j; - - for (j = 0; j < pl.lit; j++) { - int l = hufLength(hcode[pl.p[j]]); - - while (lc < l && in < ie) // get more bits - getChar(c, lc, in); - - if (lc >= l) { - if (hufCode(hcode[pl.p[j]]) == - ((c >> (lc - l)) & (((long long)(1) << l) - 1))) { - // - // Found : get long code - // - - lc -= l; - if (!getCode(pl.p[j], rlc, c, lc, in, ie, out, outb, oe)) { - return false; - } - break; - } - } - } - - if (j == pl.lit) { - return false; - // invalidCode(); // Not found - } - } - } - } - - // - // Get remaining (short) codes - // - - int i = (8 - ni) & 7; - c >>= i; - lc -= i; - - while (lc > 0) { - const HufDec pl = hdecod[(c << (HUF_DECBITS - lc)) & HUF_DECMASK]; - - if (pl.len) { - lc -= pl.len; - if (!getCode(pl.lit, rlc, c, lc, in, ie, out, outb, oe)) { - return false; - } - } else { - return false; - // invalidCode(); // wrong (long) code - } - } - - if (out - outb != no) { - return false; - } - // notEnoughData (); - - return true; -} - -static void countFrequencies(std::vector &freq, - const unsigned short data[/*n*/], int n) { - for (int i = 0; i < HUF_ENCSIZE; ++i) freq[i] = 0; - - for (int i = 0; i < n; ++i) ++freq[data[i]]; -} - -static void writeUInt(char buf[4], unsigned int i) { - unsigned char *b = (unsigned char *)buf; - - b[0] = i; - b[1] = i >> 8; - b[2] = i >> 16; - b[3] = i >> 24; -} - -static unsigned int readUInt(const char buf[4]) { - const unsigned char *b = (const unsigned char *)buf; - - return (b[0] & 0x000000ff) | ((b[1] << 8) & 0x0000ff00) | - ((b[2] << 16) & 0x00ff0000) | ((b[3] << 24) & 0xff000000); -} - -// -// EXTERNAL INTERFACE -// - -static int hufCompress(const unsigned short raw[], int nRaw, - char compressed[]) { - if (nRaw == 0) return 0; - - std::vector freq(HUF_ENCSIZE); - - countFrequencies(freq, raw, nRaw); - - int im = 0; - int iM = 0; - hufBuildEncTable(freq.data(), &im, &iM); - - char *tableStart = compressed + 20; - char *tableEnd = tableStart; - hufPackEncTable(freq.data(), im, iM, &tableEnd); - int tableLength = tableEnd - tableStart; - - char *dataStart = tableEnd; - int nBits = hufEncode(freq.data(), raw, nRaw, iM, dataStart); - int data_length = (nBits + 7) / 8; - - writeUInt(compressed, im); - writeUInt(compressed + 4, iM); - writeUInt(compressed + 8, tableLength); - writeUInt(compressed + 12, nBits); - writeUInt(compressed + 16, 0); // room for future extensions - - return dataStart + data_length - compressed; -} - -static bool hufUncompress(const char compressed[], int nCompressed, - std::vector *raw) { - if (nCompressed == 0) { - if (raw->size() != 0) return false; - - return false; - } - - int im = readUInt(compressed); - int iM = readUInt(compressed + 4); - // int tableLength = readUInt (compressed + 8); - int nBits = readUInt(compressed + 12); - - if (im < 0 || im >= HUF_ENCSIZE || iM < 0 || iM >= HUF_ENCSIZE) return false; - - const char *ptr = compressed + 20; - - // - // Fast decoder needs at least 2x64-bits of compressed data, and - // needs to be run-able on this platform. Otherwise, fall back - // to the original decoder - // - - // if (FastHufDecoder::enabled() && nBits > 128) - //{ - // FastHufDecoder fhd (ptr, nCompressed - (ptr - compressed), im, iM, iM); - // fhd.decode ((unsigned char*)ptr, nBits, raw, nRaw); - //} - // else - { - std::vector freq(HUF_ENCSIZE); - std::vector hdec(HUF_DECSIZE); - - hufClearDecTable(&hdec.at(0)); - - hufUnpackEncTable(&ptr, nCompressed - (ptr - compressed), im, iM, - &freq.at(0)); - - { - if (nBits > 8 * (nCompressed - (ptr - compressed))) { - return false; - } - - hufBuildDecTable(&freq.at(0), im, iM, &hdec.at(0)); - hufDecode(&freq.at(0), &hdec.at(0), ptr, nBits, iM, raw->size(), - raw->data()); - } - // catch (...) - //{ - // hufFreeDecTable (hdec); - // throw; - //} - - hufFreeDecTable(&hdec.at(0)); - } - - return true; -} - -// -// Functions to compress the range of values in the pixel data -// - -const int USHORT_RANGE = (1 << 16); -const int BITMAP_SIZE = (USHORT_RANGE >> 3); - -static void bitmapFromData(const unsigned short data[/*nData*/], int nData, - unsigned char bitmap[BITMAP_SIZE], - unsigned short &minNonZero, - unsigned short &maxNonZero) { - for (int i = 0; i < BITMAP_SIZE; ++i) bitmap[i] = 0; - - for (int i = 0; i < nData; ++i) bitmap[data[i] >> 3] |= (1 << (data[i] & 7)); - - bitmap[0] &= ~1; // zero is not explicitly stored in - // the bitmap; we assume that the - // data always contain zeroes - minNonZero = BITMAP_SIZE - 1; - maxNonZero = 0; - - for (int i = 0; i < BITMAP_SIZE; ++i) { - if (bitmap[i]) { - if (minNonZero > i) minNonZero = i; - if (maxNonZero < i) maxNonZero = i; - } - } -} - -static unsigned short forwardLutFromBitmap( - const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { - int k = 0; - - for (int i = 0; i < USHORT_RANGE; ++i) { - if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) - lut[i] = k++; - else - lut[i] = 0; - } - - return k - 1; // maximum value stored in lut[], -} // i.e. number of ones in bitmap minus 1 - -static unsigned short reverseLutFromBitmap( - const unsigned char bitmap[BITMAP_SIZE], unsigned short lut[USHORT_RANGE]) { - int k = 0; - - for (int i = 0; i < USHORT_RANGE; ++i) { - if ((i == 0) || (bitmap[i >> 3] & (1 << (i & 7)))) lut[k++] = i; - } - - int n = k - 1; - - while (k < USHORT_RANGE) lut[k++] = 0; - - return n; // maximum k where lut[k] is non-zero, -} // i.e. number of ones in bitmap minus 1 - -static void applyLut(const unsigned short lut[USHORT_RANGE], - unsigned short data[/*nData*/], int nData) { - for (int i = 0; i < nData; ++i) data[i] = lut[data[i]]; -} - -#ifdef __clang__ -#pragma clang diagnostic pop -#endif // __clang__ - -#ifdef _MSC_VER -#pragma warning(pop) -#endif - -static bool CompressPiz(unsigned char *outPtr, unsigned int *outSize, - const unsigned char *inPtr, size_t inSize, - const std::vector &channelInfo, - int data_width, int num_lines) { - std::vector bitmap(BITMAP_SIZE); - unsigned short minNonZero; - unsigned short maxNonZero; - -#if !MINIZ_LITTLE_ENDIAN - // @todo { PIZ compression on BigEndian architecture. } - assert(0); - return false; -#endif - - // Assume `inSize` is multiple of 2 or 4. - std::vector tmpBuffer(inSize / sizeof(unsigned short)); - - std::vector channelData(channelInfo.size()); - unsigned short *tmpBufferEnd = &tmpBuffer.at(0); - - for (size_t c = 0; c < channelData.size(); c++) { - PIZChannelData &cd = channelData[c]; - - cd.start = tmpBufferEnd; - cd.end = cd.start; - - cd.nx = data_width; - cd.ny = num_lines; - // cd.ys = c.channel().ySampling; - - size_t pixelSize = sizeof(int); // UINT and FLOAT - if (channelInfo[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - pixelSize = sizeof(short); - } - - cd.size = static_cast(pixelSize / sizeof(short)); - - tmpBufferEnd += cd.nx * cd.ny * cd.size; - } - - const unsigned char *ptr = inPtr; - for (int y = 0; y < num_lines; ++y) { - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - // if (modp (y, cd.ys) != 0) - // continue; - - size_t n = static_cast(cd.nx * cd.size); - memcpy(cd.end, ptr, n * sizeof(unsigned short)); - ptr += n * sizeof(unsigned short); - cd.end += n; - } - } - - bitmapFromData(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), - bitmap.data(), minNonZero, maxNonZero); - - std::vector lut(USHORT_RANGE); - unsigned short maxValue = forwardLutFromBitmap(bitmap.data(), lut.data()); - applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBuffer.size())); - - // - // Store range compression info in _outBuffer - // - - char *buf = reinterpret_cast(outPtr); - - memcpy(buf, &minNonZero, sizeof(unsigned short)); - buf += sizeof(unsigned short); - memcpy(buf, &maxNonZero, sizeof(unsigned short)); - buf += sizeof(unsigned short); - - if (minNonZero <= maxNonZero) { - memcpy(buf, reinterpret_cast(&bitmap[0] + minNonZero), - maxNonZero - minNonZero + 1); - buf += maxNonZero - minNonZero + 1; - } - - // - // Apply wavelet encoding - // - - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - for (int j = 0; j < cd.size; ++j) { - wav2Encode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, - maxValue); - } - } - - // - // Apply Huffman encoding; append the result to _outBuffer - // - - // length header(4byte), then huff data. Initialize length header with zero, - // then later fill it by `length`. - char *lengthPtr = buf; - int zero = 0; - memcpy(buf, &zero, sizeof(int)); - buf += sizeof(int); - - int length = - hufCompress(&tmpBuffer.at(0), static_cast(tmpBuffer.size()), buf); - memcpy(lengthPtr, &length, sizeof(int)); - - (*outSize) = static_cast( - (reinterpret_cast(buf) - outPtr) + - static_cast(length)); - - // Use uncompressed data when compressed data is larger than uncompressed. - // (Issue 40) - if ((*outSize) >= inSize) { - (*outSize) = static_cast(inSize); - memcpy(outPtr, inPtr, inSize); - } - return true; -} - -static bool DecompressPiz(unsigned char *outPtr, const unsigned char *inPtr, - size_t tmpBufSize, size_t inLen, int num_channels, - const EXRChannelInfo *channels, int data_width, - int num_lines) { - if (inLen == tmpBufSize) { - // Data is not compressed(Issue 40). - memcpy(outPtr, inPtr, inLen); - return true; - } - - std::vector bitmap(BITMAP_SIZE); - unsigned short minNonZero; - unsigned short maxNonZero; - -#if !MINIZ_LITTLE_ENDIAN - // @todo { PIZ compression on BigEndian architecture. } - assert(0); - return false; -#endif - - memset(bitmap.data(), 0, BITMAP_SIZE); - - const unsigned char *ptr = inPtr; - // minNonZero = *(reinterpret_cast(ptr)); - tinyexr::cpy2(&minNonZero, reinterpret_cast(ptr)); - // maxNonZero = *(reinterpret_cast(ptr + 2)); - tinyexr::cpy2(&maxNonZero, reinterpret_cast(ptr + 2)); - ptr += 4; - - if (maxNonZero >= BITMAP_SIZE) { - return false; - } - - if (minNonZero <= maxNonZero) { - memcpy(reinterpret_cast(&bitmap[0] + minNonZero), ptr, - maxNonZero - minNonZero + 1); - ptr += maxNonZero - minNonZero + 1; - } - - std::vector lut(USHORT_RANGE); - memset(lut.data(), 0, sizeof(unsigned short) * USHORT_RANGE); - unsigned short maxValue = reverseLutFromBitmap(bitmap.data(), lut.data()); - - // - // Huffman decoding - // - - int length; - - // length = *(reinterpret_cast(ptr)); - tinyexr::cpy4(&length, reinterpret_cast(ptr)); - ptr += sizeof(int); - - if (size_t((ptr - inPtr) + length) > inLen) { - return false; - } - - std::vector tmpBuffer(tmpBufSize); - hufUncompress(reinterpret_cast(ptr), length, &tmpBuffer); - - // - // Wavelet decoding - // - - std::vector channelData(static_cast(num_channels)); - - unsigned short *tmpBufferEnd = &tmpBuffer.at(0); - - for (size_t i = 0; i < static_cast(num_channels); ++i) { - const EXRChannelInfo &chan = channels[i]; - - size_t pixelSize = sizeof(int); // UINT and FLOAT - if (chan.pixel_type == TINYEXR_PIXELTYPE_HALF) { - pixelSize = sizeof(short); - } - - channelData[i].start = tmpBufferEnd; - channelData[i].end = channelData[i].start; - channelData[i].nx = data_width; - channelData[i].ny = num_lines; - // channelData[i].ys = 1; - channelData[i].size = static_cast(pixelSize / sizeof(short)); - - tmpBufferEnd += channelData[i].nx * channelData[i].ny * channelData[i].size; - } - - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - for (int j = 0; j < cd.size; ++j) { - wav2Decode(cd.start + j, cd.nx, cd.size, cd.ny, cd.nx * cd.size, - maxValue); - } - } - - // - // Expand the pixel data to their original range - // - - applyLut(lut.data(), &tmpBuffer.at(0), static_cast(tmpBufSize)); - - for (int y = 0; y < num_lines; y++) { - for (size_t i = 0; i < channelData.size(); ++i) { - PIZChannelData &cd = channelData[i]; - - // if (modp (y, cd.ys) != 0) - // continue; - - size_t n = static_cast(cd.nx * cd.size); - memcpy(outPtr, cd.end, static_cast(n * sizeof(unsigned short))); - outPtr += n * sizeof(unsigned short); - cd.end += n; - } - } - - return true; -} -#endif // TINYEXR_USE_PIZ - -#if TINYEXR_USE_ZFP -struct ZFPCompressionParam { - double rate; - int precision; - double tolerance; - int type; // TINYEXR_ZFP_COMPRESSIONTYPE_* - - ZFPCompressionParam() { - type = TINYEXR_ZFP_COMPRESSIONTYPE_RATE; - rate = 2.0; - precision = 0; - tolerance = 0.0f; - } -}; - -bool FindZFPCompressionParam(ZFPCompressionParam *param, - const EXRAttribute *attributes, - int num_attributes) { - bool foundType = false; - - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionType") == 0) && - (attributes[i].size == 1)) { - param->type = static_cast(attributes[i].value[0]); - - foundType = true; - } - } - - if (!foundType) { - return false; - } - - if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionRate") == 0) && - (attributes[i].size == 8)) { - param->rate = *(reinterpret_cast(attributes[i].value)); - return true; - } - } - } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionPrecision") == 0) && - (attributes[i].size == 4)) { - param->rate = *(reinterpret_cast(attributes[i].value)); - return true; - } - } - } else if (param->type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { - for (int i = 0; i < num_attributes; i++) { - if ((strcmp(attributes[i].name, "zfpCompressionTolerance") == 0) && - (attributes[i].size == 8)) { - param->tolerance = *(reinterpret_cast(attributes[i].value)); - return true; - } - } - } else { - assert(0); - } - - return false; -} - -// Assume pixel format is FLOAT for all channels. -static bool DecompressZfp(float *dst, int dst_width, int dst_num_lines, - int num_channels, const unsigned char *src, - unsigned long src_size, - const ZFPCompressionParam ¶m) { - size_t uncompressed_size = dst_width * dst_num_lines * num_channels; - - if (uncompressed_size == src_size) { - // Data is not compressed(Issue 40). - memcpy(dst, src, src_size); - } - - zfp_stream *zfp = NULL; - zfp_field *field = NULL; - - assert((dst_width % 4) == 0); - assert((dst_num_lines % 4) == 0); - - if ((dst_width & 3U) || (dst_num_lines & 3U)) { - return false; - } - - field = - zfp_field_2d(reinterpret_cast(const_cast(src)), - zfp_type_float, dst_width, dst_num_lines * num_channels); - zfp = zfp_stream_open(NULL); - - if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { - zfp_stream_set_rate(zfp, param.rate, zfp_type_float, /* dimention */ 2, - /* write random access */ 0); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { - zfp_stream_set_precision(zfp, param.precision, zfp_type_float); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { - zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); - } else { - assert(0); - } - - size_t buf_size = zfp_stream_maximum_size(zfp, field); - std::vector buf(buf_size); - memcpy(&buf.at(0), src, src_size); - - bitstream *stream = stream_open(&buf.at(0), buf_size); - zfp_stream_set_bit_stream(zfp, stream); - zfp_stream_rewind(zfp); - - size_t image_size = dst_width * dst_num_lines; - - for (int c = 0; c < num_channels; c++) { - // decompress 4x4 pixel block. - for (int y = 0; y < dst_num_lines; y += 4) { - for (int x = 0; x < dst_width; x += 4) { - float fblock[16]; - zfp_decode_block_float_2(zfp, fblock); - for (int j = 0; j < 4; j++) { - for (int i = 0; i < 4; i++) { - dst[c * image_size + ((y + j) * dst_width + (x + i))] = - fblock[j * 4 + i]; - } - } - } - } - } - - zfp_field_free(field); - zfp_stream_close(zfp); - stream_close(stream); - - return true; -} - -// Assume pixel format is FLOAT for all channels. -bool CompressZfp(std::vector *outBuf, unsigned int *outSize, - const float *inPtr, int width, int num_lines, int num_channels, - const ZFPCompressionParam ¶m) { - zfp_stream *zfp = NULL; - zfp_field *field = NULL; - - assert((width % 4) == 0); - assert((num_lines % 4) == 0); - - if ((width & 3U) || (num_lines & 3U)) { - return false; - } - - // create input array. - field = zfp_field_2d(reinterpret_cast(const_cast(inPtr)), - zfp_type_float, width, num_lines * num_channels); - - zfp = zfp_stream_open(NULL); - - if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_RATE) { - zfp_stream_set_rate(zfp, param.rate, zfp_type_float, 2, 0); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_PRECISION) { - zfp_stream_set_precision(zfp, param.precision, zfp_type_float); - } else if (param.type == TINYEXR_ZFP_COMPRESSIONTYPE_ACCURACY) { - zfp_stream_set_accuracy(zfp, param.tolerance, zfp_type_float); - } else { - assert(0); - } - - size_t buf_size = zfp_stream_maximum_size(zfp, field); - - outBuf->resize(buf_size); - - bitstream *stream = stream_open(&outBuf->at(0), buf_size); - zfp_stream_set_bit_stream(zfp, stream); - zfp_field_free(field); - - size_t image_size = width * num_lines; - - for (int c = 0; c < num_channels; c++) { - // compress 4x4 pixel block. - for (int y = 0; y < num_lines; y += 4) { - for (int x = 0; x < width; x += 4) { - float fblock[16]; - for (int j = 0; j < 4; j++) { - for (int i = 0; i < 4; i++) { - fblock[j * 4 + i] = - inPtr[c * image_size + ((y + j) * width + (x + i))]; - } - } - zfp_encode_block_float_2(zfp, fblock); - } - } - } - - zfp_stream_flush(zfp); - (*outSize) = zfp_stream_compressed_size(zfp); - - zfp_stream_close(zfp); - - return true; -} - -#endif - -// -// ----------------------------------------------------------------- -// - -// TODO(syoyo): Refactor function arguments. -static bool DecodePixelData(/* out */ unsigned char **out_images, - const int *requested_pixel_types, - const unsigned char *data_ptr, size_t data_len, - int compression_type, int line_order, int width, - int height, int x_stride, int y, int line_no, - int num_lines, size_t pixel_data_size, - size_t num_attributes, - const EXRAttribute *attributes, size_t num_channels, - const EXRChannelInfo *channels, - const std::vector &channel_offset_list) { - if (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { // PIZ -#if TINYEXR_USE_PIZ - if ((width == 0) || (num_lines == 0) || (pixel_data_size == 0)) { - // Invalid input #90 - return false; - } - - // Allocate original data size. - std::vector outBuf(static_cast( - static_cast(width * num_lines) * pixel_data_size)); - size_t tmpBufLen = outBuf.size(); - - bool ret = tinyexr::DecompressPiz( - reinterpret_cast(&outBuf.at(0)), data_ptr, tmpBufLen, - data_len, static_cast(num_channels), channels, width, num_lines); - - if (!ret) { - return false; - } - - // For PIZ_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned short *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - FP16 hf; - - // hf.u = line_ptr[u]; - // use `cpy` to avoid unaligned memory access when compiler's - // optimization is on. - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - *image = hf.u; - } else { // HALF -> FLOAT - FP32 f32 = half_to_float(hf); - float *image = reinterpret_cast(out_images)[c]; - size_t offset = 0; - if (line_order == 0) { - offset = (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - offset = static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - image += offset; - *image = f32.f; - } - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); - - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned int *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(&val); - - unsigned int *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast(&outBuf.at( - v * pixel_data_size * static_cast(x_stride) + - channel_offset_list[c] * static_cast(x_stride))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += static_cast( - (height - 1 - (line_no + static_cast(v)))) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - } - } -#else - assert(0 && "PIZ is enabled in this build"); - return false; -#endif - - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS || - compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - // Allocate original data size. - std::vector outBuf(static_cast(width) * - static_cast(num_lines) * - pixel_data_size); - - unsigned long dstLen = static_cast(outBuf.size()); - assert(dstLen > 0); - if (!tinyexr::DecompressZip( - reinterpret_cast(&outBuf.at(0)), &dstLen, data_ptr, - static_cast(data_len))) { - return false; - } - - // For ZIP_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned short *line_ptr = reinterpret_cast( - &outBuf.at(v * static_cast(pixel_data_size) * - static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - tinyexr::FP16 hf; - - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = hf.u; - } else { // HALF -> FLOAT - tinyexr::FP32 f32 = half_to_float(hf); - float *image = reinterpret_cast(out_images)[c]; - size_t offset = 0; - if (line_order == 0) { - offset = (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - offset = (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - image += offset; - - *image = f32.f; - } - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); - - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned int *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(&val); - - unsigned int *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - return false; - } - } - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { - // Allocate original data size. - std::vector outBuf(static_cast(width) * - static_cast(num_lines) * - pixel_data_size); - - unsigned long dstLen = static_cast(outBuf.size()); - if (dstLen == 0) { - return false; - } - - if (!tinyexr::DecompressRle(reinterpret_cast(&outBuf.at(0)), - dstLen, data_ptr, - static_cast(data_len))) { - return false; - } - - // For RLE_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned short *line_ptr = reinterpret_cast( - &outBuf.at(v * static_cast(pixel_data_size) * - static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - tinyexr::FP16 hf; - - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = hf.u; - } else { // HALF -> FLOAT - tinyexr::FP32 f32 = half_to_float(hf); - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = f32.f; - } - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT); - - for (size_t v = 0; v < static_cast(num_lines); v++) { - const unsigned int *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - unsigned int val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(&val); - - unsigned int *image = - reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - // val = line_ptr[u]; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - return false; - } - } - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { -#if TINYEXR_USE_ZFP - tinyexr::ZFPCompressionParam zfp_compression_param; - if (!FindZFPCompressionParam(&zfp_compression_param, attributes, - num_attributes)) { - assert(0); - return false; - } - - // Allocate original data size. - std::vector outBuf(static_cast(width) * - static_cast(num_lines) * - pixel_data_size); - - unsigned long dstLen = outBuf.size(); - assert(dstLen > 0); - tinyexr::DecompressZfp(reinterpret_cast(&outBuf.at(0)), width, - num_lines, num_channels, data_ptr, - static_cast(data_len), - zfp_compression_param); - - // For ZFP_COMPRESSION: - // pixel sample data for channel 0 for scanline 0 - // pixel sample data for channel 1 for scanline 0 - // pixel sample data for channel ... for scanline 0 - // pixel sample data for channel n for scanline 0 - // pixel sample data for channel 0 for scanline 1 - // pixel sample data for channel 1 for scanline 1 - // pixel sample data for channel ... for scanline 1 - // pixel sample data for channel n for scanline 1 - // ... - for (size_t c = 0; c < static_cast(num_channels); c++) { - assert(channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT); - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - assert(requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT); - for (size_t v = 0; v < static_cast(num_lines); v++) { - const float *line_ptr = reinterpret_cast( - &outBuf.at(v * pixel_data_size * static_cast(width) + - channel_offset_list[c] * static_cast(width))); - for (size_t u = 0; u < static_cast(width); u++) { - float val; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - float *image = reinterpret_cast(out_images)[c]; - if (line_order == 0) { - image += (static_cast(line_no) + v) * - static_cast(x_stride) + - u; - } else { - image += (static_cast(height) - 1U - - (static_cast(line_no) + v)) * - static_cast(x_stride) + - u; - } - *image = val; - } - } - } else { - assert(0); - return false; - } - } -#else - (void)attributes; - (void)num_attributes; - (void)num_channels; - assert(0); - return false; -#endif - } else if (compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { - for (size_t c = 0; c < num_channels; c++) { - for (size_t v = 0; v < static_cast(num_lines); v++) { - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - const unsigned short *line_ptr = - reinterpret_cast( - data_ptr + v * pixel_data_size * size_t(width) + - channel_offset_list[c] * static_cast(width)); - - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - unsigned short *outLine = - reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - for (int u = 0; u < width; u++) { - tinyexr::FP16 hf; - - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - outLine[u] = hf.u; - } - } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - float *outLine = reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - if (reinterpret_cast(line_ptr + width) > - (data_ptr + data_len)) { - // Insufficient data size - return false; - } - - for (int u = 0; u < width; u++) { - tinyexr::FP16 hf; - - // address may not be aliged. use byte-wise copy for safety.#76 - // hf.u = line_ptr[u]; - tinyexr::cpy2(&(hf.u), line_ptr + u); - - tinyexr::swap2(reinterpret_cast(&hf.u)); - - tinyexr::FP32 f32 = half_to_float(hf); - - outLine[u] = f32.f; - } - } else { - assert(0); - return false; - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - const float *line_ptr = reinterpret_cast( - data_ptr + v * pixel_data_size * size_t(width) + - channel_offset_list[c] * static_cast(width)); - - float *outLine = reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - if (reinterpret_cast(line_ptr + width) > - (data_ptr + data_len)) { - // Insufficient data size - return false; - } - - for (int u = 0; u < width; u++) { - float val; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - outLine[u] = val; - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - const unsigned int *line_ptr = reinterpret_cast( - data_ptr + v * pixel_data_size * size_t(width) + - channel_offset_list[c] * static_cast(width)); - - unsigned int *outLine = - reinterpret_cast(out_images[c]); - if (line_order == 0) { - outLine += (size_t(y) + v) * size_t(x_stride); - } else { - outLine += - (size_t(height) - 1 - (size_t(y) + v)) * size_t(x_stride); - } - - for (int u = 0; u < width; u++) { - if (reinterpret_cast(line_ptr + u) >= - (data_ptr + data_len)) { - // Corrupsed data? - return false; - } - - unsigned int val; - tinyexr::cpy4(&val, line_ptr + u); - - tinyexr::swap4(reinterpret_cast(&val)); - - outLine[u] = val; - } - } - } - } - } - - return true; -} - -static void DecodeTiledPixelData( - unsigned char **out_images, int *width, int *height, - const int *requested_pixel_types, const unsigned char *data_ptr, - size_t data_len, int compression_type, int line_order, int data_width, - int data_height, int tile_offset_x, int tile_offset_y, int tile_size_x, - int tile_size_y, size_t pixel_data_size, size_t num_attributes, - const EXRAttribute *attributes, size_t num_channels, - const EXRChannelInfo *channels, - const std::vector &channel_offset_list) { - assert(tile_offset_x * tile_size_x < data_width); - assert(tile_offset_y * tile_size_y < data_height); - - // Compute actual image size in a tile. - if ((tile_offset_x + 1) * tile_size_x >= data_width) { - (*width) = data_width - (tile_offset_x * tile_size_x); - } else { - (*width) = tile_size_x; - } - - if ((tile_offset_y + 1) * tile_size_y >= data_height) { - (*height) = data_height - (tile_offset_y * tile_size_y); - } else { - (*height) = tile_size_y; - } - - // Image size = tile size. - DecodePixelData(out_images, requested_pixel_types, data_ptr, data_len, - compression_type, line_order, (*width), tile_size_y, - /* stride */ tile_size_x, /* y */ 0, /* line_no */ 0, - (*height), pixel_data_size, num_attributes, attributes, - num_channels, channels, channel_offset_list); -} - -static bool ComputeChannelLayout(std::vector *channel_offset_list, - int *pixel_data_size, size_t *channel_offset, - int num_channels, - const EXRChannelInfo *channels) { - channel_offset_list->resize(static_cast(num_channels)); - - (*pixel_data_size) = 0; - (*channel_offset) = 0; - - for (size_t c = 0; c < static_cast(num_channels); c++) { - (*channel_offset_list)[c] = (*channel_offset); - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - (*pixel_data_size) += sizeof(unsigned short); - (*channel_offset) += sizeof(unsigned short); - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - (*pixel_data_size) += sizeof(float); - (*channel_offset) += sizeof(float); - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - (*pixel_data_size) += sizeof(unsigned int); - (*channel_offset) += sizeof(unsigned int); - } else { - // ??? - return false; - } - } - return true; -} - -static unsigned char **AllocateImage(int num_channels, - const EXRChannelInfo *channels, - const int *requested_pixel_types, - int data_width, int data_height) { - unsigned char **images = - reinterpret_cast(static_cast( - malloc(sizeof(float *) * static_cast(num_channels)))); - - for (size_t c = 0; c < static_cast(num_channels); c++) { - size_t data_len = - static_cast(data_width) * static_cast(data_height); - if (channels[c].pixel_type == TINYEXR_PIXELTYPE_HALF) { - // pixel_data_size += sizeof(unsigned short); - // channel_offset += sizeof(unsigned short); - // Alloc internal image for half type. - if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - images[c] = - reinterpret_cast(static_cast( - malloc(sizeof(unsigned short) * data_len))); - } else if (requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - images[c] = reinterpret_cast( - static_cast(malloc(sizeof(float) * data_len))); - } else { - assert(0); - } - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_FLOAT) { - // pixel_data_size += sizeof(float); - // channel_offset += sizeof(float); - images[c] = reinterpret_cast( - static_cast(malloc(sizeof(float) * data_len))); - } else if (channels[c].pixel_type == TINYEXR_PIXELTYPE_UINT) { - // pixel_data_size += sizeof(unsigned int); - // channel_offset += sizeof(unsigned int); - images[c] = reinterpret_cast( - static_cast(malloc(sizeof(unsigned int) * data_len))); - } else { - assert(0); - } - } - - return images; -} - -static int ParseEXRHeader(HeaderInfo *info, bool *empty_header, - const EXRVersion *version, std::string *err, - const unsigned char *buf, size_t size) { - const char *marker = reinterpret_cast(&buf[0]); - - if (empty_header) { - (*empty_header) = false; - } - - if (version->multipart) { - if (size > 0 && marker[0] == '\0') { - // End of header list. - if (empty_header) { - (*empty_header) = true; - } - return TINYEXR_SUCCESS; - } - } - - // According to the spec, the header of every OpenEXR file must contain at - // least the following attributes: - // - // channels chlist - // compression compression - // dataWindow box2i - // displayWindow box2i - // lineOrder lineOrder - // pixelAspectRatio float - // screenWindowCenter v2f - // screenWindowWidth float - bool has_channels = false; - bool has_compression = false; - bool has_data_window = false; - bool has_display_window = false; - bool has_line_order = false; - bool has_pixel_aspect_ratio = false; - bool has_screen_window_center = false; - bool has_screen_window_width = false; - - info->data_window[0] = 0; - info->data_window[1] = 0; - info->data_window[2] = 0; - info->data_window[3] = 0; - info->line_order = 0; // @fixme - info->display_window[0] = 0; - info->display_window[1] = 0; - info->display_window[2] = 0; - info->display_window[3] = 0; - info->screen_window_center[0] = 0.0f; - info->screen_window_center[1] = 0.0f; - info->screen_window_width = -1.0f; - info->pixel_aspect_ratio = -1.0f; - - info->tile_size_x = -1; - info->tile_size_y = -1; - info->tile_level_mode = -1; - info->tile_rounding_mode = -1; - - info->attributes.clear(); - - // Read attributes - size_t orig_size = size; - for (size_t nattr = 0; nattr < TINYEXR_MAX_HEADER_ATTRIBUTES; nattr++) { - if (0 == size) { - if (err) { - (*err) += "Insufficient data size for attributes.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } else if (marker[0] == '\0') { - size--; - break; - } - - std::string attr_name; - std::string attr_type; - std::vector data; - size_t marker_size; - if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, - marker, size)) { - if (err) { - (*err) += "Failed to read attribute.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - marker += marker_size; - size -= marker_size; - - if (version->tiled && attr_name.compare("tiles") == 0) { - unsigned int x_size, y_size; - unsigned char tile_mode; - assert(data.size() == 9); - memcpy(&x_size, &data.at(0), sizeof(int)); - memcpy(&y_size, &data.at(4), sizeof(int)); - tile_mode = data[8]; - tinyexr::swap4(&x_size); - tinyexr::swap4(&y_size); - - info->tile_size_x = static_cast(x_size); - info->tile_size_y = static_cast(y_size); - - // mode = levelMode + roundingMode * 16 - info->tile_level_mode = tile_mode & 0x3; - info->tile_rounding_mode = (tile_mode >> 4) & 0x1; - - } else if (attr_name.compare("compression") == 0) { - bool ok = false; - if (data[0] < TINYEXR_COMPRESSIONTYPE_PIZ) { - ok = true; - } - - if (data[0] == TINYEXR_COMPRESSIONTYPE_PIZ) { -#if TINYEXR_USE_PIZ - ok = true; -#else - if (err) { - (*err) = "PIZ compression is not supported."; - } - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; -#endif - } - - if (data[0] == TINYEXR_COMPRESSIONTYPE_ZFP) { -#if TINYEXR_USE_ZFP - ok = true; -#else - if (err) { - (*err) = "ZFP compression is not supported."; - } - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; -#endif - } - - if (!ok) { - if (err) { - (*err) = "Unknown compression type."; - } - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - info->compression_type = static_cast(data[0]); - has_compression = true; - - } else if (attr_name.compare("channels") == 0) { - // name: zero-terminated string, from 1 to 255 bytes long - // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 - // pLinear: unsigned char, possible values are 0 and 1 - // reserved: three chars, should be zero - // xSampling: int - // ySampling: int - - if (!ReadChannelInfo(info->channels, data)) { - if (err) { - (*err) += "Failed to parse channel info.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - if (info->channels.size() < 1) { - if (err) { - (*err) += "# of channels is zero.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - has_channels = true; - - } else if (attr_name.compare("dataWindow") == 0) { - if (data.size() >= 16) { - memcpy(&info->data_window[0], &data.at(0), sizeof(int)); - memcpy(&info->data_window[1], &data.at(4), sizeof(int)); - memcpy(&info->data_window[2], &data.at(8), sizeof(int)); - memcpy(&info->data_window[3], &data.at(12), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&info->data_window[0])); - tinyexr::swap4(reinterpret_cast(&info->data_window[1])); - tinyexr::swap4(reinterpret_cast(&info->data_window[2])); - tinyexr::swap4(reinterpret_cast(&info->data_window[3])); - has_data_window = true; - } - } else if (attr_name.compare("displayWindow") == 0) { - if (data.size() >= 16) { - memcpy(&info->display_window[0], &data.at(0), sizeof(int)); - memcpy(&info->display_window[1], &data.at(4), sizeof(int)); - memcpy(&info->display_window[2], &data.at(8), sizeof(int)); - memcpy(&info->display_window[3], &data.at(12), sizeof(int)); - tinyexr::swap4( - reinterpret_cast(&info->display_window[0])); - tinyexr::swap4( - reinterpret_cast(&info->display_window[1])); - tinyexr::swap4( - reinterpret_cast(&info->display_window[2])); - tinyexr::swap4( - reinterpret_cast(&info->display_window[3])); - - has_display_window = true; - } - } else if (attr_name.compare("lineOrder") == 0) { - if (data.size() >= 1) { - info->line_order = static_cast(data[0]); - has_line_order = true; - } - } else if (attr_name.compare("pixelAspectRatio") == 0) { - if (data.size() >= sizeof(float)) { - memcpy(&info->pixel_aspect_ratio, &data.at(0), sizeof(float)); - tinyexr::swap4( - reinterpret_cast(&info->pixel_aspect_ratio)); - has_pixel_aspect_ratio = true; - } - } else if (attr_name.compare("screenWindowCenter") == 0) { - if (data.size() >= 8) { - memcpy(&info->screen_window_center[0], &data.at(0), sizeof(float)); - memcpy(&info->screen_window_center[1], &data.at(4), sizeof(float)); - tinyexr::swap4( - reinterpret_cast(&info->screen_window_center[0])); - tinyexr::swap4( - reinterpret_cast(&info->screen_window_center[1])); - has_screen_window_center = true; - } - } else if (attr_name.compare("screenWindowWidth") == 0) { - if (data.size() >= sizeof(float)) { - memcpy(&info->screen_window_width, &data.at(0), sizeof(float)); - tinyexr::swap4( - reinterpret_cast(&info->screen_window_width)); - - has_screen_window_width = true; - } - } else if (attr_name.compare("chunkCount") == 0) { - if (data.size() >= sizeof(int)) { - memcpy(&info->chunk_count, &data.at(0), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&info->chunk_count)); - } - } else { - // Custom attribute(up to TINYEXR_MAX_CUSTOM_ATTRIBUTES) - if (info->attributes.size() < TINYEXR_MAX_CUSTOM_ATTRIBUTES) { - EXRAttribute attrib; -#ifdef _MSC_VER - strncpy_s(attrib.name, attr_name.c_str(), 255); - strncpy_s(attrib.type, attr_type.c_str(), 255); -#else - strncpy(attrib.name, attr_name.c_str(), 255); - strncpy(attrib.type, attr_type.c_str(), 255); -#endif - attrib.name[255] = '\0'; - attrib.type[255] = '\0'; - attrib.size = static_cast(data.size()); - attrib.value = static_cast(malloc(data.size())); - memcpy(reinterpret_cast(attrib.value), &data.at(0), - data.size()); - info->attributes.push_back(attrib); - } - } - } - - // Check if required attributes exist - { - std::stringstream ss_err; - - if (!has_compression) { - ss_err << "\"compression\" attribute not found in the header." - << std::endl; - } - - if (!has_channels) { - ss_err << "\"channels\" attribute not found in the header." << std::endl; - } - - if (!has_line_order) { - ss_err << "\"lineOrder\" attribute not found in the header." << std::endl; - } - - if (!has_display_window) { - ss_err << "\"displayWindow\" attribute not found in the header." - << std::endl; - } - - if (!has_data_window) { - ss_err << "\"dataWindow\" attribute not found in the header or invalid." - << std::endl; - } - - if (!has_pixel_aspect_ratio) { - ss_err << "\"pixelAspectRatio\" attribute not found in the header." - << std::endl; - } - - if (!has_screen_window_width) { - ss_err << "\"screenWindowWidth\" attribute not found in the header." - << std::endl; - } - - if (!has_screen_window_center) { - ss_err << "\"screenWindowCenter\" attribute not found in the header." - << std::endl; - } - - if (!(ss_err.str().empty())) { - if (err) { - (*err) += ss_err.str(); - } - return TINYEXR_ERROR_INVALID_HEADER; - } - } - - info->header_len = static_cast(orig_size - size); - - return TINYEXR_SUCCESS; -} - -// C++ HeaderInfo to C EXRHeader conversion. -static void ConvertHeader(EXRHeader *exr_header, const HeaderInfo &info) { - exr_header->pixel_aspect_ratio = info.pixel_aspect_ratio; - exr_header->screen_window_center[0] = info.screen_window_center[0]; - exr_header->screen_window_center[1] = info.screen_window_center[1]; - exr_header->screen_window_width = info.screen_window_width; - exr_header->chunk_count = info.chunk_count; - exr_header->display_window[0] = info.display_window[0]; - exr_header->display_window[1] = info.display_window[1]; - exr_header->display_window[2] = info.display_window[2]; - exr_header->display_window[3] = info.display_window[3]; - exr_header->data_window[0] = info.data_window[0]; - exr_header->data_window[1] = info.data_window[1]; - exr_header->data_window[2] = info.data_window[2]; - exr_header->data_window[3] = info.data_window[3]; - exr_header->line_order = info.line_order; - exr_header->compression_type = info.compression_type; - - exr_header->tile_size_x = info.tile_size_x; - exr_header->tile_size_y = info.tile_size_y; - exr_header->tile_level_mode = info.tile_level_mode; - exr_header->tile_rounding_mode = info.tile_rounding_mode; - - exr_header->num_channels = static_cast(info.channels.size()); - - exr_header->channels = static_cast(malloc( - sizeof(EXRChannelInfo) * static_cast(exr_header->num_channels))); - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { -#ifdef _MSC_VER - strncpy_s(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); -#else - strncpy(exr_header->channels[c].name, info.channels[c].name.c_str(), 255); -#endif - // manually add '\0' for safety. - exr_header->channels[c].name[255] = '\0'; - - exr_header->channels[c].pixel_type = info.channels[c].pixel_type; - exr_header->channels[c].p_linear = info.channels[c].p_linear; - exr_header->channels[c].x_sampling = info.channels[c].x_sampling; - exr_header->channels[c].y_sampling = info.channels[c].y_sampling; - } - - exr_header->pixel_types = static_cast( - malloc(sizeof(int) * static_cast(exr_header->num_channels))); - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - exr_header->pixel_types[c] = info.channels[c].pixel_type; - } - - // Initially fill with values of `pixel_types` - exr_header->requested_pixel_types = static_cast( - malloc(sizeof(int) * static_cast(exr_header->num_channels))); - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - exr_header->requested_pixel_types[c] = info.channels[c].pixel_type; - } - - exr_header->num_custom_attributes = static_cast(info.attributes.size()); - - if (exr_header->num_custom_attributes > 0) { - // TODO(syoyo): Report warning when # of attributes exceeds - // `TINYEXR_MAX_CUSTOM_ATTRIBUTES` - if (exr_header->num_custom_attributes > TINYEXR_MAX_CUSTOM_ATTRIBUTES) { - exr_header->num_custom_attributes = TINYEXR_MAX_CUSTOM_ATTRIBUTES; - } - - exr_header->custom_attributes = static_cast(malloc( - sizeof(EXRAttribute) * size_t(exr_header->num_custom_attributes))); - - for (size_t i = 0; i < info.attributes.size(); i++) { - memcpy(exr_header->custom_attributes[i].name, info.attributes[i].name, - 256); - memcpy(exr_header->custom_attributes[i].type, info.attributes[i].type, - 256); - exr_header->custom_attributes[i].size = info.attributes[i].size; - // Just copy poiner - exr_header->custom_attributes[i].value = info.attributes[i].value; - } - - } else { - exr_header->custom_attributes = NULL; - } - - exr_header->header_len = info.header_len; -} - -static int DecodeChunk(EXRImage *exr_image, const EXRHeader *exr_header, - const std::vector &offsets, - const unsigned char *head, const size_t size, - std::string *err) { - int num_channels = exr_header->num_channels; - - int num_scanline_blocks = 1; - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanline_blocks = 16; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - num_scanline_blocks = 32; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - num_scanline_blocks = 16; - } - - int data_width = exr_header->data_window[2] - exr_header->data_window[0] + 1; - int data_height = exr_header->data_window[3] - exr_header->data_window[1] + 1; - - if ((data_width < 0) || (data_height < 0)) { - if (err) { - std::stringstream ss; - ss << "Invalid data width or data height: " << data_width << ", " - << data_height << std::endl; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_DATA; - } - - // Do not allow too large data_width and data_height. header invalid? - { - const int threshold = 1024 * 8192; // heuristics - if ((data_width > threshold) || (data_height > threshold)) { - if (err) { - std::stringstream ss; - ss << "data_with or data_height too large. data_width: " << data_width - << ", " - << "data_height = " << data_height << std::endl; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_DATA; - } - } - - size_t num_blocks = offsets.size(); - - std::vector channel_offset_list; - int pixel_data_size = 0; - size_t channel_offset = 0; - if (!tinyexr::ComputeChannelLayout(&channel_offset_list, &pixel_data_size, - &channel_offset, num_channels, - exr_header->channels)) { - if (err) { - (*err) += "Failed to compute channel layout.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - bool invalid_data = false; // TODO(LTE): Use atomic lock for MT safety. - - if (exr_header->tiled) { - // value check - if (exr_header->tile_size_x < 0) { - if (err) { - std::stringstream ss; - ss << "Invalid tile size x : " << exr_header->tile_size_x << "\n"; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_HEADER; - } - - if (exr_header->tile_size_y < 0) { - if (err) { - std::stringstream ss; - ss << "Invalid tile size y : " << exr_header->tile_size_y << "\n"; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_HEADER; - } - - size_t num_tiles = offsets.size(); // = # of blocks - - exr_image->tiles = static_cast( - calloc(sizeof(EXRTile), static_cast(num_tiles))); - - for (size_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) { - // Allocate memory for each tile. - exr_image->tiles[tile_idx].images = tinyexr::AllocateImage( - num_channels, exr_header->channels, exr_header->requested_pixel_types, - exr_header->tile_size_x, exr_header->tile_size_y); - - // 16 byte: tile coordinates - // 4 byte : data size - // ~ : data(uncompressed or compressed) - if (offsets[tile_idx] + sizeof(int) * 5 > size) { - if (err) { - (*err) += "Insufficient data size.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - size_t data_size = size_t(size - (offsets[tile_idx] + sizeof(int) * 5)); - const unsigned char *data_ptr = - reinterpret_cast(head + offsets[tile_idx]); - - int tile_coordinates[4]; - memcpy(tile_coordinates, data_ptr, sizeof(int) * 4); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[0])); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[1])); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[2])); - tinyexr::swap4(reinterpret_cast(&tile_coordinates[3])); - - // @todo{ LoD } - if (tile_coordinates[2] != 0) { - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } - if (tile_coordinates[3] != 0) { - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } - - int data_len; - memcpy(&data_len, data_ptr + 16, - sizeof(int)); // 16 = sizeof(tile_coordinates) - tinyexr::swap4(reinterpret_cast(&data_len)); - - if (data_len < 4 || size_t(data_len) > data_size) { - if (err) { - (*err) += "Insufficient data length.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - // Move to data addr: 20 = 16 + 4; - data_ptr += 20; - - tinyexr::DecodeTiledPixelData( - exr_image->tiles[tile_idx].images, - &(exr_image->tiles[tile_idx].width), - &(exr_image->tiles[tile_idx].height), - exr_header->requested_pixel_types, data_ptr, - static_cast(data_len), exr_header->compression_type, - exr_header->line_order, data_width, data_height, tile_coordinates[0], - tile_coordinates[1], exr_header->tile_size_x, exr_header->tile_size_y, - static_cast(pixel_data_size), - static_cast(exr_header->num_custom_attributes), - exr_header->custom_attributes, - static_cast(exr_header->num_channels), exr_header->channels, - channel_offset_list); - - exr_image->tiles[tile_idx].offset_x = tile_coordinates[0]; - exr_image->tiles[tile_idx].offset_y = tile_coordinates[1]; - exr_image->tiles[tile_idx].level_x = tile_coordinates[2]; - exr_image->tiles[tile_idx].level_y = tile_coordinates[3]; - - exr_image->num_tiles = static_cast(num_tiles); - } - } else { // scanline format - - // Don't allow too large image(256GB * pixel_data_size or more). Workaround - // for #104. - size_t total_data_len = - size_t(data_width) * size_t(data_height) * size_t(num_channels); - const bool total_data_len_overflown = sizeof(void*) == 8 ? (total_data_len >= 0x4000000000) : false; - if ((total_data_len == 0) || total_data_len_overflown ) { - if (err) { - std::stringstream ss; - ss << "Image data size is zero or too large: width = " << data_width - << ", height = " << data_height << ", channels = " << num_channels - << std::endl; - (*err) += ss.str(); - } - return TINYEXR_ERROR_INVALID_DATA; - } - - exr_image->images = tinyexr::AllocateImage( - num_channels, exr_header->channels, exr_header->requested_pixel_types, - data_width, data_height); - -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int y = 0; y < static_cast(num_blocks); y++) { - size_t y_idx = static_cast(y); - - if (offsets[y_idx] + sizeof(int) * 2 > size) { - invalid_data = true; - } else { - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(uncompressed or compressed) - size_t data_size = size_t(size - (offsets[y_idx] + sizeof(int) * 2)); - const unsigned char *data_ptr = - reinterpret_cast(head + offsets[y_idx]); - - int line_no; - memcpy(&line_no, data_ptr, sizeof(int)); - int data_len; - memcpy(&data_len, data_ptr + 4, sizeof(int)); - tinyexr::swap4(reinterpret_cast(&line_no)); - tinyexr::swap4(reinterpret_cast(&data_len)); - - if (size_t(data_len) > data_size) { - invalid_data = true; - - } else if ((line_no > (2 << 20)) || (line_no < -(2 << 20))) { - // Too large value. Assume this is invalid - // 2**20 = 1048576 = heuristic value. - invalid_data = true; - } else if (data_len == 0) { - // TODO(syoyo): May be ok to raise the threshold for example `data_len - // < 4` - invalid_data = true; - } else { - // line_no may be negative. - int end_line_no = (std::min)(line_no + num_scanline_blocks, - (exr_header->data_window[3] + 1)); - - int num_lines = end_line_no - line_no; - - if (num_lines <= 0) { - invalid_data = true; - } else { - // Move to data addr: 8 = 4 + 4; - data_ptr += 8; - - // Adjust line_no with data_window.bmin.y - - // overflow check - tinyexr_int64 lno = static_cast(line_no) - static_cast(exr_header->data_window[1]); - if (lno > std::numeric_limits::max()) { - line_no = -1; // invalid - } else if (lno < -std::numeric_limits::max()) { - line_no = -1; // invalid - } else { - line_no -= exr_header->data_window[1]; - } - - if (line_no < 0) { - invalid_data = true; - } else { - if (!tinyexr::DecodePixelData( - exr_image->images, exr_header->requested_pixel_types, - data_ptr, static_cast(data_len), - exr_header->compression_type, exr_header->line_order, - data_width, data_height, data_width, y, line_no, - num_lines, static_cast(pixel_data_size), - static_cast(exr_header->num_custom_attributes), - exr_header->custom_attributes, - static_cast(exr_header->num_channels), - exr_header->channels, channel_offset_list)) { - invalid_data = true; - } - } - } - } - } - } // omp parallel - } - - if (invalid_data) { - if (err) { - std::stringstream ss; - (*err) += "Invalid data found when decoding pixels.\n"; - } - return TINYEXR_ERROR_INVALID_DATA; - } - - // Overwrite `pixel_type` with `requested_pixel_type`. - { - for (int c = 0; c < exr_header->num_channels; c++) { - exr_header->pixel_types[c] = exr_header->requested_pixel_types[c]; - } - } - - { - exr_image->num_channels = num_channels; - - exr_image->width = data_width; - exr_image->height = data_height; - } - - return TINYEXR_SUCCESS; -} - -static bool ReconstructLineOffsets( - std::vector *offsets, size_t n, - const unsigned char *head, const unsigned char *marker, const size_t size) { - assert(head < marker); - assert(offsets->size() == n); - - for (size_t i = 0; i < n; i++) { - size_t offset = static_cast(marker - head); - // Offset should not exceed whole EXR file/data size. - if ((offset + sizeof(tinyexr::tinyexr_uint64)) >= size) { - return false; - } - - int y; - unsigned int data_len; - - memcpy(&y, marker, sizeof(int)); - memcpy(&data_len, marker + 4, sizeof(unsigned int)); - - if (data_len >= size) { - return false; - } - - tinyexr::swap4(reinterpret_cast(&y)); - tinyexr::swap4(reinterpret_cast(&data_len)); - - (*offsets)[i] = offset; - - marker += data_len + 8; // 8 = 4 bytes(y) + 4 bytes(data_len) - } - - return true; -} - -static int DecodeEXRImage(EXRImage *exr_image, const EXRHeader *exr_header, - const unsigned char *head, - const unsigned char *marker, const size_t size, - const char **err) { - if (exr_image == NULL || exr_header == NULL || head == NULL || - marker == NULL || (size <= tinyexr::kEXRVersionSize)) { - tinyexr::SetErrorMessage("Invalid argument for DecodeEXRImage().", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - int num_scanline_blocks = 1; - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanline_blocks = 16; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - num_scanline_blocks = 32; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - num_scanline_blocks = 16; - } - - int data_width = exr_header->data_window[2] - exr_header->data_window[0]; - if (data_width >= std::numeric_limits::max()) { - // Issue 63 - tinyexr::SetErrorMessage("Invalid data width value", err); - return TINYEXR_ERROR_INVALID_DATA; - } - data_width++; - - int data_height = exr_header->data_window[3] - exr_header->data_window[1]; - if (data_height >= std::numeric_limits::max()) { - tinyexr::SetErrorMessage("Invalid data height value", err); - return TINYEXR_ERROR_INVALID_DATA; - } - data_height++; - - if ((data_width < 0) || (data_height < 0)) { - tinyexr::SetErrorMessage("data width or data height is negative.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - // Do not allow too large data_width and data_height. header invalid? - { - const int threshold = 1024 * 8192; // heuristics - if (data_width > threshold) { - tinyexr::SetErrorMessage("data width too large.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - if (data_height > threshold) { - tinyexr::SetErrorMessage("data height too large.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - } - - // Read offset tables. - size_t num_blocks = 0; - - if (exr_header->chunk_count > 0) { - // Use `chunkCount` attribute. - num_blocks = static_cast(exr_header->chunk_count); - } else if (exr_header->tiled) { - // @todo { LoD } - size_t num_x_tiles = static_cast(data_width) / - static_cast(exr_header->tile_size_x); - if (num_x_tiles * static_cast(exr_header->tile_size_x) < - static_cast(data_width)) { - num_x_tiles++; - } - size_t num_y_tiles = static_cast(data_height) / - static_cast(exr_header->tile_size_y); - if (num_y_tiles * static_cast(exr_header->tile_size_y) < - static_cast(data_height)) { - num_y_tiles++; - } - - num_blocks = num_x_tiles * num_y_tiles; - } else { - num_blocks = static_cast(data_height) / - static_cast(num_scanline_blocks); - if (num_blocks * static_cast(num_scanline_blocks) < - static_cast(data_height)) { - num_blocks++; - } - } - - std::vector offsets(num_blocks); - - for (size_t y = 0; y < num_blocks; y++) { - tinyexr::tinyexr_uint64 offset; - // Issue #81 - if ((marker + sizeof(tinyexr_uint64)) >= (head + size)) { - tinyexr::SetErrorMessage("Insufficient data size in offset table.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - memcpy(&offset, marker, sizeof(tinyexr::tinyexr_uint64)); - tinyexr::swap8(&offset); - if (offset >= size) { - tinyexr::SetErrorMessage("Invalid offset value in DecodeEXRImage.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - marker += sizeof(tinyexr::tinyexr_uint64); // = 8 - offsets[y] = offset; - } - - // If line offsets are invalid, we try to reconstruct it. - // See OpenEXR/IlmImf/ImfScanLineInputFile.cpp::readLineOffsets() for details. - for (size_t y = 0; y < num_blocks; y++) { - if (offsets[y] <= 0) { - // TODO(syoyo) Report as warning? - // if (err) { - // stringstream ss; - // ss << "Incomplete lineOffsets." << std::endl; - // (*err) += ss.str(); - //} - bool ret = - ReconstructLineOffsets(&offsets, num_blocks, head, marker, size); - if (ret) { - // OK - break; - } else { - tinyexr::SetErrorMessage( - "Cannot reconstruct lineOffset table in DecodeEXRImage.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - } - } - - { - std::string e; - int ret = DecodeChunk(exr_image, exr_header, offsets, head, size, &e); - - if (ret != TINYEXR_SUCCESS) { - if (!e.empty()) { - tinyexr::SetErrorMessage(e, err); - } - - // release memory(if exists) - if ((exr_header->num_channels > 0) && exr_image && exr_image->images) { - for (size_t c = 0; c < size_t(exr_header->num_channels); c++) { - if (exr_image->images[c]) { - free(exr_image->images[c]); - exr_image->images[c] = NULL; - } - } - free(exr_image->images); - exr_image->images = NULL; - } - } - - return ret; - } -} - -} // namespace tinyexr - -int LoadEXR(float **out_rgba, int *width, int *height, const char *filename, - const char **err) { - if (out_rgba == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXR()", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - EXRVersion exr_version; - EXRImage exr_image; - EXRHeader exr_header; - InitEXRHeader(&exr_header); - InitEXRImage(&exr_image); - - { - int ret = ParseEXRVersionFromFile(&exr_version, filename); - if (ret != TINYEXR_SUCCESS) { - tinyexr::SetErrorMessage("Invalid EXR header.", err); - return ret; - } - - if (exr_version.multipart || exr_version.non_image) { - tinyexr::SetErrorMessage( - "Loading multipart or DeepImage is not supported in LoadEXR() API", - err); - return TINYEXR_ERROR_INVALID_DATA; // @fixme. - } - } - - { - int ret = ParseEXRHeaderFromFile(&exr_header, &exr_version, filename, err); - if (ret != TINYEXR_SUCCESS) { - FreeEXRHeader(&exr_header); - return ret; - } - } - - // Read HALF channel as FLOAT. - for (int i = 0; i < exr_header.num_channels; i++) { - if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { - exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; - } - } - - { - int ret = LoadEXRImageFromFile(&exr_image, &exr_header, filename, err); - if (ret != TINYEXR_SUCCESS) { - FreeEXRHeader(&exr_header); - return ret; - } - } - - // RGBA - int idxR = -1; - int idxG = -1; - int idxB = -1; - int idxA = -1; - for (int c = 0; c < exr_header.num_channels; c++) { - if (strcmp(exr_header.channels[c].name, "R") == 0) { - idxR = c; - } else if (strcmp(exr_header.channels[c].name, "G") == 0) { - idxG = c; - } else if (strcmp(exr_header.channels[c].name, "B") == 0) { - idxB = c; - } else if (strcmp(exr_header.channels[c].name, "A") == 0) { - idxA = c; - } - } - - if (exr_header.num_channels == 1) { - // Grayscale channel only. - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) { - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[0][srcIdx]; - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - const float val = reinterpret_cast(exr_image.images)[0][i]; - (*out_rgba)[4 * i + 0] = val; - (*out_rgba)[4 * i + 1] = val; - (*out_rgba)[4 * i + 2] = val; - (*out_rgba)[4 * i + 3] = val; - } - } - } else { - // Assume RGB(A) - - if (idxR == -1) { - tinyexr::SetErrorMessage("R channel not found", err); - - // @todo { free exr_image } - FreeEXRHeader(&exr_header); - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxG == -1) { - tinyexr::SetErrorMessage("G channel not found", err); - // @todo { free exr_image } - FreeEXRHeader(&exr_header); - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxB == -1) { - tinyexr::SetErrorMessage("B channel not found", err); - // @todo { free exr_image } - FreeEXRHeader(&exr_header); - return TINYEXR_ERROR_INVALID_DATA; - } - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) { - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[idxR][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[idxG][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[idxB][srcIdx]; - if (idxA != -1) { - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[idxA][srcIdx]; - } else { - (*out_rgba)[4 * idx + 3] = 1.0; - } - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - (*out_rgba)[4 * i + 0] = - reinterpret_cast(exr_image.images)[idxR][i]; - (*out_rgba)[4 * i + 1] = - reinterpret_cast(exr_image.images)[idxG][i]; - (*out_rgba)[4 * i + 2] = - reinterpret_cast(exr_image.images)[idxB][i]; - if (idxA != -1) { - (*out_rgba)[4 * i + 3] = - reinterpret_cast(exr_image.images)[idxA][i]; - } else { - (*out_rgba)[4 * i + 3] = 1.0; - } - } - } - } - - (*width) = exr_image.width; - (*height) = exr_image.height; - - FreeEXRHeader(&exr_header); - FreeEXRImage(&exr_image); - - return TINYEXR_SUCCESS; -} - -int IsEXR(const char *filename) { - EXRVersion exr_version; - - int ret = ParseEXRVersionFromFile(&exr_version, filename); - if (ret != TINYEXR_SUCCESS) { - return TINYEXR_ERROR_INVALID_HEADER; - } - - return TINYEXR_SUCCESS; -} - -int ParseEXRHeaderFromMemory(EXRHeader *exr_header, const EXRVersion *version, - const unsigned char *memory, size_t size, - const char **err) { - if (memory == NULL || exr_header == NULL) { - tinyexr::SetErrorMessage( - "Invalid argument. `memory` or `exr_header` argument is null in " - "ParseEXRHeaderFromMemory()", - err); - - // Invalid argument - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (size < tinyexr::kEXRVersionSize) { - tinyexr::SetErrorMessage("Insufficient header/data size.\n", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - const unsigned char *marker = memory + tinyexr::kEXRVersionSize; - size_t marker_size = size - tinyexr::kEXRVersionSize; - - tinyexr::HeaderInfo info; - info.clear(); - - std::string err_str; - int ret = ParseEXRHeader(&info, NULL, version, &err_str, marker, marker_size); - - if (ret != TINYEXR_SUCCESS) { - if (err && !err_str.empty()) { - tinyexr::SetErrorMessage(err_str, err); - } - } - - ConvertHeader(exr_header, info); - - // transfoer `tiled` from version. - exr_header->tiled = version->tiled; - - return ret; -} - -int LoadEXRFromMemory(float **out_rgba, int *width, int *height, - const unsigned char *memory, size_t size, - const char **err) { - if (out_rgba == NULL || memory == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXRFromMemory", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - EXRVersion exr_version; - EXRImage exr_image; - EXRHeader exr_header; - - InitEXRHeader(&exr_header); - - int ret = ParseEXRVersionFromMemory(&exr_version, memory, size); - if (ret != TINYEXR_SUCCESS) { - tinyexr::SetErrorMessage("Failed to parse EXR version", err); - return ret; - } - - ret = ParseEXRHeaderFromMemory(&exr_header, &exr_version, memory, size, err); - if (ret != TINYEXR_SUCCESS) { - return ret; - } - - // Read HALF channel as FLOAT. - for (int i = 0; i < exr_header.num_channels; i++) { - if (exr_header.pixel_types[i] == TINYEXR_PIXELTYPE_HALF) { - exr_header.requested_pixel_types[i] = TINYEXR_PIXELTYPE_FLOAT; - } - } - - InitEXRImage(&exr_image); - ret = LoadEXRImageFromMemory(&exr_image, &exr_header, memory, size, err); - if (ret != TINYEXR_SUCCESS) { - return ret; - } - - // RGBA - int idxR = -1; - int idxG = -1; - int idxB = -1; - int idxA = -1; - for (int c = 0; c < exr_header.num_channels; c++) { - if (strcmp(exr_header.channels[c].name, "R") == 0) { - idxR = c; - } else if (strcmp(exr_header.channels[c].name, "G") == 0) { - idxG = c; - } else if (strcmp(exr_header.channels[c].name, "B") == 0) { - idxB = c; - } else if (strcmp(exr_header.channels[c].name, "A") == 0) { - idxA = c; - } - } - - // TODO(syoyo): Refactor removing same code as used in LoadEXR(). - if (exr_header.num_channels == 1) { - // Grayscale channel only. - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) { - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[0][srcIdx]; - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[0][srcIdx]; - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - const float val = reinterpret_cast(exr_image.images)[0][i]; - (*out_rgba)[4 * i + 0] = val; - (*out_rgba)[4 * i + 1] = val; - (*out_rgba)[4 * i + 2] = val; - (*out_rgba)[4 * i + 3] = val; - } - } - - } else { - // TODO(syoyo): Support non RGBA image. - - if (idxR == -1) { - tinyexr::SetErrorMessage("R channel not found", err); - - // @todo { free exr_image } - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxG == -1) { - tinyexr::SetErrorMessage("G channel not found", err); - // @todo { free exr_image } - return TINYEXR_ERROR_INVALID_DATA; - } - - if (idxB == -1) { - tinyexr::SetErrorMessage("B channel not found", err); - // @todo { free exr_image } - return TINYEXR_ERROR_INVALID_DATA; - } - - (*out_rgba) = reinterpret_cast( - malloc(4 * sizeof(float) * static_cast(exr_image.width) * - static_cast(exr_image.height))); - - if (exr_header.tiled) { - for (int it = 0; it < exr_image.num_tiles; it++) { - for (int j = 0; j < exr_header.tile_size_y; j++) - for (int i = 0; i < exr_header.tile_size_x; i++) { - const int ii = - exr_image.tiles[it].offset_x * exr_header.tile_size_x + i; - const int jj = - exr_image.tiles[it].offset_y * exr_header.tile_size_y + j; - const int idx = ii + jj * exr_image.width; - - // out of region check. - if (ii >= exr_image.width) { - continue; - } - if (jj >= exr_image.height) { - continue; - } - const int srcIdx = i + j * exr_header.tile_size_x; - unsigned char **src = exr_image.tiles[it].images; - (*out_rgba)[4 * idx + 0] = - reinterpret_cast(src)[idxR][srcIdx]; - (*out_rgba)[4 * idx + 1] = - reinterpret_cast(src)[idxG][srcIdx]; - (*out_rgba)[4 * idx + 2] = - reinterpret_cast(src)[idxB][srcIdx]; - if (idxA != -1) { - (*out_rgba)[4 * idx + 3] = - reinterpret_cast(src)[idxA][srcIdx]; - } else { - (*out_rgba)[4 * idx + 3] = 1.0; - } - } - } - } else { - for (int i = 0; i < exr_image.width * exr_image.height; i++) { - (*out_rgba)[4 * i + 0] = - reinterpret_cast(exr_image.images)[idxR][i]; - (*out_rgba)[4 * i + 1] = - reinterpret_cast(exr_image.images)[idxG][i]; - (*out_rgba)[4 * i + 2] = - reinterpret_cast(exr_image.images)[idxB][i]; - if (idxA != -1) { - (*out_rgba)[4 * i + 3] = - reinterpret_cast(exr_image.images)[idxA][i]; - } else { - (*out_rgba)[4 * i + 3] = 1.0; - } - } - } - } - - (*width) = exr_image.width; - (*height) = exr_image.height; - - FreeEXRHeader(&exr_header); - FreeEXRImage(&exr_image); - - return TINYEXR_SUCCESS; -} - -int LoadEXRImageFromFile(EXRImage *exr_image, const EXRHeader *exr_header, - const char *filename, const char **err) { - if (exr_image == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromFile", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - if (filesize < 16) { - tinyexr::SetErrorMessage("File size too short " + std::string(filename), - err); - return TINYEXR_ERROR_INVALID_FILE; - } - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - (void)ret; - } - - return LoadEXRImageFromMemory(exr_image, exr_header, &buf.at(0), filesize, - err); -} - -int LoadEXRImageFromMemory(EXRImage *exr_image, const EXRHeader *exr_header, - const unsigned char *memory, const size_t size, - const char **err) { - if (exr_image == NULL || memory == NULL || - (size < tinyexr::kEXRVersionSize)) { - tinyexr::SetErrorMessage("Invalid argument for LoadEXRImageFromMemory", - err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (exr_header->header_len == 0) { - tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - const unsigned char *head = memory; - const unsigned char *marker = reinterpret_cast( - memory + exr_header->header_len + - 8); // +8 for magic number + version header. - return tinyexr::DecodeEXRImage(exr_image, exr_header, head, marker, size, - err); -} - -size_t SaveEXRImageToMemory(const EXRImage *exr_image, - const EXRHeader *exr_header, - unsigned char **memory_out, const char **err) { - if (exr_image == NULL || memory_out == NULL || - exr_header->compression_type < 0) { - tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToMemory", err); - return 0; - } - -#if !TINYEXR_USE_PIZ - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - tinyexr::SetErrorMessage("PIZ compression is not supported in this build", - err); - return 0; - } -#endif - -#if !TINYEXR_USE_ZFP - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - tinyexr::SetErrorMessage("ZFP compression is not supported in this build", - err); - return 0; - } -#endif - -#if TINYEXR_USE_ZFP - for (size_t i = 0; i < static_cast(exr_header->num_channels); i++) { - if (exr_header->requested_pixel_types[i] != TINYEXR_PIXELTYPE_FLOAT) { - tinyexr::SetErrorMessage("Pixel type must be FLOAT for ZFP compression", - err); - return 0; - } - } -#endif - - std::vector memory; - - // Header - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - memory.insert(memory.end(), header, header + 4); - } - - // Version, scanline. - { - char marker[] = {2, 0, 0, 0}; - /* @todo - if (exr_header->tiled) { - marker[1] |= 0x2; - } - if (exr_header->long_name) { - marker[1] |= 0x4; - } - if (exr_header->non_image) { - marker[1] |= 0x8; - } - if (exr_header->multipart) { - marker[1] |= 0x10; - } - */ - memory.insert(memory.end(), marker, marker + 4); - } - - int num_scanlines = 1; - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanlines = 16; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - num_scanlines = 32; - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - num_scanlines = 16; - } - - // Write attributes. - std::vector channels; - { - std::vector data; - - for (int c = 0; c < exr_header->num_channels; c++) { - tinyexr::ChannelInfo info; - info.p_linear = 0; - info.pixel_type = exr_header->requested_pixel_types[c]; - info.x_sampling = 1; - info.y_sampling = 1; - info.name = std::string(exr_header->channels[c].name); - channels.push_back(info); - } - - tinyexr::WriteChannelInfo(data, channels); - - tinyexr::WriteAttributeToMemory(&memory, "channels", "chlist", &data.at(0), - static_cast(data.size())); - } - - { - int comp = exr_header->compression_type; - tinyexr::swap4(reinterpret_cast(&comp)); - tinyexr::WriteAttributeToMemory( - &memory, "compression", "compression", - reinterpret_cast(&comp), 1); - } - - { - int data[4] = {0, 0, exr_image->width - 1, exr_image->height - 1}; - tinyexr::swap4(reinterpret_cast(&data[0])); - tinyexr::swap4(reinterpret_cast(&data[1])); - tinyexr::swap4(reinterpret_cast(&data[2])); - tinyexr::swap4(reinterpret_cast(&data[3])); - tinyexr::WriteAttributeToMemory( - &memory, "dataWindow", "box2i", - reinterpret_cast(data), sizeof(int) * 4); - tinyexr::WriteAttributeToMemory( - &memory, "displayWindow", "box2i", - reinterpret_cast(data), sizeof(int) * 4); - } - - { - unsigned char line_order = 0; // @fixme { read line_order from EXRHeader } - tinyexr::WriteAttributeToMemory(&memory, "lineOrder", "lineOrder", - &line_order, 1); - } - - { - float aspectRatio = 1.0f; - tinyexr::swap4(reinterpret_cast(&aspectRatio)); - tinyexr::WriteAttributeToMemory( - &memory, "pixelAspectRatio", "float", - reinterpret_cast(&aspectRatio), sizeof(float)); - } - - { - float center[2] = {0.0f, 0.0f}; - tinyexr::swap4(reinterpret_cast(¢er[0])); - tinyexr::swap4(reinterpret_cast(¢er[1])); - tinyexr::WriteAttributeToMemory( - &memory, "screenWindowCenter", "v2f", - reinterpret_cast(center), 2 * sizeof(float)); - } - - { - float w = static_cast(exr_image->width); - tinyexr::swap4(reinterpret_cast(&w)); - tinyexr::WriteAttributeToMemory(&memory, "screenWindowWidth", "float", - reinterpret_cast(&w), - sizeof(float)); - } - - // Custom attributes - if (exr_header->num_custom_attributes > 0) { - for (int i = 0; i < exr_header->num_custom_attributes; i++) { - tinyexr::WriteAttributeToMemory( - &memory, exr_header->custom_attributes[i].name, - exr_header->custom_attributes[i].type, - reinterpret_cast( - exr_header->custom_attributes[i].value), - exr_header->custom_attributes[i].size); - } - } - - { // end of header - unsigned char e = 0; - memory.push_back(e); - } - - int num_blocks = exr_image->height / num_scanlines; - if (num_blocks * num_scanlines < exr_image->height) { - num_blocks++; - } - - std::vector offsets(static_cast(num_blocks)); - - size_t headerSize = memory.size(); - tinyexr::tinyexr_uint64 offset = - headerSize + - static_cast(num_blocks) * - sizeof( - tinyexr::tinyexr_int64); // sizeof(header) + sizeof(offsetTable) - - std::vector > data_list( - static_cast(num_blocks)); - std::vector channel_offset_list( - static_cast(exr_header->num_channels)); - - int pixel_data_size = 0; - size_t channel_offset = 0; - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - channel_offset_list[c] = channel_offset; - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - pixel_data_size += sizeof(unsigned short); - channel_offset += sizeof(unsigned short); - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_FLOAT) { - pixel_data_size += sizeof(float); - channel_offset += sizeof(float); - } else if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { - pixel_data_size += sizeof(unsigned int); - channel_offset += sizeof(unsigned int); - } else { - assert(0); - } - } - -#if TINYEXR_USE_ZFP - tinyexr::ZFPCompressionParam zfp_compression_param; - - // Use ZFP compression parameter from custom attributes(if such a parameter - // exists) - { - bool ret = tinyexr::FindZFPCompressionParam( - &zfp_compression_param, exr_header->custom_attributes, - exr_header->num_custom_attributes); - - if (!ret) { - // Use predefined compression parameter. - zfp_compression_param.type = 0; - zfp_compression_param.rate = 2; - } - } -#endif - -// Use signed int since some OpenMP compiler doesn't allow unsigned type for -// `parallel for` -#ifdef _OPENMP -#pragma omp parallel for -#endif - for (int i = 0; i < num_blocks; i++) { - size_t ii = static_cast(i); - int start_y = num_scanlines * i; - int endY = (std::min)(num_scanlines * (i + 1), exr_image->height); - int h = endY - start_y; - - std::vector buf( - static_cast(exr_image->width * h * pixel_data_size)); - - for (size_t c = 0; c < static_cast(exr_header->num_channels); c++) { - if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - float *line_ptr = reinterpret_cast(&buf.at( - static_cast(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - tinyexr::FP16 h16; - h16.u = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::FP32 f32 = half_to_float(h16); - - tinyexr::swap4(reinterpret_cast(&f32.f)); - - // line_ptr[x] = f32.f; - tinyexr::cpy4(line_ptr + x, &(f32.f)); - } - } - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_HALF) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned short *line_ptr = reinterpret_cast( - &buf.at(static_cast(pixel_data_size * y * - exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - unsigned short val = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap2(&val); - - // line_ptr[x] = val; - tinyexr::cpy2(line_ptr + x, &val); - } - } - } else { - assert(0); - } - - } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_FLOAT) { - if (exr_header->requested_pixel_types[c] == TINYEXR_PIXELTYPE_HALF) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned short *line_ptr = reinterpret_cast( - &buf.at(static_cast(pixel_data_size * y * - exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - tinyexr::FP32 f32; - f32.f = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::FP16 h16; - h16 = float_to_half_full(f32); - - tinyexr::swap2(reinterpret_cast(&h16.u)); - - // line_ptr[x] = h16.u; - tinyexr::cpy2(line_ptr + x, &(h16.u)); - } - } - } else if (exr_header->requested_pixel_types[c] == - TINYEXR_PIXELTYPE_FLOAT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - float *line_ptr = reinterpret_cast(&buf.at( - static_cast(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * - static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - float val = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap4(reinterpret_cast(&val)); - - // line_ptr[x] = val; - tinyexr::cpy4(line_ptr + x, &val); - } - } - } else { - assert(0); - } - } else if (exr_header->pixel_types[c] == TINYEXR_PIXELTYPE_UINT) { - for (int y = 0; y < h; y++) { - // Assume increasing Y - unsigned int *line_ptr = reinterpret_cast(&buf.at( - static_cast(pixel_data_size * y * exr_image->width) + - channel_offset_list[c] * static_cast(exr_image->width))); - for (int x = 0; x < exr_image->width; x++) { - unsigned int val = reinterpret_cast( - exr_image->images)[c][(y + start_y) * exr_image->width + x]; - - tinyexr::swap4(&val); - - // line_ptr[x] = val; - tinyexr::cpy4(line_ptr + x, &val); - } - } - } - } - - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_NONE) { - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(uncompressed) - std::vector header(8); - unsigned int data_len = static_cast(buf.size()); - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), buf.begin(), - buf.begin() + data_len); - - } else if ((exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || - (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { -#if TINYEXR_USE_MINIZ - std::vector block(tinyexr::miniz::mz_compressBound( - static_cast(buf.size()))); -#else - std::vector block( - compressBound(static_cast(buf.size()))); -#endif - tinyexr::tinyexr_uint64 outSize = block.size(); - - tinyexr::CompressZip(&block.at(0), outSize, - reinterpret_cast(&buf.at(0)), - static_cast(buf.size())); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = static_cast(outSize); // truncate - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_RLE) { - // (buf.size() * 3) / 2 would be enough. - std::vector block((buf.size() * 3) / 2); - - tinyexr::tinyexr_uint64 outSize = block.size(); - - tinyexr::CompressRle(&block.at(0), outSize, - reinterpret_cast(&buf.at(0)), - static_cast(buf.size())); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = static_cast(outSize); // truncate - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { -#if TINYEXR_USE_PIZ - unsigned int bufLen = - 8192 + static_cast( - 2 * static_cast( - buf.size())); // @fixme { compute good bound. } - std::vector block(bufLen); - unsigned int outSize = static_cast(block.size()); - - CompressPiz(&block.at(0), &outSize, - reinterpret_cast(&buf.at(0)), - buf.size(), channels, exr_image->width, h); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = outSize; - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - -#else - assert(0); -#endif - } else if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { -#if TINYEXR_USE_ZFP - std::vector block; - unsigned int outSize; - - tinyexr::CompressZfp( - &block, &outSize, reinterpret_cast(&buf.at(0)), - exr_image->width, h, exr_header->num_channels, zfp_compression_param); - - // 4 byte: scan line - // 4 byte: data size - // ~ : pixel data(compressed) - std::vector header(8); - unsigned int data_len = outSize; - memcpy(&header.at(0), &start_y, sizeof(int)); - memcpy(&header.at(4), &data_len, sizeof(unsigned int)); - - tinyexr::swap4(reinterpret_cast(&header.at(0))); - tinyexr::swap4(reinterpret_cast(&header.at(4))); - - data_list[ii].insert(data_list[ii].end(), header.begin(), header.end()); - data_list[ii].insert(data_list[ii].end(), block.begin(), - block.begin() + data_len); - -#else - assert(0); -#endif - } else { - assert(0); - } - } // omp parallel - - for (size_t i = 0; i < static_cast(num_blocks); i++) { - offsets[i] = offset; - tinyexr::swap8(reinterpret_cast(&offsets[i])); - offset += data_list[i].size(); - } - - size_t totalSize = static_cast(offset); - { - memory.insert( - memory.end(), reinterpret_cast(&offsets.at(0)), - reinterpret_cast(&offsets.at(0)) + - sizeof(tinyexr::tinyexr_uint64) * static_cast(num_blocks)); - } - - if (memory.size() == 0) { - tinyexr::SetErrorMessage("Output memory size is zero", err); - return 0; - } - - (*memory_out) = static_cast(malloc(totalSize)); - memcpy((*memory_out), &memory.at(0), memory.size()); - unsigned char *memory_ptr = *memory_out + memory.size(); - - for (size_t i = 0; i < static_cast(num_blocks); i++) { - memcpy(memory_ptr, &data_list[i].at(0), data_list[i].size()); - memory_ptr += data_list[i].size(); - } - - return totalSize; // OK -} - -int SaveEXRImageToFile(const EXRImage *exr_image, const EXRHeader *exr_header, - const char *filename, const char **err) { - if (exr_image == NULL || filename == NULL || - exr_header->compression_type < 0) { - tinyexr::SetErrorMessage("Invalid argument for SaveEXRImageToFile", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#if !TINYEXR_USE_PIZ - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_PIZ) { - tinyexr::SetErrorMessage("PIZ compression is not supported in this build", - err); - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } -#endif - -#if !TINYEXR_USE_ZFP - if (exr_header->compression_type == TINYEXR_COMPRESSIONTYPE_ZFP) { - tinyexr::SetErrorMessage("ZFP compression is not supported in this build", - err); - return TINYEXR_ERROR_UNSUPPORTED_FEATURE; - } -#endif - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "wb"); -#else - FILE *fp = fopen(filename, "wb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot write a file", err); - return TINYEXR_ERROR_CANT_WRITE_FILE; - } - - unsigned char *mem = NULL; - size_t mem_size = SaveEXRImageToMemory(exr_image, exr_header, &mem, err); - if (mem_size == 0) { - return TINYEXR_ERROR_SERIALZATION_FAILED; - } - - size_t written_size = 0; - if ((mem_size > 0) && mem) { - written_size = fwrite(mem, 1, mem_size, fp); - } - free(mem); - - fclose(fp); - - if (written_size != mem_size) { - tinyexr::SetErrorMessage("Cannot write a file", err); - return TINYEXR_ERROR_CANT_WRITE_FILE; - } - - return TINYEXR_SUCCESS; -} - -int LoadDeepEXR(DeepImage *deep_image, const char *filename, const char **err) { - if (deep_image == NULL) { - tinyexr::SetErrorMessage("Invalid argument for LoadDeepEXR", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _MSC_VER - FILE *fp = NULL; - errno_t errcode = fopen_s(&fp, filename, "rb"); - if ((0 != errcode) || (!fp)) { - tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename), - err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } -#else - FILE *fp = fopen(filename, "rb"); - if (!fp) { - tinyexr::SetErrorMessage("Cannot read a file " + std::string(filename), - err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } -#endif - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - if (filesize == 0) { - fclose(fp); - tinyexr::SetErrorMessage("File size is zero : " + std::string(filename), - err); - return TINYEXR_ERROR_INVALID_FILE; - } - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - (void)ret; - } - fclose(fp); - - const char *head = &buf[0]; - const char *marker = &buf[0]; - - // Header check. - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - - if (memcmp(marker, header, 4) != 0) { - tinyexr::SetErrorMessage("Invalid magic number", err); - return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; - } - marker += 4; - } - - // Version, scanline. - { - // ver 2.0, scanline, deep bit on(0x800) - // must be [2, 0, 0, 0] - if (marker[0] != 2 || marker[1] != 8 || marker[2] != 0 || marker[3] != 0) { - tinyexr::SetErrorMessage("Unsupported version or scanline", err); - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - marker += 4; - } - - int dx = -1; - int dy = -1; - int dw = -1; - int dh = -1; - int num_scanline_blocks = 1; // 16 for ZIP compression. - int compression_type = -1; - int num_channels = -1; - std::vector channels; - - // Read attributes - size_t size = filesize - tinyexr::kEXRVersionSize; - for (;;) { - if (0 == size) { - return TINYEXR_ERROR_INVALID_DATA; - } else if (marker[0] == '\0') { - marker++; - size--; - break; - } - - std::string attr_name; - std::string attr_type; - std::vector data; - size_t marker_size; - if (!tinyexr::ReadAttribute(&attr_name, &attr_type, &data, &marker_size, - marker, size)) { - std::stringstream ss; - ss << "Failed to parse attribute\n"; - tinyexr::SetErrorMessage(ss.str(), err); - return TINYEXR_ERROR_INVALID_DATA; - } - marker += marker_size; - size -= marker_size; - - if (attr_name.compare("compression") == 0) { - compression_type = data[0]; - if (compression_type > TINYEXR_COMPRESSIONTYPE_PIZ) { - std::stringstream ss; - ss << "Unsupported compression type : " << compression_type; - tinyexr::SetErrorMessage(ss.str(), err); - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - if (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) { - num_scanline_blocks = 16; - } - - } else if (attr_name.compare("channels") == 0) { - // name: zero-terminated string, from 1 to 255 bytes long - // pixel type: int, possible values are: UINT = 0 HALF = 1 FLOAT = 2 - // pLinear: unsigned char, possible values are 0 and 1 - // reserved: three chars, should be zero - // xSampling: int - // ySampling: int - - if (!tinyexr::ReadChannelInfo(channels, data)) { - tinyexr::SetErrorMessage("Failed to parse channel info", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - num_channels = static_cast(channels.size()); - - if (num_channels < 1) { - tinyexr::SetErrorMessage("Invalid channels format", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - } else if (attr_name.compare("dataWindow") == 0) { - memcpy(&dx, &data.at(0), sizeof(int)); - memcpy(&dy, &data.at(4), sizeof(int)); - memcpy(&dw, &data.at(8), sizeof(int)); - memcpy(&dh, &data.at(12), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&dx)); - tinyexr::swap4(reinterpret_cast(&dy)); - tinyexr::swap4(reinterpret_cast(&dw)); - tinyexr::swap4(reinterpret_cast(&dh)); - - } else if (attr_name.compare("displayWindow") == 0) { - int x; - int y; - int w; - int h; - memcpy(&x, &data.at(0), sizeof(int)); - memcpy(&y, &data.at(4), sizeof(int)); - memcpy(&w, &data.at(8), sizeof(int)); - memcpy(&h, &data.at(12), sizeof(int)); - tinyexr::swap4(reinterpret_cast(&x)); - tinyexr::swap4(reinterpret_cast(&y)); - tinyexr::swap4(reinterpret_cast(&w)); - tinyexr::swap4(reinterpret_cast(&h)); - } - } - - assert(dx >= 0); - assert(dy >= 0); - assert(dw >= 0); - assert(dh >= 0); - assert(num_channels >= 1); - - int data_width = dw - dx + 1; - int data_height = dh - dy + 1; - - std::vector image( - static_cast(data_width * data_height * 4)); // 4 = RGBA - - // Read offset tables. - int num_blocks = data_height / num_scanline_blocks; - if (num_blocks * num_scanline_blocks < data_height) { - num_blocks++; - } - - std::vector offsets(static_cast(num_blocks)); - - for (size_t y = 0; y < static_cast(num_blocks); y++) { - tinyexr::tinyexr_int64 offset; - memcpy(&offset, marker, sizeof(tinyexr::tinyexr_int64)); - tinyexr::swap8(reinterpret_cast(&offset)); - marker += sizeof(tinyexr::tinyexr_int64); // = 8 - offsets[y] = offset; - } - -#if TINYEXR_USE_PIZ - if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP) || - (compression_type == TINYEXR_COMPRESSIONTYPE_PIZ)) { -#else - if ((compression_type == TINYEXR_COMPRESSIONTYPE_NONE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_RLE) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIPS) || - (compression_type == TINYEXR_COMPRESSIONTYPE_ZIP)) { -#endif - // OK - } else { - tinyexr::SetErrorMessage("Unsupported compression format", err); - return TINYEXR_ERROR_UNSUPPORTED_FORMAT; - } - - deep_image->image = static_cast( - malloc(sizeof(float **) * static_cast(num_channels))); - for (int c = 0; c < num_channels; c++) { - deep_image->image[c] = static_cast( - malloc(sizeof(float *) * static_cast(data_height))); - for (int y = 0; y < data_height; y++) { - } - } - - deep_image->offset_table = static_cast( - malloc(sizeof(int *) * static_cast(data_height))); - for (int y = 0; y < data_height; y++) { - deep_image->offset_table[y] = static_cast( - malloc(sizeof(int) * static_cast(data_width))); - } - - for (size_t y = 0; y < static_cast(num_blocks); y++) { - const unsigned char *data_ptr = - reinterpret_cast(head + offsets[y]); - - // int: y coordinate - // int64: packed size of pixel offset table - // int64: packed size of sample data - // int64: unpacked size of sample data - // compressed pixel offset table - // compressed sample data - int line_no; - tinyexr::tinyexr_int64 packedOffsetTableSize; - tinyexr::tinyexr_int64 packedSampleDataSize; - tinyexr::tinyexr_int64 unpackedSampleDataSize; - memcpy(&line_no, data_ptr, sizeof(int)); - memcpy(&packedOffsetTableSize, data_ptr + 4, - sizeof(tinyexr::tinyexr_int64)); - memcpy(&packedSampleDataSize, data_ptr + 12, - sizeof(tinyexr::tinyexr_int64)); - memcpy(&unpackedSampleDataSize, data_ptr + 20, - sizeof(tinyexr::tinyexr_int64)); - - tinyexr::swap4(reinterpret_cast(&line_no)); - tinyexr::swap8( - reinterpret_cast(&packedOffsetTableSize)); - tinyexr::swap8( - reinterpret_cast(&packedSampleDataSize)); - tinyexr::swap8( - reinterpret_cast(&unpackedSampleDataSize)); - - std::vector pixelOffsetTable(static_cast(data_width)); - - // decode pixel offset table. - { - unsigned long dstLen = - static_cast(pixelOffsetTable.size() * sizeof(int)); - if (!tinyexr::DecompressZip( - reinterpret_cast(&pixelOffsetTable.at(0)), - &dstLen, data_ptr + 28, - static_cast(packedOffsetTableSize))) { - return false; - } - - assert(dstLen == pixelOffsetTable.size() * sizeof(int)); - for (size_t i = 0; i < static_cast(data_width); i++) { - deep_image->offset_table[y][i] = pixelOffsetTable[i]; - } - } - - std::vector sample_data( - static_cast(unpackedSampleDataSize)); - - // decode sample data. - { - unsigned long dstLen = static_cast(unpackedSampleDataSize); - if (dstLen) { - if (!tinyexr::DecompressZip( - reinterpret_cast(&sample_data.at(0)), &dstLen, - data_ptr + 28 + packedOffsetTableSize, - static_cast(packedSampleDataSize))) { - return false; - } - assert(dstLen == static_cast(unpackedSampleDataSize)); - } - } - - // decode sample - int sampleSize = -1; - std::vector channel_offset_list(static_cast(num_channels)); - { - int channel_offset = 0; - for (size_t i = 0; i < static_cast(num_channels); i++) { - channel_offset_list[i] = channel_offset; - if (channels[i].pixel_type == TINYEXR_PIXELTYPE_UINT) { // UINT - channel_offset += 4; - } else if (channels[i].pixel_type == TINYEXR_PIXELTYPE_HALF) { // half - channel_offset += 2; - } else if (channels[i].pixel_type == - TINYEXR_PIXELTYPE_FLOAT) { // float - channel_offset += 4; - } else { - assert(0); - } - } - sampleSize = channel_offset; - } - assert(sampleSize >= 2); - - assert(static_cast( - pixelOffsetTable[static_cast(data_width - 1)] * - sampleSize) == sample_data.size()); - int samples_per_line = static_cast(sample_data.size()) / sampleSize; - - // - // Alloc memory - // - - // - // pixel data is stored as image[channels][pixel_samples] - // - { - tinyexr::tinyexr_uint64 data_offset = 0; - for (size_t c = 0; c < static_cast(num_channels); c++) { - deep_image->image[c][y] = static_cast( - malloc(sizeof(float) * static_cast(samples_per_line))); - - if (channels[c].pixel_type == 0) { // UINT - for (size_t x = 0; x < static_cast(samples_per_line); x++) { - unsigned int ui; - unsigned int *src_ptr = reinterpret_cast( - &sample_data.at(size_t(data_offset) + x * sizeof(int))); - tinyexr::cpy4(&ui, src_ptr); - deep_image->image[c][y][x] = static_cast(ui); // @fixme - } - data_offset += - sizeof(unsigned int) * static_cast(samples_per_line); - } else if (channels[c].pixel_type == 1) { // half - for (size_t x = 0; x < static_cast(samples_per_line); x++) { - tinyexr::FP16 f16; - const unsigned short *src_ptr = reinterpret_cast( - &sample_data.at(size_t(data_offset) + x * sizeof(short))); - tinyexr::cpy2(&(f16.u), src_ptr); - tinyexr::FP32 f32 = half_to_float(f16); - deep_image->image[c][y][x] = f32.f; - } - data_offset += sizeof(short) * static_cast(samples_per_line); - } else { // float - for (size_t x = 0; x < static_cast(samples_per_line); x++) { - float f; - const float *src_ptr = reinterpret_cast( - &sample_data.at(size_t(data_offset) + x * sizeof(float))); - tinyexr::cpy4(&f, src_ptr); - deep_image->image[c][y][x] = f; - } - data_offset += sizeof(float) * static_cast(samples_per_line); - } - } - } - } // y - - deep_image->width = data_width; - deep_image->height = data_height; - - deep_image->channel_names = static_cast( - malloc(sizeof(const char *) * static_cast(num_channels))); - for (size_t c = 0; c < static_cast(num_channels); c++) { -#ifdef _WIN32 - deep_image->channel_names[c] = _strdup(channels[c].name.c_str()); -#else - deep_image->channel_names[c] = strdup(channels[c].name.c_str()); -#endif - } - deep_image->num_channels = num_channels; - - return TINYEXR_SUCCESS; -} - -void InitEXRImage(EXRImage *exr_image) { - if (exr_image == NULL) { - return; - } - - exr_image->width = 0; - exr_image->height = 0; - exr_image->num_channels = 0; - - exr_image->images = NULL; - exr_image->tiles = NULL; - - exr_image->num_tiles = 0; -} - -void FreeEXRErrorMessage(const char *msg) { - if (msg) { - free(reinterpret_cast(const_cast(msg))); - } - return; -} - -void InitEXRHeader(EXRHeader *exr_header) { - if (exr_header == NULL) { - return; - } - - memset(exr_header, 0, sizeof(EXRHeader)); -} - -int FreeEXRHeader(EXRHeader *exr_header) { - if (exr_header == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (exr_header->channels) { - free(exr_header->channels); - } - - if (exr_header->pixel_types) { - free(exr_header->pixel_types); - } - - if (exr_header->requested_pixel_types) { - free(exr_header->requested_pixel_types); - } - - for (int i = 0; i < exr_header->num_custom_attributes; i++) { - if (exr_header->custom_attributes[i].value) { - free(exr_header->custom_attributes[i].value); - } - } - - if (exr_header->custom_attributes) { - free(exr_header->custom_attributes); - } - - return TINYEXR_SUCCESS; -} - -int FreeEXRImage(EXRImage *exr_image) { - if (exr_image == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - for (int i = 0; i < exr_image->num_channels; i++) { - if (exr_image->images && exr_image->images[i]) { - free(exr_image->images[i]); - } - } - - if (exr_image->images) { - free(exr_image->images); - } - - if (exr_image->tiles) { - for (int tid = 0; tid < exr_image->num_tiles; tid++) { - for (int i = 0; i < exr_image->num_channels; i++) { - if (exr_image->tiles[tid].images && exr_image->tiles[tid].images[i]) { - free(exr_image->tiles[tid].images[i]); - } - } - if (exr_image->tiles[tid].images) { - free(exr_image->tiles[tid].images); - } - } - free(exr_image->tiles); - } - - return TINYEXR_SUCCESS; -} - -int ParseEXRHeaderFromFile(EXRHeader *exr_header, const EXRVersion *exr_version, - const char *filename, const char **err) { - if (exr_header == NULL || exr_version == NULL || filename == NULL) { - tinyexr::SetErrorMessage("Invalid argument for ParseEXRHeaderFromFile", - err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - - if (ret != filesize) { - tinyexr::SetErrorMessage("fread() error on " + std::string(filename), - err); - return TINYEXR_ERROR_INVALID_FILE; - } - } - - return ParseEXRHeaderFromMemory(exr_header, exr_version, &buf.at(0), filesize, - err); -} - -int ParseEXRMultipartHeaderFromMemory(EXRHeader ***exr_headers, - int *num_headers, - const EXRVersion *exr_version, - const unsigned char *memory, size_t size, - const char **err) { - if (memory == NULL || exr_headers == NULL || num_headers == NULL || - exr_version == NULL) { - // Invalid argument - tinyexr::SetErrorMessage( - "Invalid argument for ParseEXRMultipartHeaderFromMemory", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (size < tinyexr::kEXRVersionSize) { - tinyexr::SetErrorMessage("Data size too short", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - const unsigned char *marker = memory + tinyexr::kEXRVersionSize; - size_t marker_size = size - tinyexr::kEXRVersionSize; - - std::vector infos; - - for (;;) { - tinyexr::HeaderInfo info; - info.clear(); - - std::string err_str; - bool empty_header = false; - int ret = ParseEXRHeader(&info, &empty_header, exr_version, &err_str, - marker, marker_size); - - if (ret != TINYEXR_SUCCESS) { - tinyexr::SetErrorMessage(err_str, err); - return ret; - } - - if (empty_header) { - marker += 1; // skip '\0' - break; - } - - // `chunkCount` must exist in the header. - if (info.chunk_count == 0) { - tinyexr::SetErrorMessage( - "`chunkCount' attribute is not found in the header.", err); - return TINYEXR_ERROR_INVALID_DATA; - } - - infos.push_back(info); - - // move to next header. - marker += info.header_len; - size -= info.header_len; - } - - // allocate memory for EXRHeader and create array of EXRHeader pointers. - (*exr_headers) = - static_cast(malloc(sizeof(EXRHeader *) * infos.size())); - for (size_t i = 0; i < infos.size(); i++) { - EXRHeader *exr_header = static_cast(malloc(sizeof(EXRHeader))); - - ConvertHeader(exr_header, infos[i]); - - // transfoer `tiled` from version. - exr_header->tiled = exr_version->tiled; - - (*exr_headers)[i] = exr_header; - } - - (*num_headers) = static_cast(infos.size()); - - return TINYEXR_SUCCESS; -} - -int ParseEXRMultipartHeaderFromFile(EXRHeader ***exr_headers, int *num_headers, - const EXRVersion *exr_version, - const char *filename, const char **err) { - if (exr_headers == NULL || num_headers == NULL || exr_version == NULL || - filename == NULL) { - tinyexr::SetErrorMessage( - "Invalid argument for ParseEXRMultipartHeaderFromFile()", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - - if (ret != filesize) { - tinyexr::SetErrorMessage("`fread' error. file may be corrupted.", err); - return TINYEXR_ERROR_INVALID_FILE; - } - } - - return ParseEXRMultipartHeaderFromMemory( - exr_headers, num_headers, exr_version, &buf.at(0), filesize, err); -} - -int ParseEXRVersionFromMemory(EXRVersion *version, const unsigned char *memory, - size_t size) { - if (version == NULL || memory == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - if (size < tinyexr::kEXRVersionSize) { - return TINYEXR_ERROR_INVALID_DATA; - } - - const unsigned char *marker = memory; - - // Header check. - { - const char header[] = {0x76, 0x2f, 0x31, 0x01}; - - if (memcmp(marker, header, 4) != 0) { - return TINYEXR_ERROR_INVALID_MAGIC_NUMBER; - } - marker += 4; - } - - version->tiled = false; - version->long_name = false; - version->non_image = false; - version->multipart = false; - - // Parse version header. - { - // must be 2 - if (marker[0] != 2) { - return TINYEXR_ERROR_INVALID_EXR_VERSION; - } - - if (version == NULL) { - return TINYEXR_SUCCESS; // May OK - } - - version->version = 2; - - if (marker[1] & 0x2) { // 9th bit - version->tiled = true; - } - if (marker[1] & 0x4) { // 10th bit - version->long_name = true; - } - if (marker[1] & 0x8) { // 11th bit - version->non_image = true; // (deep image) - } - if (marker[1] & 0x10) { // 12th bit - version->multipart = true; - } - } - - return TINYEXR_SUCCESS; -} - -int ParseEXRVersionFromFile(EXRVersion *version, const char *filename) { - if (filename == NULL) { - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t file_size; - // Compute size - fseek(fp, 0, SEEK_END); - file_size = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - if (file_size < tinyexr::kEXRVersionSize) { - return TINYEXR_ERROR_INVALID_FILE; - } - - unsigned char buf[tinyexr::kEXRVersionSize]; - size_t ret = fread(&buf[0], 1, tinyexr::kEXRVersionSize, fp); - fclose(fp); - - if (ret != tinyexr::kEXRVersionSize) { - return TINYEXR_ERROR_INVALID_FILE; - } - - return ParseEXRVersionFromMemory(version, buf, tinyexr::kEXRVersionSize); -} - -int LoadEXRMultipartImageFromMemory(EXRImage *exr_images, - const EXRHeader **exr_headers, - unsigned int num_parts, - const unsigned char *memory, - const size_t size, const char **err) { - if (exr_images == NULL || exr_headers == NULL || num_parts == 0 || - memory == NULL || (size <= tinyexr::kEXRVersionSize)) { - tinyexr::SetErrorMessage( - "Invalid argument for LoadEXRMultipartImageFromMemory()", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - // compute total header size. - size_t total_header_size = 0; - for (unsigned int i = 0; i < num_parts; i++) { - if (exr_headers[i]->header_len == 0) { - tinyexr::SetErrorMessage("EXRHeader variable is not initialized.", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - total_header_size += exr_headers[i]->header_len; - } - - const char *marker = reinterpret_cast( - memory + total_header_size + 4 + - 4); // +8 for magic number and version header. - - marker += 1; // Skip empty header. - - // NOTE 1: - // In multipart image, There is 'part number' before chunk data. - // 4 byte : part number - // 4+ : chunk - // - // NOTE 2: - // EXR spec says 'part number' is 'unsigned long' but actually this is - // 'unsigned int(4 bytes)' in OpenEXR implementation... - // http://www.openexr.com/openexrfilelayout.pdf - - // Load chunk offset table. - std::vector > chunk_offset_table_list; - for (size_t i = 0; i < static_cast(num_parts); i++) { - std::vector offset_table( - static_cast(exr_headers[i]->chunk_count)); - - for (size_t c = 0; c < offset_table.size(); c++) { - tinyexr::tinyexr_uint64 offset; - memcpy(&offset, marker, 8); - tinyexr::swap8(&offset); - - if (offset >= size) { - tinyexr::SetErrorMessage("Invalid offset size in EXR header chunks.", - err); - return TINYEXR_ERROR_INVALID_DATA; - } - - offset_table[c] = offset + 4; // +4 to skip 'part number' - marker += 8; - } - - chunk_offset_table_list.push_back(offset_table); - } - - // Decode image. - for (size_t i = 0; i < static_cast(num_parts); i++) { - std::vector &offset_table = - chunk_offset_table_list[i]; - - // First check 'part number' is identitical to 'i' - for (size_t c = 0; c < offset_table.size(); c++) { - const unsigned char *part_number_addr = - memory + offset_table[c] - 4; // -4 to move to 'part number' field. - unsigned int part_no; - memcpy(&part_no, part_number_addr, sizeof(unsigned int)); // 4 - tinyexr::swap4(&part_no); - - if (part_no != i) { - tinyexr::SetErrorMessage("Invalid `part number' in EXR header chunks.", - err); - return TINYEXR_ERROR_INVALID_DATA; - } - } - - std::string e; - int ret = tinyexr::DecodeChunk(&exr_images[i], exr_headers[i], offset_table, - memory, size, &e); - if (ret != TINYEXR_SUCCESS) { - if (!e.empty()) { - tinyexr::SetErrorMessage(e, err); - } - return ret; - } - } - - return TINYEXR_SUCCESS; -} - -int LoadEXRMultipartImageFromFile(EXRImage *exr_images, - const EXRHeader **exr_headers, - unsigned int num_parts, const char *filename, - const char **err) { - if (exr_images == NULL || exr_headers == NULL || num_parts == 0) { - tinyexr::SetErrorMessage( - "Invalid argument for LoadEXRMultipartImageFromFile", err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - -#ifdef _WIN32 - FILE *fp = NULL; - fopen_s(&fp, filename, "rb"); -#else - FILE *fp = fopen(filename, "rb"); -#endif - if (!fp) { - tinyexr::SetErrorMessage("Cannot read file " + std::string(filename), err); - return TINYEXR_ERROR_CANT_OPEN_FILE; - } - - size_t filesize; - // Compute size - fseek(fp, 0, SEEK_END); - filesize = static_cast(ftell(fp)); - fseek(fp, 0, SEEK_SET); - - std::vector buf(filesize); // @todo { use mmap } - { - size_t ret; - ret = fread(&buf[0], 1, filesize, fp); - assert(ret == filesize); - fclose(fp); - (void)ret; - } - - return LoadEXRMultipartImageFromMemory(exr_images, exr_headers, num_parts, - &buf.at(0), filesize, err); -} - -int SaveEXR(const float *data, int width, int height, int components, - const int save_as_fp16, const char *outfilename, const char **err) { - if ((components == 1) || components == 3 || components == 4) { - // OK - } else { - std::stringstream ss; - ss << "Unsupported component value : " << components << std::endl; - - tinyexr::SetErrorMessage(ss.str(), err); - return TINYEXR_ERROR_INVALID_ARGUMENT; - } - - EXRHeader header; - InitEXRHeader(&header); - - if ((width < 16) && (height < 16)) { - // No compression for small image. - header.compression_type = TINYEXR_COMPRESSIONTYPE_NONE; - } else { - header.compression_type = TINYEXR_COMPRESSIONTYPE_ZIP; - } - - EXRImage image; - InitEXRImage(&image); - - image.num_channels = components; - - std::vector images[4]; - - if (components == 1) { - images[0].resize(static_cast(width * height)); - memcpy(images[0].data(), data, sizeof(float) * size_t(width * height)); - } else { - images[0].resize(static_cast(width * height)); - images[1].resize(static_cast(width * height)); - images[2].resize(static_cast(width * height)); - images[3].resize(static_cast(width * height)); - - // Split RGB(A)RGB(A)RGB(A)... into R, G and B(and A) layers - for (size_t i = 0; i < static_cast(width * height); i++) { - images[0][i] = data[static_cast(components) * i + 0]; - images[1][i] = data[static_cast(components) * i + 1]; - images[2][i] = data[static_cast(components) * i + 2]; - if (components == 4) { - images[3][i] = data[static_cast(components) * i + 3]; - } - } - } - - float *image_ptr[4] = {0, 0, 0, 0}; - if (components == 4) { - image_ptr[0] = &(images[3].at(0)); // A - image_ptr[1] = &(images[2].at(0)); // B - image_ptr[2] = &(images[1].at(0)); // G - image_ptr[3] = &(images[0].at(0)); // R - } else if (components == 3) { - image_ptr[0] = &(images[2].at(0)); // B - image_ptr[1] = &(images[1].at(0)); // G - image_ptr[2] = &(images[0].at(0)); // R - } else if (components == 1) { - image_ptr[0] = &(images[0].at(0)); // A - } - - image.images = reinterpret_cast(image_ptr); - image.width = width; - image.height = height; - - header.num_channels = components; - header.channels = static_cast(malloc( - sizeof(EXRChannelInfo) * static_cast(header.num_channels))); - // Must be (A)BGR order, since most of EXR viewers expect this channel order. - if (components == 4) { -#ifdef _MSC_VER - strncpy_s(header.channels[0].name, "A", 255); - strncpy_s(header.channels[1].name, "B", 255); - strncpy_s(header.channels[2].name, "G", 255); - strncpy_s(header.channels[3].name, "R", 255); -#else - strncpy(header.channels[0].name, "A", 255); - strncpy(header.channels[1].name, "B", 255); - strncpy(header.channels[2].name, "G", 255); - strncpy(header.channels[3].name, "R", 255); -#endif - header.channels[0].name[strlen("A")] = '\0'; - header.channels[1].name[strlen("B")] = '\0'; - header.channels[2].name[strlen("G")] = '\0'; - header.channels[3].name[strlen("R")] = '\0'; - } else if (components == 3) { -#ifdef _MSC_VER - strncpy_s(header.channels[0].name, "B", 255); - strncpy_s(header.channels[1].name, "G", 255); - strncpy_s(header.channels[2].name, "R", 255); -#else - strncpy(header.channels[0].name, "B", 255); - strncpy(header.channels[1].name, "G", 255); - strncpy(header.channels[2].name, "R", 255); -#endif - header.channels[0].name[strlen("B")] = '\0'; - header.channels[1].name[strlen("G")] = '\0'; - header.channels[2].name[strlen("R")] = '\0'; - } else { -#ifdef _MSC_VER - strncpy_s(header.channels[0].name, "A", 255); -#else - strncpy(header.channels[0].name, "A", 255); -#endif - header.channels[0].name[strlen("A")] = '\0'; - } - - header.pixel_types = static_cast( - malloc(sizeof(int) * static_cast(header.num_channels))); - header.requested_pixel_types = static_cast( - malloc(sizeof(int) * static_cast(header.num_channels))); - for (int i = 0; i < header.num_channels; i++) { - header.pixel_types[i] = - TINYEXR_PIXELTYPE_FLOAT; // pixel type of input image - - if (save_as_fp16 > 0) { - header.requested_pixel_types[i] = - TINYEXR_PIXELTYPE_HALF; // save with half(fp16) pixel format - } else { - header.requested_pixel_types[i] = - TINYEXR_PIXELTYPE_FLOAT; // save with float(fp32) pixel format(i.e. - // no precision reduction) - } - } - - int ret = SaveEXRImageToFile(&image, &header, outfilename, err); - if (ret != TINYEXR_SUCCESS) { - return ret; - } - - free(header.channels); - free(header.pixel_types); - free(header.requested_pixel_types); - - return ret; -} - -#ifdef __clang__ -// zero-as-null-ppinter-constant -#pragma clang diagnostic pop -#endif - -#endif // TINYEXR_IMPLEMENTATION_DEIFNED -#endif // TINYEXR_IMPLEMENTATION diff --git a/zenovis/xinxinoptix/SDK/support/tinygltf/LICENSE b/zenovis/xinxinoptix/SDK/support/tinygltf/LICENSE deleted file mode 100644 index 34398adf07..0000000000 --- a/zenovis/xinxinoptix/SDK/support/tinygltf/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2017 Syoyo Fujita, Aurélien Chatelain and many contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/zenovis/xinxinoptix/SDK/support/tinygltf/json.hpp b/zenovis/xinxinoptix/SDK/support/tinygltf/json.hpp deleted file mode 100644 index c9af0bed36..0000000000 --- a/zenovis/xinxinoptix/SDK/support/tinygltf/json.hpp +++ /dev/null @@ -1,20406 +0,0 @@ -/* - __ _____ _____ _____ - __| | __| | | | JSON for Modern C++ -| | |__ | | | | | | version 3.5.0 -|_____|_____|_____|_|___| https://github.com/nlohmann/json - -Licensed under the MIT License . -SPDX-License-Identifier: MIT -Copyright (c) 2013-2018 Niels Lohmann . - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -*/ - -#ifndef NLOHMANN_JSON_HPP -#define NLOHMANN_JSON_HPP - -#define NLOHMANN_JSON_VERSION_MAJOR 3 -#define NLOHMANN_JSON_VERSION_MINOR 5 -#define NLOHMANN_JSON_VERSION_PATCH 0 - -#include // all_of, find, for_each -#include // assert -#include // and, not, or -#include // nullptr_t, ptrdiff_t, size_t -#include // hash, less -#include // initializer_list -#include // istream, ostream -#include // random_access_iterator_tag -#include // accumulate -#include // string, stoi, to_string -#include // declval, forward, move, pair, swap - -// #include -#ifndef NLOHMANN_JSON_FWD_HPP -#define NLOHMANN_JSON_FWD_HPP - -#include // int64_t, uint64_t -#include // map -#include // allocator -#include // string -#include // vector - -/*! -@brief namespace for Niels Lohmann -@see https://github.com/nlohmann -@since version 1.0.0 -*/ -namespace nlohmann -{ -/*! -@brief default JSONSerializer template argument - -This serializer ignores the template arguments and uses ADL -([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) -for serialization. -*/ -template -struct adl_serializer; - -template class ObjectType = - std::map, - template class ArrayType = std::vector, - class StringType = std::string, class BooleanType = bool, - class NumberIntegerType = std::int64_t, - class NumberUnsignedType = std::uint64_t, - class NumberFloatType = double, - template class AllocatorType = std::allocator, - template class JSONSerializer = - adl_serializer> -class basic_json; - -/*! -@brief JSON Pointer - -A JSON pointer defines a string syntax for identifying a specific value -within a JSON document. It can be used with functions `at` and -`operator[]`. Furthermore, JSON pointers are the base for JSON patches. - -@sa [RFC 6901](https://tools.ietf.org/html/rfc6901) - -@since version 2.0.0 -*/ -template -class json_pointer; - -/*! -@brief default JSON class - -This type is the default specialization of the @ref basic_json class which -uses the standard template types. - -@since version 1.0.0 -*/ -using json = basic_json<>; -} // namespace nlohmann - -#endif - -// #include - - -// This file contains all internal macro definitions -// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them - -// exclude unsupported compilers -#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) - #if defined(__clang__) - #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 - #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) - #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 - #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #endif -#endif - -// disable float-equal warnings on GCC/clang -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wfloat-equal" -#endif - -// disable documentation warnings on clang -#if defined(__clang__) - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wdocumentation" -#endif - -// allow for portable deprecation warnings -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #define JSON_DEPRECATED __attribute__((deprecated)) -#elif defined(_MSC_VER) - #define JSON_DEPRECATED __declspec(deprecated) -#else - #define JSON_DEPRECATED -#endif - -// allow to disable exceptions -#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) - #define JSON_THROW(exception) throw exception - #define JSON_TRY try - #define JSON_CATCH(exception) catch(exception) - #define JSON_INTERNAL_CATCH(exception) catch(exception) -#else - #define JSON_THROW(exception) std::abort() - #define JSON_TRY if(true) - #define JSON_CATCH(exception) if(false) - #define JSON_INTERNAL_CATCH(exception) if(false) -#endif - -// override exception macros -#if defined(JSON_THROW_USER) - #undef JSON_THROW - #define JSON_THROW JSON_THROW_USER -#endif -#if defined(JSON_TRY_USER) - #undef JSON_TRY - #define JSON_TRY JSON_TRY_USER -#endif -#if defined(JSON_CATCH_USER) - #undef JSON_CATCH - #define JSON_CATCH JSON_CATCH_USER - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_CATCH_USER -#endif -#if defined(JSON_INTERNAL_CATCH_USER) - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER -#endif - -// manual branch prediction -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) - #define JSON_LIKELY(x) __builtin_expect(!!(x), 1) - #define JSON_UNLIKELY(x) __builtin_expect(!!(x), 0) -#else - #define JSON_LIKELY(x) x - #define JSON_UNLIKELY(x) x -#endif - -// C++ language standard detection -#if (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 - #define JSON_HAS_CPP_17 - #define JSON_HAS_CPP_14 -#elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) - #define JSON_HAS_CPP_14 -#endif - -/*! -@brief macro to briefly define a mapping between an enum and JSON -@def NLOHMANN_JSON_SERIALIZE_ENUM -@since version 3.4.0 -*/ -#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ - template \ - inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ - { \ - static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - static const std::pair m[] = __VA_ARGS__; \ - auto it = std::find_if(std::begin(m), std::end(m), \ - [e](const std::pair& ej_pair) -> bool \ - { \ - return ej_pair.first == e; \ - }); \ - j = ((it != std::end(m)) ? it : std::begin(m))->second; \ - } \ - template \ - inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ - { \ - static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - static const std::pair m[] = __VA_ARGS__; \ - auto it = std::find_if(std::begin(m), std::end(m), \ - [j](const std::pair& ej_pair) -> bool \ - { \ - return ej_pair.second == j; \ - }); \ - e = ((it != std::end(m)) ? it : std::begin(m))->first; \ - } - -// Ugly macros to avoid uglier copy-paste when specializing basic_json. They -// may be removed in the future once the class is split. - -#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ - template class ObjectType, \ - template class ArrayType, \ - class StringType, class BooleanType, class NumberIntegerType, \ - class NumberUnsignedType, class NumberFloatType, \ - template class AllocatorType, \ - template class JSONSerializer> - -#define NLOHMANN_BASIC_JSON_TPL \ - basic_json - -// #include - - -#include // not -#include // size_t -#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type - -namespace nlohmann -{ -namespace detail -{ -// alias templates to reduce boilerplate -template -using enable_if_t = typename std::enable_if::type; - -template -using uncvref_t = typename std::remove_cv::type>::type; - -// implementation of C++14 index_sequence and affiliates -// source: https://stackoverflow.com/a/32223343 -template -struct index_sequence -{ - using type = index_sequence; - using value_type = std::size_t; - static constexpr std::size_t size() noexcept - { - return sizeof...(Ints); - } -}; - -template -struct merge_and_renumber; - -template -struct merge_and_renumber, index_sequence> - : index_sequence < I1..., (sizeof...(I1) + I2)... > {}; - -template -struct make_index_sequence - : merge_and_renumber < typename make_index_sequence < N / 2 >::type, - typename make_index_sequence < N - N / 2 >::type > {}; - -template<> struct make_index_sequence<0> : index_sequence<> {}; -template<> struct make_index_sequence<1> : index_sequence<0> {}; - -template -using index_sequence_for = make_index_sequence; - -// dispatch utility (taken from ranges-v3) -template struct priority_tag : priority_tag < N - 1 > {}; -template<> struct priority_tag<0> {}; - -// taken from ranges-v3 -template -struct static_const -{ - static constexpr T value{}; -}; - -template -constexpr T static_const::value; -} // namespace detail -} // namespace nlohmann - -// #include - - -#include // not -#include // numeric_limits -#include // false_type, is_constructible, is_integral, is_same, true_type -#include // declval - -// #include - -// #include - - -#include // random_access_iterator_tag - -// #include - - -namespace nlohmann -{ -namespace detail -{ -template struct make_void -{ - using type = void; -}; -template using void_t = typename make_void::type; -} // namespace detail -} // namespace nlohmann - -// #include - - -namespace nlohmann -{ -namespace detail -{ -template -struct iterator_types {}; - -template -struct iterator_types < - It, - void_t> -{ - using difference_type = typename It::difference_type; - using value_type = typename It::value_type; - using pointer = typename It::pointer; - using reference = typename It::reference; - using iterator_category = typename It::iterator_category; -}; - -// This is required as some compilers implement std::iterator_traits in a way that -// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. -template -struct iterator_traits -{ -}; - -template -struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> - : iterator_types -{ -}; - -template -struct iterator_traits::value>> -{ - using iterator_category = std::random_access_iterator_tag; - using value_type = T; - using difference_type = ptrdiff_t; - using pointer = T*; - using reference = T&; -}; -} -} - -// #include - -// #include - - -#include - -// #include - - -// http://en.cppreference.com/w/cpp/experimental/is_detected -namespace nlohmann -{ -namespace detail -{ -struct nonesuch -{ - nonesuch() = delete; - ~nonesuch() = delete; - nonesuch(nonesuch const&) = delete; - void operator=(nonesuch const&) = delete; -}; - -template class Op, - class... Args> -struct detector -{ - using value_t = std::false_type; - using type = Default; -}; - -template class Op, class... Args> -struct detector>, Op, Args...> -{ - using value_t = std::true_type; - using type = Op; -}; - -template