diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
index 15ecbc5c3..2772de28b 100644
--- a/.JuliaFormatter.toml
+++ b/.JuliaFormatter.toml
@@ -8,7 +8,4 @@ ignore = [
     # https://github.com/TuringLang/Turing.jl/pull/2328/files
     "src/experimental/gibbs.jl",
     "test/experimental/gibbs.jl",
-    # https://github.com/TuringLang/Turing.jl/pull/1887 # Enzyme PR
-    "test/mcmc/hmc.jl",
-    "test/mcmc/sghmc.jl",
 ]
diff --git a/.github/workflows/DocsNav.yml b/.github/workflows/DocsNav.yml
index 14614d1fd..301ee7393 100644
--- a/.github/workflows/DocsNav.yml
+++ b/.github/workflows/DocsNav.yml
@@ -32,13 +32,13 @@ jobs:
 
           # Define the URL of the navbar to be used
           NAVBAR_URL="https://raw.githubusercontent.com/TuringLang/turinglang.github.io/main/assets/scripts/TuringNavbar.html"
-         
+
           # Update all HTML files in the current directory (gh-pages root)
           ./insert_navbar.sh . $NAVBAR_URL
-         
+
           # Remove the insert_navbar.sh file
           rm insert_navbar.sh
-         
+
           # Check if there are any changes
           if [[ -n $(git status -s) ]]; then
             git add .
diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl
index 433add6b5..bc2519d71 100644
--- a/src/mcmc/mh.jl
+++ b/src/mcmc/mh.jl
@@ -54,7 +54,7 @@ Specifying a single distribution implies the use of static MH:
 
 ```julia
 # Use a static proposal for s² (which happens to be the same
-# as the prior) and a static proposal for m (note that this 
+# as the prior) and a static proposal for m (note that this
 # isn't a random walk proposal).
 chain = sample(
     gdemo(1.5, 2.0),
diff --git a/test/mcmc/hmc.jl b/test/mcmc/hmc.jl
index 7404dbf43..27c055394 100644
--- a/test/mcmc/hmc.jl
+++ b/test/mcmc/hmc.jl
@@ -22,52 +22,51 @@ using Turing
     # Set a seed
     rng = StableRNG(123)
     @testset "constrained bounded" begin
-        obs = [0,1,0,1,1,1,1,1,1,1]
+        obs = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1]
 
         @model function constrained_test(obs)
-            p ~ Beta(2,2)
-            for i = 1:length(obs)
+            p ~ Beta(2, 2)
+            for i in 1:length(obs)
                 obs[i] ~ Bernoulli(p)
             end
-            p
+            return p
         end
 
         chain = sample(
             rng,
             constrained_test(obs),
             HMC(1.5, 3; adtype=adbackend),# using a large step size (1.5)
-            1000)
+            1000,
+        )
 
-        check_numerical(chain, [:p], [10/14], atol=0.1)
+        check_numerical(chain, [:p], [10 / 14]; atol=0.1)
     end
     @testset "constrained simplex" begin
-        obs12 = [1,2,1,2,2,2,2,2,2,2]
+        obs12 = [1, 2, 1, 2, 2, 2, 2, 2, 2, 2]
 
         @model function constrained_simplex_test(obs12)
             ps ~ Dirichlet(2, 3)
             pd ~ Dirichlet(4, 1)
-            for i = 1:length(obs12)
+            for i in 1:length(obs12)
                 obs12[i] ~ Categorical(ps)
             end
             return ps
         end
 
         chain = sample(
-            rng,
-            constrained_simplex_test(obs12),
-            HMC(0.75, 2; adtype=adbackend),
-            1000)
+            rng, constrained_simplex_test(obs12), HMC(0.75, 2; adtype=adbackend), 1000
+        )
 
-        check_numerical(chain, ["ps[1]", "ps[2]"], [5/16, 11/16], atol=0.015)
+        check_numerical(chain, ["ps[1]", "ps[2]"], [5 / 16, 11 / 16]; atol=0.015)
     end
     @testset "hmc reverse diff" begin
         alg = HMC(0.1, 10; adtype=adbackend)
         res = sample(rng, gdemo_default, alg, 4000)
-        check_gdemo(res, rtol=0.1)
+        check_gdemo(res; rtol=0.1)
     end
     @testset "matrix support" begin
         @model function hmcmatrixsup()
-            v ~ Wishart(7, [1 0.5; 0.5 1])
+            return v ~ Wishart(7, [1 0.5; 0.5 1])
         end
 
         model_f = hmcmatrixsup()
@@ -75,7 +74,7 @@ using Turing
         vs = map(1:3) do _
             chain = sample(rng, model_f, HMC(0.15, 7; adtype=adbackend), n_samples)
             r = reshape(Array(group(chain, :v)), n_samples, 2, 2)
-            reshape(mean(r; dims = 1), 2, 2)
+            reshape(mean(r; dims=1), 2, 2)
         end
 
         @test maximum(abs, mean(vs) - (7 * [1 0.5; 0.5 1])) <= 0.5
@@ -92,10 +91,10 @@ using Turing
         M = N ÷ 4
         x1s = rand(M) * 5
         x2s = rand(M) * 5
-        xt1s = Array([[x1s[i]; x2s[i]] for i = 1:M])
-        append!(xt1s, Array([[x1s[i] - 6; x2s[i] - 6] for i = 1:M]))
-        xt0s = Array([[x1s[i]; x2s[i] - 6] for i = 1:M])
-        append!(xt0s, Array([[x1s[i] - 6; x2s[i]] for i = 1:M]))
+        xt1s = Array([[x1s[i]; x2s[i]] for i in 1:M])
+        append!(xt1s, Array([[x1s[i] - 6; x2s[i] - 6] for i in 1:M]))
+        xt0s = Array([[x1s[i]; x2s[i] - 6] for i in 1:M])
+        append!(xt0s, Array([[x1s[i] - 6; x2s[i]] for i in 1:M]))
 
         xs = [xt1s; xt0s]
         ts = [ones(M); ones(M); zeros(M); zeros(M)]
@@ -106,20 +105,22 @@ using Turing
         var_prior = sqrt(1.0 / alpha) # variance of the Gaussian prior
 
         @model function bnn(ts)
-            b1 ~ MvNormal([0. ;0.; 0.],
-                [var_prior 0. 0.; 0. var_prior 0.; 0. 0. var_prior])
-            w11 ~ MvNormal([0.; 0.], [var_prior 0.; 0. var_prior])
-            w12 ~ MvNormal([0.; 0.], [var_prior 0.; 0. var_prior])
-            w13 ~ MvNormal([0.; 0.], [var_prior 0.; 0. var_prior])
+            b1 ~ MvNormal(
+                [0.0; 0.0; 0.0], [var_prior 0.0 0.0; 0.0 var_prior 0.0; 0.0 0.0 var_prior]
+            )
+            w11 ~ MvNormal([0.0; 0.0], [var_prior 0.0; 0.0 var_prior])
+            w12 ~ MvNormal([0.0; 0.0], [var_prior 0.0; 0.0 var_prior])
+            w13 ~ MvNormal([0.0; 0.0], [var_prior 0.0; 0.0 var_prior])
             bo ~ Normal(0, var_prior)
 
-            wo ~ MvNormal([0.; 0; 0],
-                [var_prior 0. 0.; 0. var_prior 0.; 0. 0. var_prior])
-            for i = rand(1:N, 10)
+            wo ~ MvNormal(
+                [0.0; 0; 0], [var_prior 0.0 0.0; 0.0 var_prior 0.0; 0.0 0.0 var_prior]
+            )
+            for i in rand(1:N, 10)
                 y = nn(xs[i], b1, w11, w12, w13, bo, wo)
                 ts[i] ~ Bernoulli(y)
             end
-            b1, w11, w12, w13, bo, wo
+            return b1, w11, w12, w13, bo, wo
         end
 
         # Sampling
@@ -147,7 +148,7 @@ using Turing
         Random.seed!(12345) # particle samplers do not support user-provided `rng` yet
         alg3 = Gibbs(PG(20, :s), HMCDA(500, 0.8, 0.25, :m; init_ϵ=0.05, adtype=adbackend))
 
-        res3 = sample(rng, gdemo_default, alg3, 3000, discard_initial=1000)
+        res3 = sample(rng, gdemo_default, alg3, 3000; discard_initial=1000)
         check_gdemo(res3)
     end
 
@@ -191,8 +192,8 @@ using Turing
     @testset "check discard" begin
         alg = NUTS(100, 0.8; adtype=adbackend)
 
-        c1 = sample(rng, gdemo_default, alg, 500, discard_adapt=true)
-        c2 = sample(rng, gdemo_default, alg, 500, discard_adapt=false)
+        c1 = sample(rng, gdemo_default, alg, 500; discard_adapt=true)
+        c2 = sample(rng, gdemo_default, alg, 500; discard_adapt=false)
 
         @test size(c1, 1) == 500
         @test size(c2, 1) == 500
@@ -210,20 +211,20 @@ using Turing
         # https://github.com/TuringLang/DynamicPPL.jl/issues/27
         @model function mwe1(::Type{T}=Float64) where {T<:Real}
             m = Matrix{T}(undef, 2, 3)
-            m .~ MvNormal(zeros(2), I)
+            return m .~ MvNormal(zeros(2), I)
         end
         @test sample(rng, mwe1(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains
 
         @model function mwe2(::Type{T}=Matrix{Float64}) where {T}
             m = T(undef, 2, 3)
-            m .~ MvNormal(zeros(2), I)
+            return m .~ MvNormal(zeros(2), I)
         end
         @test sample(rng, mwe2(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains
 
         # https://github.com/TuringLang/Turing.jl/issues/1308
         @model function mwe3(::Type{T}=Array{Float64}) where {T}
             m = T(undef, 2, 3)
-            m .~ MvNormal(zeros(2), I)
+            return m .~ MvNormal(zeros(2), I)
         end
         @test sample(rng, mwe3(), HMC(0.2, 4; adtype=adbackend), 1_000) isa Chains
     end
@@ -241,13 +242,17 @@ using Turing
         @model function demo_hmc_prior()
             # NOTE: Used to use `InverseGamma(2, 3)` but this has infinite variance
             # which means that it's _very_ difficult to find a good tolerance in the test below:)
-            s ~ truncated(Normal(3, 1), lower=0)
-            m ~ Normal(0, sqrt(s))
+            s ~ truncated(Normal(3, 1); lower=0)
+            return m ~ Normal(0, sqrt(s))
         end
         alg = NUTS(1000, 0.8; adtype=adbackend)
-        gdemo_default_prior = DynamicPPL.contextualize(demo_hmc_prior(), DynamicPPL.PriorContext())
+        gdemo_default_prior = DynamicPPL.contextualize(
+            demo_hmc_prior(), DynamicPPL.PriorContext()
+        )
         chain = sample(gdemo_default_prior, alg, 10_000; initial_params=[3.0, 0.0])
-        check_numerical(chain, [:s, :m], [mean(truncated(Normal(3, 1); lower=0)), 0], atol=0.2)
+        check_numerical(
+            chain, [:s, :m], [mean(truncated(Normal(3, 1); lower=0)), 0]; atol=0.2
+        )
     end
 
     @testset "warning for difficult init params" begin
@@ -262,7 +267,7 @@ using Turing
         @test_logs (
             :warn,
             "failed to find valid initial parameters in 10 tries; consider providing explicit initial parameters using the `initial_params` keyword",
-        ) (:info,) match_mode=:any begin
+        ) (:info,) match_mode = :any begin
             sample(demo_warn_initial_params(), NUTS(; adtype=adbackend), 5)
         end
     end
@@ -271,7 +276,7 @@ using Turing
         @model function vector_of_dirichlet(::Type{TV}=Vector{Float64}) where {TV}
             xs = Vector{TV}(undef, 2)
             xs[1] ~ Dirichlet(ones(5))
-            xs[2] ~ Dirichlet(ones(5))
+            return xs[2] ~ Dirichlet(ones(5))
         end
         model = vector_of_dirichlet()
         chain = sample(model, NUTS(), 1000)
@@ -296,15 +301,10 @@ using Turing
             end
         end
 
-        model = buggy_model();
-        num_samples = 1_000;
+        model = buggy_model()
+        num_samples = 1_000
 
-        chain = sample(
-            model,
-            NUTS(),
-            num_samples;
-            initial_params=[0.5, 1.75, 1.0]
-        )
+        chain = sample(model, NUTS(), num_samples; initial_params=[0.5, 1.75, 1.0])
         chain_prior = sample(model, Prior(), num_samples)
 
         # Extract the `x` like this because running `generated_quantities` was how
diff --git a/test/mcmc/sghmc.jl b/test/mcmc/sghmc.jl
index 1f8179503..c1d07d2ce 100644
--- a/test/mcmc/sghmc.jl
+++ b/test/mcmc/sghmc.jl
@@ -34,7 +34,7 @@ using Turing
 
         alg = SGHMC(; learning_rate=0.02, momentum_decay=0.5, adtype=adbackend)
         chain = sample(rng, gdemo_default, alg, 10_000)
-        check_gdemo(chain, atol=0.1)
+        check_gdemo(chain; atol=0.1)
     end
 end
 
@@ -58,15 +58,15 @@ end
     @testset "sgld inference" begin
         rng = StableRNG(1)
 
-        chain = sample(rng, gdemo_default, SGLD(; stepsize = PolynomialStepsize(0.5)), 20_000)
-        check_gdemo(chain, atol = 0.2)
+        chain = sample(rng, gdemo_default, SGLD(; stepsize=PolynomialStepsize(0.5)), 20_000)
+        check_gdemo(chain; atol=0.2)
 
         # Weight samples by step sizes (cf section 4.2 in the paper by Welling and Teh)
         v = get(chain, [:SGLD_stepsize, :s, :m])
         s_weighted = dot(v.SGLD_stepsize, v.s) / sum(v.SGLD_stepsize)
         m_weighted = dot(v.SGLD_stepsize, v.m) / sum(v.SGLD_stepsize)
-        @test s_weighted ≈ 49/24 atol=0.2
-        @test m_weighted ≈ 7/6 atol=0.2
+        @test s_weighted ≈ 49 / 24 atol = 0.2
+        @test m_weighted ≈ 7 / 6 atol = 0.2
     end
 end