From aec45e4d3134b29a7045a319955e9cdb88df2c5c Mon Sep 17 00:00:00 2001
From: MartinuzziFrancesco <martinuzzi.francesco@gmail.com>
Date: Thu, 31 Oct 2024 11:57:34 +0100
Subject: [PATCH] readme cleanup, start of rhn end

---
 README.md       | 138 +++++++++++++++++++++++++++---------------------
 src/rhn_cell.jl |  30 +++++++++++
 2 files changed, 109 insertions(+), 59 deletions(-)

diff --git a/README.md b/README.md
index 1f4cd7b..5d3d734 100644
--- a/README.md
+++ b/README.md
@@ -41,91 +41,111 @@ using MLUtils: DataLoader
 using Statistics
 using Random
 
-# Parameters
-input_size = 1       # Each element in the sequence is a scalar
-hidden_size = 64     # Size of the hidden state in MGU
-num_classes = 2      # Binary classification
-seq_length = 10      # Length of each sequence
-batch_size = 16      # Batch size
-num_epochs = 50       # Number of epochs for training
-num_samples = 1000   # Number of samples in dataset
-
 # Create dataset
-function create_dataset(seq_length, num_samples)
-    data = randn(input_size, seq_length, num_samples)
-    labels = sum(data, dims=(1,2)) .>= 0
+function create_data(input_size, seq_length::Int, num_samples::Int)
+    data = randn(input_size, seq_length, num_samples) #(input_size, seq_length, num_samples)
+    labels = sum(data, dims=(1, 2)) .>= 0
     labels = Int.(labels)
+    labels = dropdims(labels, dims=(1))
     return data, labels
 end
 
-# Generate training data
-train_data, train_labels = create_dataset(seq_length, num_samples)
-train_loader = DataLoader((train_data, train_labels), batchsize=batch_size, shuffle=true)
+function create_dataset(input_size, seq_length, n_train::Int, n_test::Int, batch_size)
+    train_data, train_labels = create_data(input_size, seq_length, n_train)
+    train_loader = DataLoader((train_data, train_labels), batchsize=batch_size, shuffle=true)
 
-# Define the model
-model = Chain(
-    RAN(input_size => hidden_size),
-    x -> x[:, end, :],  # Extract the last hidden state
-    Dense(hidden_size, num_classes)
-)
+    test_data, test_labels = create_data(input_size, seq_length, n_test)
+    test_loader = DataLoader((test_data, test_labels), batchsize=batch_size, shuffle=false)
+    return train_loader, test_loader
+end
 
-function adjust_labels(labels)
-    return labels .+ 1
+struct RecurrentModel{H,C,D}
+    h0::H
+    rnn::C
+    dense::D
 end
 
-# Define the loss function
-function loss_fn(batch_data, batch_labels)
-    # Adjust labels
-    batch_labels = adjust_labels(batch_labels)
-    # One-hot encode labels and remove any extra singleton dimensions
-    batch_labels_oh = dropdims(Flux.onehotbatch(batch_labels, 1:num_classes), dims=(2, 3))
-    # Forward pass
-    y_pred = model(batch_data)
-    # Compute loss
-    loss = Flux.logitcrossentropy(y_pred, batch_labels_oh)
-    return loss
+Flux.@layer RecurrentModel trainable=(rnn, dense)
+
+function RecurrentModel(input_size::Int, hidden_size::Int)
+    return RecurrentModel(
+                 zeros(Float32, hidden_size), 
+                 MGU(input_size => hidden_size),
+                 Dense(hidden_size => 1, sigmoid))
 end
 
+function (model::RecurrentModel)(inp)
+    state = model.rnn(inp, model.h0)
+    state = state[:, end, :]
+    output = model.dense(state)
+    return output
+end
 
-# Define the optimizer
-opt = Adam(0.01)
+function criterion(model, batch_data, batch_labels)
+    y_pred = model(batch_data)
+    loss = Flux.binarycrossentropy(y_pred, batch_labels)
+    return loss
+end
 
-# Training loop
-for epoch in 1:num_epochs
+function train_recurrent!(epoch, train_loader, opt, model, criterion)
     total_loss = 0.0
     for (batch_data, batch_labels) in train_loader
         # Compute gradients and update parameters
-        grads = gradient(() -> loss_fn(batch_data, batch_labels), Flux.params(model))
+        grads = gradient(() -> criterion(model, batch_data, batch_labels), Flux.params(model))
         Flux.Optimise.update!(opt, Flux.params(model), grads)
 
         # Accumulate loss
-        total_loss += loss_fn(batch_data, batch_labels)
+        total_loss += criterion(model, batch_data, batch_labels)
     end
     avg_loss = total_loss / length(train_loader)
     println("Epoch $epoch/$num_epochs, Loss: $(round(avg_loss, digits=4))")
 end
 
-# Generate test data
-test_data, test_labels = create_dataset(seq_length, 200)
-test_loader = DataLoader((test_data, test_labels), batchsize=batch_size, shuffle=false)
-
-# Evaluation
-correct = 0
-total = 0
-for (batch_data, batch_labels) in test_loader
-    # Adjust labels
-    batch_labels = adjust_labels(batch_labels)
-    # Forward pass
-    y_pred = model(batch_data)
-    # Decode predictions
-    predicted = Flux.onecold(y_pred, 1:num_classes)
-    # Flatten and compare
-    correct += sum(vec(predicted) .== vec(batch_labels))
-    total += length(batch_labels)
+function test_recurrent(test_loader, model)
+    # Evaluation
+    correct = 0
+    total = 0
+    for (batch_data, batch_labels) in test_loader
+
+        # Forward pass
+        predicted = model(batch_data)
+
+        # Decode predictions: convert probabilities to class labels (0 or 1)
+        predicted_labels = vec(predicted .>= 0.5)   # Threshold at 0.5 for binary classification
+
+        # Compare predicted labels to actual labels
+        correct += sum(predicted_labels .== vec(batch_labels))
+        total += length(batch_labels)
+    end
+    accuracy = correct / total
+    println("Accuracy: ", accuracy * 100, "%")
 end
 
-accuracy = 100 * correct / total
-println("Test Accuracy: $(round(accuracy, digits=2))%")
+function main(;
+    input_size = 1,       # Each element in the sequence is a scalar
+    hidden_size = 64,    # Size of the hidden state
+    seq_length = 10,      # Length of each sequence
+    batch_size = 16,      # Batch size
+    num_epochs = 50,       # Number of epochs for training
+    n_train = 1000,   # Number of samples in train dataset
+    n_test = 200   # Number of samples in test dataset)
+)
+    model = RecurrentModel(input_size, hidden_size)
+    # Generate test data
+    train_loader, test_loader = create_dataset(input_size, seq_length, n_train, n_test, batch_size)
+    # Define the optimizer
+    opt = Adam(0.001)
+
+    for epoch in 1:num_epochs
+        train_recurrent!(epoch, train_loader, opt, model, criterion)
+    end
+
+    test_recurrent(test_loader, model)
+
+end
+
+main()
+
 
 
 ```
diff --git a/src/rhn_cell.jl b/src/rhn_cell.jl
index 0b4c1ee..6702183 100644
--- a/src/rhn_cell.jl
+++ b/src/rhn_cell.jl
@@ -98,3 +98,33 @@ function (rhn::RHNCell)(inp, state=nothing)
 
     return current_state
 end
+
+# TODO fix implementation here
+struct RHN{M}
+    cell::M
+end
+  
+Flux.@layer :expand RHN
+
+"""
+    RHN((in, out)::Pair depth=3; kwargs...)
+"""
+function RHN((in, out)::Pair, depth=3; kwargs...)
+    cell = RHNCell(in => out, depth; kwargs...)
+    return RHN(cell)
+end
+  
+function (rhn::RHN)(inp)
+    state = zeros_like(inp, size(rhn.cell.layers[2].weights, 2))
+    return rhn(inp, state)
+end
+  
+function (rhn::RHN)(inp, state)
+    @assert ndims(inp) == 2 || ndims(inp) == 3
+    new_state = []
+    for inp_t in eachslice(inp, dims=2)
+        state = rhn.cell(inp_t, state)
+        new_state = vcat(new_state, [state])
+    end
+    return stack(new_state, dims=2)
+end
\ No newline at end of file