diff --git a/JGNN/src/examples/classification/LogisticRegression.java b/JGNN/src/examples/classification/LogisticRegression.java
index 54a0b8e2..83571724 100644
--- a/JGNN/src/examples/classification/LogisticRegression.java
+++ b/JGNN/src/examples/classification/LogisticRegression.java
@@ -4,6 +4,7 @@
import mklab.JGNN.adhoc.ModelBuilder;
import mklab.JGNN.adhoc.ModelTraining;
import mklab.JGNN.adhoc.datasets.Citeseer;
+import mklab.JGNN.adhoc.train.SampleClassification;
import mklab.JGNN.core.Matrix;
import mklab.JGNN.nn.Model;
import mklab.JGNN.nn.loss.Accuracy;
@@ -44,7 +45,7 @@ public static void main(String[] args) {
long tic = System.currentTimeMillis();
- Model model = new ModelTraining()
+ Model model = new SampleClassification()
.setOptimizer(new GradientDescent(0.01))
.setEpochs(600)
.setNumBatches(10)
diff --git a/JGNN/src/examples/classification/MLP.java b/JGNN/src/examples/classification/MLP.java
index 093f2fbf..55fd55f2 100644
--- a/JGNN/src/examples/classification/MLP.java
+++ b/JGNN/src/examples/classification/MLP.java
@@ -4,6 +4,7 @@
import mklab.JGNN.adhoc.ModelBuilder;
import mklab.JGNN.adhoc.ModelTraining;
import mklab.JGNN.adhoc.datasets.Citeseer;
+import mklab.JGNN.adhoc.train.SampleClassification;
import mklab.JGNN.core.Matrix;
import mklab.JGNN.nn.Model;
import mklab.JGNN.core.Slice;
@@ -11,6 +12,7 @@
import mklab.JGNN.nn.initializers.XavierNormal;
import mklab.JGNN.nn.loss.Accuracy;
import mklab.JGNN.nn.loss.BinaryCrossEntropy;
+import mklab.JGNN.nn.loss.report.VerboseLoss;
import mklab.JGNN.nn.optimizers.Adam;
/**
@@ -42,20 +44,24 @@ public static void main(String[] args) {
Slice nodeIds = dataset.samples().getSlice().shuffle(100);
- long tic = System.currentTimeMillis();
- Model model = new ModelTraining()
+ Slice nodes = dataset.samples().getSlice().shuffle(100);
+ ModelTraining trainer = new SampleClassification()
+ .setFeatures(dataset.features())
+ .setOutputs(dataset.labels())
+ .setTrainingSamples(nodes.range(0, 0.6))
+ .setValidationSamples(nodes.range(0.6, 0.8))
.setOptimizer(new Adam(0.01))
.setEpochs(3000)
.setPatience(300)
.setNumBatches(20)
.setParallelizedStochasticGradientDescent(true)
.setLoss(new BinaryCrossEntropy())
- .setVerbose(true)
- .setValidationLoss(new Accuracy())
- .train(new XavierNormal().apply(modelBuilder.getModel()),
- dataset.features(),
- dataset.labels(),
- nodeIds.range(0, 0.7), nodeIds.range(0.7, 0.8));
+ .setValidationLoss(new VerboseLoss(new Accuracy()));
+
+ long tic = System.currentTimeMillis();
+ Model model = modelBuilder.getModel()
+ .init(new XavierNormal())
+ .train(trainer);
long toc = System.currentTimeMillis();
double acc = 0;
diff --git a/JGNN/src/examples/graphClassification/SortPooling.java b/JGNN/src/examples/graphClassification/SortPooling.java
index 2dfa8ae5..b9f23067 100644
--- a/JGNN/src/examples/graphClassification/SortPooling.java
+++ b/JGNN/src/examples/graphClassification/SortPooling.java
@@ -3,14 +3,18 @@
import java.util.Arrays;
import mklab.JGNN.adhoc.ModelBuilder;
+import mklab.JGNN.adhoc.ModelTraining;
import mklab.JGNN.adhoc.parsers.LayeredBuilder;
+import mklab.JGNN.adhoc.train.AGFTraining;
import mklab.JGNN.core.Matrix;
import mklab.JGNN.core.Tensor;
import mklab.JGNN.core.ThreadPool;
import mklab.JGNN.nn.Loss;
import mklab.JGNN.nn.Model;
import mklab.JGNN.nn.initializers.XavierNormal;
+import mklab.JGNN.nn.loss.Accuracy;
import mklab.JGNN.nn.loss.CategoricalCrossEntropy;
+import mklab.JGNN.nn.loss.report.VerboseLoss;
import mklab.JGNN.nn.optimizers.Adam;
import mklab.JGNN.nn.optimizers.BatchOptimizer;
@@ -45,40 +49,30 @@ public static void main(String[] args){
TrajectoryData dtrain = new TrajectoryData(8000);
TrajectoryData dtest = new TrajectoryData(2000);
- Model model = builder.getModel().init(new XavierNormal());
- BatchOptimizer optimizer = new BatchOptimizer(new Adam(0.01));
- Loss loss = new CategoricalCrossEntropy();
- for(int epoch=0; epoch<600; epoch++) {
- // gradient update over all graphs
- for(int graphId=0; graphId Training epochs for the created model can be implemented
manually, by passing inputs, obtaining outputs, computing losses, and triggering backpropagation
on an optimizer. As these steps may be complicated, JGNN automates common
- training patterns with a In the example, a parameter initializer is applied on the model before training is conducted.
- This is a cold start scenario, as opposed to a warm start that continues training already
- trained parameters.
- Selecting an initializer is not part of training strategies
+ training patterns by extending a base Of data needed for training, the graph adjacency matrix and node features are already declared as constants by the
+ To finish describing the training strategy, the example selects
+ Trained models and their generating builders can be saved and loaded. The next snippet demonstrates
how raw predictions can be made too. During this process,
@@ -358,7 +366,7 @@ model
(the same instance as the first
- * argument).
- */
- public Model train(Model model, Matrix features, Matrix labels, Slice trainingSamples, Slice validationSamples) {
- // ACTUΑL TRAINING
- double minLoss = Double.POSITIVE_INFINITY;
- HashMapthis
classification training instance.
+ */
+ public SampleClassification setOutputs(Matrix labels) {
+ if (this.labels != null)
+ throw new RuntimeException("Can only set labels once in a SampleClassification instance.");
+ this.labels = labels;
+ return this;
+ }
+
+ /**
+ * Sets a slice of training samples. These should be identifiers of
+ * feature/label rows; basically, they reflect which rows of these matrices
+ * should be retrieved during training. If multiple batches are set, for example
+ * with {@link #setNumBatches(int)}, then these samples are further split for
+ * each batch.
+ *
+ * @param trainingSamples The slice of training samples.
+ * @return this
classification training instance.
+ */
+ public SampleClassification setTrainingSamples(Slice trainingSamples) {
+ if (this.trainingSamples != null)
+ throw new RuntimeException("Can only set a training sample slice once in a SampleClassification instance.");
+ this.trainingSamples = trainingSamples;
+ return this;
+ }
+
+ /**
+ * Sets a slice of validation samples. These should be identifiers of
+ * feature/label rows; basically, they reflect which rows of these matrices
+ * should be retrieved during validation.
+ *
+ * @param validationSamples The slice of validation samples.
+ * @return this
classification training instance.
+ */
+ public SampleClassification setValidationSamples(Slice validationSamples) {
+ if (this.validationSamples != null)
+ throw new RuntimeException(
+ "Can only set a validation sample slice once in a SampleClassification instance.");
+ this.validationSamples = validationSamples;
+ return this;
+ }
+
+ @Override
+ protected void onStartEpoch(int epoch) {
+ if (stochasticGradientDescent)
+ trainingSamples.shuffle(epoch);
+ }
+
+ @Override
+ protected Listthis
Tensor instance.
*/
public Tensor setDimensionName(Tensor other) {
- assertMatching(other);
- if (dimensionName == null)
+ //assertMatching(other);
+ if (other.getDimensionName() != null)
dimensionName = other.getDimensionName();
return this;
}
diff --git a/JGNN/src/main/java/mklab/JGNN/core/matrix/DenseMatrix.java b/JGNN/src/main/java/mklab/JGNN/core/matrix/DenseMatrix.java
index 7c7b9c47..9f9e5f79 100644
--- a/JGNN/src/main/java/mklab/JGNN/core/matrix/DenseMatrix.java
+++ b/JGNN/src/main/java/mklab/JGNN/core/matrix/DenseMatrix.java
@@ -105,7 +105,7 @@ public Matrix matmul(Matrix with) {
@Override
public Matrix matmul(Matrix with, boolean transposeThis, boolean transposeWith) {
- if (with instanceof SparseMatrix)
+ if (!(with instanceof DenseMatrix) && !(with instanceof VectorizedMatrix))
return super.matmul(with, transposeThis, transposeWith);
// Determine the dimensions based on whether we transpose or not
@@ -124,8 +124,7 @@ public Matrix matmul(Matrix with, boolean transposeThis, boolean transposeWith)
// Create the resulting matrix
DenseMatrix ret = new DenseMatrix(rowsThis, colsWith);
- double[] with_tensor_values = (with instanceof VectorizedMatrix) ? ((VectorizedMatrix) with).tensor.values
- : ((DenseMatrix) with).tensor.values;
+ double[] with_tensor_values = (with instanceof VectorizedMatrix) ? ((VectorizedMatrix) with).tensor.values: ((DenseMatrix) with).tensor.values;
for (int col2 = 0; col2 < colsWith; ++col2) {
for (int row = 0; row < rowsThis; ++row) {
diff --git a/JGNN/src/main/java/mklab/JGNN/core/matrix/VectorizedMatrix.java b/JGNN/src/main/java/mklab/JGNN/core/matrix/VectorizedMatrix.java
index 8d6edf55..5c1884d8 100644
--- a/JGNN/src/main/java/mklab/JGNN/core/matrix/VectorizedMatrix.java
+++ b/JGNN/src/main/java/mklab/JGNN/core/matrix/VectorizedMatrix.java
@@ -105,7 +105,7 @@ public Matrix matmul(Matrix with) {
@Override
public Matrix matmul(Matrix with, boolean transposeThis, boolean transposeWith) {
- if (with instanceof SparseMatrix)
+ if (!(with instanceof DenseMatrix) && !(with instanceof VectorizedMatrix))
return super.matmul(with, transposeThis, transposeWith);
// Determine the dimensions based on whether we transpose or not
diff --git a/JGNN/src/main/java/mklab/JGNN/core/util/Range.java b/JGNN/src/main/java/mklab/JGNN/core/util/Range.java
index ee25c7f2..10d6f68e 100644
--- a/JGNN/src/main/java/mklab/JGNN/core/util/Range.java
+++ b/JGNN/src/main/java/mklab/JGNN/core/util/Range.java
@@ -4,33 +4,45 @@
import java.util.NoSuchElementException;
/**
- * Implements an iterator that traverses a range (similar to Python's range(min, max) method).
- * It is often used by {@link mklab.JGNN.core.Tensor} derived classes to traverse through all
- * element positions in sequential order.
+ * Implements an iterator that traverses a range [min, max) where the right side
+ * is non-inclusive. That is, this method behaves similarly to Python's
+ * range(min, max). It is often used by {@link mklab.JGNN.core.Tensor} derived
+ * classes to traverse through all element positions in sequential order.
*
* @author Emmanouil Krasanakis
*/
public class Range implements Iteratorthis
Model instance.
* @see #addOutput(NNOperation)
@@ -155,9 +177,10 @@ public Model addInput(Variable input) {
inputs.add(input);
return this;
}
-
+
/**
* Adds to the model's output the output of the provided operation.
+ *
* @param output An operation to set as an output.
* @return this
Model instance.
* @see #addInput(Variable)
@@ -169,22 +192,24 @@ public Model addOutput(NNOperation output) {
outputs.add(output);
return this;
}
-
+
/**
- * Retrieves a list of model inputs. Editing this list affects
- * the model and is not recommended. Input order is based on
- * the chronological addition of inputs through {@link #addInput(Variable)}.
+ * Retrieves a list of model inputs. Editing this list affects the model and is
+ * not recommended. Input order is based on the chronological addition of inputs
+ * through {@link #addInput(Variable)}.
+ *
* @return A list of {@link Variable} instances.
* @see #getOutputs()
*/
public ArrayList2. Quickstart
ModelTraining
class. Instances of this class
- accept a method chain notation to set their parameters, like the number of epochs, patience
- for early stopping, the employed optimizer, and loss functions. An example is presented below,
- where Adam
optimization with learning rate 0.01 is performed, and a verbose
- variation of a validation loss prints the progress. To run a full training process,
- pass the defined strategy to a model alongside input data, corresponding output data, as well
- as training and validation slices.ModelTraining
class with training strategies
+ tailored to different data formats and predictive tasks. Find these subclasses in the
+ adhoc.train
+ Javadoc. Instances of model trainers
+ accept a method chain notation to set their parameters. Parameters usually include training and validation data
+ (these should be made first and depend on the model training class) and aspects of the training strategy like the number of epochs, patience
+ for early stopping, the employed optimizer, and loss functions. An example is presented below.FastBuilder
constructor, as node classification takes place on the same graph
+ with fully known node features. Thus, input features are a column of node identifiers, which
+ classify
method above uses to gather
+ the predictions on respective nodes. Architecture outputs are softmax approximation of the one-hot
+ encodings of respective node labels. The simplest way to handle missing labels for test data without modifying
+ the example is to leave their one-hot encodings as zeroes only.
+ Additionally, this particular training strategy accepts training and validation data slices, where slices are lists
+ of integer entries pointing to rows of inputs and outputs - find more later.Adam
optimization with learning rate 0.01, and training
+ over many epochs with early stopping. A verbose
+ loss prints every 10 epochs the progress of cross entropy and accuracy on validation data, where the
+ first of these two is used for the early stopping criterion.
+ To run a full training process, pass a strategy to a model.
+ In a cold start scenario, apply a parameter initializer first before training is conducted.
+ A warm start that resumes training from some previously trained outcomes would skip this step.
+ Selecting an initializer is not part of the training strategy
to signify its model-dependent nature; dense layers should maintain the expected
input variances in the output before the first epoch, and therefore the initializer depends
- on the type of activation functions. Moreover,
- the graph's adjacency matrix and node features are already declared as constants by the
- FastBuilder
constructor, as node classification takes place on the same graph
- with fully known node features. Architecture anputs are the node identifiers, which in the
- classify
method above are used to gather
- the predictions on respective nodes, and desired outputs are the corresponding labels from
- the dataset. Labels that are not known still need to have some value; as a convention when working
- with your own data, leave the one-hot label encoding of test nodes as zeroes. Doing so in our
- present example would not affect the outcome either.
- The last two training arguments of the train
method
- then accept training and validation data slices. Slices are effectively lists of integer entries
- pointing to rows of inputs and outputs - find more later.
+ on the type of activation functions.
ModelTraining trainer = new ModelTraining()
+
+ .init(new XavierNormal())
+ .train(trainer);Slice nodes = dataset.samples().getSlice().shuffle(); // a permutation of node identifiers
+Matrix inputFeatures = Tensor.fromRange(nodes.size()).asColumn(); // each node has its identifier as an input (equivalent to: nodes.samplesAsFeatures())
+ModelTraining trainer = new SampleClassification()
+ // training data
+ .setFeatures(inputFeatures)
+ .setLabels(dataset.labels())
+ .setTrainingSamples(nodes.range(0, 0.6))
+ .setValidationSamples(nodes.range(0.6, 0.8))
+ // training strategy
.setOptimizer(new Adam(0.01))
.setEpochs(3000)
.setPatience(100)
.setLoss(new CategoricalCrossEntropy())
- .setValidationLoss(new VerboseLoss(new Accuracy()).setInterval(10)); // print validation every 10 epochs
-
-Slice nodes = dataset.samples().getSlice().shuffle(); // a permutation of node identifiers
-Matrix inputData = Tensor.fromRange(nodes.size()).asColumn(); // each node has its identifier as an input
+ .setValidationLoss(new VerboseLoss(new CategoricalCrossEntropy(), new Accuracy()).setInterval(10)); // print every 10 epochs
+
Model model = modelBuilder.getModel()
- .init(new XavierNormal())
- .train(trainer,
- inputData,
- dataset.labels(),
- nodes.range(0, 0.6), // training slice
- nodes.range(0.6, 0.8) // validation slice
- );
2. Quickstart
modelBuilder.save(Paths.get("gcn_cora.jgnn")); // needs a Path as an input
-Model loadedModel = ModelBuilder.load(Paths.get("gcn_cora.jgnn")).getModel(); // loading creates an intermediate modelbuilder
+Model loadedModel = ModelBuilder.load(Paths.get("gcn_cora.jgnn")).getModel(); // loading creates a new modelbuilder from which to get the model
Matrix output = loadedModel.predict(Tensor.fromRange(0, nodes.size()).asColumn()).get(0).cast(Matrix.class);
double acc = 0;
diff --git a/tutorials/Data.md b/tutorials/Data.md
deleted file mode 100644
index f3d9fa00..00000000
--- a/tutorials/Data.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# :zap: Data creation
-
-If you have been following the tutorial, we have only used automatically downloaded datasets till now.
-In practice, you will want to use your own data. This tutorial covers typical code patterns on doing so.
-
-1. [Creating preallocated feature matrices](#creating-preallocated-feature-matrices)
-2. [Converting lists of tensors to matrices](#converting-lists-of-tensors=to-matrices)
-3. [Constructing graph adjacency matrices](#constructing-graph-adjacency-matrices)
-4. [Managing identifiers](#managing-identifiers)
-
-## Creating preallocated feature matrices
-If you know the number of nodes or data samples and features a-priori, you can create
-dense feature matrices with the following code. This uses the bare minimum memory necessary
-to construct the feature matrix. If features are dense (do not have a lot of zeroes),
-you could also consider using the `DenseMatrix` class instead of initializing a sparse
-matrix - the two classes are interoperable and have the same constructor arguments
- so that the rest of the code in the tutorials remains the same.
-
-```java
-Matrix features = new SparseMatrix(numNodes, numFeatures);
-for(long nodeId=0; nodeId rows = new ArrayList();
-try(BufferedReader reader = new BufferedReader(new FileReader(file))){
- String line = reader.readLine();
- while (line != null) {
- String[] cols = line.split(",");
- Tensor features = new SparseTensor(cols.length); // or a dense tensor
- for(int col=0;col edge : edges)
- matrix.put(edge.getKey(), edge.getValue(), 1).put(edge.getValue(), edge.getKey(), 1);
-```
-
-:bulb: Don't forget to normalize or apply the renormalization trick (self-edges) on matrices
-if these are needed by your algorithm, for instance by calling `adjacency.setMainDiagonal(1).setToSymmetricNormalization();` after matrix construction.
-
-## Managing identifiers
-The above snippets all reference node identifiers. To help you with managing these, JGNN
-provides an `IdConverter` class. You can convert hashable objects (e.g., Strings) to identifiers
-by calling `IdConverter.getOrCreateId(object)`. The same functionality is also helpful
-for one-hot encoding of class labels. If you want to search only for previously registered identifiers,
-for example to catch logical errors, you can use `IdConverter.get(object)`.
-
-For example, you can construct a label matrix of one-hot encodings for your training data per:
-
-```java
-// register the ids in data
-IdConverter nodeIds = new IdConverter();
-IdConverter classIds = new IdConverter();
-for(Entry entry : nodeLabels) {
- nodeids.getOrCreateId(entry.getKey()); // or .get(entry.getKey()) if reusing nodeIds of feature loading
- classIds.getOrCreateId(entry.getValue());
-}
-// create the matrix
-Matrix labels = new SparseMatrix(nodeIds.size(), classIds.size());
-for(Entry entry : nodeLabels)
- labels.put(nodeids.get(entry.getKey()), classIds.get(entry.getValue()), 1);
-```
-
-As a final remark, you can reverse-search the `IdConverter` to obtain the original object of your
-predictions using the `IdConverter.get(long identifier)` to retrieve the identifier. For example:
-
-```java
-long nodeId = nodeIds.get("nodeName");
-Tensor prediction = labels.accessRow(nodeId);
-long predictedClassId = prediction.argmax();
-System.out.println(classIds.get(predictedClassId));
-```
-
-
-
-[NEXT: Primitives](Primitives.md)
\ No newline at end of file
diff --git a/tutorials/Debugging.md b/tutorials/Debugging.md
deleted file mode 100644
index 9d108807..00000000
--- a/tutorials/Debugging.md
+++ /dev/null
@@ -1,149 +0,0 @@
-# :zap: Debugging
-JGNN offers high-level tools for debugging base architectures.
-This tutorial covers what errors to expect, what diagnostics to run,
-and how to make sense of error messages to fix erroneous architectures.
-
-1. [Name checking](#name-checking)
-2. [Debugging execution DAGs](#debugging-execution-dags)
-3. [Debugging logical errors](#debugging-logical-errors)
-4. [Monitoring operations](#monitoring-operations)
-
-## Name checking
-When parsing operations, values should be assigned to variables before
-subsequent use. Model builders check for unused variables and raise
-respective runtime exceptions.
-
-For example, for a `FastBuilder` that tries to parse the expression
-`.layer("h{l+1}=relu(hl@matrix(features, 32, reg)+vector(32))")`,
-where we remind that the layer definition is an enhanced version of
-operation declaration, and `hl` is a typographical error of `h{l}`,
-the following exception is thrown:
-
-```java
-Exception in thread "main" java.lang.RuntimeException: Symbol hl not defined.
-```
-
-## Debugging execution DAGs
-Model builders are responsible for creating directed acyclic graphs (DAGs)
-in models they are managing (these are not to be confused with graph inputs GNNs
-are managing). During parsing, builders may create temporary variables, which
-start with the `_tmp` prefix and are followed by a number, and linking components
-to others that use them.
-
-The easiest way to understand execution DAGs is to actually look
-at them. The library provides two tools to that end: a) a `.print()`
-method for model build functional flows that prints all the parsed
-expressions and intermediate expression in the system console, and b)
-a. `.getExecutionGraphDot()` that returns a String holding the execution
-graph in *.dot* format for visualization with external tools, such
-as [GraphViz](https://dreampuf.github.io/GraphvizOnline).
-
-A second error-checking procedure consists of checking
-for model operations that do not
-eventually reach any outputs, for example one of the output operation
-outcomes defined by `.out(String)`. Avoiding this behavior is particularly
-important, as it messes with graph traversal counting during backpropagation.
-However, to accomodate complex use cases, these checks can only be manually performed
-at the very end of model building with the builder method `.assertBackwardValidity()`.
-Calling these checks early on in functional model building
-will likely throw exceptions that are not trully logical errors - the
-outputs may be declared at later functional steps. Thrown errors would look like this:
-```java
-Exception in thread "main" java.lang.RuntimeException: The component class mklab.JGNN.nn.operations.Multiply: _tmp102 = null does not lead to an output
- at mklab.JGNN.nn.ModelBuilder.assertBackwardValidity(ModelBuilder.java:504)
- at nodeClassification.APPNP.main(APPNP.java:45)
-```
-For example, this indicates that the component *_tmp102* and we should look
-at the execution tree to understand its role.
-
-
-## Debugging logical errors
-There are two main mechanisms for the identification of logically erroneous
-architectures: a) mismatched dimension size, and b) mismatched dimension names.
-Of the two, dimension sizes are easy to comprehend, since they just mean that
-operations are mathematically invalid.
-
-On the other hand, dimension names need to be determined for
-starting data, such as model inputs and parameters, and are automatically
-inferred from operations on such primitives. For in-line declaration of
-parameters in operations or layers, dimension names are copied from any hyperperameters.
-Therefore, for easier debugging,
-prefer using functionl expressions that declare hyperperameters:
-
-```java
-new ModelBuilder()
- .config("features", 7)
- .config("hidden", 64)
- .var("x")
- .operation("h = x@matrix(features, hidden)");
-```
-instead of the simpler `new ModelBuilder().var(x).operation('h = x@matrix(features, hidden)')`
-
-
-Both mismatched dimensions and mismatched dimension names
-throw runtime exceptions. The beginning of their
-error console traces should start with something like this:
-```java
-java.lang.IllegalArgumentException: Mismatched matrix sizes between SparseMatrix (3327,32) 52523/106464 entries and DenseMatrix (64, classes 6)
-During the forward pass of class mklab.JGNN.nn.operations.MatMul: _tmp4 = null with the following inputs:
- class mklab.JGNN.nn.activations.Relu: h1 = SparseMatrix (3327,32) 52523/106464 entries
- class mklab.JGNN.nn.inputs.Parameter: _tmp5 = DenseMatrix (64, classes 6)
-java.lang.IllegalArgumentException: Mismatched matrix sizes between SparseMatrix (3327,32) 52523/106464 entries and DenseMatrix (64, classes 6)
- at mklab.JGNN.core.Matrix.matmul(Matrix.java:258)
- at mklab.JGNN.nn.operations.MatMul.forward(MatMul.java:21)
- at mklab.JGNN.nn.NNOperation.runPrediction(NNOperation.java:180)
- at mklab.JGNN.nn.NNOperation.runPrediction(NNOperation.java:170)
- at mklab.JGNN.nn.NNOperation.runPrediction(NNOperation.java:170)
- at mklab.JGNN.nn.NNOperation.runPrediction(NNOperation.java:170)
- at mklab.JGNN.nn.NNOperation.runPrediction(NNOperation.java:170)
- at mklab.JGNN.nn.NNOperation.runPrediction(NNOperation.java:170)
- ...
-```
-
-As an example, let us try to understand what this error tels us. First,
-it notifies us of the actual problem: that the architecture encounters mismatched matrix
-sizes when trying to multiply a 3327x32 SparseMatrix with a 64x6 dense matrix.
-This is easy to understand and there are also dimension names in there;
-for this example, only *classes* is a named dimension, but if models
-and input data are well-designed more names will be in there and some
-errors will also arise from different dimension names.
-
-At any rate, understanding the exact error is easy - the inner matrix dimensions
-of matrix multiplication
-do not agree. However, we need to find the error within our architecture to
-be able to fix whatever is causing this.
-
-To do this, we continue reading and see the message
-`During the forward pass of class mklab.JGNN.nn.operations.MatMul: _tmp4 = null`.
-This tells us that the problem occurs when trying to calculate *_tmp4*
-which is currently assigned a *null* tensor as value (this is pretty normal,
-as the forward pass has not yet already concluded for that variable to assume a value).
-Some more information is there to see what the operation's inputs are like - in this case
-they coincide with the multiplication's inputs, but this will not always be the case.
-
-The important point, is to go back to the execution tree and see during which exact operation
-this variable is defined. There, we will undoubtedly find that some dimension had 64 instead
-of 32 elements or conversely.
-
-## Monitoring operations
-In addition to all other debugging mechanisms, JGNN presents a way to show when
-forward and backward operations of specific code components are executed and with what kinds
-of arguments.
-This can be particularly useful when testing new components in real (complex) architectures.
-
-The practice consists of calling a *monitor(...)* function within operations.
-This does not affect what expressions do and only enables printing execution tree operations
-on operation components. For example, to monitor the outcome of matrix multiplication within
-the following operation:
-
-```java
-builder.operation("h = relu(x@matrix(features, 64) + vector(64))")
-```
-
-it should be converted to:
-
-```java
-builder.operation("h = relu(monitor(x@matrix(features, 64)) + vector(64))")
-```
-
-[NEXT: Message passing GNNs](Message.md)
\ No newline at end of file
diff --git a/tutorials/GNN.md b/tutorials/GNN.md
deleted file mode 100644
index 1e9ee8f1..00000000
--- a/tutorials/GNN.md
+++ /dev/null
@@ -1,185 +0,0 @@
-# :zap: Graph neural networks for node classification
-
-Graph neural networks (GNNs) extend the concept of base [neural networks](tutorials/NN.md).
-You can already write any GNN with the base the `LayerBuilder` class for designing neural models,
-but JGNN provides some common design choices that simplify the process for node classification.
-
-1. [Initializing a GNN builder](#initializing-a-gnn-builder)
-2. [GNN concepts](#gnn-concepts)
-3. [Adding a classification layer](#adding-a-classification-layer)
-4. [Example architecture](#example-architecture)
-5. [GNN training](#gnn-training)
-
-*Full implementations can be found in the [examples](../JGNN/src/examples/nodeClassification/APPNP.java).*
-
-## Initializing a GNN builder
-The `FastBuilder` class for building GNN architectures extends the generic
-`LayerBuilder` with common graph neural network operations.
-The only difference is that now we initialize it with a
-square matrix A, which is typically a normalization of the adjacency matrix, and a feature matrix h0
-(this is different than the symbol h{0}).
-Given that you will most likely use normal neural layers, you only need
-to remember that in symbolic parsing A will correspond to the adjacency matrix
-and that layer representations should be annotated with h{l}. We may make a more
-customizeable version of the builder in the future, but these symbols will always remain
-the default. Preferrably, each row of the feature matrix should correspond to the features
-of one node/sample. The normalized adjacency matrix can -and usually should-
-be sparse to save on memory.
-
-Most GNNs perform the renormalization trick by adding a self-loop
-before applying symmetric normalization on the adjacency matrix.
-Assuming no existing self-loops, the following snippet shows how to apply those
-transformations on adjacency matrices, such as ones obtained from `Dadaset.graph()`.
-The snippet use in-place arithmetics to directly alter raw matrix data:
-
-```java
-adjacency.setMainDiagonal(1).setToSymmetricNormalization();
-```
-
-Finally, you can instantiate the builder by providing the adjacency and feature
-matrices per:
-
-```java
-FastBuilder modelBuilder = new FastBuilder(adjacency, features);
-```
-
-Sending specific tensors to the builder's consructor
-does not restrict you from editing or replacing them later,
-even after architectures have been trained.
-For example, you can add node edges later by editing an element of the
-adjacency matrix per:
-
-```java
-Matrix adjacency = ((Constant)modelBuilder.get("A")).get(); // retrieves constant's value from the architecture
-adjacency.put(from, to, value);
-```
-
-
-## GNN concepts
-
-The base operation of GNNs is to propagate node representations to neighbors via graph edges,
-where they are aggregated. Aggeration typically consists of a weighted average
-per the normalized adjacency matrix edge weights, which propagates information
-while respecting spectral graph characteristics. Other types of aggregation
-are can be found in the more advanced tutorial for [Message passing GNNs](Message.md).
-Spectral aggregation can be achieved with a simple matrix multiplication on the previous layer's
-node features per `.layer("h{l+1}=A @ h{l}")`. In practice, you will often want to
-add more operations on the propagation, such as passing it through a dense layer.
-For example, the original GCN architecture defines layers of the form:
-
-```java
-.layer("h{l+1}=relu(A@(h{l}@matrix(features, hidden, reg))+vector(hidden))")
-.layer("h{l+1}=A@(h{l}@matrix(hidden, classes, reg))+vector(classes)")
-```
-
-Most architectures nowadays also perform edge dropout, which is as simple as applying dropout
-on the adjacency matrix values on each layer per:
-
-```java
-.layer("h{l+1}=dropout(A,0.5) @ h{l}")
-```
-
-Recent areas of heterogenous graph research also explicitly use the graph Laplacian,
-which you can insert into the architecture as a normal constant per `.constant("L", adjacency.negative().cast(Matrix.class).setMainDiagonal(1))`. Even more complex concepts
-can be modelled with edge attention that gathers and
-perform the dot product of edge nodes to provide new edge weights, exponentiating
-non-zero weights with *nexp* and applying row-wise L1 transformation. This yields
-an adjacency matrix weighting unique to the layer per `.operation("A{l}" = L1(nexp(att(A, h{l})))")`.
-Nonetheless, it is recommended that you stay away from these kinds complex architectures
-when learning from large graphs, as JGNN is designed to be lightweight and not fast.
-Consider using GPU GNNs if 1-2% accuracy gains matter enough to make your application
-several folds slower.
-
-
-## Adding a classification layer
-This far, we touched on propagation mechanisms of GNNs, which consider the features of all nodes.
-However, when moving to a node classification setting,
-training data labels are typically available only for certain nodes.
-We thus need a mechanism that can retrieve the predictions of the top neural layer for certain nodes
-and pass them through a softmax activation.
-This can already be achieved with normal neural model definitions using the gather bracket operation
-after declaring a variable of which nodes to retrieve:
-
-```java
-.var("nodes")
-.layer("h{l} = softmax(h{l})")
-.operation("ouput = h{l}[nodes]")
-```
-
-Recall that h{l} always points to the top layer when writting a new layer.
-
-
-This way, the built model takes as inputs a set of nodes, perform the forward pass of the
-architecture and then selects the provided nodes to use as outputs (and backpropagate from).
-**All** nodes are needed for training because they are made aware of each other via the
-graph's structure.
-
-To simplify how node classification architectures are defined,
-the above symbolic snippet is automatically generated and applied by calling the
-`.classify()` method of the `FastBuilder` instead.
-
-## Example architecture
-
-As an example of how to define a full GNN with symbolic parsing, let us define
-the well-known APPNP architecture. This comprises two normal dense layers and then
-propagates their predictions through the graph structure with a fixed-depth approximation
-of the personalized PageRank algorithm. To define the architecture,
-let us consider a `Dataset dataset` loaded by the library, for which we normalize the
-adjacency matrix and send everything to the GNN builder class. We let the outcome of
-the first two dense layers to be remembered as `h{0}` (this is *not* `h0`), define
-a diffusion rate constant `a` and then perform 10 times the
-personalized PageRank diffusion scheme on a graph with edge dropout 0.5. This is all achieved
-with the same `layer` and `layerRepeat` methods as neural builders.
-
-```java
-dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();
-long numClasses = dataset.labels().getCols();
-
-ModelBuilder modelBuilder = new FastBuilder(dataset.graph(), dataset.features())
- .config("reg", 0.005)
- .config("hidden", 16)
- .config("classes", numClasses)
- .layer("h{l+1}=relu(h{l}@matrix(features, hidden, reg)+vector(hidden))")
- .layer("h{l+1}=h{l}@matrix(hidden, classes)+vector(classes)")
- .rememberAs("0")
- .constant("a", 0.9)
- .layerRepeat("h{l+1} = a*(dropout(A, 0.5)@h{l})+(1-a)*h{0}", 10)
- .classify();
-```
-
-
-## GNN training
-
-GNN classification models can be backpropagated by considering a list of node indeces and desired
-predictions for those nodes. However, you can also use the interfaces discussed in the
-[learning](tutorials/Learning.md) tutorial to automate the training process and control it
-in a fixed manner.
-
-Recall that training needs to call the model's method
-`.train(optimizer, features, labels, train, valid)`.
-The important question is what to consider as training inputs and outputs, given that node features
-and the graph are passed to the `FastBuilder` constructor.
-
-The answer is that the (ordered) list of all node identifiers *0,1,2,...* constitutes the training inputs
-and the corresponding labels constitute the outputs. You can create a slice of identifiers
-and use JGNN to design the training process per:
-
-```java
-Slice nodes = dataset.samples().getSlice().shuffle(100); // or nodes = new Slice(0, numNodes).shuffle(100);
-Model model = modelBuilder()
- .getModel()
- .init(...)
- .train(trainer,
- nodes.samplesAsFeatures(),
- dataset.labels(),
- nodes.range(0, trainSplit),
- nodes.range(trainSplit, validationSplit));
-
-```
-
-In the above snipper, the label matrix can have zeroes for the nodes not used for training.
-If only the first nodes have known labels, the label matrix may also have less rows.
-
-
-
-[NEXT: Graph neural networks for graph classification](GraphClassification.md)
\ No newline at end of file
diff --git a/tutorials/GraphClassification.md b/tutorials/GraphClassification.md
deleted file mode 100644
index 9c033aec..00000000
--- a/tutorials/GraphClassification.md
+++ /dev/null
@@ -1,190 +0,0 @@
-# :zap: Graph neural networks for graph classification
-Most neural network architectures are designed with the idea of learning to classify
-nodes or samples. However, GNNs also provide the prospect of classifying graphs
-based on their structure.
-
-1. [Organizing data](#organizing-data)
-2. [Defining the architecture](#defining-the-architecture)
-3. [Training the architecture](#training-the-architecture)
-4. [Sort pooling](#sort-pooling)
-5. [Parallelized training](#parallelized-training)
-
-*Full implementations can be found in the [examples](../JGNN/src/examples/graphClassification/SortPooling.java).*
-
-## Organizing data
-
-To define architectures for graph classification,
-we can make use of the generic `LayeredBuilder` class. The main difference compared
-to traditional neural networks is that architecture inputs do not all exhibit the
-same size (e.g. some graphs may have more nodes than others) and therefore they
-can not be organized into tensors of common dimensions.
-
-Instead, let us presume that training data are stored in the following lists:
-
-```java
-ArrayList adjacencyMatrices = new ArrayList();
-ArrayList nodeFeatures = new ArrayList();
-ArrayList graphLabels = new ArrayList();
-```
-
-## Defining the architecture
-
-The `LayeredBuilder` already introduces the input variable *h0* for sample features.
-We can use to it to pass node features to the architectures, so we only need to add
-a second input storing the (sparse) adjacency matrix per `.var("A")`. We can proceed
-to define a GNN architecture, for instance as explained in previous tutorials.
-
-This time, though, we do not aim to classify nodes but the whole graph. For this reason,
-we need to pool top layer node representations, for instance by averaging them
-across all nodes per `.layer("h{l+1}=softmax(mean(h{l}, row))")`. Remember to apply
-a softmax activation for classification tasks.
-Finally, we need to set up the top layer as the built model's
-output per `.out("h{l}")`.
-
-An example architecture following these principles is the following:
-
-```java
-ModelBuilder builder = new LayeredBuilder()
- .var("A")
- .config("features", nodeLabelIds.size())
- .config("classes", graphLabelIds.size())
- .config("hidden", 16)
- .layer("h{l+1}=relu(A@(h{l}@matrix(features, hidden)))")
- .layer("h{l+1}=relu(A@(h{l}@matrix(hidden, classes)))")
- .layer("h{l+1}=softmax(mean(h{l}, row))")
- .out("h{l}");
-```
-
-## Training the architecture
-
-For the time being, training architectures like the above on prepared data should
-manually call the backpropagation for each epoch and each graph in the training
-batch. To do this, first retrieve the model and initialize its parameters:
-
-```java
-Model model = builder.getModel().init(new XavierNormal());
-```
-
-Next, define a loss function and set up a batch optimization
-strategy wrapping any base optimizer and accumulating parameter updates until
-`BatchOptimizer.updateAll()` is called later on:
-
-```java
-Loss loss = new CategoricalCrossEntropy();
-BatchOptimizer optimizer = new BatchOptimizer(new Adam(0.01));
-```
-
-Finally, training can be conducted by iterating through epochs and training samples
-and appropriately calling the `Model.train` for combinations of of node features and graph
-adjacency matrix inputs, and graph label outputs.
-At the end of each batch (e.g. each epoch), do not forget
-to call the `optimizer.updateAll()` method to apply the accumulated gradients. This
-process can be realized with the following code:
-
-```java
-for(int epoch=0; epoch<300; epoch++) {
- for(int graphId=0; graphIda) `var` to define inputs
-
b) `config` to define hyperparameters
-
c) `operation` to parse symbolic operations
-
d) `out` to define output variables
-
-We can retrieve the defined model at anytime with the builder's `.getModel()` method.
-Until that point, models are incrementally constructed with functional programming.
-For this example, we define a two-layer perceptron, with a relu hidden layer and
-a row-wide softmax activation. Learnable matrices and vectors could be defined manually,
-but we automatically generate them in operation definitions. The number of
-hidden dimensions (64 right now) could also have been set as a hyperparameter.
-`@` corresponds to matrix multiplication. Details on how to write operations
-are presented in the [next tutorial](NN.md).
-
-```java
-ModelBuilder modelBuilder = new ModelBuilder()
- .config("feats", numFeatures)
- .config("labels", numClasses)
- .config("reg", 1.E-5)
- .var("x")
- .operation("h = relu(x@matrix(feats, 64, reg)+vector(64))")
- .operation("yhat = softmax(h@matrix(64, labels)+vector(labels), row)")
- .out("yhat")
- .assertBackwardValidity();
-```
-
-In addition to normal syntax checks, the last method call asserts that all operations
-are eventually used by outputs, creating an error message otherwise.
-To further check up on the architecture, let's extract its execution graph in *.dot* format
-by writting:
-
-```java
-System.out.println(modelBuilder.getExecutionGraphDot());
-```
-
-Copying-and-pasting the outputted description to [GraphvizOnline](https://dreampuf.github.io/GraphvizOnline/) creates the following visualization
-of the execution graph. A more detailed overview of potential debugging actions
-is presented later in in the [debugging](Debugging.md) tutorial.
-
-![Example execution graph](graphviz.png)
-
-## Training
-To train the model, we set up 50-25-25 training-validation-test data slices.
-These basically handle shuffled sample identifiers. You can use integers instead of
-doubles in the `range` method to reference a fixed number of samples instead of fractional slice sizes.
-
-```java
-Slice samples = dataset.samples().getSlice().shuffle(); // or samples = new Slice(0, labels.getRows()).shuffle();
-Slice train = samples.range(0, 0.5);
-Slice valid = samples.range(0.5, 0.75);
-Slice test = samples.range(0.75, 1);
-```
-
-Next, we set up create a leaning strategy given an Adam optimizer,
-a binary cross-entropy loss, and validation loss patience of 100 epochs:
-
-
-```java
-Optimizer optimizer = new Adam(0.1);
-
-ModelTraining trainer = new ModelTraining()
- .setOptimizer(optimizer)
- .setLoss(new BinaryCrossEntropy())
- .setEpochs(3000)
- .setPatience(100);
-```
-
-Finally, we train the model under this strategy by initializing its parameters
-and calling the optimizer:
-
-```java
-model
- .init(new XavierNormal())
- .train(optimizer, features, labels, train, valid);
-```
-
-:bulb: Real-world settings can further separate rows of the test set first without using
-more memory, but we keep things simple here.
-
-
-## Testing
-We finally report training accuracy on the test set. We demonstrate how single-sample predictions can be
-made and measure the accuracy of those. To do this, we use `Matrix.accessRow` to obtain specific matrix rows from node features as tensors and `Tensor.asRow` to convert the obtained tensors into a row representation. Row representations are matrices and can pass through the model just fine.
-We use `argmax` to convert one-hot predictions to label ids.
-
-```java
-double acc = 0;
-for(Long node : test) {
- Matrix nodeFeatures = features.accessRow(node).asRow();
- Matrix nodeLabels = labels.accessRow(node).asRow();
- Tensor output = model.predict(nodeFeatures).get(0);
- acc += (output.argmax()==nodeLabels.argmax()?1:0);
-}
-System.out.println("Acc\t "+acc/testIds.size());
-```
-
-[NEXT: Neural networks](NN.md)
\ No newline at end of file
diff --git a/tutorials/Message.md b/tutorials/Message.md
deleted file mode 100644
index c4f148c2..00000000
--- a/tutorials/Message.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# :zap: Message passing GNNs
-JGNN also supports the more generic view of GNNs as message passing mechanisms
-between node neighbors. This supports complex types of relational analysis,
-such as edge attributes, but may be computationally intense. Thus, prefer
-using [simpler GNNs](GNN.md) when possible.
-
-1. [Message passing principles](#message-passing-principles)
-2. [Creating messages](#creating-messages)
-3. [Receiving messages](#receiving-messages)
-4. [Neighbor attention](#neighbor-attention)
-
-## Message passing principles
-Message passing considers a setting where each edge is responsible for appropriately
-transforming and propagating represenetations to node neighbors. In this tutorial
-we show how to use JGNN to implement a generic realization in which the transformation
-can depend on the specific edge. The implementation is compatible to the formulation that
-[Velickovic (2022)](https://arxiv.org/pdf/2202.11097.pdf) shows to be able to capture
-several settings of interest.
-
-## Creating messages
-In the more general sense, messages can be matrices whose rows correspond to edges
-and columns to edge features. In the simplest scenario, you can create such matrices
-by gathering the features of edge source and destination nodes by accessing
-the resspective elements of a feature matrix *h{l}*. Doing so requires that you
-first obtain edge source indexes *src=from(A)* and destination indexes *dst=to(A)*
-where *A* is an adjacency matrix. Thus, you can construct edge features per:
-
-```java
-modelBuilder
- .operation("src=from(A)")
- .operation("dst=to(A)")
- .operation("message{l}=h{l}[src] | h{l}[dst]");
-```
-
-The model builder parses *|* as the horizontal concatenation expression. You can
-also construct a global independent edge feature matrix and concatenate that
-too.
-
-Given that you have constructed a message, you can contintue by defining any kind
-of ad-hoc mechanism or neural processing of messages with traditional matrix
-operations (take care to define correct matrix sizes for dense transformations, e.g.,
-twice the number of columns as *H{l}* in the previous snippet).
-For any kind of *LayeredBuilder* don't forget that *message{l}* within
-operations is needed to obtain a message from the representations *H{l}*
-that is not accidentally shared with future layers.
-
-## Receiving messages
-Receiver mechanisms need to perform some kind of reduction on messages.
-For the time being, JGNN implements only summation reduction,
-given that this has the same theoretical expressive power as the alternative
-of maximum-based reduction but is easier to backpropagate through.
-
-Reduction can be performed with the following code snippet. Note that
-the sum is weighted per the values of the adjacency matrix *A*. Thus,
-perform adjacency matrix normalization only if you want such weighting
-to take place.
-
-```java
-modelBuilder
- .operation("received{l}=reduce(transformed_message{l}, A)")
-```
-
-You can finally define layers that can transform node representations
-while accounting for received messages, for example per:
-```java
-modelBuilder
- .config("2feats{l}", ...)
- .layer("h{l+1}=(h{l} | received{l})@matrix(2feats,dims) + vector(dims)")
-```
-
-where *2feats{l}* is configured to a matching number of dimensions as the sum
-of the number of columns of *h{l}* and *transformed_message{l}*.
-
-## Neighbor attention
-A common realization of message passing GNNs is via sparse-dense matrix multiplication
-to emulate neighbor attention per: *A.(hTh)* where *A* is a sparse
-adjacency matrix, *.* is the Hadamard product (element-by-element multiplication)
-and *h* a dense matrix whose rows hold respective node representations.
-JGNN implements this operation and you can include it in symbolic definitions with the
-expression `att(A, h)`. Its implementation is considerably more lightweight
-than the equivalent message passing mechanism.
-
-True neighbor attention in the style of gated attention networks can be implemented
-by exponantiating all non-zero elements of the adjacency matrix and performing row-wise
-normalization per `L1(nexp(att(A, h)))`.
diff --git a/tutorials/Models.md b/tutorials/Models.md
deleted file mode 100644
index 5f1d0a58..00000000
--- a/tutorials/Models.md
+++ /dev/null
@@ -1,142 +0,0 @@
-# Models and builders
-
-## Table of Contents
-
-1. [JGNN Expressions](#jgnn-expressions)
-2. [JGNN Models](#jgnn-models)
-3. [Symbolic Model Definition](#symbolic-model-definition)
-4. [Learning Parameters](#learning-parameters)
-5. [Neural Network Examples](#neural-network-examples)
-6. [Multithread Batch Learning](#multithread-batch-learning)
-
-## JGNN Expressions
-
-The base structure used to define machine learning operations is the `mklab.JGGN.core.NNOperation` abstract class.
-This is implemented by common mathematical operations, which are presented in the following table. Operation instances
-can be added to inputs of other operations through the `addInput(NNOperation)` method of the latter. Starting points
-of operations are variables, constants and parameters, whose differences will be discussed later.
-
-:bulb: The hustle of learning to write expressions is removed with [symbolic model definition](#symbolic-model-definition).
-You can safely skip to that segment to learn how to write machine learning models without the tedious definitions of intermediate steps explained here.
-
-|Operator| Constructor | Number of inputs |
-| --- | --- | --- |
-| + | mklab.JGNN.nn.operations.Add() | 2 |
-| * | mklab.JGNN.nn.operations.Multiply() | 2 |
-| @ | mklab.JGNN.nn.operations.MatMul() | 2 |
-| 1-x | mklab.JGNN.nn.operations.Complement() | 1 |
-| log | mklab.JGNN.nn.operations.Log() | 1 |
-| variable | mklab.JGNN.nn.operations.Variable() | 0 |
-| constant | mklab.JGNN.nn.operations.Constant(tensor) | 0 |
-| parameter | mklab.JGNN.nn.operations.Parameter(tensor) | 0 |
-| relu | mklab.JGNN.nn.activations.Relu() | 1 |
-| tanh | mklab.JGNN.nn.activations.Tanh() | 1 |
-| sigmoid | mklab.JGNN.nn.activations.Sigmoid() | 1 |
-| lrelu | mklab.JGNN.nn.activations.LRelu() | 2 |
-| prelu | mklab.JGNN.nn.activations.PRelu() | 2 |
-| dropout | mklab.JGNN.nn.activations.Dropout() | 2 |
-
-:warning: In principle, the `addInput` should be called a number of times equal to the number of operator arguments for each operator.
-It is defined for the sake of convenience, for example to initialize operators at different parts of the code than the one linking them.
-
-:warning: Detailed error checking of JGNN operations is under development.
-
-For example, the expression *y=log(2x+1)* can be constructed with the following code - a more consise way to do this is presented in
-[symbolic model definition](#symbolic-model-definition):
-
-```java
- Variable x = new Variable();
- Constant c1 = new Constant(Tensor.fromDouble(1)); // holds the constant "1"
- Constant c2 = new Constant(Tensor.fromDouble(2)); // holds the constant "2"
- NNOperation mult = new Multiply().addInput(x).addInput(c2);
- NNOperation add = new Add().addInput(mult).addInput(c1);
- NNOperation y = new Log().addInput(add);
-```
-
-## JGNN Models
-
-Constructed expressions can be organized into machine learning models. Models are implemented by the class `mklab.JGNN.core.Model`
-and defining them is as simple as marking the input variables with the method `Model addInput(Variable)` and output operations
-with the method `Model addOutput(NNOperation)`. For example, constructing a model holding the previous expression is as simple as writing
-`Model model = new Model().addInput(x).addOutput(y)`. Potential backpropagation machine learning operations are automatically handled
-by models.
-
-Running the model once to create outputs can be achieved with `Tensor Model.predict(Tensor...)` method. This takes as input one or more
-comma-separated tensors to pass into the model.
-If the number of inputs is dynamically created, an overloaded version of the same method supports an array list of input tensors
-`Tensor Model.predict(ArrayList)`.
-
-:warning: Input tensor order should be the same as the order in which variables were added to the model.
-
-Obtaining the last value of intermediate (i.e. non-ouput) operations *after* the run can achieved with the `Tensor NNOperation.getPrediction()` method. To sum up with an example, running a model of the previously defined *y=log(2x+1)* for *x=2* and printing both the value of *y* (approx. 1.61) and the value inside the logarithm (5) can be achieved with with the following code:
-
-```java
- Model model = new Model().addInput(x).addOutput(log);
- System.out.println(model.predict(Tensor.fromDouble(2)));
- System.out.println(add.getPrediction());
-```
-
-
-## Symbolic model definition
-JGNN supports the definition of models from high-level expressions.
-This involves using a builder pattern to
-declare input variables, constants, parameters, output
-variables, and forward assignment operations. For example, the following
-code declares a linear model.
-
-```java
-ModelBuilder modelBuilder = new ModelBuilder()
- .var("x") // first argument
- .constant("a", Tensor.fromDouble(2))
- .constant("b", Tensor.fromDouble(1))
- .operation("yhat = a*x+b")
- .out("yhat")
- .print() // comment out this line to not print the model
- ;
-System.out.println(modelBuilder.getModel().predict(Tensor.fromDouble(2)));
-```
-
-## Learning parameters
-
-Examples up to this point were limited to using constant and variable data. However, machine learning
-tasks typically introduce the notion of *parameters* as constants whose values can be learned to optimize
-certain objectives, such as making model output values as close as possible to desired ones.
-
-Parameter operations can be instantiated with the constructor `new mklab.JGGN.nn.Parameter(Tensor initialValue)`,
-where their initial values or provided. Model builder parameters need to be defined before they are used
-by operations and can be symbolically defined with the method
-`ModelBuilder ModelBuilder.param(String name, Tensor initialValue)`.
-
-Approximating ideal parameter values for a model requires three steps: a) selecting an optimization scheme responsible for
-updating parameters based on backpropagated errors, b) selecting a loss function quantifying how much model outputs deviate
-from optimal ones and c) repeatedly calling one of the model's overloaded `Model.trainSample` methods for a number of epochs.
-For the sake of simplicity, in the following example we consider a single sample before we discuss how to handle multiple ones:
-
-
-```java
-ModelBuilder modelBuilder = new ModelBuilder()
- .var("x") // first argument
- .var("y") // second argument
- .param("a", Tensor.fromDouble(1))
- .param("b", Tensor.fromDouble(0))
- .operation("yhat = a*x+b")
- .operation("error = (y-yhat)*(y-yhat)")
- .out("error")
- .print();
-Optimizer optimizer = new Adam(0.1);
-// when no output is passed to training, the output is considered to be an error
-for(int i=0;i<200;i++)
- modelBuilder.getModel().trainSample(optimizer, Arrays.asList(new DenseTensor(1,2,3,4,5), new DenseTensor(3,5,7,9,11)));
-//run the wrapped model and obtain an internal variable prediction
-System.out.println(modelBuilder.runModel(Tensor.fromDouble(2), Tensor.fromDouble(0)).get("yhat").getPrediction());
-```
-
-:bulb: Examples with multiple features should either be organized into sparce matrices or be fed one-by-one to learners
-through a [batch-learning](#multithread-batch-learning) scheme. Specifically for graph neural networks, computation
-speed benefits tremendously from organizing all node features into one matrix and simultaneously passing this through
-graph convolutional layers.
-
-## Neural Network Examples
-
-
-## Multithread Batch Learning
\ No newline at end of file
diff --git a/tutorials/NN.md b/tutorials/NN.md
deleted file mode 100644
index 465effac..00000000
--- a/tutorials/NN.md
+++ /dev/null
@@ -1,110 +0,0 @@
-# :zap: Neural networks
-For this example, we refer to the same dataset and experimentation
-methodology as in the in the [learning](tutorials/Learning.md) tutorial.
-But we will see how to easily create a multilayer perceptron.
-We cover the following topics:
-
-1. [Building layers](#building-layers)
-2. [Deep architectures](#deep-architectures)
-3. [Writing operations](#writing-operations)
-4. [Save and load architectures](#save-and-load-architectures)
-
-*Full implementations can be found in the [examples](../JGNN/src/examples/tutorial/NN.java).*
-
-## Building layers
-The class for building layered architectures (`LayeredBuilder`) improves base builder
-functionalities by introducing methods like `.layer(String)`. This
-is an extension of normal `.operation(String)` definitions,
-with the addition that specifically the expressions `{l}` and `{l+1}` are replaced
-by the previous and current layer identifiers respectively.
-Setting the input layer to `"h0"` lets it get parsed by subsequent calls.
-
-```java
-ModelBuilder modelBuilder = new LayeredBuilder("h0")
- .config("features", numFeatures)
- .config("classes", numClasses)
- .config("hidden", 64)
- .layer("h{l+1} = relu(h{l}@matrix(features, hidden)+vector(hidden))")
- .layer("yhat = softmax(h{l}@matrix(hidden, classes)+vector(classes), row)")
- .out("yhat");
-```
-
-## Deep architectures
-Now that we have explained how simple layers work, let's look at two more advanced
-`LayeredBuilder` methods pivotal to many deep neural networks.
-The first is `.layerRepeat(String, int)`, which just repeats
-the layer expression a set number of times without breaking the
-functional model definition pipeline. The second is `.concat(int)`,
-which concatenates horizontally concatenates a number of top layers. Concatenation
-is also possible in symbolic parsing through the `|` operation,
-but calling the method easily scales it over a large number of layers
-(e.g., across several graph convolutional layers).
-
-We now make a more advanved model using these methods:
-
-```java
-ModelBuilder modelBuilder = new LayeredBuilder()
- .config("features", numFeatures)
- .config("classes", numClasses)
- .config("hidden", 64)
- .config("2hidden", 2*64)
- .layer("h{l+1} = relu(h{l}@matrix(features, hidden)+vector(hidden))")
- .layerRepeat("h{l+1} = relu(h{l}@matrix(hidden, hidden)+vector(hidden))", 2)
- .concat(2)
- .layer("yhat = softmax(h{l}@matrix(2hidden, classes)+vector(classes), row)")
- .out("yhat");
-```
-
-## Writing operations
-This is a good point to present symbols you can use within expressions.
-Unless otherwise specified, you can replace x and y with any expression. Sometimes,
-y needs to be a constant defined either by presenting a number, calling
-`ModelBuilder.config(y, double)`, or calling `ModelBuilder.constant(y, double)` to
-set the numbers as hyperparameters.
-
-|Symbol| Type | Number of inputs |
-| --- | --- | --- |
-| x = y | Operator | Assign to variable x the outcome of executing y.
-| x + y | Operator | Element-by-element addition. |
-| x * y | Operator | Element-by-element multiplication. |
-| x - y | Operator | Element-by-element subtraction. |
-| x @ y | Operator | Matrix multiplication. |
-| x | y | Operator | Row-wise concatenation of x and y. |
-| x [y] | Operator | Gathers the rows with indexes y of x.|
-| transpose(x) | Function | Transposes matrix x. |
-| log(x) | Function | Apply logarithm on each tensor element. |
-| relu(x) | Function | Apply relu on each tensor element. |
-| tanh(x) | Function | Apply a tanh activation on each tensor element. |
-| sigmoid(x) | Function | Apply a sigmoid activation on each tensor element. |
-| dropout(x, y) | Function | Apply training dropout on tensor x with constant dropout rate y. |
-| lrelu(x, y) | Function | Leaky relu on tensor x with constant negative slope y. |
-| prelu(x) | Function | Leaky relu on tensor x with learnanble negative slope. |
-| softmax(x, y) | Function | Apply y-wide softmax on x, where y is either row or col.|
-| sum(x, y) | Function | Apply y-wide sum reduction on x, where y is either row or col.|
-| max(x, y) | Function | Apply y-wide max reduction on x, where y is either row or col.|
-| matrix(x, y) | Function | Generate a matrix parameter with respective hyperparameter dimensions. |
-| vector(x) | Function | Generate a vector with respective hyperparameter size.|
-
-Prefer using hyperparameters (set via `.config`) for matrix and vector creation, as these transfer their names to respective
-dimensions for error checking. For `dropout,matrix,vector` you can also use the short names `drop,mat,vec`.
-
-## Save and load architectures
-Saving a model needs to be done via its builder. Saving stores the whole parameter
-state in a specified Java path per:
-
-```java
-modelBuilder.save(Paths.get("file.jgnn"));
-```
-
-A new builder (of the same type as the one that saved the model)
-can be constructed given the save Path per:
-
-```java
-modelBuilder = (LayeredBuilder)ModelBuilder.load(Paths.get("file.jgnn"));
-```
-
-You can continue working with the loaded builder, for example by adding more
-layers if needed, and you can call its `.getModel()` per normal.
-
-
-[NEXT: Graph neural networks for node classification](GNN.md)
\ No newline at end of file
diff --git a/tutorials/Neuralang.md b/tutorials/Neuralang.md
deleted file mode 100644
index 0eebac22..00000000
--- a/tutorials/Neuralang.md
+++ /dev/null
@@ -1,77 +0,0 @@
-# Neuralang
-
-This is a scripting language for defining graph and traditional
-neural network architectures. It extends JGNN's symbolic definition
-with function declarations.
-
-
-
-## Script
-
-Neuralang scripts consist of functions like the ones bellow.
-These define neural network components and their interactions
-using a syntax inspired by Mojo. Use a Rust highlighter to cover
-all keywords.
-
-```rust
-fn classify(nodes, h, epochs: !3000, patience: !100, lr: !0.01) {
- return softmax(h[nodes], dim: "row");
-}
-```
-
-The classify function takes two inputs: the input nodes for classification; h is the feature matrix. A softmax is returned for the specified nodes. The function's signature also has several configuration values, whose defaults are indicated by a colon (:). The same notation is used to set/overwrite configurations when calling functions, as we do for softmax to apply it row-wise. Think of configurations as keyword arguments.
-
-Exclamation marks (!) before numbers broadcast them to all subsequent function calls as new defaults for the same configurations. Broadcasted configurations are retrievable from JGNN's Neuralang model builder too; which is useful for Java integration later. Configuration values have the priority:
-
-1. function call arguments
-2. broacasted configurations (last value, includes configurations set by Java)
-3. function signature defaults
-
-```rust
-fn gcnlayer(A, h, hidden: 64, reg: 0.005) {
- return relu(A@h@matrix(?, hidden, reg) + vector(hidden));
-}
-```
-
-The gcnlayer function accepts the following parameters: A is the adjacency matrix; h is the input feature matrix; hidden is a configuration that defaults to 64 and specifies the number of hidden units; and reg is the regularization term that defaults to 0.005. The ? in matrix definitions lets the autosize feature of Java integration later determine the dimension size based on a test run. The function returns the activated output of the GCN layer using ReLU.
-
-```rust
-fn gcn(A, h, classes: extern) {
- h = gcnlayer(A, h);
- h = gcnlayer(A, h, hidden: classes);
- return h;
-}
-```
-
-The gcn function declares the popular Graph Convoluational Network (GCN) architecture and has as configuration the number of output classes. The function first applies a gcnlayer, and then applies another layer of the same type with the hidden units configuration set to the value of classes. Thus the output matches the number of classes, which is set as externally declared (there is no default), for example by broadcasted defaults or Java.
-
-
-## Java integration
-
-Neuralang scripts can be integrated into Java code for building and training models. Below is an example of how to do so:
-
-
-```java
-Dataset dataset = new Cora();
-dataset.graph().setMainDiagonal(1).setToSymmetricNormalization();
-
-ModelBuilder modelBuilder = new Neuralang()
- .parse(Paths.get("../architectures.nn"))
- .constant("A", dataset.graph())
- .constant("h", dataset.features())
- .var("nodes")
- .config("classes", dataset.labels().getCols())
- .config("hidden", 16)
- .out("classify(nodes, gcn(A,h))")
- .autosize(new EmptyTensor(dataset.samples().getSlice().size()));
-
-ModelTraining trainer = new ModelTraining()
- .configFrom(modelBuilder)
- .setVerbose(true)
- .setLoss(new CategoricalCrossEntropy())
- .setValidationLoss(new CategoricalCrossEntropy());
-```
-
-In the above example, a dataset (Cora) is loaded, and its graph is prepared by adding self-loops (the renormalization trick) and performing symmetric normalization. A Neuralang instance is then created; this is a ModelBuilder that can parse scripts as either file Paths or pure text. Constants like the adjacency matrix A and feature matrix h are set, along with variables (nodes) and configurations (classes, hidden). The model and its output is defined with a Neuralang statement. Finally, dimension names and sizes for ? found model declaration are filled by calling autosize to perform a test run. In the example we use empty tensors to avoid unecessary computations while determining the dimensions.
-
-A ModelTraining instance is finally configured using parameters from the ModelBuilder, utilizing the configurations found in the classification method. Don't forget to broadcast configuration values that you need to access from Java code later.
diff --git a/tutorials/Primitives.md b/tutorials/Primitives.md
deleted file mode 100644
index 020c07b3..00000000
--- a/tutorials/Primitives.md
+++ /dev/null
@@ -1,145 +0,0 @@
-# Primitives
-Primitive operations found in other tutorials suffice for most
-use cases. However, you may need to process
-neural inputs, postprocess learning outcomes, create custom
-parameters, contribute to the library with more components,
-or make derivative works based on native java vector and matrix
-arithmetics.
-
-This tutorial lists JGNN primitives and explains how to work with them.
-
-In general, JGNN provides the `mklab.JGNN.core.Tensor` abstract class
-for storing data. Vector and matrix operations use primitives
-of this or derived classes. To reduce the number of code predicates
-and improve comprehensibility, operations between two tensors are implemented
-by calling respective methods of the first one.
-
-## Table of contents
-
-1. [Tensor operations](#tensor-operations)
-2. [Vector initialization](#vector-initialization)
-3. [Matrix initialization and operations](#matrix-initialization-and-operations)
-4. [Named dimensions](#named-dimensions)
-
-## Tensor operations
-Tensor operations are performed element-by-element and can be split into
-the following categories:
-
a) *arithmetic* - combine the values of two tensors to create a new one
-
b) *in-place arithmetic* - combine the values of two tensors to alter the first one
-
c) *summary statistics* - output simple numeric values
-
c) *element access* - manipulation of specific values
-
-
-:bulb: In-place arithmetics follow the same naming conventions of base arithmetics and
-begin with with a "self" prefix for pairwise operations or "setTo" prefix to perform operators.
-
-Here we present some commonly used operations applicable to all tensors, whose functionality is inferable
-from their name and argument types. For more operations or details, please refer to the project's
-[Javadoc](https://mklab-iti.github.io/JGNN/).
-
-Operation | Type | Comments
---- | --- | ---
-`Tensor copy()` | arithmetic
-`Tensor zeroCopy()` | arithmetic | Zero copies share the same type with the tensor and comprise only zeros.
-`Tensor add(Tensor)` | arithmetic
-`Tensor substract(Tensor)` | arithmetic
-`Tensor multiply(Tensor)` | arithmetic | Multiplication is performed element-by-element.
-`Tensor multiply(double)` | arithmetic
-`Tensor normalized()` | arithmetic | Division with L2 norm (if non-zero).
-`Tensor toProbability()` | arithmetic | Division with the sum (if non-zero).
-`Tensor setToZero()` | in-place arithmetic
-`Tensor selfAdd(Tensor)` | in-place arithmetic
-`Tensor selfSubtract(Tensor)` | in-place arithmetic
-`Tensor setMultiply(Tensor)` | in-place arithmetic
-`Tensor selfMultiply(double)` | in-place arithmetic
-`Tensor setToRandom()` | in-place arithmetic | element selected from uniform distribution in the range [0,1]
-`Tensor setToOnes()` | in-place arithmetic
-`Tensor setToNormalized()` | in-place arithmetic | Division with L2 norm (if non-zero).
-`Tensor setToProbability()` | in-place arithmetic | Division with the sum (if non-zero).
-`double dot(Tensor)` | summary statistics
-`double norm()` | summary statistics | The L2 norm.
-`double sum()` | summary statistics
-`double max()` | summary statistics
-`double min()` | summary statistics
-`long argmax()` | summary statistics
-`long argmin()` | summary statistics
-`double toDouble()` | summary statistics | Converts tensor with exactly one element to a double (throws exception if more elements).
-`Tensor set(long position, double value)` | element access | NaN values throw exceptions. Is in-place.
-`double get(long position)` | element access
-`Iterator getNonZeroElements()` | element access | Traverses all elements for dense tensors, but skips zero elements for sparse tensors. (Guarantee: there is no non-zero element not traversed.) Returns element positions **positions**.
-`String describe()` | summary statistics | Description of type and dimensions.
-
-
-:bulb: To write code that accommodates both dense and sparse tensors, make sure that iterating over indices elements is performed with the iterator `Iterator getNonZeroElements()`.
-
-Prefer in-place arithmetic operations when transforming tensor values or for intermediate calculation steps, as these do not allocate new memory. For example, the following code can be used for creating and normalizing a tensor of ones without using additional memory:
-
-```Java
-Tensor normalized = new DenseTensor(10).setToOnes().setToNormalized();
-```
-
-
-## Vector initialization
-
-You can initialize a dense tensor with the expression `Tensor denseTensor = new mklab.JGNN.tensor.DenseTensor(long size)` .
-If there are many zero elements expected, or if sizes go beyond the max integer limit Java imposes on array sizes (and hence a dense representation can not be stored as an array), a sparse tensor can be used per `Tensor sparseTensor = new mklab.JGNN.tensor.SparseTensor(long size)`. For example, one-hot encodings for classification problems can be generated with the following code. This creates a dense tensor with *numClasses* elements and puts at element *classId* the value 1:
-
-```java
-int classId = ...;
-int numClasses = ...;
-Tensor oneHotEncoding = new mklab.JGNN.tensor.DenseTensor(numClasses).set(classId, 1);
-```
-
-Dense tensors serialized with their `String toString()` method and can be deserialized into new tensors with the constructor `mklab.JGNN.tensor.DenseTensor(String)`.
-
-
-## Matrix initialization and operations
-The `Matrix` class extends the concept of tensors with additional operations. Under the hood,
-Matrices linearly store elements and use computations to transform the (row,col) position of
-their elements to respective positions. The outcome of some methods inherited from tensors may
-need to be typecast back into a matrix (e.g. for all in-place operations).
-
-Operation | Type | Comments
---- | --- | ---
-`Matrix onesMask()` | arithmetic | Copy of a matrix with elements set to one.
-`Matrix transposed()` | arithmetic | There is no method for in-place transposition.
-`Matrix asTransposed()` | arithmetic | Shares data with the original.
-`Tensor getRow(long)` | arithmetic | Shares data with the original.
-`Tensor getCol(long)` | arithmetic | Shares data with the original.
-`Tensor transform(Tensor x)` | arithmetic | Outputs a dense tensor that holds the linear transformation of the given tensor (using it as a column vector) by multiplying it with the matrix.
-`Matrix matmul(Matrix with)` | arithmetic | Outputs the matrix multiplication **this \* with**. There is no in-place matrix multiplication.
-`Matrix matmul(Matrix with, boolean transposeSelf, boolean transposeWith)` | arithmetic | Does not perform memory allocation to compute transpositions.
-`Matrix external(Tensor horizontal, Tensor vertical)` | static method | External product of two tensors. Is a dense matrix.
-`Matrix symmetricNormalization()` | in-place arithmetic | The symmetrically normalized matrix.
-`Matrix setToSymmetricNormalization()` | in-place arithmetic | The symmetrically normalized matrix.
-`Matrix setMainDiagonal(double value)` | in-place arithmetic | Sets diagonal elements.
-`Matrix setDiagonal(long diagonal, double value)` | in-place arithmetic | Sets diagonal elements.
-`Matrix put(long row, long col, double value)` | element access | NaN values throw exceptions. Is in-place.
-`Iterable> getNonZeroEntries()` | element access | Similar to getNonZeroElements() but iterates through (row, col) pairs.
-
-
-## Named dimensions
-In addition to other operations, there exist methods that do not affect
-tensor or matrix values but are only responsible for naming dimensions. Functioanlly, these
-are decorative and aim to improve debugging by throwing errors for incompatible non-null names.
-For example, adding two matrices with different dimension names will result in an error.
-Likewise, the inner dimension names during matrix multiplication should agree.
-
-Operation | Type | Comments
---- | --- | ---
-`Tensor setDimensionName(String name)` | arithmetic | For naming tensor dimensions (of the 1D space tensors lie in).
- `Tensor setRowName(String rowName)` | arithmetic | For naming what kind of information matrix rows hold (e.g. `"samples"`).
- `Tensor setColName(String colName)` | arithmetic | For naming what kind of information matrix columns hold (e.g. `"features"`).
- `Tensor setDimensionName(String rowName, String colName)` | arithmetic | A shorthand of calling `setRowName(rowName).setColName(colName)`.
-
-
-Arithmetic operations, *including* matrix multiplication and copying,
-automatically infer dimension names in the result to make sure that only compatible data types
-are compared. Dimension names can be freely changed for any Tensor *without*
-backtracking changes (even for see-through data types, such as the outcome of asTransposed()).
-
-:bulb: Matrices effectively have three dimension names: for their rows, columns, and inner
-data as long as they are treated as tensors.
-
-
-[NEXT: Debugging](Debugging.md)
diff --git a/tutorials/README.md b/tutorials/README.md
deleted file mode 100644
index 0fd88712..00000000
--- a/tutorials/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# :dart: Tutorials
-Overall, JGNN provides machine learning primitives (e.g. tensors, matrices),
-neural components, model builders that parse expressions to create components,
-model initializers, and training strategies.
-Tutorials cover the following subjects:
-
-## Introduction
-1. [Learning](Learning.md)
-2. [Neural networks](NN.md)
-3. [GNNs for node classification](GNN.md)
-3. [GNNs for graph classification](GraphClassification.md)
-4. [Data creation](Data.md)
-
-## Advanced topics
-4. [Primitives](Primitives.md)
-5. [Debugging](Debugging.md)
-6. [Message passing GNNs](Message.md)
diff --git a/tutorials/graphviz.png b/tutorials/graphviz.png
deleted file mode 100644
index 80f26462..00000000
Binary files a/tutorials/graphviz.png and /dev/null differ