shakes76 · parker334 · Oct 22, 2022 · Oct 22, 2022
diff --git a/recognition/46376066_ADNI/60epoch_plot.png b/recognition/46376066_ADNI/60epoch_plot.png
diff --git a/recognition/46376066_ADNI/README.md b/recognition/46376066_ADNI/README.md
@@ -0,0 +1,53 @@
+### NOTE ON LATE PULL REQUEST AND MISSING COMMITS
+Due to me not knowing of the git file size limit, most of my commits included the zip files which are too large. Attempts were made over the enitirety of Saturday at editing the commits to remove the file using various online tools, however none proved successful.
+
+Due to this I have committed (the one before this commit) the files as they were on 11:56 on Friday Night, and I can show in person this is the case.
+
+I have also included my log history in the text file named original_git_log.txt, this may not be suffiecient evidence but I hope it will be.
+
+If any further evidence is required please email me.
+
+# Perceiver Model implementation for Classification of ADNI Alzheimer's Data
+### Author: William Parker
+### Student Number: s4637606
+
+## Description of Model
+
+This model is fundametally based off [Perceiver: General Perception with Iterative Attention](https://arxiv.org/abs/2103.03206) with the particular code and model used being based off [Image classification with Perceiver](https://keras.io/examples/vision/perceiver_image_classification/) with some variation and changing of the data to use ADNI data set instead of CIFAR-100.
+
+This Perciever model aims to accurately classify whether or not an image of a brain is indicative of Alzheimers or not. An issue with Transformer models is the quadratic computing bottleneck. Perciever models aim to address this issue by combining and alternating the transformer attention mechanism with a cross attention module. This creates an array in the model which is of a signifigantly lower size than the array in alternative models. Another advantage of Perciever models is that they can be used on varying types of data (audio, video, spatial positioning, text and images), although this particular model deals only with images.
+
+Helpful Diagram from Deepmind Paper
+![Diagram](modeldiagram.PNG)
+
+### Positional Encoding
+Positional encoding lets the data array preserve the positional structure. The original deepmind paper uses fourier features which appears to be a more ideal solution that produces better results. However due to complexity of implementation and a lack of math knowledge, patches were used instead. Patches are regions of each image where positioning is retained.
+
+## Data
+The data was already pre-processed kindly by the Course Staff. Images were input with a label of 1 (Alzheimers) or 0 (no Alzheimers).
+A limited amount of the data (800 training and 200 test) was used with a 80:20 split of train:test data. It may be the case that images adjacent to the other may be the same patients. However random selection was not used which may be a cause for the low accuracy of my model, but the high accuracy on the predict.py.
+
+### Example Input
+An Alzheimers positve brain scan
+![Brain_Scan](ex_brain.jpeg)
+
+## Usage
+There are 4 files
+dataset.py - processes data
+modules.py - stores model
+train.py - run to run the model (ensure tensorflow_addons installed)
+predict.py - run to test accuracy on test data
+
+## Dependencies
+tensorflow
+tensorflow_addons
+numpy
+matplotlib
+
+## Results
+
+It had a final accuracy of 58.0% which is well below the required 80% though it sometimes reach 65% accuracy. 
+![Results](60epoch_plot.png)
+
+However when used to predict on 10 positve test data it was 100% accurate.
+![Predict](predict.PNG)
diff --git a/recognition/46376066_ADNI/dataset.py b/recognition/46376066_ADNI/dataset.py
@@ -0,0 +1,73 @@
+import os
+
+import tensorflow as tf
+import numpy as np
+
+from tensorflow.keras.preprocessing.image import load_img, img_to_array
+
+import random as rd
+
+# Used to process images into usable data for model
+def process_image(dir):
+    AD_train_images = os.listdir(dir + "/train/AD")
+    NC_train_images = os.listdir(dir + "/train/NC")
+
+    AD_test_images = os.listdir(dir + "/test/AD")
+    NC_test_images = os.listdir(dir + "/test/NC")
+
+    #image_size = (240, 256, 3)
+
+    #Train Data
+    train_both = []
+
+    for image_name in AD_train_images[:400]:
+        image = load_img(dir + "/train/AD/" + image_name, target_size = (128, 128, 3))
+        image = img_to_array(image)
+        train_both.append([image,1])
+
+    for image_name in NC_train_images[:400]:
+        image = load_img(dir + "/train/NC/" + image_name, target_size = (128, 128, 3))
+        image = img_to_array(image)
+        train_both.append([image,0])
+
+    rd.shuffle(train_both)
+
+    train_images = []
+    train_labels = []
+
+    for x in train_both:
+        train_images.append(x[0])
+        train_labels.append(x[1])
+
+    x_train = tf.convert_to_tensor(np.array(train_images, dtype=np.uint8))
+    x_train = tf.cast(x_train, tf.float16) / 255.0
+    y_train = tf.convert_to_tensor(train_labels)
+
+    # Test Data
+    test_both = []
+
+    for image_name in AD_test_images[:100]:
+        image = load_img(dir + "/test/AD/" + image_name, target_size = (128, 128, 3))
+        image = img_to_array(image)
+        test_both.append([image,1])
+
+    for image_name in NC_test_images[:100]:
+        image = load_img(dir + "/test/NC/" + image_name, target_size = (128, 128, 3))
+        image = img_to_array(image)
+        test_both.append([image,0])
+
+    rd.shuffle(test_both)
+
+    test_images = []
+    test_labels = []
+
+    for x in test_both:
+        test_images.append(x[0])
+        test_labels.append(x[1])
+
+    x_test = tf.convert_to_tensor(np.array(test_images, dtype=np.uint8))
+    x_test = tf.cast(x_test, tf.float16) / 255.0
+    y_test = tf.convert_to_tensor(test_labels)
+
+
+    return x_train, y_train, x_test, y_test
diff --git a/recognition/46376066_ADNI/ex_brain.jpeg b/recognition/46376066_ADNI/ex_brain.jpeg
diff --git a/recognition/46376066_ADNI/modeldiagram.PNG b/recognition/46376066_ADNI/modeldiagram.PNG
diff --git a/recognition/46376066_ADNI/modules.py b/recognition/46376066_ADNI/modules.py
@@ -0,0 +1,215 @@
+import tensorflow as tf
+import numpy as np
+
+from tensorflow import keras
+from keras import layers
+
+# Perceiver Architecture
+
+def get_forward(hidden_units, dropout_rate):
+    forward_layers = []
+    for units in hidden_units[:-1]:
+        forward_layers.append(layers.Dense(units, activation=tf.nn.gelu))
+
+    forward_layers.append(layers.Dense(units=hidden_units[-1]))
+    forward_layers.append(layers.Dropout(dropout_rate))
+
+    ffn = keras.Sequential(forward_layers)
+
+    return ffn
+
+## Cross-Attention
+
+def get_x_attention_mod(latent_dims,
+    data_dims,
+    proj_dims,
+    forward_units,
+    dropout_rate
+):
+    latent_arr = layers.Input(shape=(latent_dims, proj_dims))
+    data_arr = layers.Input(shape=(data_dims, proj_dims))
+
+    # Latent
+    query = layers.Dense(units=proj_dims)(latent_arr)
+    # Data
+    key = layers.Dense(units=proj_dims)(data_arr)
+    value = layers.Dense(units=proj_dims)(data_arr)
+
+    attention_output = layers.Attention(use_scale=True, dropout=0.1)(
+        [query, key, value], return_attention_scores=False
+    )
+
+    attention_output = layers.Add()([attention_output, latent_arr])
+
+    attention_output = layers.LayerNormalization(epsilon=1e-6)(attention_output)
+
+    f_net = get_forward(hidden_units=forward_units, dropout_rate=dropout_rate)
+    outputs = f_net(attention_output)
+
+    outputs = layers.Add()([outputs, attention_output])
+
+    #If something goes wrong iiput methods will be probelm
+    model = keras.Model(inputs=[latent_arr, data_arr], outputs=outputs)
+
+    return model
+
+
+## Transformer
+
+def get_transformer_mod(latent_dim,
+    proj_dim,
+    num_heads,
+    num_t_blocks,
+    forward_units,
+    dropout_rate,
+):
+
+    inputs = layers.Input(shape=(latent_dim, proj_dim))
+
+    x_0 = inputs
+
+    for _ in range(num_t_blocks):
+
+        x_1 = layers.LayerNormalization(epsilon=1e-6)(x_0)
+
+        attention_output = layers.MultiHeadAttention(
+            num_heads=num_heads, key_dim=proj_dim, dropout=0.1
+        )(x_1, x_1)
+
+        x_2 = layers.Add()([attention_output, x_0])
+
+        x_3 = layers.LayerNormalization(epsilon=1e-6)(x_2)
+
+        f_net = get_forward(hidden_units=forward_units, dropout_rate=dropout_rate)
+        x_3 = f_net(x_3)
+
+        x_0 = layers.Add()([x_3, x_2])
+
+    model = keras.Model(inputs=inputs, outputs=x_0)
+    return model
+
+## Iterative cross-attention and weight sharing?? or Perciever is alternating of above 2. need to check
+
+## Perceiver Model
+
+class Perceiver(keras.Model):
+    def __init__(
+        self,
+        patch_size,
+        data_dim,
+        latent_dim,
+        proj_dim,
+        num_heads,
+        num_t_blocks,
+        forward_units,
+        dropout_rate,
+        num_iters,
+        classif_units
+    ):
+        super(Perceiver, self).__init__()
+        self.patch_size = patch_size
+        self.data_dim = data_dim
+        self.latent_dim = latent_dim
+        self.proj_dim = proj_dim
+        self.num_heads = num_heads
+        self.num_t_blocks = num_t_blocks
+        self.forward_units = forward_units
+        self.dropout_rate = dropout_rate
+        self.num_iters = num_iters
+        self.classif_units = classif_units
+
+    def build(self, input_shape):
+        self.latent_arr = self.add_weight(
+            shape=(self.latent_dim, self.proj_dim),
+            initializer="random_normal",
+            trainable=True,
+            name='name'
+        )
+
+        self.patcher = PatchCreater(self.patch_size)
+
+        self.patch_pos_encoder = PatchPosEncoder(self.data_dim, self.proj_dim)
+
+        self.x_attention = get_x_attention_mod(
+            self.latent_dim,
+            self.data_dim,
+            self.proj_dim,
+            self.forward_units,
+            self.dropout_rate
+        )
+
+        self.transformer = get_transformer_mod(
+            self.latent_dim,
+            self.proj_dim,
+            self.num_heads,
+            self.num_t_blocks,
+            self.forward_units,
+            self.dropout_rate
+        )
+
+        self.avg_pooling = layers.GlobalAveragePooling1D()
+
+        self.classif_head = get_forward(
+            hidden_units=self.classif_units, dropout_rate= self.dropout_rate
+        )
+
+        super(Perceiver, self).build(input_shape)
+
+    def call(self, inputs):
+        patches = self.patcher(inputs)
+
+        pos_encode_patches = self.patch_pos_encoder(patches)
+
+        latent_arr = tf.expand_dims(self.latent_arr, 0)
+        data_arr = pos_encode_patches
+
+        for _ in range(self.num_iters):
+            latent_arr = self.x_attention([latent_arr, data_arr])
+            latent_arr = self.transformer(latent_arr)
+
+        repres = self.avg_pooling(latent_arr)
+
+        logits = self.classif_head(repres)
+
+        return logits
+
+# Postional Encodings
+
+## Fourier Features (After failed uncommited attemps will use patch encoding for time being)
+
+class PatchCreater(layers.Layer):
+    def __init__(self, patch_size):
+        super(PatchCreater, self).__init__()
+        self.patch_size = patch_size
+
+    def call(self, images):
+        batch_size = tf.shape(images)[0]
+        patches = tf.image.extract_patches(
+            images=images,
+            sizes=[1, self.patch_size, self.patch_size, 1],
+            strides=[1, self.patch_size, self.patch_size, 1],
+            rates=[1, 1, 1, 1],
+            padding="VALID"
+        )
+        patch_dimens = patches.shape[-1]
+        patches = tf.reshape(patches, [batch_size, -1, patch_dimens])
+
+        return patches
+
+
+class PatchPosEncoder(layers.Layer):
+    def __init__(self, patch_num, output_dim):
+        super(PatchPosEncoder, self).__init__()
+        self.patch_num = patch_num
+        self.projection = layers.Dense(units=output_dim)
+        self.pos_embed = layers.Embedding(
+            input_dim=patch_num, output_dim=output_dim
+        )
+
+    def call(self, patches):
+        posits = tf.range(start=0, limit=self.patch_num, delta=1)
+        encode = self.projection(patches) + self.pos_embed(posits)
+
+        return encode
+
+