Skip to content

Commit

Permalink
Merge pull request #1126 from lemonviv/add-hfl-example
Browse files Browse the repository at this point in the history
Add implementation for model and data processing for the hfl example
  • Loading branch information
lzjpaul authored Jan 6, 2024
2 parents d950067 + ec21358 commit 75d04de
Show file tree
Hide file tree
Showing 2 changed files with 212 additions and 0 deletions.
78 changes: 78 additions & 0 deletions examples/hfl/bank.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# https://github.com/zhengzangw/Fed-SINGA/blob/main/src/client/data/bank.py

import pandas as pd
import numpy as np
import sys
from pandas.api.types import is_numeric_dtype
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle


def encode(df):
res = pd.DataFrame()
for col in df.columns.values:
if not is_numeric_dtype(df[col]):
tmp = pd.get_dummies(df[col], prefix=col)
else:
tmp = df[col]
res = pd.concat([res, tmp], axis=1)
return res


def load(device_id):
fn_train = "data/bank_train_" + str(device_id) + ".csv"
fn_test = "data/bank_test_" + str(device_id) + ".csv"

train = pd.read_csv(fn_train, sep=',')
test = pd.read_csv(fn_test, sep=',')

train_x = train.drop(['y'], axis=1)
train_y = train['y']
val_x = test.drop(['y'], axis=1)
val_y = test['y']

train_x = np.array((train_x), dtype=np.float32)
val_x = np.array((val_x), dtype=np.float32)
train_y = np.array((train_y), dtype=np.int32)
val_y = np.array((val_y), dtype=np.int32)

train_x, val_x = normalize(train_x, val_x)
num_classes = 2

return train_x, train_y, val_x, val_y, num_classes


def normalize(X_train, X_test):
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
return X_train_scaled, X_test_scaled


def split(num):
filepath = "../data/bank-additional-full.csv"
df = pd.read_csv(filepath, sep=';')
df['y'] = (df['y'] == 'yes').astype(int)
data = encode(df)
data = shuffle(data)
train, test = train_test_split(data, test_size=0.2)

train.to_csv("data/bank_train_.csv", index=False)
test.to_csv("data/bank_test_.csv", index=False)

train_per_client = len(train) // num
test_per_client = len(test) // num

print("train_per_client:", train_per_client)
print("test_per_client:", test_per_client)
for i in range(num):
sub_train = train[i * train_per_client:(i + 1) * train_per_client]
sub_test = test[i * test_per_client:(i + 1) * test_per_client]
sub_train.to_csv("data/bank_train_" + str(i) + ".csv", index=False)
sub_test.to_csv("data/bank_test_" + str(i) + ".csv", index=False)


if __name__ == "__main__":
split(int(sys.argv[1]))

134 changes: 134 additions & 0 deletions examples/hfl/mlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

import argparse

import numpy as np
from singa import device, layer, model, opt, tensor

np_dtype = {"float16": np.float16, "float32": np.float32}

singa_dtype = {"float16": tensor.float16, "float32": tensor.float32}


class MLP(model.Model):
def __init__(self, data_size=10, perceptron_size=100, num_classes=10):
super(MLP, self).__init__()
self.num_classes = num_classes
self.dimension = 2

self.relu = layer.ReLU()
self.linear1 = layer.Linear(perceptron_size)
self.linear2 = layer.Linear(num_classes)
self.softmax_cross_entropy = layer.SoftMaxCrossEntropy()

def forward(self, inputs):
y = self.linear1(inputs)
y = self.relu(y)
y = self.linear2(y)
return y

def train_one_batch(self, x, y, dist_option, spars):
out = self.forward(x)
loss = self.softmax_cross_entropy(out, y)

if dist_option == "plain":
self.optimizer(loss)
elif dist_option == "half":
self.optimizer.backward_and_update_half(loss)
elif dist_option == "partialUpdate":
self.optimizer.backward_and_partial_update(loss)
elif dist_option == "sparseTopK":
self.optimizer.backward_and_sparse_update(loss, topK=True, spars=spars)
elif dist_option == "sparseThreshold":
self.optimizer.backward_and_sparse_update(loss, topK=False, spars=spars)
return out, loss

def set_optimizer(self, optimizer):
self.optimizer = optimizer


def create_model(pretrained=False, **kwargs):
"""Constructs a CNN model.
Args:
pretrained (bool): If True, returns a pre-trained model.
Returns:
The created CNN model.
"""
model = MLP(**kwargs)

return model


__all__ = ["MLP", "create_model"]

if __name__ == "__main__":
np.random.seed(0)

parser = argparse.ArgumentParser()
parser.add_argument("-p", choices=["float32", "float16"], default="float32", dest="precision")
parser.add_argument(
"-g",
"--disable-graph",
default="True",
action="store_false",
help="disable graph",
dest="graph",
)
parser.add_argument(
"-m", "--max-epoch", default=1001, type=int, help="maximum epochs", dest="max_epoch"
)
args = parser.parse_args()

# generate the boundary
f = lambda x: (5 * x + 1)
bd_x = np.linspace(-1.0, 1, 200)
bd_y = f(bd_x)

# generate the training data
x = np.random.uniform(-1, 1, 400)
y = f(x) + 2 * np.random.randn(len(x))

# choose one precision
precision = singa_dtype[args.precision]
np_precision = np_dtype[args.precision]

# convert training data to 2d space
label = np.asarray([5 * a + 1 > b for (a, b) in zip(x, y)]).astype(np.int32)
data = np.array([[a, b] for (a, b) in zip(x, y)], dtype=np_precision)

dev = device.create_cuda_gpu_on(0)
sgd = opt.SGD(0.1, 0.9, 1e-5, dtype=singa_dtype[args.precision])
tx = tensor.Tensor((400, 2), dev, precision)
ty = tensor.Tensor((400,), dev, tensor.int32)
model = MLP(data_size=2, perceptron_size=3, num_classes=2)

# attach model to graph
model.set_optimizer(sgd)
model.compile([tx], is_train=True, use_graph=args.graph, sequential=True)
model.train()

for i in range(args.max_epoch):
tx.copy_from_numpy(data)
ty.copy_from_numpy(label)
out, loss = model(tx, ty, "fp32", spars=None)

if i % 100 == 0:
print("training loss = ", tensor.to_numpy(loss)[0])

0 comments on commit 75d04de

Please sign in to comment.