Profile networkΒΆ

Script output:

conv:           fprop(): 0.002031 s      bprop(): 0.003639 s
act_relu:       fprop(): 0.000370 s      bprop(): 0.000574 s
pool:           fprop(): 0.000487 s      bprop(): 0.000712 s
conv:           fprop(): 0.002509 s      bprop(): 0.008041 s
act_relu:       fprop(): 0.000229 s      bprop(): 0.000283 s
pool:           fprop(): 0.000195 s      bprop(): 0.000358 s
conv:           fprop(): 0.001325 s      bprop(): 0.003920 s
act_relu:       fprop(): 0.000187 s      bprop(): 0.000223 s
pool:           fprop(): 0.000154 s      bprop(): 0.000289 s
flatten:        fprop(): 0.000029 s      bprop(): 0.000017 s
fc_drop:        fprop(): 0.001440 s      bprop(): 0.000276 s
act_relu:       fprop(): 0.000027 s      bprop(): 0.000048 s
total_duration: 0.027360 s
net._bprop(): 0.025831 s

Python source code: profile_net.py

import time
import deeppy as dp


def avg_running_time(fun, reps):
    # Memory allocation forces GPU synchronization
    start_time = time.time()
    for _ in range(reps):
        fun()
    return float(time.time() - start_time) / reps


def profile(net, input, reps=50):
    input = dp.Input.from_any(input)
    net._setup(**input.shapes)
    net.phase = 'train'
    batch = next(input.batches())
    x = batch['x']
    total_duration = 0
    for layer_idx, layer in enumerate(net.layers[:-1]):
        def fprop():
            layer.fprop(x)
        fprop_duration = avg_running_time(fprop, reps)
        y = layer.fprop(x)
        layer.bprop_to_x = layer_idx > net.bprop_until

        def bprop():
            layer.bprop(y)
        bprop_duration = avg_running_time(bprop, reps)
        print('%s:   \tfprop(): %.6f s \t bprop(): %.6f s'
              % (layer.name, fprop_duration, bprop_duration))
        x = y
        total_duration += fprop_duration + bprop_duration
    print('total_duration: %.6f s' % total_duration)

    def nn_bprop():
        net._update(**batch)
    nn_bprop_duration = avg_running_time(nn_bprop, reps)
    print('net._bprop(): %.6f s' % nn_bprop_duration)


# Fetch CIFAR10 data
dataset = dp.dataset.CIFAR10()
x_train, y_train, x_test, y_test = dataset.data(dp_dtypes=True)

# Prepare network inputs
batch_size = 128
train_input = dp.SupervisedInput(x_train, y_train, batch_size=batch_size)

# Setup network
def conv_layer(n_filters):
    return dp.Convolution(
        n_filters=32,
        filter_shape=(5, 5),
        border_mode='full',
        weights=dp.Parameter(dp.AutoFiller(gain=1.25), weight_decay=0.003),
    )

def pool_layer():
    return dp.Pool(
        win_shape=(3, 3),
        strides=(2, 2),
        border_mode='same',
        method='max',
    )

net = dp.NeuralNetwork(
    layers=[
        conv_layer(32),
        dp.Activation('relu'),
        pool_layer(),
        conv_layer(32),
        dp.Activation('relu'),
        pool_layer(),
        conv_layer(64),
        dp.Activation('relu'),
        pool_layer(),
        dp.Flatten(),
        dp.DropoutFullyConnected(
            n_out=64,
            weights=dp.Parameter(dp.AutoFiller(gain=1.25), weight_decay=0.03)
        ),
        dp.Activation('relu'),
        dp.FullyConnected(
            n_out=dataset.n_classes,
            weights=dp.Parameter(dp.AutoFiller(gain=1.25)),
        )
    ],
    loss=dp.SoftmaxCrossEntropy(),
)

profile(net, train_input)

Total running time of the example: 0 minutes 3.4 seconds