diff --git a/decolle/init_functions.py b/decolle/init_functions.py index 5613eee..1672eaf 100644 --- a/decolle/init_functions.py +++ b/decolle/init_functions.py @@ -31,7 +31,7 @@ def init_LSUV(net, data_batch, tgt_mu=0.0, tgt_var=1.0): init.orthogonal_(l.base_layer.weight) if hasattr(l,'rec_layer'): - if l.base_layer.bias is not None: + if l.rec_layer.bias is not None: l.rec_layer.bias.data *= 0 init.orthogonal_(l.rec_layer.weight) alldone = False @@ -53,14 +53,19 @@ def init_LSUV(net, data_batch, tgt_mu=0.0, tgt_var=1.0): done=False else: done=True + alldone*=done - if np.abs(m-tgt_mu+.1)>.2: + if np.abs(m-tgt_mu)>.2: if net.LIF_layers[i].base_layer.bias is not None: net.LIF_layers[i].base_layer.bias.data -= .5*(m-tgt_mu) done=False else: done=True alldone*=done + if alldone: + print("Initialization finalized:") + print("Layer: {0}, Variance: {1:.3}, Mean U: {2:.3}, Mean S: {3:.3}".format(i,v,m,mus)) + def init_LSUV_actrate(net, data_batch, act_rate, threshold=0., var=1.0): diff --git a/decolle/lenet_decolle_model_fa.py b/decolle/lenet_decolle_model_fa.py index b2ab5cb..95336c0 100644 --- a/decolle/lenet_decolle_model_fa.py +++ b/decolle/lenet_decolle_model_fa.py @@ -102,6 +102,7 @@ def build_conv_stack(self, Nhid, feature_height, feature_width, pool_size, kerne alpha=self.alpha[i], beta=self.beta[i], alpharp=self.alpharp[i], + wrp=self.wrp[i], deltat=self.deltat, do_detach= True if self.method == 'rtrl' else False) pool = nn.MaxPool2d(kernel_size=pool_size[i]) @@ -116,7 +117,7 @@ def build_conv_stack(self, Nhid, feature_height, feature_width, pool_size, kerne readout = nn.Identity() self.readout_layers.append(readout) - if self.dropout[i] < 1.0: + if self.dropout[i] > 0.0: dropout_layer = nn.Dropout(self.dropout[i]) else: dropout_layer = nn.Identity() @@ -131,15 +132,16 @@ def build_mlp_stack(self, Mhid, out_channels): output_shape = None for i in range(self.num_mlp_layers): - base_layer = nn.Linear(Mhid[i], out_channels) + base_layer = nn.Linear(Mhid[i], Mhid[i+1], self.with_bias) layer = self.lif_layer_type[i+self.num_conv_layers](base_layer, alpha=self.alpha[i], beta=self.beta[i], alpharp=self.alpharp[i], + wrp=self.wrp[i], deltat=self.deltat, do_detach=True if self.method == 'rtrl' else False) if self.lc_ampl is not None: - readout = FALinear(out_channels, out_channels) + readout = FALinear(Mhid[i+1], out_channels) # Readout layer has random fixed weights for param in readout.parameters(): param.requires_grad = False @@ -147,7 +149,7 @@ def build_mlp_stack(self, Mhid, out_channels): else: readout = nn.Identity() - if self.dropout[i] < 1.0: + if self.dropout[i] > 0.0: dropout_layer = nn.Dropout(self.dropout[i]) else: dropout_layer = nn.Identity() diff --git a/scripts/parameters/params_dvsgestures_torchneuromorphic.yml b/scripts/parameters/params_dvsgestures_torchneuromorphic.yml index dfdf128..bcf6809 100644 --- a/scripts/parameters/params_dvsgestures_torchneuromorphic.yml +++ b/scripts/parameters/params_dvsgestures_torchneuromorphic.yml @@ -24,7 +24,7 @@ input_shape: - 32 kernel_size: - 7 -lc_ampl: 0.5 +lc_ampl: 2.0 learning_rate: 3.6e-4 learning_method: 'rtrl' loss: smoothL1 diff --git a/scripts/train_lenet_decolle_fa.py b/scripts/train_lenet_decolle_fa.py index 700bad5..a394b72 100644 --- a/scripts/train_lenet_decolle_fa.py +++ b/scripts/train_lenet_decolle_fa.py @@ -43,12 +43,16 @@ num_workers=params['num_dl_workers']) data_batch, target_batch = next(iter(gen_train)) -data_batch = torch.tensor(data_batch).to(device) -target_batch = torch.tensor(target_batch).to(device) +data_batch = torch.Tensor(data_batch).to(device) +target_batch = torch.Tensor(target_batch).to(device) #d, t = next(iter(gen_train)) input_shape = data_batch.shape[-3:] +#Backward compatibility +if 'dropout' not in params.keys(): + params['dropout'] = [.5] + ## Create Model, Optimizer and Loss net = LenetDECOLLEFA( out_channels=params['out_channels'], Nhid=params['Nhid'], @@ -58,13 +62,14 @@ input_shape=params['input_shape'], alpha=params['alpha'], alpharp=params['alpharp'], + dropout=params['dropout'], beta=params['beta'], num_conv_layers=params['num_conv_layers'], num_mlp_layers=params['num_mlp_layers'], lc_ampl=params['lc_ampl'], lif_layer_type = LIFLayer, method=params['learning_method'], - with_output_layer=True).to(device) + with_output_layer=params['with_output_layer']).to(device) if hasattr(params['learning_rate'], '__len__'): from decolle.utils import MultiOpt @@ -77,19 +82,25 @@ reg_l = params['reg_l'] if 'reg_l' in params else None -if 'loss_scope' in params and params['loss_scope']=='crbp': - from decolle.lenet_decolle_model import CRBPLoss - loss = torch.nn.SmoothL1Loss(reduction='none') - decolle_loss = CRBPLoss(net = net, loss_fn = loss, reg_l=reg_l) +if 'loss_scope' in params and params['loss_scope']=='global': + loss = [None for i in range(len(net))] + if net.with_output_layer: + loss[-1] = cross_entropy_one_hot + else: + raise RuntimeError('bptt mode needs output layer') + decolle_loss = DECOLLELoss(net = net, loss_fn = loss, reg_l=reg_l) else: + print('Running local learning') loss = [torch.nn.SmoothL1Loss() for i in range(len(net))] if net.with_output_layer: loss[-1] = cross_entropy_one_hot decolle_loss = DECOLLELoss(net = net, loss_fn = loss, reg_l=reg_l) ##Initialize -net.init_parameters(data_batch) +net.init_parameters(data_batch[:32]) +from decolle.init_functions import init_LSUV +init_LSUV(net, data_batch[:32], tgt_var=.5) ##Resume if necessary if args.resume_from is not None: print("Checkpoint directory " + checkpoint_dir)