Source code for psychrnn.backend.rnn

from __future__ import division
from __future__ import print_function

from abc import ABCMeta, abstractmethod

# abstract class python 2 & 3 compatible
ABC = ABCMeta('ABC', (object,), {})

import tensorflow as tf
import numpy as np

import sys
from time import time
from os import makedirs, path
from inspect import isgenerator

from psychrnn.backend.regularizations import Regularizer
from psychrnn.backend.loss_functions import LossFunction
from psychrnn.backend.initializations import WeightInitializer, GaussianSpectralRadius

tf.compat.v1.disable_eager_execution()

[docs]class RNN(ABC): """ The base recurrent neural network class. Note: The base RNN class is not itself a functioning RNN. forward_pass must be implemented to define a functioning RNN. Args: params (dict): The RNN parameters. Use your tasks's :func:`~psychrnn.tasks.task.Task.get_task_params` function to start building this dictionary. Optionally use a different network's :func:`get_weights` function to initialize the network with preexisting weights. :Dictionary Keys: * **name** (*str*) -- Unique name used to determine variable scope. Having different variable scopes allows multiple distinct models to be instantiated in the same TensorFlow environment. See `TensorFlow's variable_scope <https://www.tensorflow.org/api_docs/python/tf/compat/v1/variable_scope>`_ for more details. * **N_in** (*int*) -- The number of network inputs. * **N_rec** (*int*) -- The number of recurrent units in the network. * **N_out** (*int*) -- The number of network outputs. * **N_steps** (*int*): The number of simulation timesteps in a trial. * **dt** (*float*) -- The simulation timestep. * **tau** (*float*) -- The intrinsic time constant of neural state decay. * **N_batch** (*int*) -- The number of trials per training update. * **rec_noise** (*float, optional*) -- How much recurrent noise to add each time the new state of the network is calculated. Default: 0.0. * **load_weights_path** (*str, optional*) -- When given a path, loads weights from file in that path. Default: None * **initializer** (:class:`~psychrnn.backend.initializations.WeightInitializer` *or child object, optional*) -- Initializer to use for the network. Default: :class:`~psychrnn.backend.initializations.WeightInitializer` (:data:`params`) if :data:`params` includes :data:`W_rec` or :data:`load_weights_path` as a key, :class:`~psychrnn.backend.initializations.GaussianSpectralRadius` (:data:`params`) otherwise. * **W_in_train** (*bool, optional*) -- True if input weights, W_in, are trainable. Default: True * **W_rec_train** (*bool, optional*) -- True if recurrent weights, W_rec, are trainable. Default: True * **W_out_train** (*bool, optional*) -- True if output weights, W_out, are trainable. Default: True * **b_rec_train** (*bool, optional*) -- True if recurrent bias, b_rec, is trainable. Default: True * **b_out_train** (*bool, optional*) -- True if output bias, b_out, is trainable. Default: True * **init_state_train** (*bool, optional*) -- True if the inital state for the network, init_state, is trainable. Default: True * **loss_function** (*str, optional*) -- Which loss function to use. See :class:`psychrnn.backend.loss_functions.LossFunction` for details. Defaults to ``"mean_squared_error"``. :Other Dictionary Keys: * Any dictionary keys used by the regularizer will be passed onwards to :class:`psychrnn.backend.regularizations.Regularizer`. See :class:`~psychrnn.backend.regularizations.Regularizer` for key names and details. * Any dictionary keys used for the loss function will be passed onwards to :class:`psychrnn.backend.loss_functions.LossFunction`. See :class:`~psychrnn.backend.loss_functions.LossFunction` for key names and details. * If :data:`initializer` is not set, any dictionary keys used by the initializer will be pased onwards to :class:`WeightInitializer <psychrnn.backend.initializations.WeightInitializer>` if :data:`load_weights_path` is set or :data:`W_rec` is passed in. Otherwise all keys will be passed to :class:`GaussianSpectralRadius <psychrnn.backend.initializations.GaussianSpectralRadius>` * If :data:`initializer` is not set and :data:`load_weights_path` is not set, the dictionary entries returned previously by :func:`get_weights` can be passed in to initialize the network. See :class:`WeightInitializer <psychrnn.backend.initializations.WeightInitializer>` for a list and explanation of possible parameters. At a minimum, :data:`W_rec` must be included as a key to make use of this option. * If :data:`initializer` is not set and :data:`load_weights_path` is not set, the following keys can be used to set biological connectivity constraints: * **input_connectivity** (*ndarray(dtype=float, shape=(* :attr:`N_rec`, :attr:`N_in` *)), optional*) -- Connectivity mask for the input layer. 1 where connected, 0 where unconnected. Default: np.ones((:attr:`N_rec`, :attr:`N_in`)). * **rec_connectivity** (*ndarray(dtype=float, shape=(* :attr:`N_rec`, :attr:`N_rec` *)), optional*) -- Connectivity mask for the recurrent layer. 1 where connected, 0 where unconnected. Default: np.ones((:attr:`N_rec`, :attr:`N_rec`)). * **output_connectivity** (*ndarray(dtype=float, shape=(* :attr:`N_out`, :attr:`N_rec` *)), optional*) -- Connectivity mask for the output layer. 1 where connected, 0 where unconnected. Default: np.ones((:attr:`N_out`, :attr:`N_rec`)). * **autapses** (*bool, optional*) -- If False, self connections are not allowed in N_rec, and diagonal of :data:`rec_connectivity` will be set to 0. Default: True. * **dale_ratio** (float, optional) -- Dale's ratio, used to construct Dale_rec and Dale_out. 0 <= dale_ratio <=1 if dale_ratio should be used. ``dale_ratio * N_rec`` recurrent units will be excitatory, the rest will be inhibitory. Default: None * **transfer_function** (*function, optional*) -- Transfer function to use for the network. Default: `tf.nn.relu <https://www.tensorflow.org/api_docs/python/tf/nn/relu>`_. Inferred Parameters: * **alpha** (*float*) -- The number of unit time constants per simulation timestep. """ def __init__(self, params): self.params = params # -------------------------------------------- # Unique name used to determine variable scope # -------------------------------------------- try: self.name = params['name'] except KeyError: print("You must pass a 'name' to RNN") raise # ---------------------------------- # Network sizes (tensor dimensions) # ---------------------------------- try: N_in = self.N_in = params['N_in'] except KeyError: print("You must pass 'N_in' to RNN") raise try: N_rec = self.N_rec = params['N_rec'] except KeyError: print("You must pass 'N_rec' to RNN") raise try: N_out = self.N_out = params['N_out'] except KeyError: print("You must pass 'N_out' to RNN") raise try: N_steps = self.N_steps = params['N_steps'] except KeyError: print("You must pass 'N_steps' to RNN") raise # ---------------------------------- # Physical parameters # ---------------------------------- try: self.dt = params['dt'] except KeyError: print("You must pass 'dt' to RNN") raise try: self.tau = params['tau'] except KeyError: print("You must pass 'tau' to RNN") raise try: self.tau = self.tau.astype('float32') except AttributeError: pass try: self.N_batch = params['N_batch'] except KeyError: print("You must pass 'N_batch' to RNN") raise self.alpha = (1.0 * self.dt) / self.tau self.rec_noise = params.get('rec_noise', 0.0) # ---------------------------------- # Load weights path # ---------------------------------- self.load_weights_path = params.get('load_weights_path', None) # ------------------------------------------------ # Define initializer for TensorFlow variables # ------------------------------------------------ if self.load_weights_path is not None: # transfer function is passed in here only for backwards compatibility -- if you load weights saved before transfer_function was added to saved weights, the model will use the custom transfer function passed in. self.initializer = WeightInitializer(load_weights_path=self.load_weights_path, transfer_function=params.get('transfer_function', tf.nn.relu)) elif params.get('W_rec', None) is not None: self.initializer = params.get('initializer', WeightInitializer(**params)) else: self.initializer = params.get('initializer', GaussianSpectralRadius(**params)) self.dale_ratio = self.initializer.get_dale_ratio() self.transfer_function = self.initializer.get_transfer_function() # ---------------------------------- # Trainable features # ---------------------------------- self.W_in_train = params.get('W_in_train', True) self.W_rec_train = params.get('W_rec_train', True) self.W_out_train = params.get('W_out_train', True) self.b_rec_train = params.get('b_rec_train', True) self.b_out_train = params.get('b_out_train', True) self.init_state_train = params.get('init_state_train', True) # -------------------------------------------------- # TensorFlow input/output placeholder initializations # --------------------------------------------------- self.x = tf.compat.v1.placeholder("float", [None, N_steps, N_in]) self.y = tf.compat.v1.placeholder("float", [None, N_steps, N_out]) self.output_mask = tf.compat.v1.placeholder("float", [None, N_steps, N_out]) # -------------------------------------------------- # Initialize variables in proper scope # --------------------------------------------------- with tf.compat.v1.variable_scope(self.name) as scope: # ------------------------------------------------ # Trainable variables: # Initial State, weight matrices and biases # ------------------------------------------------ try: self.init_state = tf.compat.v1.get_variable('init_state', [1, N_rec], initializer=self.initializer.get('init_state'), trainable=self.init_state_train) except ValueError as error: raise UserWarning("Try calling model.destruct() or changing params['name'].") self.init_state = tf.tile(self.init_state, [self.N_batch, 1]) # Input weight matrix: self.W_in = \ tf.compat.v1.get_variable('W_in', [N_rec, N_in], initializer=self.initializer.get('W_in'), trainable=self.W_in_train) # Recurrent weight matrix: self.W_rec = \ tf.compat.v1.get_variable( 'W_rec', [N_rec, N_rec], initializer=self.initializer.get('W_rec'), trainable=self.W_rec_train) # Output weight matrix: self.W_out = tf.compat.v1.get_variable('W_out', [N_out, N_rec], initializer=self.initializer.get('W_out'), trainable=self.W_out_train) # Recurrent bias: self.b_rec = tf.compat.v1.get_variable('b_rec', [N_rec], initializer=self.initializer.get('b_rec'), trainable=self.b_rec_train) # Output bias: self.b_out = tf.compat.v1.get_variable('b_out', [N_out], initializer=self.initializer.get('b_out'), trainable=self.b_out_train) # ------------------------------------------------ # Non-trainable variables: # Overall connectivity and Dale's law matrices # ------------------------------------------------ # Recurrent Dale's law weight matrix: self.Dale_rec = tf.compat.v1.get_variable('Dale_rec', [N_rec, N_rec], initializer=self.initializer.get('Dale_rec'), trainable=False) # Output Dale's law weight matrix: self.Dale_out = tf.compat.v1.get_variable('Dale_out', [N_rec, N_rec], initializer=self.initializer.get('Dale_out'), trainable=False) # Connectivity weight matrices: self.input_connectivity = tf.compat.v1.get_variable('input_connectivity', [N_rec, N_in], initializer=self.initializer.get('input_connectivity'), trainable=False) self.rec_connectivity = tf.compat.v1.get_variable('rec_connectivity', [N_rec, N_rec], initializer=self.initializer.get('rec_connectivity'), trainable=False) self.output_connectivity = tf.compat.v1.get_variable('output_connectivity', [N_out, N_rec], initializer=self.initializer.get('output_connectivity'), trainable=False) # -------------------------------------------------- # Flag to check if variables initialized, model built # --------------------------------------------------- self.is_initialized = False self.is_built = False
[docs] def build(self): """ Build the TensorFlow network and start a TensorFlow session. """ # -------------------------------------------------- # Define the predictions # -------------------------------------------------- self.predictions, self.states = self.forward_pass() # -------------------------------------------------- # Define the loss (based on the predictions) # -------------------------------------------------- self.loss = LossFunction(self.params).set_model_loss(self) # -------------------------------------------------- # Define the regularization # -------------------------------------------------- self.reg = Regularizer(self.params).set_model_regularization(self) # -------------------------------------------------- # Define the total regularized loss # -------------------------------------------------- self.reg_loss = self.loss + self.reg # -------------------------------------------------- # Open a session # -------------------------------------------------- self.sess = tf.compat.v1.Session() # -------------------------------------------------- # Record successful build # -------------------------------------------------- self.is_built = True return
[docs] def destruct(self): """ Close the TensorFlow session and reset the global default graph. """ # -------------------------------------------------- # Close the session. Delete the graph. # -------------------------------------------------- if self.is_built: self.sess.close() tf.compat.v1.reset_default_graph() return
[docs] def get_effective_W_rec(self): """ Get the recurrent weights used in the network, after masking by connectivity and dale_ratio. Returns: tf.Tensor(dtype=float, shape=(:attr:`N_rec`, :attr:`N_rec` )) """ W_rec = self.W_rec * self.rec_connectivity if self.dale_ratio: W_rec = tf.matmul(tf.abs(W_rec), self.Dale_rec, name="in_1") return W_rec
[docs] def get_effective_W_in(self): """ Get the input weights used in the network, after masking by connectivity and dale_ratio. Returns: tf.Tensor(dtype=float, shape=(:attr:`N_rec`, :attr:`N_in` )) """ W_in = self.W_in * self.input_connectivity if self.dale_ratio: W_in = tf.abs(W_in) return W_in
[docs] def get_effective_W_out(self): """ Get the output weights used in the network, after masking by connectivity, and dale_ratio. Returns: tf.Tensor(dtype=float, shape=(:attr:`N_out`, :attr:`N_rec` )) """ W_out = self.W_out * self.output_connectivity if self.dale_ratio: W_out = tf.matmul(tf.abs(W_out), self.Dale_out, name="in_2") return W_out
[docs] @abstractmethod def forward_pass(self): """ Run the RNN on a batch of task inputs. Note: This is an abstract function that must be defined in a child class. Returns: tuple: * **predictions** (*ndarray(dtype=float, shape=(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*) -- Network output on inputs found in self.x within the tf network. * **states** (*ndarray(dtype=float, shape=(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_rec` *))*) -- State variable values over the course of the trials found in self.x within the tf network. """ raise UserWarning("forward_pass must be implemented in child class. See Basic for example.")
[docs] def get_weights(self): """ Get weights used in the network. Allows for rebuilding or tweaking different weights to do experiments / analyses. Returns: dict: Dictionary of rnn weights including the following keys: :Dictionary Keys: * **init_state** (*ndarray(dtype=float, shape=(1, :attr:`N_rec` *))*) -- Initial state of the network's recurrent units. * **W_in** (*ndarray(dtype=float, shape=(:attr:`N_rec`. :attr:`N_in` *))*) -- Input weights. * **W_rec** (*ndarray(dtype=float, shape=(:attr:`N_rec`, :attr:`N_rec` *))*) -- Recurrent weights. * **W_out** (*ndarray(dtype=float, shape=(:attr:`N_out`, :attr:`N_rec` *))*) -- Output weights. * **b_rec** (*ndarray(dtype=float, shape=(:attr:`N_rec`, *))*) -- Recurrent bias. * **b_out** (*ndarray(dtype=float, shape=(:attr:`N_out`, *))*) -- Output bias. * **Dale_rec** (*ndarray(dtype=float, shape=(:attr:`N_rec`, :attr:`N_rec`*))*) -- Diagonal matrix with ones and negative ones on the diagonal. If :data:`dale_ratio` is not ``None``, indicates whether a recurrent unit is excitatory(1) or inhibitory(-1). * **Dale_out** (*ndarray(dtype=float, shape=(:attr:`N_rec`, :attr:`N_rec`*))*) -- Diagonal matrix with ones and zeroes on the diagonal. If :data:`dale_ratio` is not ``None``, indicates whether a recurrent unit is excitatory(1) or inhibitory(0). Inhibitory neurons do not contribute to the output. * **input_connectivity** (*ndarray(dtype=float, shape=(:attr:`N_rec`, :attr:`N_in`*))*) -- Connectivity mask for the input layer. 1 where connected, 0 where unconnected. * **rec_connectivity** (*ndarray(dtype=float, shape=(:attr:`N_rec`, :attr:`N_rec`*))*) -- Connectivity mask for the recurrent layer. 1 where connected, 0 where unconnected. * **output_connectivity** (*ndarray(dtype=float, shape=(:attr:`N_out`, :attr:`N_rec`*))*) -- Connectivity mask for the output layer. 1 where connected, 0 where unconnected. * **dale_ratio** (*float*) -- Dale's ratio, used to construct Dale_rec and Dale_out. Either ``None`` if dale's law was not applied, or 0 <= dale_ratio <=1 if dale_ratio was applied. * **transfer_function** (*function*) -- Transfer function to use for the network. Note: Keys returned may be different / include other keys depending on the implementation of :class:`RNN` used. A different set of keys will be included e.g. if the :class:`~psychrnn.backend.models.lstm.LSTM` implementation is used. The set of keys above is accurate and meaningful for the :class:`~psychrnn.backend.models.basic.Basic` and :class:`~psychrnn.backend.models.basic.BasicScan` implementations. """ if not self.is_built: self.build() if not self.is_initialized: self.sess.run(tf.compat.v1.global_variables_initializer()) self.is_initialized = True weights_dict = dict() for var in tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=self.name): # avoid saving duplicates if var.name.endswith(':0') and var.name.startswith(self.name): name = var.name[len(self.name)+1:-2] weights_dict.update({name: var.eval(session=self.sess)}) weights_dict.update({'W_rec': self.get_effective_W_rec().eval(session=self.sess)}) weights_dict.update({'W_in': self.get_effective_W_in().eval(session=self.sess)}) weights_dict.update({'W_out': self.get_effective_W_out().eval(session=self.sess)}) weights_dict['dale_ratio'] = self.dale_ratio weights_dict['transfer_function'] = self.transfer_function return weights_dict
[docs] def save(self, save_path): """ Save the weights returned by :func:`get_weights` to :data:`save_path` Arguments: save_path (str): Path for where to save the network weights. """ weights_dict = self.get_weights() np.savez(save_path, **weights_dict) return
[docs] def train(self, trial_batch_generator, train_params={}): """ Train the network. Arguments: trial_batch_generator (:class:`~psychrnn.tasks.task.Task` object or *Generator[tuple, None, None]*): the task to train on, or the task to train on's batch_generator. If a task is passed in, task.:func:`batch_generator` () will be called to get the generator for the task to train on. train_params (dict, optional): Dictionary of training parameters containing the following possible keys: :Dictionary Keys: * **learning_rate** (*float, optional*) -- Sets learning rate if use default optimizer Default: .001 * **training_iters** (*int, optional*) -- Number of iterations to train for Default: 50000. * **loss_epoch** (*int, optional*) -- Compute and record loss every 'loss_epoch' epochs. Default: 10. * **verbosity** (*bool, optional*) -- If true, prints information as training progresses. Default: True. * **save_weights_path** (*str, optional*) -- Where to save the model after training. Default: None * **save_training_weights_epoch** (*int, optional*) -- Save training weights every 'save_training_weights_epoch' epochs. Weights only actually saved if :data:`training_weights_path` is set. Default: 100. * **training_weights_path** (*str, optional*) -- What directory to save training weights into as training progresses. Default: None. * **curriculum** (`~psychrnn.backend.curriculum.Curriculum` *object, optional*) -- Curriculum to train on. If a curriculum object is provided, it overrides the trial_batch_generator argument. Default: None. * **optimizer** (`tf.compat.v1.train.Optimizer <https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/Optimizer>`_ *object, optional*) -- What optimizer to use to compute gradients. Default: `tf.train.AdamOptimizer <https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdamOptimizer>`_ (learning_rate=:data:`train_params`['learning_rate']` ). * **clip_grads** (*bool, optional*) -- If true, clip gradients by norm 1. Default: True * **fixed_weights** (*dict, optional*) -- By default all weights are allowed to train unless :data:`fixed_weights` or :data:`W_rec_train`, :data:`W_in_train`, or :data:`W_out_train` are set. Default: None. Dictionary of weights to fix (not allow to train) with the following optional keys: Fixed Weights Dictionary Keys (in case of :class:`~psychrnn.backend.models.basic.Basic` and :class:`~psychrnn.backend.models.basic.BasicScan` implementations) * **W_in** (*ndarray(dtype=bool, shape=(:attr:`N_rec`. :attr:`N_in` *)), optional*) -- True for input weights that should be fixed during training. * **W_rec** (*ndarray(dtype=bool, shape=(:attr:`N_rec`, :attr:`N_rec` *)), optional*) -- True for recurrent weights that should be fixed during training. * **W_out** (*ndarray(dtype=bool, shape=(:attr:`N_out`, :attr:`N_rec` *)), optional*) -- True for output weights that should be fixed during training. :Note: In general, any key in the dictionary output by :func:`get_weights` can have a key in the fixed_weights matrix, however fixed_weights will only meaningfully apply to trainable matrices. * **performance_cutoff** (*float*) -- If :data:`performance_measure` is not ``None``, training stops as soon as performance_measure surpases the performance_cutoff. Default: None. * **performance_measure** (*function*) -- Function to calculate the performance of the network using custom criteria. Default: None. :Arguments: * **trial_batch** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Task stimuli for :attr:`N_batch` trials. * **trial_y** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Target output for the network on :attr:`N_batch` trials given the :data:`trial_batch`. * **output_mask** (*ndarray(dtype=bool, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Output mask for :attr:`N_batch` trials. True when the network should aim to match the target output, False when the target output can be ignored. * **output** (*ndarray(dtype=bool, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Output to compute the accuracy of. ``output`` as returned by :func:`psychrnn.backend.rnn.RNN.test`. * **epoch** (*int*): Current training epoch (e.g. perhaps the performance_measure is calculated differently early on vs late in training) * **losses** (*list of float*): List of losses from the beginning of training until the current epoch. * **verbosity** (*bool*): Passed in from :data:`train_params`. :Returns: *float* Performance, greater when the performance is better. Returns: tuple: * **losses** (*list of float*) -- List of losses, computed every :data:`loss_epoch` epochs during training. * **training_time** (*float*) -- Time spent training. * **initialization_time** (*float*) -- Time spent initializing the network and preparing to train. """ if not self.is_built: self.build() t0 = time() # -------------------------------------------------- # Extract params # -------------------------------------------------- learning_rate = train_params.get('learning_rate', .001) training_iters = train_params.get('training_iters', 50000) loss_epoch = train_params.get('loss_epoch', 10) verbosity = train_params.get('verbosity', True) save_weights_path = train_params.get('save_weights_path', None) save_training_weights_epoch = train_params.get('save_training_weights_epoch', 100) training_weights_path = train_params.get('training_weights_path', None) curriculum = train_params.get('curriculum', None) optimizer = train_params.get('optimizer', tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)) clip_grads = train_params.get('clip_grads', True) fixed_weights = train_params.get('fixed_weights', None) # array of zeroes and ones. One indicates to pin and not train that weight. performance_cutoff = train_params.get('performance_cutoff', None) performance_measure = train_params.get('performance_measure', None) if (performance_cutoff is not None and performance_measure is None) or (performance_cutoff is None and performance_measure is not None): raise UserWarning("training will not be cutoff based on performance. Make sure both performance_measure and performance_cutoff are defined") if curriculum is not None: trial_batch_generator = curriculum.batch_generator() if not isgenerator(trial_batch_generator): trial_batch_generator = trial_batch_generator.batch_generator() # -------------------------------------------------- # Make weights folder if it doesn't already exist. # -------------------------------------------------- if save_weights_path != None: if path.dirname(save_weights_path) != "" and not path.exists(path.dirname(save_weights_path)): makedirs(path.dirname(save_weights_path)) # -------------------------------------------------- # Make train weights folder if it doesn't already exist. # -------------------------------------------------- if training_weights_path != None: if path.dirname(training_weights_path) != "" and not path.exists(path.dirname(training_weights_path)): makedirs(path.dirname(training_weights_path)) # -------------------------------------------------- # Compute gradients # -------------------------------------------------- grads = optimizer.compute_gradients(self.reg_loss) # -------------------------------------------------- # Fixed Weights # -------------------------------------------------- if fixed_weights is not None: for i in range(len(grads)): (grad, var) = grads[i] name = var.name[len(self.name)+1:-2] if name in fixed_weights.keys(): grad = tf.multiply(grad, (1-fixed_weights[name])) grads[i] = (grad, var) # -------------------------------------------------- # Clip gradients # -------------------------------------------------- if clip_grads: grads = [(tf.clip_by_norm(grad, 1.0), var) if grad is not None else (grad, var) for grad, var in grads] # -------------------------------------------------- # Call the optimizer and initialize variables # -------------------------------------------------- optimize = optimizer.apply_gradients(grads) self.sess.run(tf.compat.v1.global_variables_initializer()) self.is_initialized = True # -------------------------------------------------- # Record training time for performance benchmarks # -------------------------------------------------- t1 = time() # -------------------------------------------------- # Training loop # -------------------------------------------------- epoch = 1 batch_size = next(trial_batch_generator)[0].shape[0] losses = [] if performance_cutoff is not None: performance = performance_cutoff - 1 while (epoch - 1) * batch_size < training_iters and (performance_cutoff is None or performance < performance_cutoff): batch_x, batch_y, output_mask, _ = next(trial_batch_generator) self.sess.run(optimize, feed_dict={self.x: batch_x, self.y: batch_y, self.output_mask: output_mask}) # -------------------------------------------------- # Output batch loss # -------------------------------------------------- if epoch % loss_epoch == 0: reg_loss = self.sess.run(self.reg_loss, feed_dict={self.x: batch_x, self.y: batch_y, self.output_mask: output_mask}) losses.append(reg_loss) if verbosity: print("Iter " + str(epoch * batch_size) + ", Minibatch Loss= " + \ "{:.6f}".format(reg_loss)) # -------------------------------------------------- # Allow for curriculum learning # -------------------------------------------------- if curriculum is not None and epoch % curriculum.metric_epoch == 0: trial_batch, trial_y, output_mask, _ = next(trial_batch_generator) output, _ = self.test(trial_batch) if curriculum.metric_test(trial_batch, trial_y, output_mask, output, epoch, losses, verbosity): if curriculum.stop_training: break trial_batch_generator = curriculum.batch_generator() # -------------------------------------------------- # Save intermediary weights # -------------------------------------------------- if epoch % save_training_weights_epoch == 0: if training_weights_path is not None: self.save(training_weights_path + str(epoch)) if verbosity: print("Training weights saved in file: %s" % training_weights_path + str(epoch)) # --------------------------------------------------- # Update performance value if necessary # --------------------------------------------------- if performance_measure is not None: trial_batch, trial_y, output_mask, _ = next(trial_batch_generator) output, _ = self.test(trial_batch) performance = performance_measure(trial_batch, trial_y, output_mask, output, epoch, losses, verbosity) if verbosity: print("performance: " + str(performance)) epoch += 1 t2 = time() if verbosity: print("Optimization finished!") # -------------------------------------------------- # Save final weights # -------------------------------------------------- if save_weights_path is not None: self.save(save_weights_path) if verbosity: print("Model saved in file: %s" % save_weights_path) # -------------------------------------------------- # Return losses, training time, initialization time # -------------------------------------------------- return losses, (t2 - t1), (t1 - t0)
[docs] def train_curric(self, train_params): """Wrapper function for training with curriculum to streamline curriculum learning. Arguments: train_params (dict, optional): See :func:`train` for details. Returns: tuple: See :func:`train` for details. """ # -------------------------------------------------- # Wrapper function for training with curriculum # to streamline curriculum learning # -------------------------------------------------- curriculum = train_params.get('curriculum', None) if curriculum is None: raise UserWarning("train_curric requires a curriculum. Please pass in a curriculum or use train instead.") losses, training_time, initialization_time = self.train(curriculum.get_generator_function(), train_params) return losses, training_time, initialization_time
[docs] def test(self, trial_batch): """ Test the network on a certain task input. Arguments: trial_batch ((*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*): Task stimulus to run the network on. Stimulus from :func:`psychrnn.tasks.task.Task.get_trial_batch`, or from next(:func:`psychrnn.tasks.task.Task.batch_generator` ). Returns: tuple: * **outputs** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_out` *))*) -- Output time series of the network for each trial in the batch. * **states** (*ndarray(dtype=float, shape =(*:attr:`N_batch`, :attr:`N_steps`, :attr:`N_rec` *))*) -- Activity of recurrent units during each trial. """ if not self.is_built: self.build() if not self.is_initialized: self.sess.run(tf.compat.v1.global_variables_initializer()) self.is_initialized = True # -------------------------------------------------- # Run the forward pass on trial_batch # -------------------------------------------------- outputs, states = self.sess.run([self.predictions, self.states], feed_dict={self.x: trial_batch}) return outputs, states