Module nujo.autodiff.function
Expand source code
from abc import abstractmethod
from numbers import Number
from typing import Any, Dict, Iterable, List, TypeVar, Union
from numpy import ndarray
import nujo.autodiff.modes as modes
from nujo.autodiff._node import _Node
from nujo.autodiff.tensor import Tensor
# ====================================================================================================
class _FunctionMeta(type):
def __call__(cls, *children: Union[Tensor, ndarray, List[Number], Number],
**kwargs):
''' Used to lookup the cache for an already defined function of
the current type using the current `children` as inputs, and reuse
it. If a function satisfying this requirements could not be found,
a new function is created and added to the cache, in order to be,
potentially, later reused.
'''
obj = cls.__new__(cls, *children, **kwargs)
# Only cache functions that are in the computation graph
if modes.DIFF_ENABLED:
key = _get_function_identifier(cls, children)
cache = cls._func_children_lookup_cache
if key in cache:
return cache[key]
else:
cls.__init__(obj, *children, **kwargs)
cache[key] = obj
return obj
# Otherwise - standard call
cls.__init__(obj, *children, **kwargs)
return obj
# ====================================================================================================
class Function(_Node, metaclass=_FunctionMeta):
''' Base Class for functions
Functions are applied to tensors. They take multiple
tensors as input and produces only one tensor as output.
They do NOT change tensors in-place.
Functions were also written so they reuse the input/output tensors
when possible, which results in the computation graph being:
- "Dynamically defined, statically evaluated."
taking the best from both worlds.
Parameters:
-----------
- children : varargs, the inpute tensors
'''
_func_children_lookup_cache: Dict[str, 'Function'] = {}
''' Cache used to lookup for functions that may have already been defined
in the computation graph.
- key : hash(FuncType) + (children's identifiers);
use `_get_function_identifier` to obtain a key
- value : the already defined function which can be reused
'''
T = TypeVar('T', Tensor, ndarray)
def __init__(self, *children: Union[Tensor, ndarray, List[Number],
Number]):
super(Function, self).__init__(*_parse_inputs(children),
name=self.__class__.__name__)
# This output placeholder is reused when possible
self._output_placeholder = Tensor(
None,
diff=any(x.diff for x in self.children) and modes.DIFF_ENABLED,
creator=self if modes.DIFF_ENABLED else None,
name=self._generate_tensor_name())
if modes.DIFF_ENABLED: # If graph building is enabled.
# Allocate space for parent's output (output placeholder)
for child in self.children:
child.parents_outputs.append(self._output_placeholder)
def __repr__(self):
return super(Function, self).__repr__() + f'#{self.id}'
def _generate_tensor_name(self) -> str:
return 'Z' + self.__repr__()
@abstractmethod
def forward(self) -> ndarray:
''' Implement forward pass of the function here.
Use the `self.children` list to access the inputs.
'''
pass
@abstractmethod
def backward(self, idx: int, accum_grad: T) -> T:
''' Implement backward pass of the function here
Compute the gradient of children[idx] w.r.t. output of the
computation graph from the accumulated gradient (the gradient
of the output of the function w.r.t. the output of the graph).
Parameters:
-----------
- idx : int, the index of the children for which to compute the
gradient w.r.t. output of the computation graph
- accum_grad : T (Tensor or ndarray), the accumulated grad in the graph
so far, you can otherwise think of it as the gradient of the output of
the function w.r.t. the output of the graph.
- `accum_grad` is Tensor if differentiantion is enabled
(`DIFF_ENABLED`) and the children has opted for differentiation
(`diff` is True), thus the computations will be recorded in the
computation graph and higher-order derivatives could be computed.
- otherwise, `accum_grad` is ndarray and the computations are not
recorded; ndarrays are used since the computations with them are
more efficient.
Returns:
--------
- grad : T (Tensor or ndarray), the computed gradient of
`self.children[idx]`
'''
pass
def __call__(self) -> Tensor:
''' Executes cached forward pass
'''
# Forward pass
self._output_placeholder.value = self.forward()
return self._output_placeholder
# ====================================================================================================
def _parse_inputs(inputs: Iterable[Any]) -> List[Tensor]:
''' Parse all inputs that are not Nodes to Tensors
'''
return [
x if isinstance(x, _Node) else Tensor(x, name=str(x)) for x in inputs
]
# ====================================================================================================
def _get_function_identifier(func_type: type, inputs: Iterable[Any]) -> str:
''' Returns a string identifier for the current function type and its inputs,
used for a key in the cache.
'''
key = str(hash(func_type)) # Inlcude the function type hash in the key
# Include the inputs' (children's) identifiers in the key
key += ''.join(('T' + str(x.id) if isinstance(x, Tensor) else 'P' + str(x)
for x in inputs))
# 'T' and 'P' signatures were added in order to avoid
# collisions between Tensor and Python values
return key
# ====================================================================================================
Classes
class Function (*children: Union[nujo.autodiff.tensor.Tensor, numpy.ndarray, List[numbers.Number], numbers.Number], **kwargs)
-
Base Class for functions
Functions are applied to tensors. They take multiple tensors as input and produces only one tensor as output. They do NOT change tensors in-place.
Functions were also written so they reuse the input/output tensors when possible, which results in the computation graph being: - "Dynamically defined, statically evaluated." taking the best from both worlds.
Parameters:
- children : varargs, the inpute tensors
Expand source code
class Function(_Node, metaclass=_FunctionMeta): ''' Base Class for functions Functions are applied to tensors. They take multiple tensors as input and produces only one tensor as output. They do NOT change tensors in-place. Functions were also written so they reuse the input/output tensors when possible, which results in the computation graph being: - "Dynamically defined, statically evaluated." taking the best from both worlds. Parameters: ----------- - children : varargs, the inpute tensors ''' _func_children_lookup_cache: Dict[str, 'Function'] = {} ''' Cache used to lookup for functions that may have already been defined in the computation graph. - key : hash(FuncType) + (children's identifiers); use `_get_function_identifier` to obtain a key - value : the already defined function which can be reused ''' T = TypeVar('T', Tensor, ndarray) def __init__(self, *children: Union[Tensor, ndarray, List[Number], Number]): super(Function, self).__init__(*_parse_inputs(children), name=self.__class__.__name__) # This output placeholder is reused when possible self._output_placeholder = Tensor( None, diff=any(x.diff for x in self.children) and modes.DIFF_ENABLED, creator=self if modes.DIFF_ENABLED else None, name=self._generate_tensor_name()) if modes.DIFF_ENABLED: # If graph building is enabled. # Allocate space for parent's output (output placeholder) for child in self.children: child.parents_outputs.append(self._output_placeholder) def __repr__(self): return super(Function, self).__repr__() + f'#{self.id}' def _generate_tensor_name(self) -> str: return 'Z' + self.__repr__() @abstractmethod def forward(self) -> ndarray: ''' Implement forward pass of the function here. Use the `self.children` list to access the inputs. ''' pass @abstractmethod def backward(self, idx: int, accum_grad: T) -> T: ''' Implement backward pass of the function here Compute the gradient of children[idx] w.r.t. output of the computation graph from the accumulated gradient (the gradient of the output of the function w.r.t. the output of the graph). Parameters: ----------- - idx : int, the index of the children for which to compute the gradient w.r.t. output of the computation graph - accum_grad : T (Tensor or ndarray), the accumulated grad in the graph so far, you can otherwise think of it as the gradient of the output of the function w.r.t. the output of the graph. - `accum_grad` is Tensor if differentiantion is enabled (`DIFF_ENABLED`) and the children has opted for differentiation (`diff` is True), thus the computations will be recorded in the computation graph and higher-order derivatives could be computed. - otherwise, `accum_grad` is ndarray and the computations are not recorded; ndarrays are used since the computations with them are more efficient. Returns: -------- - grad : T (Tensor or ndarray), the computed gradient of `self.children[idx]` ''' pass def __call__(self) -> Tensor: ''' Executes cached forward pass ''' # Forward pass self._output_placeholder.value = self.forward() return self._output_placeholder
Ancestors
- nujo.autodiff._node._Node
Subclasses
- nujo.autodiff._functions._activations._BinaryStep
- nujo.autodiff._functions._activations._LeakyReLU
- nujo.autodiff._functions._activations._ReLU
- nujo.autodiff._functions._activations._Sigmoid
- nujo.autodiff._functions._activations._Softmax
- nujo.autodiff._functions._activations._Swish
- nujo.autodiff._functions._activations._TanH
- nujo.autodiff._functions._aggregate._InnerProd
- nujo.autodiff._functions._aggregate._InnerSum
- nujo.autodiff._functions._elementary._Addition
- nujo.autodiff._functions._elementary._Logarithm
- nujo.autodiff._functions._elementary._MatrixMul
- nujo.autodiff._functions._elementary._Multiplication
- nujo.autodiff._functions._elementary._Negation
- nujo.autodiff._functions._elementary._Power
- nujo.autodiff._functions._elementary._Reciprocal
- nujo.autodiff._functions._transform._ConstPad
- nujo.autodiff._functions._transform._Im2col
- nujo.autodiff._functions._transform._Reshape
- nujo.autodiff._functions._transform._Transpose
Class variables
var T
Methods
def backward(self, idx: int, accum_grad: ~T) -> ~T
-
Implement backward pass of the function here
Compute the gradient of children[idx] w.r.t. output of the computation graph from the accumulated gradient (the gradient of the output of the function w.r.t. the output of the graph).
Parameters:
- idx : int, the index of the children for which to compute the gradient w.r.t. output of the computation graph
-
accum_grad : T (Tensor or ndarray), the accumulated grad in the graph so far, you can otherwise think of it as the gradient of the output of the function w.r.t. the output of the graph.
accum_grad
is Tensor if differentiantion is enabled (DIFF_ENABLED
) and the children has opted for differentiation (diff
is True), thus the computations will be recorded in the computation graph and higher-order derivatives could be computed.- otherwise,
accum_grad
is ndarray and the computations are not recorded; ndarrays are used since the computations with them are more efficient.
Returns:
- grad : T (Tensor or ndarray), the computed gradient of
self.children[idx]
Expand source code
@abstractmethod def backward(self, idx: int, accum_grad: T) -> T: ''' Implement backward pass of the function here Compute the gradient of children[idx] w.r.t. output of the computation graph from the accumulated gradient (the gradient of the output of the function w.r.t. the output of the graph). Parameters: ----------- - idx : int, the index of the children for which to compute the gradient w.r.t. output of the computation graph - accum_grad : T (Tensor or ndarray), the accumulated grad in the graph so far, you can otherwise think of it as the gradient of the output of the function w.r.t. the output of the graph. - `accum_grad` is Tensor if differentiantion is enabled (`DIFF_ENABLED`) and the children has opted for differentiation (`diff` is True), thus the computations will be recorded in the computation graph and higher-order derivatives could be computed. - otherwise, `accum_grad` is ndarray and the computations are not recorded; ndarrays are used since the computations with them are more efficient. Returns: -------- - grad : T (Tensor or ndarray), the computed gradient of `self.children[idx]` ''' pass
def forward(self) -> numpy.ndarray
-
Implement forward pass of the function here.
Use the
self.children
list to access the inputs.Expand source code
@abstractmethod def forward(self) -> ndarray: ''' Implement forward pass of the function here. Use the `self.children` list to access the inputs. ''' pass