Package nujo

Expand source code
from nujo.autodiff import Function, Tensor, no_diff
from nujo.flow import Flow
from nujo.init import *
from nujo.math import *

__all__ = [
    'Function',
    'Tensor',
    'no_diff',
    'Flow',
]

__version__ = '0.3.0'

Sub-modules

nujo.autodiff

nujo's core Reverse-mode Automatic Differentiation module

nujo.flow

a chainable computation Flow

nujo.init

Tensor initializers …

nujo.math

nujo's core mathematical functionality

nujo.nn

nujo's Neural Network module …

nujo.objective

nujo's objective functions module …

nujo.optim

nujo's optimization module …

nujo.utils

nujo utils

Classes

class Flow (*args, **kwargs)

A chainable computation Flow

A Flow is just a sequance of functions (addition, multiplication, etc.) that are grouped in a single object (Flow) and can be applied on a tensor.

Each nujo Flow has a list of flow objects (a chain) that a tensor will pass through when the Flow is called on that tensor.

This allows the chaining of flows (connecting two or more chains together).

Parameters:

  • name : string, idetifier of the current flow
Expand source code
class Flow(metaclass=_FlowMeta):
    ''' A chainable computation Flow

    A Flow is just a sequance of functions (addition, multiplication, etc.)
    that are grouped in a single object (Flow) and can be applied on a tensor.

    Each nujo Flow has a list of flow objects (a chain) that a tensor will pass
    through when the Flow is called on that tensor.

    This allows the chaining of flows (connecting two or more chains together).

    Parameters:
    -----------
     - name : string, idetifier of the current flow

    '''
    def __init__(self, name='Flow', _chain: List['Flow'] = []):
        self.name = name
        self._chain = _chain

        if len(self._chain):  # If there is a chain
            self.name = self._generate_chain_name()

    # setup methods

    def _register_parameters(self) -> None:
        ''' Tensor parameters registration - called after Flow.__init__

        Makes all tensors bounded to `self` diff enabled (sets their `diff`
        to `True`).

        Called only once, when the chain for the current flow is being created.

        '''

        for prop_name in dir(self):
            prop = getattr(self, prop_name)

            if isinstance(prop, Tensor):
                prop.diff = True

    def _generate_chain_name(self) -> str:
        return ' >> '.join(map(lambda x: x.name, self._chain))

    # parameters generators

    def parameters(self) -> Tensor:
        ''' Generator for all the parameters of the current flow
        '''

        for param in self._total_parameters():
            yield param

    def _total_parameters(self) -> Tensor:
        ''' Returns an iterable of all the parameters of the current flow

        Including those of other flows that are used in the current one
        (namely other flows bounded to `self`).

        '''

        total_params = [self._current_parameters()]

        for prop_name in dir(self):
            prop = getattr(self, prop_name)

            if isinstance(prop, Flow):
                total_params.append(prop.parameters())

        return chain(*total_params)

    def _current_parameters(self) -> Tensor:
        ''' Generator for the current tensor parameters bounded to `self`
        '''

        for flow in self._chain:
            for prop_name in dir(flow):
                prop = getattr(flow, prop_name)

                if isinstance(prop, Tensor):
                    yield prop

    # API methods

    def append(self, *flows: 'Flow') -> 'Flow':
        ''' Flow Append

        Connect the current chain with those of `flows` by adding them
        at the end.

        Parameters:
        -----------
         - flows : varargs, the flows to append, sequantially

        Returns:
        --------
         - flow : Flow, the total computation flow

        '''

        for flow in flows:
            for chain_section in flow:  # Iterate over the chain
                # Connect with the current chain
                self._chain.append(chain_section)

        self.name = self._generate_chain_name()  # Update the chain name
        return self

    def pop(self, idx=-1) -> 'Flow':
        ''' Flow Pop

        Removes a flow (and it's chain) at a given index, defaults to
        the last one (-1).

        Parameters:
        -----------
         - idx : integer, index of the flow to remove

        Returns:
        --------
         - flow : Flow, the total computation flow

        '''

        retflow = self._chain.pop(idx)
        self.name = self._generate_chain_name()

        return retflow

    def copy(self) -> 'Flow':
        ''' Make a copy of the flow
        '''

        return deepcopy(self)

    @abstractmethod
    def forward(self, *args, **kwargs) -> Tensor:
        ''' Flow Forward

        The flow computation is defined here.

        '''

        pass

    # methods implementing the flow functionality

    def __call__(self, *args, **kwargs) -> Tensor:
        output = self[0].forward(*args, **kwargs)

        for flow in self[1:]:
            output = flow.forward(output, **kwargs)

        return output

    def __rshift__(self, other: 'Flow') -> 'Flow':
        ''' Chaining operator

        Example:
            >>> a = nj.Flow()
            >>> b = nj.Flow()
            >>> chained_flow = a >> b
            >>> result = chained_flow(...)
            >>> ...

        '''

        return Flow(_chain=[*list(self), *list(other)])

    def __getitem__(self, key: Union[int, str]) -> 'Flow':
        '''Access flows in the chain by index/name

        Example:
            >>> a = nj.Flow('A')
            >>> b = nj.Flow('B')
            >>> chained_flow = a >> b
            >>> chained_flow[0]  # a flow (chain section) can be get by index
            'A' (this is the repr for `a`)
            >>> chained_flow['A']  # can also be get by name
            'A'

        '''

        if type(key) is str:
            flow = next((x for x in self._chain if x.name == key), None)
            if flow is not None:
                return flow
            else:
                raise ValueError(f'Could not find a flow named: {key}')
        else:
            return self._chain[key]

    def __iter__(self):
        return iter(self._chain)

    def __len__(self):
        return len(self._chain)

    def __repr__(self):
        return '<|' + self.name + '>'

Subclasses

Methods

def append(self, *flows: Flow) -> Flow

Flow Append

Connect the current chain with those of flows by adding them at the end.

Parameters:

  • flows : varargs, the flows to append, sequantially

Returns:

  • flow : Flow, the total computation flow
Expand source code
def append(self, *flows: 'Flow') -> 'Flow':
    ''' Flow Append

    Connect the current chain with those of `flows` by adding them
    at the end.

    Parameters:
    -----------
     - flows : varargs, the flows to append, sequantially

    Returns:
    --------
     - flow : Flow, the total computation flow

    '''

    for flow in flows:
        for chain_section in flow:  # Iterate over the chain
            # Connect with the current chain
            self._chain.append(chain_section)

    self.name = self._generate_chain_name()  # Update the chain name
    return self
def copy(self) -> Flow

Make a copy of the flow

Expand source code
def copy(self) -> 'Flow':
    ''' Make a copy of the flow
    '''

    return deepcopy(self)
def forward(self, *args, **kwargs) -> Tensor

Flow Forward

The flow computation is defined here.

Expand source code
@abstractmethod
def forward(self, *args, **kwargs) -> Tensor:
    ''' Flow Forward

    The flow computation is defined here.

    '''

    pass
def parameters(self) -> Tensor

Generator for all the parameters of the current flow

Expand source code
def parameters(self) -> Tensor:
    ''' Generator for all the parameters of the current flow
    '''

    for param in self._total_parameters():
        yield param
def pop(self, idx=-1) -> Flow

Flow Pop

Removes a flow (and it's chain) at a given index, defaults to the last one (-1).

Parameters:

  • idx : integer, index of the flow to remove

Returns:

  • flow : Flow, the total computation flow
Expand source code
def pop(self, idx=-1) -> 'Flow':
    ''' Flow Pop

    Removes a flow (and it's chain) at a given index, defaults to
    the last one (-1).

    Parameters:
    -----------
     - idx : integer, index of the flow to remove

    Returns:
    --------
     - flow : Flow, the total computation flow

    '''

    retflow = self._chain.pop(idx)
    self.name = self._generate_chain_name()

    return retflow
class Function (*children: Union[nujo.autodiff.tensor.Tensor, numpy.ndarray, List[numbers.Number], numbers.Number], **kwargs)

Base Class for functions

Functions are applied to tensors. They take multiple tensors as input and produces only one tensor as output. They do NOT change tensors in-place.

Functions were also written so they reuse the input/output tensors when possible, which results in the computation graph being: - "Dynamically defined, statically evaluated." taking the best from both worlds.

Parameters:

  • children : varargs, the inpute tensors
Expand source code
class Function(_Node, metaclass=_FunctionMeta):
    ''' Base Class for functions

    Functions are applied to tensors. They take multiple
    tensors as input and produces only one tensor as output.
    They do NOT change tensors in-place.

    Functions were also written so they reuse the input/output tensors
    when possible, which results in the computation graph being:
     - "Dynamically defined, statically evaluated."
    taking the best from both worlds.

    Parameters:
    -----------
     - children : varargs, the inpute tensors

    '''

    _func_children_lookup_cache: Dict[str, 'Function'] = {}
    ''' Cache used to lookup for functions that may have already been defined
    in the computation graph.

     - key : hash(FuncType) + (children's identifiers);
     use `_get_function_identifier` to obtain a key
     - value : the already defined function which can be reused

    '''

    T = TypeVar('T', Tensor, ndarray)

    def __init__(self, *children: Union[Tensor, ndarray, List[Number],
                                        Number]):

        super(Function, self).__init__(*_parse_inputs(children),
                                       name=self.__class__.__name__)

        # This output placeholder is reused when possible
        self._output_placeholder = Tensor(
            None,
            diff=any(x.diff for x in self.children) and modes.DIFF_ENABLED,
            creator=self if modes.DIFF_ENABLED else None,
            name=self._generate_tensor_name())

        if modes.DIFF_ENABLED:  # If graph building is enabled.
            # Allocate space for parent's output (output placeholder)
            for child in self.children:
                child.parents_outputs.append(self._output_placeholder)

    def __repr__(self):
        return super(Function, self).__repr__() + f'#{self.id}'

    def _generate_tensor_name(self) -> str:
        return 'Z' + self.__repr__()

    @abstractmethod
    def forward(self) -> ndarray:
        ''' Implement forward pass of the function here.

        Use the `self.children` list to access the inputs.

        '''

        pass

    @abstractmethod
    def backward(self, idx: int, accum_grad: T) -> T:
        ''' Implement backward pass of the function here

        Compute the gradient of children[idx] w.r.t. output of the
        computation graph from the accumulated gradient (the gradient
        of the output of the function w.r.t. the output of the graph).

        Parameters:
        -----------
        - idx : int, the index of the children for which to compute the
         gradient w.r.t. output of the computation graph
        - accum_grad : T (Tensor or ndarray), the accumulated grad in the graph
         so far, you can otherwise think of it as the gradient of the output of
         the function w.r.t. the output of the graph.

            - `accum_grad` is Tensor if differentiantion is enabled
             (`DIFF_ENABLED`) and the children has opted for differentiation
             (`diff` is True), thus the computations will be recorded in the
             computation graph and higher-order derivatives could be computed.
            - otherwise, `accum_grad` is ndarray and the computations are not
             recorded; ndarrays are used since the computations with them are
             more efficient.

        Returns:
        --------
        - grad : T (Tensor or ndarray), the computed gradient of
         `self.children[idx]`

        '''

        pass

    def __call__(self) -> Tensor:
        ''' Executes cached forward pass
        '''

        # Forward pass
        self._output_placeholder.value = self.forward()
        return self._output_placeholder

Ancestors

  • nujo.autodiff._node._Node

Subclasses

  • nujo.autodiff._functions._activations._BinaryStep
  • nujo.autodiff._functions._activations._LeakyReLU
  • nujo.autodiff._functions._activations._ReLU
  • nujo.autodiff._functions._activations._Sigmoid
  • nujo.autodiff._functions._activations._Softmax
  • nujo.autodiff._functions._activations._Swish
  • nujo.autodiff._functions._activations._TanH
  • nujo.autodiff._functions._aggregate._InnerProd
  • nujo.autodiff._functions._aggregate._InnerSum
  • nujo.autodiff._functions._elementary._Addition
  • nujo.autodiff._functions._elementary._Logarithm
  • nujo.autodiff._functions._elementary._MatrixMul
  • nujo.autodiff._functions._elementary._Multiplication
  • nujo.autodiff._functions._elementary._Negation
  • nujo.autodiff._functions._elementary._Power
  • nujo.autodiff._functions._elementary._Reciprocal
  • nujo.autodiff._functions._transform._ConstPad
  • nujo.autodiff._functions._transform._Im2col
  • nujo.autodiff._functions._transform._Reshape
  • nujo.autodiff._functions._transform._Transpose

Class variables

var T

Methods

def backward(self, idx: int, accum_grad: ~T) -> ~T

Implement backward pass of the function here

Compute the gradient of children[idx] w.r.t. output of the computation graph from the accumulated gradient (the gradient of the output of the function w.r.t. the output of the graph).

Parameters:

  • idx : int, the index of the children for which to compute the gradient w.r.t. output of the computation graph
  • accum_grad : T (Tensor or ndarray), the accumulated grad in the graph so far, you can otherwise think of it as the gradient of the output of the function w.r.t. the output of the graph.

    • accum_grad is Tensor if differentiantion is enabled (DIFF_ENABLED) and the children has opted for differentiation (diff is True), thus the computations will be recorded in the computation graph and higher-order derivatives could be computed.
    • otherwise, accum_grad is ndarray and the computations are not recorded; ndarrays are used since the computations with them are more efficient.

Returns:

  • grad : T (Tensor or ndarray), the computed gradient of self.children[idx]
Expand source code
@abstractmethod
def backward(self, idx: int, accum_grad: T) -> T:
    ''' Implement backward pass of the function here

    Compute the gradient of children[idx] w.r.t. output of the
    computation graph from the accumulated gradient (the gradient
    of the output of the function w.r.t. the output of the graph).

    Parameters:
    -----------
    - idx : int, the index of the children for which to compute the
     gradient w.r.t. output of the computation graph
    - accum_grad : T (Tensor or ndarray), the accumulated grad in the graph
     so far, you can otherwise think of it as the gradient of the output of
     the function w.r.t. the output of the graph.

        - `accum_grad` is Tensor if differentiantion is enabled
         (`DIFF_ENABLED`) and the children has opted for differentiation
         (`diff` is True), thus the computations will be recorded in the
         computation graph and higher-order derivatives could be computed.
        - otherwise, `accum_grad` is ndarray and the computations are not
         recorded; ndarrays are used since the computations with them are
         more efficient.

    Returns:
    --------
    - grad : T (Tensor or ndarray), the computed gradient of
     `self.children[idx]`

    '''

    pass
def forward(self) -> numpy.ndarray

Implement forward pass of the function here.

Use the self.children list to access the inputs.

Expand source code
@abstractmethod
def forward(self) -> ndarray:
    ''' Implement forward pass of the function here.

    Use the `self.children` list to access the inputs.

    '''

    pass
class Tensor (value: Union[ForwardRef('Tensor'), numpy.ndarray, List[numbers.Number], numbers.Number], diff=False, creator=None, name='Tensor')

Tensor - a multi-dimensional array

Tensors are the main units of data in nujo. They "flow" in the computation graph. :)

Tensors can be either constants or trainable weights, depending on whether gradients are computed for the given tensor.

Parameters:

  • value : value, numerical value of the tensor
  • diff : boolean, whether to compute gradients for the tensor
  • creator : nujo function, that created this tensor; the only child of a tensor
  • name : string, representation of the tensor
Expand source code
class Tensor(_Node):
    ''' Tensor - a multi-dimensional array

    Tensors are the main units of data in nujo.
    They "flow" in the computation graph. :)

    Tensors can be either constants or trainable weights,
    depending on whether gradients are computed for the given tensor.

    Parameters:
    -----------
     - value : value, numerical value of the tensor
     - diff : boolean, whether to compute gradients for the tensor
     - creator : nujo function, that created this tensor;
       the only child of a tensor
     - name : string, representation of the tensor

    '''
    def __init__(self,
                 value: Union['Tensor', ndarray, List[Number], Number],
                 diff=False,
                 creator=None,
                 name='Tensor'):

        super(Tensor, self).__init__(*_if_not_none(creator), name=name)

        self._value: ndarray = None
        self.value = value  # set value

        self.diff = diff
        self.creator = creator

        # Outputs of the functions the current tensor is input to.
        # Used for backpropagation of the gradients.
        self.parents_outputs: List['Tensor'] = []

        # Gradient of the current tensor
        self._grad: 'Tensor' = None

        # Transposed tensor cache
        self._T: 'Tensor' = None
        self._prev_value: ndarray = None

    @property
    def value(self):
        return self._value

    @value.setter
    def value(self, value: Union['Tensor', ndarray, List[Number], Number]):
        if isinstance(value, Tensor):
            self._value = value.value
        elif isinstance(value, ndarray):
            self._value = value
        else:
            self._value = array(value)

    @value.deleter
    def value(self):
        del self._value

    @property
    def grad(self) -> 'Tensor':
        if self._grad is None:
            self._grad = Tensor(empty(self._value.shape),
                                name=f'grad[{self.name}]')

        return self._grad

    # Shape and shape manipulations

    @property
    def shape(self) -> Tuple[int, ...]:
        return self._value.shape

    @property
    def T(self) -> 'Tensor':
        # Only transpose if something has changed
        if (self._value != self._prev_value).any():
            self._T = self.transpose()
            self._prev_value = self._value

        return self._T

    def transpose(self, *dims: int) -> 'Tensor':
        from nujo.autodiff._functions._transform import _Transpose
        return _Transpose(self, dims)()

    def reshape(self, *shape: int) -> 'Tensor':
        from nujo.autodiff._functions._transform import _Reshape
        return _Reshape(self, shape)()

    def squeeze(self, dim=-1) -> 'Tensor':
        if dim < 0:
            num_dims = len(self._value.shape)

            if dim < -num_dims:
                dim = num_dims
            else:
                dim += num_dims

        return self.reshape(*self._value.shape[:dim],
                            *self._value.shape[dim + 1:])

    def unsqueeze(self, dim=-1) -> 'Tensor':
        if dim < 0:
            num_dims = len(self._value.shape)

            if dim < -num_dims:
                dim = 0
            else:
                if dim == -1:
                    dim += 1
                dim += num_dims

        return self.reshape(*self._value.shape[:dim], 1,
                            *self._value.shape[dim:])

    # Gradient computation

    def _compute_grad_from(self,
                           poutput: 'Tensor') -> Union['Tensor', ndarray]:
        ''' Computes the gradient of `self` w.r.t. the output of the computation
        graph from `poutput` (using the path of computations from `poutput`)

            In other words, this functions returns:
                (dOutput / dPoutput) * (dPoutput / dSelf)

        '''

        # Find the index of the children which gradient should be computed
        # (a.k.a. find the index of `self` in `poutput.creator.children`)
        idx = next(i for i, v in enumerate(poutput.creator.children)
                   if v is self)

        if poutput._grad.diff:
            # Pass a diff enabled tensor to the backward call,
            # thus recording grad computations in the computation
            # graph, which enables higher-order differentiation.
            grad = poutput.creator.backward(idx, poutput._grad)

            # Check if `self` is scalar and needs to be averaged
            if self._value.shape != () and\
               self._value.shape[-1] == 1:

                # Record the mean in the computation graph
                from nujo.math.aggregate import mean
                grad = mean(grad, dim=-1, keepdim=True)

        else:
            # Do not leave a trace in the computation graph!
            # Use numpy arrays! :)
            grad = poutput.creator.backward(idx, poutput._grad._value)

            # Check if `self` is scalar and needs to be averaged
            if self._value.shape != () and\
               self._value.shape[-1] == 1:

                grad = grad.mean(axis=-1, keepdims=True)

        return grad

    def compute_grad(self) -> None:
        if modes.DIFF_ENABLED and self.diff:

            # Make sure grad is Tensor (`grad property call`) and init value
            if self._grad is None:
                self.zero_grad(propagate=False)

            # Top-parent grad
            if len(self.parents_outputs) == 0:
                self._grad._value += 1
                return

            for poutput in self.parents_outputs:
                curr_grad = self._compute_grad_from(poutput)

                if self._grad.diff:
                    # Record grad computations in the computation graph
                    self._grad += curr_grad
                else:
                    self._grad._value += curr_grad

    def zero_grad(self, propagate=True) -> None:
        self.grad._value.fill(0)

        if propagate:
            for poutput in self.parents_outputs:
                poutput.zero_grad()

    def backward(self, _debug=False) -> None:
        ''' It uses Breadth First Search to traverse the computation graph
        and compute the gradient for each differentiable Tensor in the graph.

        '''

        nodes_to_visit: List['Tensor'] = [self]
        if _debug:
            i = 1

        while nodes_to_visit:
            node = nodes_to_visit.pop()
            node.compute_grad()

            if _debug:
                nstr = f' [{i}]'
                node.name += nstr if nstr not in node.name else ''
                i += 1

            if node.creator:
                for child in node.creator.children:
                    # Avoid visiting the same node twice
                    if all(child is not node for node in nodes_to_visit):
                        nodes_to_visit.insert(0, child)

    # Useful methods

    def all(self) -> ndarray:
        return self._value.all()

    def any(self) -> ndarray:
        return self._value.any()

    def __getitem__(self, position: Union[int, Tuple[int, ...]]):
        return Tensor(self._value[position],
                      diff=self.diff,
                      creator=self.creator,
                      name=f'{self.name}[{position}]')

    def __setitem__(self, position: Union[int, Tuple[int, ...]],
                    value: Union['Tensor', ndarray, List[Number], Number]):

        # TODO: This is a naive implementation. Fix it.
        self._value[position] = value

    def __hash__(self):
        return self.id

    # Static evaluation operator

    def __ilshift__(
            self, other: Union['Tensor', ndarray, List[Number],
                               Number]) -> 'Tensor':
        ''' In-place assignment operator: `<<=`

        Transfering key properties from `other` to `self`.
        Essentially a shortcut for:
            >>> self.children = other.children
            >>> self.creator = other.creator
            >>> self.value = other.value
            >>> self.grad = other.grad

        '''

        self.children = getattr(other, 'children', None)
        if self.children:
            try:
                self.children.remove(self)
            except ValueError:  # self is not in children
                pass

        self.creator = getattr(other, 'creator', None)
        if self.creator:
            try:
                self.creator.children.remove(self)
            except ValueError:  # self is not in children
                pass

        self._value = getattr(other, 'value', other)

        # Transfer the gradient
        self._grad = getattr(other, 'grad', None)

        return self

    # Comparison operations

    def __lt__(self, other):
        return self._value < getattr(other, 'value', other)

    def __le__(self, other):
        return self._value <= getattr(other, 'value', other)

    def __eq__(self, other):
        return self._value == getattr(other, 'value', other)

    def __ne__(self, other):
        return self._value != getattr(other, 'value', other)

    def __gt__(self, other):
        return self._value > getattr(other, 'value', other)

    def __ge__(self, other):
        return self._value >= getattr(other, 'value', other)

    # Arithmetic operations

    def __add__(self, other):
        from nujo.autodiff._functions._elementary import _Addition
        return _Addition(self, other)()

    def __radd__(self, other):
        return self.__add__(other)

    def __neg__(self):
        from nujo.autodiff._functions._elementary import _Negation
        return _Negation(self)()

    def __sub__(self, other):
        return self.__add__(other.__neg__())

    def __rsub__(self, other):
        return self.__neg__().__add__(other)

    def __mul__(self, other):
        from nujo.autodiff._functions._elementary import _Multiplication
        return _Multiplication(self, other)()

    def __rmul__(self, other):
        return self.__mul__(other)

    def __truediv__(self, other):
        from nujo.autodiff._functions._elementary import _Reciprocal
        return self.__mul__(_Reciprocal(other)())

    def __rtruediv__(self, other):
        from nujo.autodiff._functions._elementary import _Reciprocal
        return _Reciprocal(self)().__mul__(other)

    def __pow__(self, other):
        from nujo.autodiff._functions._elementary import _Power
        return _Power(self, other)()

    def __rpow__(self, other):
        from nujo.autodiff._functions._elementary import _Power
        return _Power(other, self)()

    # More complex arithmetic operations

    def __matmul__(self, other):
        from nujo.autodiff._functions._elementary import _MatrixMul
        return _MatrixMul(self, other)()

    def __rmatmul__(self, other):
        from nujo.autodiff._functions._elementary import _MatrixMul
        return _MatrixMul(other, self)()

    # Representations

    def __str__(self):
        # TODO: Come up with a better representation
        return self.__repr__() + '\n' + '-' * 32 + '\n' + str(self._value)

Ancestors

  • nujo.autodiff._node._Node

Instance variables

var TTensor
Expand source code
@property
def T(self) -> 'Tensor':
    # Only transpose if something has changed
    if (self._value != self._prev_value).any():
        self._T = self.transpose()
        self._prev_value = self._value

    return self._T
var gradTensor
Expand source code
@property
def grad(self) -> 'Tensor':
    if self._grad is None:
        self._grad = Tensor(empty(self._value.shape),
                            name=f'grad[{self.name}]')

    return self._grad
var shape : Tuple[int, ...]
Expand source code
@property
def shape(self) -> Tuple[int, ...]:
    return self._value.shape
var value
Expand source code
@property
def value(self):
    return self._value

Methods

def all(self) -> numpy.ndarray
Expand source code
def all(self) -> ndarray:
    return self._value.all()
def any(self) -> numpy.ndarray
Expand source code
def any(self) -> ndarray:
    return self._value.any()
def backward(self) -> NoneType

It uses Breadth First Search to traverse the computation graph and compute the gradient for each differentiable Tensor in the graph.

Expand source code
def backward(self, _debug=False) -> None:
    ''' It uses Breadth First Search to traverse the computation graph
    and compute the gradient for each differentiable Tensor in the graph.

    '''

    nodes_to_visit: List['Tensor'] = [self]
    if _debug:
        i = 1

    while nodes_to_visit:
        node = nodes_to_visit.pop()
        node.compute_grad()

        if _debug:
            nstr = f' [{i}]'
            node.name += nstr if nstr not in node.name else ''
            i += 1

        if node.creator:
            for child in node.creator.children:
                # Avoid visiting the same node twice
                if all(child is not node for node in nodes_to_visit):
                    nodes_to_visit.insert(0, child)
def compute_grad(self) -> NoneType
Expand source code
def compute_grad(self) -> None:
    if modes.DIFF_ENABLED and self.diff:

        # Make sure grad is Tensor (`grad property call`) and init value
        if self._grad is None:
            self.zero_grad(propagate=False)

        # Top-parent grad
        if len(self.parents_outputs) == 0:
            self._grad._value += 1
            return

        for poutput in self.parents_outputs:
            curr_grad = self._compute_grad_from(poutput)

            if self._grad.diff:
                # Record grad computations in the computation graph
                self._grad += curr_grad
            else:
                self._grad._value += curr_grad
def reshape(self, *shape: int) -> Tensor
Expand source code
def reshape(self, *shape: int) -> 'Tensor':
    from nujo.autodiff._functions._transform import _Reshape
    return _Reshape(self, shape)()
def squeeze(self, dim=-1) -> Tensor
Expand source code
def squeeze(self, dim=-1) -> 'Tensor':
    if dim < 0:
        num_dims = len(self._value.shape)

        if dim < -num_dims:
            dim = num_dims
        else:
            dim += num_dims

    return self.reshape(*self._value.shape[:dim],
                        *self._value.shape[dim + 1:])
def transpose(self, *dims: int) -> Tensor
Expand source code
def transpose(self, *dims: int) -> 'Tensor':
    from nujo.autodiff._functions._transform import _Transpose
    return _Transpose(self, dims)()
def unsqueeze(self, dim=-1) -> Tensor
Expand source code
def unsqueeze(self, dim=-1) -> 'Tensor':
    if dim < 0:
        num_dims = len(self._value.shape)

        if dim < -num_dims:
            dim = 0
        else:
            if dim == -1:
                dim += 1
            dim += num_dims

    return self.reshape(*self._value.shape[:dim], 1,
                        *self._value.shape[dim:])
def zero_grad(self, propagate=True) -> NoneType
Expand source code
def zero_grad(self, propagate=True) -> None:
    self.grad._value.fill(0)

    if propagate:
        for poutput in self.parents_outputs:
            poutput.zero_grad()
class no_diff

No Differentiation block

Creates a block of code where no differentiation is done. a.k.a. No gradients are computed for whatever tensor.

Expand source code
class no_diff():
    ''' No Differentiation block

    Creates a block of code where no differentiation is done.
    a.k.a. No gradients are computed for whatever tensor.

    '''
    def __enter__(self):
        global DIFF_ENABLED
        DIFF_ENABLED = False

    def __exit__(self, type, value, traceback):
        global DIFF_ENABLED
        DIFF_ENABLED = True