Package nujo
Expand source code
from nujo.autodiff import Function, Tensor, no_diff
from nujo.flow import Flow
from nujo.init import *
from nujo.math import *
__all__ = [
'Function',
'Tensor',
'no_diff',
'Flow',
]
__version__ = '0.3.0'
Sub-modules
nujo.autodiff
-
nujo's core Reverse-mode Automatic Differentiation module
nujo.flow
-
a chainable computation Flow
nujo.init
-
Tensor initializers …
nujo.math
-
nujo's core mathematical functionality
nujo.nn
-
nujo's Neural Network module …
nujo.objective
-
nujo's objective functions module …
nujo.optim
-
nujo's optimization module …
nujo.utils
-
nujo utils
Classes
class Flow (*args, **kwargs)
-
A chainable computation Flow
A Flow is just a sequance of functions (addition, multiplication, etc.) that are grouped in a single object (Flow) and can be applied on a tensor.
Each nujo Flow has a list of flow objects (a chain) that a tensor will pass through when the Flow is called on that tensor.
This allows the chaining of flows (connecting two or more chains together).
Parameters:
- name : string, idetifier of the current flow
Expand source code
class Flow(metaclass=_FlowMeta): ''' A chainable computation Flow A Flow is just a sequance of functions (addition, multiplication, etc.) that are grouped in a single object (Flow) and can be applied on a tensor. Each nujo Flow has a list of flow objects (a chain) that a tensor will pass through when the Flow is called on that tensor. This allows the chaining of flows (connecting two or more chains together). Parameters: ----------- - name : string, idetifier of the current flow ''' def __init__(self, name='Flow', _chain: List['Flow'] = []): self.name = name self._chain = _chain if len(self._chain): # If there is a chain self.name = self._generate_chain_name() # setup methods def _register_parameters(self) -> None: ''' Tensor parameters registration - called after Flow.__init__ Makes all tensors bounded to `self` diff enabled (sets their `diff` to `True`). Called only once, when the chain for the current flow is being created. ''' for prop_name in dir(self): prop = getattr(self, prop_name) if isinstance(prop, Tensor): prop.diff = True def _generate_chain_name(self) -> str: return ' >> '.join(map(lambda x: x.name, self._chain)) # parameters generators def parameters(self) -> Tensor: ''' Generator for all the parameters of the current flow ''' for param in self._total_parameters(): yield param def _total_parameters(self) -> Tensor: ''' Returns an iterable of all the parameters of the current flow Including those of other flows that are used in the current one (namely other flows bounded to `self`). ''' total_params = [self._current_parameters()] for prop_name in dir(self): prop = getattr(self, prop_name) if isinstance(prop, Flow): total_params.append(prop.parameters()) return chain(*total_params) def _current_parameters(self) -> Tensor: ''' Generator for the current tensor parameters bounded to `self` ''' for flow in self._chain: for prop_name in dir(flow): prop = getattr(flow, prop_name) if isinstance(prop, Tensor): yield prop # API methods def append(self, *flows: 'Flow') -> 'Flow': ''' Flow Append Connect the current chain with those of `flows` by adding them at the end. Parameters: ----------- - flows : varargs, the flows to append, sequantially Returns: -------- - flow : Flow, the total computation flow ''' for flow in flows: for chain_section in flow: # Iterate over the chain # Connect with the current chain self._chain.append(chain_section) self.name = self._generate_chain_name() # Update the chain name return self def pop(self, idx=-1) -> 'Flow': ''' Flow Pop Removes a flow (and it's chain) at a given index, defaults to the last one (-1). Parameters: ----------- - idx : integer, index of the flow to remove Returns: -------- - flow : Flow, the total computation flow ''' retflow = self._chain.pop(idx) self.name = self._generate_chain_name() return retflow def copy(self) -> 'Flow': ''' Make a copy of the flow ''' return deepcopy(self) @abstractmethod def forward(self, *args, **kwargs) -> Tensor: ''' Flow Forward The flow computation is defined here. ''' pass # methods implementing the flow functionality def __call__(self, *args, **kwargs) -> Tensor: output = self[0].forward(*args, **kwargs) for flow in self[1:]: output = flow.forward(output, **kwargs) return output def __rshift__(self, other: 'Flow') -> 'Flow': ''' Chaining operator Example: >>> a = nj.Flow() >>> b = nj.Flow() >>> chained_flow = a >> b >>> result = chained_flow(...) >>> ... ''' return Flow(_chain=[*list(self), *list(other)]) def __getitem__(self, key: Union[int, str]) -> 'Flow': '''Access flows in the chain by index/name Example: >>> a = nj.Flow('A') >>> b = nj.Flow('B') >>> chained_flow = a >> b >>> chained_flow[0] # a flow (chain section) can be get by index 'A' (this is the repr for `a`) >>> chained_flow['A'] # can also be get by name 'A' ''' if type(key) is str: flow = next((x for x in self._chain if x.name == key), None) if flow is not None: return flow else: raise ValueError(f'Could not find a flow named: {key}') else: return self._chain[key] def __iter__(self): return iter(self._chain) def __len__(self): return len(self._chain) def __repr__(self): return '<|' + self.name + '>'
Subclasses
- BinaryStep
- LeakyReLU
- ReLU
- Sigmoid
- Softmax
- Swish
- TanH
- ConstPad2d
- Conv2d
- Linear
- nujo.objective.loss._Loss
Methods
def append(self, *flows: Flow) -> Flow
-
Flow Append
Connect the current chain with those of
flows
by adding them at the end.Parameters:
- flows : varargs, the flows to append, sequantially
Returns:
- flow : Flow, the total computation flow
Expand source code
def append(self, *flows: 'Flow') -> 'Flow': ''' Flow Append Connect the current chain with those of `flows` by adding them at the end. Parameters: ----------- - flows : varargs, the flows to append, sequantially Returns: -------- - flow : Flow, the total computation flow ''' for flow in flows: for chain_section in flow: # Iterate over the chain # Connect with the current chain self._chain.append(chain_section) self.name = self._generate_chain_name() # Update the chain name return self
def copy(self) -> Flow
-
Make a copy of the flow
Expand source code
def copy(self) -> 'Flow': ''' Make a copy of the flow ''' return deepcopy(self)
def forward(self, *args, **kwargs) -> Tensor
-
Flow Forward
The flow computation is defined here.
Expand source code
@abstractmethod def forward(self, *args, **kwargs) -> Tensor: ''' Flow Forward The flow computation is defined here. ''' pass
def parameters(self) -> Tensor
-
Generator for all the parameters of the current flow
Expand source code
def parameters(self) -> Tensor: ''' Generator for all the parameters of the current flow ''' for param in self._total_parameters(): yield param
def pop(self, idx=-1) -> Flow
-
Flow Pop
Removes a flow (and it's chain) at a given index, defaults to the last one (-1).
Parameters:
- idx : integer, index of the flow to remove
Returns:
- flow : Flow, the total computation flow
Expand source code
def pop(self, idx=-1) -> 'Flow': ''' Flow Pop Removes a flow (and it's chain) at a given index, defaults to the last one (-1). Parameters: ----------- - idx : integer, index of the flow to remove Returns: -------- - flow : Flow, the total computation flow ''' retflow = self._chain.pop(idx) self.name = self._generate_chain_name() return retflow
class Function (*children: Union[nujo.autodiff.tensor.Tensor, numpy.ndarray, List[numbers.Number], numbers.Number], **kwargs)
-
Base Class for functions
Functions are applied to tensors. They take multiple tensors as input and produces only one tensor as output. They do NOT change tensors in-place.
Functions were also written so they reuse the input/output tensors when possible, which results in the computation graph being: - "Dynamically defined, statically evaluated." taking the best from both worlds.
Parameters:
- children : varargs, the inpute tensors
Expand source code
class Function(_Node, metaclass=_FunctionMeta): ''' Base Class for functions Functions are applied to tensors. They take multiple tensors as input and produces only one tensor as output. They do NOT change tensors in-place. Functions were also written so they reuse the input/output tensors when possible, which results in the computation graph being: - "Dynamically defined, statically evaluated." taking the best from both worlds. Parameters: ----------- - children : varargs, the inpute tensors ''' _func_children_lookup_cache: Dict[str, 'Function'] = {} ''' Cache used to lookup for functions that may have already been defined in the computation graph. - key : hash(FuncType) + (children's identifiers); use `_get_function_identifier` to obtain a key - value : the already defined function which can be reused ''' T = TypeVar('T', Tensor, ndarray) def __init__(self, *children: Union[Tensor, ndarray, List[Number], Number]): super(Function, self).__init__(*_parse_inputs(children), name=self.__class__.__name__) # This output placeholder is reused when possible self._output_placeholder = Tensor( None, diff=any(x.diff for x in self.children) and modes.DIFF_ENABLED, creator=self if modes.DIFF_ENABLED else None, name=self._generate_tensor_name()) if modes.DIFF_ENABLED: # If graph building is enabled. # Allocate space for parent's output (output placeholder) for child in self.children: child.parents_outputs.append(self._output_placeholder) def __repr__(self): return super(Function, self).__repr__() + f'#{self.id}' def _generate_tensor_name(self) -> str: return 'Z' + self.__repr__() @abstractmethod def forward(self) -> ndarray: ''' Implement forward pass of the function here. Use the `self.children` list to access the inputs. ''' pass @abstractmethod def backward(self, idx: int, accum_grad: T) -> T: ''' Implement backward pass of the function here Compute the gradient of children[idx] w.r.t. output of the computation graph from the accumulated gradient (the gradient of the output of the function w.r.t. the output of the graph). Parameters: ----------- - idx : int, the index of the children for which to compute the gradient w.r.t. output of the computation graph - accum_grad : T (Tensor or ndarray), the accumulated grad in the graph so far, you can otherwise think of it as the gradient of the output of the function w.r.t. the output of the graph. - `accum_grad` is Tensor if differentiantion is enabled (`DIFF_ENABLED`) and the children has opted for differentiation (`diff` is True), thus the computations will be recorded in the computation graph and higher-order derivatives could be computed. - otherwise, `accum_grad` is ndarray and the computations are not recorded; ndarrays are used since the computations with them are more efficient. Returns: -------- - grad : T (Tensor or ndarray), the computed gradient of `self.children[idx]` ''' pass def __call__(self) -> Tensor: ''' Executes cached forward pass ''' # Forward pass self._output_placeholder.value = self.forward() return self._output_placeholder
Ancestors
- nujo.autodiff._node._Node
Subclasses
- nujo.autodiff._functions._activations._BinaryStep
- nujo.autodiff._functions._activations._LeakyReLU
- nujo.autodiff._functions._activations._ReLU
- nujo.autodiff._functions._activations._Sigmoid
- nujo.autodiff._functions._activations._Softmax
- nujo.autodiff._functions._activations._Swish
- nujo.autodiff._functions._activations._TanH
- nujo.autodiff._functions._aggregate._InnerProd
- nujo.autodiff._functions._aggregate._InnerSum
- nujo.autodiff._functions._elementary._Addition
- nujo.autodiff._functions._elementary._Logarithm
- nujo.autodiff._functions._elementary._MatrixMul
- nujo.autodiff._functions._elementary._Multiplication
- nujo.autodiff._functions._elementary._Negation
- nujo.autodiff._functions._elementary._Power
- nujo.autodiff._functions._elementary._Reciprocal
- nujo.autodiff._functions._transform._ConstPad
- nujo.autodiff._functions._transform._Im2col
- nujo.autodiff._functions._transform._Reshape
- nujo.autodiff._functions._transform._Transpose
Class variables
var T
Methods
def backward(self, idx: int, accum_grad: ~T) -> ~T
-
Implement backward pass of the function here
Compute the gradient of children[idx] w.r.t. output of the computation graph from the accumulated gradient (the gradient of the output of the function w.r.t. the output of the graph).
Parameters:
- idx : int, the index of the children for which to compute the gradient w.r.t. output of the computation graph
-
accum_grad : T (Tensor or ndarray), the accumulated grad in the graph so far, you can otherwise think of it as the gradient of the output of the function w.r.t. the output of the graph.
accum_grad
is Tensor if differentiantion is enabled (DIFF_ENABLED
) and the children has opted for differentiation (diff
is True), thus the computations will be recorded in the computation graph and higher-order derivatives could be computed.- otherwise,
accum_grad
is ndarray and the computations are not recorded; ndarrays are used since the computations with them are more efficient.
Returns:
- grad : T (Tensor or ndarray), the computed gradient of
self.children[idx]
Expand source code
@abstractmethod def backward(self, idx: int, accum_grad: T) -> T: ''' Implement backward pass of the function here Compute the gradient of children[idx] w.r.t. output of the computation graph from the accumulated gradient (the gradient of the output of the function w.r.t. the output of the graph). Parameters: ----------- - idx : int, the index of the children for which to compute the gradient w.r.t. output of the computation graph - accum_grad : T (Tensor or ndarray), the accumulated grad in the graph so far, you can otherwise think of it as the gradient of the output of the function w.r.t. the output of the graph. - `accum_grad` is Tensor if differentiantion is enabled (`DIFF_ENABLED`) and the children has opted for differentiation (`diff` is True), thus the computations will be recorded in the computation graph and higher-order derivatives could be computed. - otherwise, `accum_grad` is ndarray and the computations are not recorded; ndarrays are used since the computations with them are more efficient. Returns: -------- - grad : T (Tensor or ndarray), the computed gradient of `self.children[idx]` ''' pass
def forward(self) -> numpy.ndarray
-
Implement forward pass of the function here.
Use the
self.children
list to access the inputs.Expand source code
@abstractmethod def forward(self) -> ndarray: ''' Implement forward pass of the function here. Use the `self.children` list to access the inputs. ''' pass
class Tensor (value: Union[ForwardRef('Tensor'), numpy.ndarray, List[numbers.Number], numbers.Number], diff=False, creator=None, name='Tensor')
-
Tensor - a multi-dimensional array
Tensors are the main units of data in nujo. They "flow" in the computation graph. :)
Tensors can be either constants or trainable weights, depending on whether gradients are computed for the given tensor.
Parameters:
- value : value, numerical value of the tensor
- diff : boolean, whether to compute gradients for the tensor
- creator : nujo function, that created this tensor; the only child of a tensor
- name : string, representation of the tensor
Expand source code
class Tensor(_Node): ''' Tensor - a multi-dimensional array Tensors are the main units of data in nujo. They "flow" in the computation graph. :) Tensors can be either constants or trainable weights, depending on whether gradients are computed for the given tensor. Parameters: ----------- - value : value, numerical value of the tensor - diff : boolean, whether to compute gradients for the tensor - creator : nujo function, that created this tensor; the only child of a tensor - name : string, representation of the tensor ''' def __init__(self, value: Union['Tensor', ndarray, List[Number], Number], diff=False, creator=None, name='Tensor'): super(Tensor, self).__init__(*_if_not_none(creator), name=name) self._value: ndarray = None self.value = value # set value self.diff = diff self.creator = creator # Outputs of the functions the current tensor is input to. # Used for backpropagation of the gradients. self.parents_outputs: List['Tensor'] = [] # Gradient of the current tensor self._grad: 'Tensor' = None # Transposed tensor cache self._T: 'Tensor' = None self._prev_value: ndarray = None @property def value(self): return self._value @value.setter def value(self, value: Union['Tensor', ndarray, List[Number], Number]): if isinstance(value, Tensor): self._value = value.value elif isinstance(value, ndarray): self._value = value else: self._value = array(value) @value.deleter def value(self): del self._value @property def grad(self) -> 'Tensor': if self._grad is None: self._grad = Tensor(empty(self._value.shape), name=f'grad[{self.name}]') return self._grad # Shape and shape manipulations @property def shape(self) -> Tuple[int, ...]: return self._value.shape @property def T(self) -> 'Tensor': # Only transpose if something has changed if (self._value != self._prev_value).any(): self._T = self.transpose() self._prev_value = self._value return self._T def transpose(self, *dims: int) -> 'Tensor': from nujo.autodiff._functions._transform import _Transpose return _Transpose(self, dims)() def reshape(self, *shape: int) -> 'Tensor': from nujo.autodiff._functions._transform import _Reshape return _Reshape(self, shape)() def squeeze(self, dim=-1) -> 'Tensor': if dim < 0: num_dims = len(self._value.shape) if dim < -num_dims: dim = num_dims else: dim += num_dims return self.reshape(*self._value.shape[:dim], *self._value.shape[dim + 1:]) def unsqueeze(self, dim=-1) -> 'Tensor': if dim < 0: num_dims = len(self._value.shape) if dim < -num_dims: dim = 0 else: if dim == -1: dim += 1 dim += num_dims return self.reshape(*self._value.shape[:dim], 1, *self._value.shape[dim:]) # Gradient computation def _compute_grad_from(self, poutput: 'Tensor') -> Union['Tensor', ndarray]: ''' Computes the gradient of `self` w.r.t. the output of the computation graph from `poutput` (using the path of computations from `poutput`) In other words, this functions returns: (dOutput / dPoutput) * (dPoutput / dSelf) ''' # Find the index of the children which gradient should be computed # (a.k.a. find the index of `self` in `poutput.creator.children`) idx = next(i for i, v in enumerate(poutput.creator.children) if v is self) if poutput._grad.diff: # Pass a diff enabled tensor to the backward call, # thus recording grad computations in the computation # graph, which enables higher-order differentiation. grad = poutput.creator.backward(idx, poutput._grad) # Check if `self` is scalar and needs to be averaged if self._value.shape != () and\ self._value.shape[-1] == 1: # Record the mean in the computation graph from nujo.math.aggregate import mean grad = mean(grad, dim=-1, keepdim=True) else: # Do not leave a trace in the computation graph! # Use numpy arrays! :) grad = poutput.creator.backward(idx, poutput._grad._value) # Check if `self` is scalar and needs to be averaged if self._value.shape != () and\ self._value.shape[-1] == 1: grad = grad.mean(axis=-1, keepdims=True) return grad def compute_grad(self) -> None: if modes.DIFF_ENABLED and self.diff: # Make sure grad is Tensor (`grad property call`) and init value if self._grad is None: self.zero_grad(propagate=False) # Top-parent grad if len(self.parents_outputs) == 0: self._grad._value += 1 return for poutput in self.parents_outputs: curr_grad = self._compute_grad_from(poutput) if self._grad.diff: # Record grad computations in the computation graph self._grad += curr_grad else: self._grad._value += curr_grad def zero_grad(self, propagate=True) -> None: self.grad._value.fill(0) if propagate: for poutput in self.parents_outputs: poutput.zero_grad() def backward(self, _debug=False) -> None: ''' It uses Breadth First Search to traverse the computation graph and compute the gradient for each differentiable Tensor in the graph. ''' nodes_to_visit: List['Tensor'] = [self] if _debug: i = 1 while nodes_to_visit: node = nodes_to_visit.pop() node.compute_grad() if _debug: nstr = f' [{i}]' node.name += nstr if nstr not in node.name else '' i += 1 if node.creator: for child in node.creator.children: # Avoid visiting the same node twice if all(child is not node for node in nodes_to_visit): nodes_to_visit.insert(0, child) # Useful methods def all(self) -> ndarray: return self._value.all() def any(self) -> ndarray: return self._value.any() def __getitem__(self, position: Union[int, Tuple[int, ...]]): return Tensor(self._value[position], diff=self.diff, creator=self.creator, name=f'{self.name}[{position}]') def __setitem__(self, position: Union[int, Tuple[int, ...]], value: Union['Tensor', ndarray, List[Number], Number]): # TODO: This is a naive implementation. Fix it. self._value[position] = value def __hash__(self): return self.id # Static evaluation operator def __ilshift__( self, other: Union['Tensor', ndarray, List[Number], Number]) -> 'Tensor': ''' In-place assignment operator: `<<=` Transfering key properties from `other` to `self`. Essentially a shortcut for: >>> self.children = other.children >>> self.creator = other.creator >>> self.value = other.value >>> self.grad = other.grad ''' self.children = getattr(other, 'children', None) if self.children: try: self.children.remove(self) except ValueError: # self is not in children pass self.creator = getattr(other, 'creator', None) if self.creator: try: self.creator.children.remove(self) except ValueError: # self is not in children pass self._value = getattr(other, 'value', other) # Transfer the gradient self._grad = getattr(other, 'grad', None) return self # Comparison operations def __lt__(self, other): return self._value < getattr(other, 'value', other) def __le__(self, other): return self._value <= getattr(other, 'value', other) def __eq__(self, other): return self._value == getattr(other, 'value', other) def __ne__(self, other): return self._value != getattr(other, 'value', other) def __gt__(self, other): return self._value > getattr(other, 'value', other) def __ge__(self, other): return self._value >= getattr(other, 'value', other) # Arithmetic operations def __add__(self, other): from nujo.autodiff._functions._elementary import _Addition return _Addition(self, other)() def __radd__(self, other): return self.__add__(other) def __neg__(self): from nujo.autodiff._functions._elementary import _Negation return _Negation(self)() def __sub__(self, other): return self.__add__(other.__neg__()) def __rsub__(self, other): return self.__neg__().__add__(other) def __mul__(self, other): from nujo.autodiff._functions._elementary import _Multiplication return _Multiplication(self, other)() def __rmul__(self, other): return self.__mul__(other) def __truediv__(self, other): from nujo.autodiff._functions._elementary import _Reciprocal return self.__mul__(_Reciprocal(other)()) def __rtruediv__(self, other): from nujo.autodiff._functions._elementary import _Reciprocal return _Reciprocal(self)().__mul__(other) def __pow__(self, other): from nujo.autodiff._functions._elementary import _Power return _Power(self, other)() def __rpow__(self, other): from nujo.autodiff._functions._elementary import _Power return _Power(other, self)() # More complex arithmetic operations def __matmul__(self, other): from nujo.autodiff._functions._elementary import _MatrixMul return _MatrixMul(self, other)() def __rmatmul__(self, other): from nujo.autodiff._functions._elementary import _MatrixMul return _MatrixMul(other, self)() # Representations def __str__(self): # TODO: Come up with a better representation return self.__repr__() + '\n' + '-' * 32 + '\n' + str(self._value)
Ancestors
- nujo.autodiff._node._Node
Instance variables
var T : Tensor
-
Expand source code
@property def T(self) -> 'Tensor': # Only transpose if something has changed if (self._value != self._prev_value).any(): self._T = self.transpose() self._prev_value = self._value return self._T
var grad : Tensor
-
Expand source code
@property def grad(self) -> 'Tensor': if self._grad is None: self._grad = Tensor(empty(self._value.shape), name=f'grad[{self.name}]') return self._grad
var shape : Tuple[int, ...]
-
Expand source code
@property def shape(self) -> Tuple[int, ...]: return self._value.shape
var value
-
Expand source code
@property def value(self): return self._value
Methods
def all(self) -> numpy.ndarray
-
Expand source code
def all(self) -> ndarray: return self._value.all()
def any(self) -> numpy.ndarray
-
Expand source code
def any(self) -> ndarray: return self._value.any()
def backward(self) -> NoneType
-
It uses Breadth First Search to traverse the computation graph and compute the gradient for each differentiable Tensor in the graph.
Expand source code
def backward(self, _debug=False) -> None: ''' It uses Breadth First Search to traverse the computation graph and compute the gradient for each differentiable Tensor in the graph. ''' nodes_to_visit: List['Tensor'] = [self] if _debug: i = 1 while nodes_to_visit: node = nodes_to_visit.pop() node.compute_grad() if _debug: nstr = f' [{i}]' node.name += nstr if nstr not in node.name else '' i += 1 if node.creator: for child in node.creator.children: # Avoid visiting the same node twice if all(child is not node for node in nodes_to_visit): nodes_to_visit.insert(0, child)
def compute_grad(self) -> NoneType
-
Expand source code
def compute_grad(self) -> None: if modes.DIFF_ENABLED and self.diff: # Make sure grad is Tensor (`grad property call`) and init value if self._grad is None: self.zero_grad(propagate=False) # Top-parent grad if len(self.parents_outputs) == 0: self._grad._value += 1 return for poutput in self.parents_outputs: curr_grad = self._compute_grad_from(poutput) if self._grad.diff: # Record grad computations in the computation graph self._grad += curr_grad else: self._grad._value += curr_grad
def reshape(self, *shape: int) -> Tensor
-
Expand source code
def reshape(self, *shape: int) -> 'Tensor': from nujo.autodiff._functions._transform import _Reshape return _Reshape(self, shape)()
def squeeze(self, dim=-1) -> Tensor
-
Expand source code
def squeeze(self, dim=-1) -> 'Tensor': if dim < 0: num_dims = len(self._value.shape) if dim < -num_dims: dim = num_dims else: dim += num_dims return self.reshape(*self._value.shape[:dim], *self._value.shape[dim + 1:])
def transpose(self, *dims: int) -> Tensor
-
Expand source code
def transpose(self, *dims: int) -> 'Tensor': from nujo.autodiff._functions._transform import _Transpose return _Transpose(self, dims)()
def unsqueeze(self, dim=-1) -> Tensor
-
Expand source code
def unsqueeze(self, dim=-1) -> 'Tensor': if dim < 0: num_dims = len(self._value.shape) if dim < -num_dims: dim = 0 else: if dim == -1: dim += 1 dim += num_dims return self.reshape(*self._value.shape[:dim], 1, *self._value.shape[dim:])
def zero_grad(self, propagate=True) -> NoneType
-
Expand source code
def zero_grad(self, propagate=True) -> None: self.grad._value.fill(0) if propagate: for poutput in self.parents_outputs: poutput.zero_grad()
class no_diff
-
No Differentiation block
Creates a block of code where no differentiation is done. a.k.a. No gradients are computed for whatever tensor.
Expand source code
class no_diff(): ''' No Differentiation block Creates a block of code where no differentiation is done. a.k.a. No gradients are computed for whatever tensor. ''' def __enter__(self): global DIFF_ENABLED DIFF_ENABLED = False def __exit__(self, type, value, traceback): global DIFF_ENABLED DIFF_ENABLED = True