Skip to content
Commits on Source (2)
......@@ -33,3 +33,12 @@ CAMx6.5/test_run_data/ptsrce
# JupyterLab
.ipynb_checkpoints
# Python cache
**__pycache__/
**.egg-info/
# Nati stuff
Pipfile
Pipfile.lock
#LyX 2.3 created this file. For more info see http://www.lyx.org/
\lyxformat 544
\begin_document
\begin_header
\save_transient_properties true
\origin unavailable
\textclass article
\use_default_options true
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman "default" "default"
\font_sans "default" "default"
\font_typewriter "default" "default"
\font_math "auto" "auto"
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100 100
\font_tt_scale 100 100
\use_microtype false
\use_dash_ligatures true
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize default
\use_hyperref false
\papersize default
\use_geometry false
\use_package amsmath 1
\use_package amssymb 1
\use_package cancel 1
\use_package esint 1
\use_package mathdots 1
\use_package mathtools 1
\use_package mhchem 1
\use_package stackrel 1
\use_package stmaryrd 1
\use_package undertilde 1
\cite_engine basic
\cite_engine_type default
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\justification true
\use_refstyle 1
\use_minted 0
\index Index
\shortcut idx
\color #008000
\end_index
\secnumdepth 3
\tocdepth 3
\paragraph_separation indent
\paragraph_indentation default
\is_math_indent 0
\math_numbering_side default
\quotes_style english
\dynamic_quotes 0
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\end_header
\begin_body
\begin_layout Section
Current solution
\end_layout
\begin_layout Standard
Variable
\begin_inset Formula
\[
\mathcal{N}\left(FG,\text{diag}\left(W\right)\right)
\]
\end_inset
\end_layout
\begin_layout Standard
\begin_inset Formula
\[
Likelyhood
\]
\end_inset
\end_layout
\begin_layout Section
General problem
\end_layout
\begin_layout Standard
Let us consider the problem
\begin_inset Formula
\[
\dot{F},\dot{G}=\arg\min_{F\geq0,G\geq0}\|FG-X\|_{F}^{2}+\lambda\text{tr}\left(F^{T}\Delta F\right)\hspace{1em}\text{s.t. }G1=1
\]
\end_inset
We can weight the objective function as
\begin_inset Formula
\[
\dot{F},\dot{G}=\arg\min_{F\geq0,G\geq0}\|W\circ\left(FG-X\right)\|_{F}^{2}+\lambda\text{tr}\left(F^{T}\Delta F\right)\hspace{1em}\text{s.t. }G1=1
\]
\end_inset
where
\begin_inset Formula $W$
\end_inset
represent a weight matrix.
\end_layout
\begin_layout Section
Solve with respect of
\begin_inset Formula $F$
\end_inset
\end_layout
\begin_layout Standard
Let us first solve the problem with respect to
\begin_inset Formula $F$
\end_inset
given
\begin_inset Formula $G$
\end_inset
.
Our goal is to minimize the following loss
\begin_inset Formula
\[
L(F)=\|FG-X\|_{F}^{2}+\lambda\text{tr}\left(F^{T}\Delta F\right)
\]
\end_inset
The gradient is given by
\begin_inset Formula
\[
\frac{1}{2}\nabla L\left(F\right)=FGG^{T}-XG^{T}+\lambda\Delta F.
\]
\end_inset
A close form solution would be
\begin_inset Formula
\[
F=\left(XG^{T}-\lambda\Delta F\right)\left(GG^{T}\right)^{-1}.
\]
\end_inset
Unfortunately, this does not satisfy the positivity constraint of
\begin_inset Formula $F.$
\end_inset
Alternatively a gradient update becomes
\begin_inset Formula
\begin{align*}
F_{i,j}^{t+1} & =F_{i,j}^{t}-\gamma_{i,j}\nabla_{i,j}L\left(F^{t}\right)\\
& =F_{i,j}^{t}-\gamma_{i,j}\left[F^{t}GG^{T}+\lambda\Delta F\right]_{i,j}+\gamma_{i,j}\left[XG^{T}\right]_{i,j}\\
& =F_{i,j}^{t}\frac{\left[XG^{T}\right]_{i,j}}{\left[F^{t}GG^{T}+\lambda\Delta F\right]_{i,j}}
\end{align*}
\end_inset
where we set
\begin_inset Formula $\gamma_{i,j}=\frac{F_{i,j}^{t}}{\left[F^{t}GG^{T}+\lambda\Delta F\right]_{i,j}}.$
\end_inset
Note that since this is a multiplicative update, the postivity constraint
is always ensured.
Problematically, this is wrong since
\begin_inset Formula $\lambda\Delta F$
\end_inset
is not always positive!
\end_layout
\begin_layout Standard
Therefore, let us consider the auxiliary function
\begin_inset Formula $L_{a}(F,F^{t})$
\end_inset
\begin_inset Formula
\[
L_{a}(F,F^{t})=\|FG-X\|_{F}^{2}+\text{tr}\left(F^{tT}\Delta F^{t}\right)+2\text{tr}\left(\left(F-F^{t}\right)^{T}\Delta F^{t}\right)+\sigma(\Delta)\|F-F^{t}\|_{2}^{2}
\]
\end_inset
Here we have
\begin_inset Formula $L_{a}(F^{t},F^{t})=L(F^{t})$
\end_inset
and
\begin_inset Formula $\nabla_{F}L_{a}\left(F^{t},F^{t}\right)=\nabla_{F}L\left(F^{t}\right)$
\end_inset
\color red
\color inherit
from
\begin_inset Formula
\[
x^{T}Cx\leq x^{tT}Cx^{t}+2\left(x-x^{t}\right)^{T}Cx^{t}+\sigma(C)\|x-x^{t}\|_{2}^{2}
\]
\end_inset
and
\begin_inset Formula
\[
\|FG-X\|_{F}^{2}\leq\|F^{t}G-X\|_{F}^{2}+2\text{tr}\left(\left(F-F^{t}\right)^{T}\left(F^{t}GG^{T}-XG^{T}\right)\right)+
\]
\end_inset
\end_layout
\begin_layout Section
Solve with respect of
\begin_inset Formula $G$
\end_inset
\end_layout
\begin_layout Standard
Let us now consider the update for
\begin_inset Formula $G$
\end_inset
.
Let us minimize the Lagrangian
\begin_inset Formula
\[
\mathcal{L}(G)=\frac{1}{2}\|FG-X\|_{F}^{2}+\nu^{T}\left(G1-1\right)
\]
\end_inset
while ensuring that the constraint
\begin_inset Formula $G1-1$
\end_inset
is always satisfied.
The gradient becomes
\begin_inset Formula
\[
\nabla\mathcal{L}(G)=F^{T}FG-F^{T}X+\nu1^{T}.
\]
\end_inset
A gradient update is given by
\begin_inset Formula
\begin{align*}
G_{i,j}^{t+1} & =G_{i,j}^{t}-\gamma_{i,j}\nabla\mathcal{L}(G^{t})\\
& =G_{i,j}^{t}-\gamma_{i,j}\left[F^{T}FG+\nu1^{T}\right]_{i,j}+\gamma_{i,j}\left[F^{T}X\right]_{i,j}\\
& =G_{i,j}^{t}\frac{\left[F^{T}X\right]_{i,j}}{\left[F^{T}FG+\nu1^{T}\right]_{i,j}}
\end{align*}
\end_inset
where
\begin_inset Formula $\gamma_{i,j}=\frac{G_{i,j}^{t}}{\left[F^{T}FG+\nu\right]_{i,j}}.$
\end_inset
To satisfy the KKT conditions, we need that
\begin_inset Formula $G^{t+1}1=1.$
\end_inset
Therefore, we need to find
\begin_inset Formula $\nu$
\end_inset
such that
\begin_inset Formula
\[
1=\sum_{i}G_{i,j}^{t+1}=\sum_{i}\frac{G_{i,j}^{t}\left[F^{T}X\right]_{i,j}}{\left[F^{T}FG\right]_{i,j}+\nu_{j}}.
\]
\end_inset
We will use dicotomy to find it.
\end_layout
\end_body
\end_document
This diff is collapsed.
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted
import numpy as np
from auroraPSI.nmf_updates import multiplicative_step_F,multiplicative_step_G, Frobenius_loss, trace_xtLx, initialize_algorithms, create_laplacian_matrix
from auroraPSI.nmf_updates import dicotomy_tol, sigmaL, shift
import time
class NMFEstimator(TransformerMixin, BaseEstimator):
loss_names_ = ["Frobenius loss", "Laplacian loss"]
def __init__(self, n_components=2, init='warn', tol=1e-4, max_iter=200,
random_state=None, verbose=1, debug=False, lambda_L=0,
shift=shift, eval_print=10, force_simplex=False, dicotomy_tol=dicotomy_tol):
self.n_components = n_components
self.init = init
self.tol = tol
self.max_iter = max_iter
self.random_state = random_state
self.verbose = verbose
self.shift = shift
self.debug = debug
self.eval_print = eval_print
self.lambda_L = lambda_L
self.force_simplex = force_simplex
self.dicotomy_tol = dicotomy_tol
def _more_tags(self):
return {'requires_positive_X': True}
def _iteration(self, F, G, noG):
F = multiplicative_step_F(self.X_, F, G, safe=self.debug, lambda_L=self.lambda_L, L=self.L_, shift=self.shift)
if not noG:
G = multiplicative_step_G(self.X_, F, G, force_simplex=self.force_simplex, safe=self.debug, dicotomy_tol=self.dicotomy_tol, shift=self.shift)
return F, G
def loss(self, F, G, average=True, X = None):
if X is None :
X = self.X_
assert(X.shape == (F.shape[0],G.shape[1]))
self.FG_numel_ = F.shape[0] * G.shape[1]
loss_fro = Frobenius_loss(X, F, G, average=average)
loss = loss_fro
self.detailed_loss_ = [loss_fro, 0]
if self.lambda_L>0:
loss_tr = self.lambda_L * trace_xtLx(self.L_, F, average=average)
loss += loss_tr
self.detailed_loss_[1] = loss_tr
# if average:
# loss = loss / self.FG_numel_
return loss
def fit_transform(self, X, y=None, F=None, G=None, noG=False):
"""Learn a NMF model for the data X and returns the transformed data.
This is more efficient than calling fit followed by transform.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Data matrix to be decomposed
F : array-like of shape (n_samples, n_components)
If specified, it is used as initial guess for the solution.
G : array-like of shape (n_components, n_features)
If specified, it is used as initial guess for the solution.
Returns
-------
F,G : ndarrays
"""
self.X_ = self._validate_data(X, dtype=[np.float64, np.float32])
self.X_ = self.remove_zeros_lines(self.X_, self.shift)
self.F_, self.G_ = initialize_algorithms(X = self.X_, F= F, G = G, n_components = self.n_components, init = self.init, random_state = self.random_state, force_simplex = self.force_simplex)
if self.lambda_L > 0:
self.L_ = create_laplacian_matrix(self.F_.shape[0])
else:
self.L_ = None
algo_start = time.time()
# If mu_sparse != 0, this is the regularized step of the algorithm
# Otherwise this is directly the data fitting step
eval_before = np.inf
eval_init = self.loss(self.F_, self.G_)
self.n_iter_ = 0
# if self.debug:
self.losses_ = []
self.rel_ = []
self.detailed_losses_ = []
try:
while True:
# Take one step in A, P
old_F, old_G = self.F_.copy(), self.G_.copy()
self.F_, self.G_ = self._iteration(self.F_, self.G_, noG=noG)
eval_after = self.loss(self.F_, self.G_)
self.n_iter_ +=1
rel_F = np.max((self.F_ - old_F)/(self.F_ + self.tol*np.mean(self.F_) ))
rel_G = np.max((self.G_ - old_G)/(self.G_ + self.tol*np.mean(self.G_) ))
# store some information for assessing the convergence
# for debugging purposes
detailed_loss_ = self.detailed_loss_
self.losses_.append(eval_after)
self.detailed_losses_.append(detailed_loss_)
self.rel_.append([rel_F,rel_G])
# check convergence criterions
if self.n_iter_ >= self.max_iter:
print("exits because max_iteration was reached")
break
# If there is no regularization the algorithm stops with this criterion
# Otherwise it goes to the data fitting step
elif max(rel_F,rel_G) < self.tol:
print(
"exits because of relative change rel_F {} or rel_G {} < tol ".format(
rel_F,rel_G
)
)
break
elif abs((eval_before - eval_after)/eval_init) < self.tol:
print(
"exits because of relative change < tol: {}".format(
(eval_before - eval_after)/min(eval_before, eval_after)
)
)
break
elif np.isnan(eval_after):
print("exit because of the presence of NaN")
break
elif (eval_before - eval_after) < 0:
if hasattr(self, "accelerate"):
if not self.accelerate:
print("exit because of negative decrease {}: {}, {}".format((eval_before - eval_after), eval_before, eval_after))
break
else:
print("exit because of negative decrease {}: {}, {}".format((eval_before - eval_after), eval_before, eval_after))
break
if self.verbose > 0 and np.mod(self.n_iter_, self.eval_print) == 0:
print(
f"It {self.n_iter_} / {self.max_iter}: loss {eval_after:0.3f}, {self.n_iter_/(time.time()-algo_start):0.3f} it/s",
)
pass
eval_before = eval_after
except KeyboardInterrupt:
pass
# if not(self.force_simplex):
# self.F_, self.G_ = rescaled_DH(self.F_, self.G_ )
algo_time = time.time() - algo_start
print(
f"Stopped after {self.n_iter_} iterations in {algo_time//60} minutes "
f"and {np.round(algo_time) % 60} seconds."
)
self.reconstruction_err_ = self.loss(self.F_, self.G_)
self.n_components_ = self.G_.shape[0]
self.components_ = self.G_
return self.F_
def fit(self, X, y=None, **params):
"""Learn a NMF model for the data X.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Data matrix to be decomposed
y : Ignored
Returns
-------
self
"""
self.fit_transform(X, **params)
return self
def inverse_transform(self, F):
"""Transform data back to its original space.
Parameters
----------
W : {ndarray, sparse matrix} of shape (n_samples, n_components)
Transformed data matrix.
Returns
-------
X : {ndarray, sparse matrix} of shape (n_samples, n_features)
Data matrix of original shape.
.. versionadded:: 0.18
"""
check_is_fitted(self)
return F @ self.G_
def get_losses(self):
names = ["full_loss"] + self.loss_names_ + ["rel_W","rel_H"]
dt_list = []
for elt in names :
dt_list.append((elt,"float64"))
dt = np.dtype(dt_list)
tup_list = []
for i in range(len(self.losses_)) :
tup_list.append((self.losses_[i],) + tuple(self.detailed_losses_[i]) + tuple(self.rel_[i]))
array = np.array(tup_list,dtype=dt)
return array
def remove_zeros_lines (self, X, epsilon) :
if np.all(X >= 0) :
new_X = X.copy()
sum_cols = X.sum(axis = 0)
sum_rows = X.sum(axis = 1)
new_X[:,np.where(sum_cols == 0)] = epsilon
new_X[np.where(sum_rows == 0),:] = epsilon
return new_X
else :
raise ValueError("There are negative values in X")
\ No newline at end of file
import numpy as np
from sklearn.decomposition._nmf import _initialize_nmf as initialize_nmf
from scipy.sparse import lil_matrix
dicotomy_tol = 1e-6
shift = 1e-10
sigmaL = 4
def dichotomy_simplex(num, denum, tol=dicotomy_tol, maxit=40):
"""
Function to solve the num/(x+denum) -1 = 0 equation. Here, x is the Lagragian multiplier which is used to apply the simplex constraint.
The first part consists in finding a and b such that num/(a+denum) -1 > 0 and num/(b+denum) -1 < 0.
The second part applies the dichotomy algorithm to solve the equation.
"""
# The function has exactly one root at the right of the first singularity (the singularity at min(denum))
# num = num.astype("float64")
# denum = denum.astype("float64")
# do some test
assert((num>=0).all())
assert((denum>=0).all())
assert((np.sum(num, axis=0)>0).all())
# Ideally we want to do this, but we have to exclude the case where num==0.
# a = np.max(num/2 - denum, axis=0)
if denum.shape[1]>1:
a = []
for n,d in zip(num.T, denum.T):
m = n>0
a.append(np.max(n[m]/2 - d[m]))
a = np.array(a)
else:
d = denum[:,0]
def max_masked(n):
m = n>0
return np.max(n[m]/2-d[m])
a = np.apply_along_axis(max_masked, 0, num)
# r = np.sum(num/denum, axis=0)
# b = np.zeros(r.shape)
# b[r>=1] = (len(num) * np.max(num, axis=0)/0.5 - np.min(denum, axis=0))[r>=1]
b = len(num) * np.max(num, axis=0)/0.5 - np.min(denum, axis=0)
def func(x):
return np.sum(num / (x + denum), axis=0) - 1
func_a = func(a)
func_b = func(b)
assert(np.sum(func_b>=0)==0)
assert(np.sum(func_a<=0)==0)
assert(np.sum(np.isnan(func_a))==0)
assert(np.sum(np.isnan(func_b))==0)
return dicotomy(a, b, func, maxit, tol)
def dicotomy(a, b, func, maxit, tol):
"""
Dicotomy algorithm searching for func(x)=0.
Inputs:
* a: bound such that func(a) > 0
* b: bound such that func(b) < 0
* maxit: maximum number of iteration
* tol: tolerance - the algorithm stops if |func(sol)| < tol
This algorithm work for number or numpy array of any size.
"""
# Dichotomy algorithm to solve the equation
it = 0
new = (a + b)/2
func_new = func(new)
while np.max(np.abs(func_new)) > tol:
it=it+1
func_a = func(a)
# func_b = func(b)
# if f(a)*f(new) <0 then f(new) < 0 --> store in b
minus_bool = func_a * func_new <= 0
# if f(a)*f(new) > 0 then f(new) > 0 --> store in a
# plus_bool = func_a * func_new > 0
plus_bool = np.logical_not(minus_bool)
b[minus_bool] = new[minus_bool]
a[plus_bool] = new[plus_bool]
new = (a + b) / 2
func_new = func(new)
if it>=maxit:
print("Dicotomy stopped for maximum number of iterations")
break
return new
def Frobenius_loss(X, W, H, average=False):
"""
Compute the generalized KL divergence.
\sum_{ji} | X_{ij} - (D H)_{ij} |^2
"""
DH = W @ H
if average:
return np.mean((DH - X)**2)
else:
return np.sum((DH - X)**2)
def trace_xtLx(L, x, average=False):
if average:
return np.mean(x * (L @ x))
else:
return np.sum(x * (L @ x))
def create_laplacian_matrix(nx):
"""
Helper method to create the laplacian matrix for the laplacian regularization
:param n: width of the original image
:return:the n x n laplacian matrix
"""
assert(nx>1)
#Blocks corresponding to the corner of the image (linking row elements)
mat=lil_matrix((nx,nx),dtype=np.float32)
mat.setdiag(2)
mat.setdiag(-1,k=1)
mat.setdiag(-1,k=-1)
mat[0,0] = 1
mat[nx-1, nx-1] = 1
return mat
def multiplicative_step_G(X, F, G, force_simplex=True, safe=True, dicotomy_tol=dicotomy_tol, shift=shift):
"""
Multiplicative step in G.
"""
if safe:
# Allow for very small negative values!
assert(np.sum(G<-shift/2)==0)
assert(np.sum(F<-shift/2)==0)
G = np.maximum(G, shift)
F = np.maximum(F, shift)
FF = F.T @ F
FX = F.T @ X
num = G * FX
denum = FF @ G
if force_simplex:
nu = dichotomy_simplex(num, denum,dicotomy_tol)
else:
nu = 0
if safe:
assert np.sum(denum<0)==0
assert np.sum(num<0)==0
new_G = num/(denum+nu)
return new_G
def multiplicative_step_F(X, F, G, lambda_L=0, L=None, sigmaL=sigmaL, safe=True, shift=shift):
"""
Multiplicative step in F.
"""
if not(lambda_L==0):
if L is None:
raise ValueError("Please provide the laplacian")
if safe:
# Allow for very small negative values!
assert(np.sum(G<-shift/2)==0)
assert(np.sum(F<-shift/2)==0)
G = np.maximum(G, shift)
F = np.maximum(F, shift)
GG = G @ G.T
XG = X @ G.T
num = F * XG
denum = F @ GG
if not(lambda_L==0):
denum += np.maximum(lambda_L * (L @ F),0)
if safe:
assert np.sum(denum<0)==0
assert np.sum(num<0)==0
new_F = num/denum
return new_F
def initialize_algorithms(X, F, G, n_components, init, random_state, force_simplex):
# Handle initialization
if F is None:
if G is None:
F, G = initialize_nmf(X, n_components=n_components, init=init, random_state=random_state)
if force_simplex:
scale = np.sum(G, axis=0, keepdims=True)
G = np.nan_to_num(G/scale, nan = 1.0/G.shape[0] )
else:
F = np.abs(np.linalg.lstsq(G, X.T, rcond=None)[0]).T
elif G is None:
G = np.abs(np.linalg.lstsq(F, X, rcond=None)[0])
if force_simplex:
scale = np.sum(G, axis=0, keepdims=True)
G = G/scale
return F, G