-
Notifications
You must be signed in to change notification settings - Fork 0
/
mgprox_l0.m
176 lines (172 loc) · 7.21 KB
/
mgprox_l0.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
function [xk, hist] = mgprox_l0(A0, b0, c0, lambda, L0, x_ini, tol, level, smooth, verbose)
xk0 = x_ini; xk = xk0;
[n, ~] = size(A0);
max_iter = n*1e2;
crp = 2;
% t = 1; t0 = 1;
objold = 0.5*((A0*xk0-b0)'*(A0*xk0-b0)) + c0'*xk0 + lambda*sum(abs(xk0) > eps);
hist.time_forward = 0; hist.time_backward = 0; hist.time_ls = 0;
hist.time_ini = 0; hist.time_apg = 0;
ini = tic;
hist.F = zeros(max_iter, 1);
hist.G = zeros(max_iter, 1);
hist.dist = zeros(max_iter, 1);
hist.relDist = zeros(max_iter, 1);
hist.relObjdiff = zeros(max_iter, 1);
% prepare the full version of non-adaptive \bar{R}_{l->l+1}
Rbar = cell(level, 1); % restriction matrix
A = cell(level+1, 1); A{1} = A0; % A_{l}
L = [L0; zeros(level, 1)]; % L_{l}, Lipschitz constant
c = cell(level+1, 1); c{1} = c0;
for l = 1 : level
np = floor(n/2);
Rbar{l} = sparse([1:np,1:np,2:np], [2*(1:np)-1,2:2:n,2:2:2*np-2], [2*ones(np,1);1*ones(2*np-1,1)]);
if mod(n, 2) == 1
Rbar{l} = [Rbar{l}, sparse(np, 1)];
end
n = np;
A{l+1} = crp * A{l} * Rbar{l}';
c{l+1} = Rbar{l} * c{l};
L(l+1) = norm(A{l+1}'*A{l+1});
end
hist.time_ini = hist.time_ini + toc(ini);
% Q_inv = Q{L+1}^(-1);
for iter = 1 : max_iter
R = Rbar; % R_{l->l+1}
tau = cell(level+1, 1); tau{1} = 0; % tau_{l->l+1}^{k+1}
x = cell(level+1, 1); x{1} = xk; % x_{l+1}^k
y = cell(level, 1); % y_{l}^k
Axb = A0*xk-b0;
grad = A0'*Axb + c0;
hist.dist(iter) = norm(grad);
obj = 0.5*(Axb'*Axb) + c0'*xk + lambda*sum(abs(xk) > eps);
hist.F(iter) = obj;
hist.G(iter) = L0*norm(xk-prox_l0(xk-grad/L0,lambda/L0));
hist.relDist(iter) = norm(xk-xk0) / norm(xk);
hist.relObjdiff(iter) = abs(obj - objold) / max(obj, 1);
% stopping criterion
if hist.G(iter) / hist.G(1) <= tol
hist.F = hist.F(1:iter);
hist.G = hist.G(1:iter);
hist.dist = hist.dist(1:iter);
hist.relDist = hist.relDist(1:iter);
hist.relObjdiff = hist.relObjdiff(1:iter);
if verbose
fprintf('\n MGProx early stopping--iteration: %d\n', iter);
fprintf('[c] proximal first-order optimality condition satisfied\n')
end
break
end
if iter > 4
if hist.G(iter) > hist.G(iter-1) * 1e1
iter = iter - 1;
hist.F = hist.F(1:iter);
hist.G = hist.G(1:iter);
hist.dist = hist.dist(1:iter-1);
hist.relDist = hist.relDist(1:iter);
hist.relObjdiff = hist.relObjdiff(1:iter);
if verbose
fprintf('\n MGProx early stopping--iteration: %d\n', iter);
fprintf('[d] sudden jump in proximal first-order optimality condition\n')
end
xk = xk0;
break
end
if max(hist.relDist(iter), 0.1*hist.relObjdiff(iter)) < tol
if verbose
fprintf("\n MGProx Early Stopping--iteration: %d\n", iter);
fprintf('[a] relDist < %3.2e\n', tol);
fprintf("norm(X-Xold,'fro')/norm(X,'fro') = %f\n", hist.relDist(iter));
end
hist.F = hist.F(1:iter);
hist.G = hist.G(1:iter);
hist.dist = hist.dist(1:iter);
hist.relDist = hist.relDist(1:iter);
hist.relObjdiff = hist.relObjdiff(1:iter);
break
end
if max(0.5*hist.relDist(iter), 100*hist.relObjdiff(iter)) < tol
if verbose
fprintf("\n MGProx Early Stopping--iteration: %d\n", iter);
fprintf('[b] relObjdiff < %3.2e\n', 0.01*tol);
end
hist.F = hist.F(1:iter);
hist.G = hist.G(1:iter);
hist.dist = hist.dist(1:iter);
hist.relDist = hist.relDist(1:iter);
hist.relObjdiff = hist.relObjdiff(1:iter);
break;
end
end
forward = tic;
for l = 1 : level
% pre-smoothing
y{l} = x{l};
for sm = 1 : smooth
y{l} = prox_l0(y{l}-(A{l}'*(A{l}*y{l}-b0)+c{l}-tau{l})/L(l), lambda/L(l));
end
% generate the adaptive restriction operator
R{l}(:,~y{l}) = 0;
x{l+1} = R{l} * y{l};
% create the tau vector
%%% how to deal with the subdifferential of l0 norm
tau{l+1} = A{l+1}'*(A{l+1}*x{l+1}-b0)+c{l+1} - R{l}*(A{l}'*(A{l}*y{l}-b0)+c{l});
end
hist.time_forward = hist.time_forward + toc(forward);
% Solve level-L coarse problem
apg = tic;
w = A{level+1}'*A{level+1} \ (A{level+1}'*b0+tau{level+1});
% w = rand(np, 1) / sqrt(np);
% w = zeros(n, 1);
% options = optimoptions('quadprog', 'Display', 'off', 'Algorithm', 'interior-point-convex', ...
% 'MaxIterations', 10, 'OptimalityTolerance', eps, 'StepTolerance', eps*0.01, 'LinearSolver', 'sparse');
% w = quadprog(Q{L+1}, b, [],[],[],[],zeros(n,1),[],[], options);
[w, ~] = apg_l0(A{level+1}, b0, c{level+1}-tau{level+1}, lambda, L(level+1), w, 1e-2, 0);
% [w, ~] = mgproxL(Q{L+1}, b, Ll, w, eps, L, 20, options, 0);
% [w, ~] = mgprox(Q{L+1}, b, Ll, w, eps*1e4, floor(2*log2(n)) - 1, smooth);
% w(w < eps) = 0;
hist.time_apg = hist.time_apg + toc(apg);
backward = tic;
for l = level : -1 : 1
% coarse correction with line search
ls = tic;
alpha = 1e1;
% grad = A{l}'*(A{l}*y{l}-b0) + c{l};
Ayb = A{l} * y{l} - b0;
gy = lambda * sum(abs(y{l}) > eps);
% cor = alpha*crp*R{l}'*(w-x{l+1});
cor0 = crp*R{l}'*(w-x{l+1});
while 1
cor = alpha * cor0;
Ax = A{l}*cor;
if 0.5*(Ax'*Ax) + (Ax'*Ayb) + cor'*c{l} + lambda*sum(abs(y{l}+cor) > eps) <= gy + eps
z = y{l} + cor;
break;
elseif alpha > eps
alpha = alpha * 0.5;
else
z = y{l};
break;
end
end
% z = y{l} + eps * crp*R{l}'*(w-x{l+1});
hist.time_ls = hist.time_ls + toc(ls);
% post-smoothing
w = z;
for sm = 1 : smooth
w = prox_l0(w-(A{l}'*(A{l}*w-b0)+c{l}-tau{l})/L(l), lambda/L(l));
end
end
hist.time_backward = hist.time_backward + toc(backward);
% update the fine variable
xk0 = xk;
xk = w;
% t0 = t; t = 0.5*(1+sqrt(1+4*t^2));
objold = obj;
end
fprintf('ini time: %f\n', hist.time_ini);
fprintf('forward time: %f\n', hist.time_forward);
fprintf('apg time: %f\n', hist.time_apg);
fprintf('ls time: %f\n', hist.time_ls);
fprintf('backward time: %f\n', hist.time_backward);
end