Backtracking Line Search Fails to Converge in Barrier SVM Implementation
5 views (last 30 days)
Show older comments
Konstantinos
on 26 Dec 2024
Answered: Konstantinos
on 26 Dec 2024
Hi MATLAB community,
I am working on an implementation of the barrier method for an SVM optimization problem. My code uses a backtracking line search to determine the step size τ\tauτ. However, the backtracking loop fails to converge, and τ eventually becomes very small (approaching zero). At this point, the loop does not exit, and the program stalls.
The part of the code that i am referring in the following:
% while (barrier_SVM_cost_function( w_new, X_augm, y, t ) > ...
% barrier_SVM_cost_function( w(:,inner_iter), X_augm, y, t ) + alpha * tau * grad' * Dw_Nt )
% fprintf('\nI backtrack...')
% tau = beta * tau;
% w_new = w(:,inner_iter) + tau * Dw_Nt;
% end
The code that i am using in the following:
clc, clear, figure(1), clf, % close all
n = 2; % problem dimension
N = 1000; % number of data points
% Define and draw the separating line a^T * x = b
a = randn(2,1); b = randn;
x1 = [-2:.1:2];
for ii=1:length(x1)
x2(ii) = 1/a(2) * ( -a(1) * x1(ii) + b );
end
plot(x1, x2, '-'), grid, hold on
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Generate and draw SEPARABLE data to classify
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fprintf('\nData generation and plot...')
[X, y] = generate_data(N, a, b);
for ii=1:N
if (y(ii) == 1)
plot(X(1,ii), X(2,ii), 'bo')
else
plot(X(1,ii), X(2,ii), 'r+')
end
end
% Augment the data matrix with -1 elements
X_augm = [-ones(1,N); X];
% Check data separablity with respect to the true a and b
if ( data_is_not_separable(X_augm, y, a, b) == 0 )
fprintf('\nData is separable... I proceed to the solution...')
pause(1)
else
fprintf('\nData is non-separable... I quit...')
return
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Solution via SVMs and CVX
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fprintf('\nSolution of SVM via CVX (primal problem)...')
cvx_begin quiet
variable w_SVM(n+1, 1);
minimize ( w_SVM' * w_SVM );
subject to
y' .* (X_augm' * w_SVM) >= 1;
cvx_end
% Draw SVM-based separating hyperplane (color: magenta)
% in the same plot with "true" separating hyperplane and data
a_est = w_SVM(2:3);
b_est = w_SVM(1);
x1 = -2:.1:2;
for ii=1:length(x1)
x2(ii) = 1/a_est(2) * ( -a_est(1) * x1(ii) + b_est );
end
figure(1), plot(x1, x2, 'm'), grid on,
xlabel('x1'), ylabel('x2'), hold on
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Solution via interior point method
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fprintf('\n Solution via interior point method...')
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialization
w_init(:,1) = [b; a]; % start from a feasible point
while ( point_is_feasible(w_init, X_augm, y) == 0 )
disp('w_init is not feasible, doubling w_init...');
w_init = 2 * w_init;
end
% *******************************************************
% % Initialization
threshold_1 = 10^(-3); % determines termination condition for inner Newton iterations
epsilon = 10^(-5); % determines final termination condition and algorithm accuracy
alpha = 0.2; beta = 0.3; % backtracking parameters
t = 10; % initialize parameter t of barrier method
mu = 10; % used for the increase of t: t = mu * t;
outer_iter = 1;
W(:,outer_iter) = w_init;
while (1) % Outer loop
outer_iter;
w(:,1) = W(:, outer_iter); % Start new optimization from the previous solution
inner_iter = 1; % initialize counter of inner iterations
while ( 1 ) % Inner loop
grad = gradient_SVM_barrier( w(:,inner_iter), X_augm, y, t ); % gradient at w
Hess = Hess_SVM_barrier( w(:,inner_iter), X_augm, y, t ); % Hessian at w
Dw_Nt = - pinv(Hess) * grad; % Newton step
l_x = sqrt( Dw_Nt' * Hess * Dw_Nt ); % Newton decrement
if (l_x^2/2 <= threshold_1)
break; end % Newton termination condition
% Update and check feasibility - if the new point is not feasible, then backtrack
tau = 1;
w_new = w(:,inner_iter) + tau * Dw_Nt;
while ( point_is_feasible(w_new, X_augm, y) == 0 )
fprintf('\nNew point is infeasible... I backtrack...')
tau = beta * tau;
w_new = w(:,inner_iter) + tau * Dw_Nt;
end
% Backtracking line search
while (barrier_SVM_cost_function( w_new, X_augm, y, t ) > ...
barrier_SVM_cost_function( w(:,inner_iter), X_augm, y, t ) + alpha * tau * grad' * Dw_Nt )
fprintf('\nI backtrack...')
tau = beta * tau;
w_new = w(:,inner_iter) + tau * Dw_Nt;
end
% Update and plot w in the same plot with w_SVM
w(:,inner_iter+1) = w_new;
% Plot current solution estimate and optimal solution computed via CVX
figure(2), plot([w_SVM w(:,inner_iter+1)]),
legend('optimal solution', 'current estimate'), pause(.001)
inner_iter = inner_iter + 1;
end
W(:, outer_iter+1) = w(:, inner_iter); % W(:, outer_iter+1): solution of optimization problem
if (N/t < epsilon)
break
end % Algorithm termination condition
outer_iter = outer_iter + 1;
t = t * mu;
end
function flag = point_is_feasible(w, X_augm, y)
% INPUTS:
% w - Current parameter vector [bias; weights]
% X_augm - Augmented data matrix, size (n+1) x N
% y - Labels vector, size 1 x N
constraints = 1 - (( w' * X_augm ) .*y ) ;
% Check if the constraints are satisfied
if all(constraints <= 0)
flag = 1; % Point is feasible
else
flag = 0; % Point is not feasible
end
end
function Hess = Hess_SVM_barrier(w, X_augm, y, t)
first_term = t * eye(size(w, 1));
d = 1 - (( w' * X_augm ) .*y );
second_term = X_augm * diag(1./(d.^2) ) * X_augm';
%Total hess
Hess = first_term + second_term;
end
function grad = gradient_SVM_barrier(w, X_augm, y, t)
first_term = t * w;
denominator = 1 - y.*(X_augm' * w);
second_term = X_augm * (y ./ denominator)';
%Total grad
grad = first_term + second_term;
end
function [X, y] = generate_data(N, a, b)
% [X, y] = generate_data(N, a, b)
% Generate n X N matrix X and 1 x N vector y
% separated by hyperplane defined by
% the n-dimensional vector a and scalar b
% Compute dimension of the data
n = length(a);
% Find a point of the hyperplane
lambda = b / norm(a)^2;
x0 = lambda * a;
mean = a;
for ii=1:N
y(1,ii) = sign(randn); % generate label
X(:,ii) = x0 + y(1,ii) * mean + .2 * randn(n,1); % generate data point
end
end
function flag = data_is_not_separable(X, y, a, b)
% a = data_is_not_separable(X, y, a, b)
N = length(y);
flag = 0;
tmp = y' .* (X' * [b; a]) > 0;
if ( sum(tmp) < N )
flag = 1;
end
end
function cost = barrier_SVM_cost_function(w_new, X_augm, y, t)
first_term = t * 0.5 * (norm(w_new)^2);
log_arg = 1 - (( w_new' * X_augm ) .* y );
second_term = -sum(log(-log_arg));
%Total cost
cost = first_term + second_term;
end
What could be causing the condition to fail repeatedly in this context?
Any insights or suggestions to improve the backtracking logic or stability of the cost function would be greatly appreciated!
Thank you in advance!
0 Comments
Accepted Answer
More Answers (0)
See Also
Categories
Find more on Deep Learning Toolbox in Help Center and File Exchange
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!