From 67087f3f5efe543e36c3e33f50d8648895a17266 Mon Sep 17 00:00:00 2001
From: yangarbiter <yangarbiter@gmail.com>
Date: Sun, 21 Feb 2016 14:09:30 +0800
Subject: [PATCH 1/7] update svm.cpp to 3.20

---
 svm.cpp | 373 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 223 insertions(+), 150 deletions(-)

diff --git a/svm.cpp b/svm.cpp
index fb7a8d02..4744e230 100644
--- a/svm.cpp
+++ b/svm.cpp
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <limits.h>
+#include <locale.h>
 #include "svm.h"
 int libsvm_version = LIBSVM_VERSION;
 typedef float Qfloat;
@@ -73,7 +74,7 @@ class Cache
 	// return some position p where [p,len) need to be filled
 	// (p >= len if nothing needs to be filled)
 	int get_data(const int index, Qfloat **data, int len);
-	void swap_index(int i, int j);	
+	void swap_index(int i, int j);
 private:
 	int l;
 	long int size;
@@ -307,7 +308,7 @@ double Kernel::dot(const svm_node *px, const svm_node *py)
 				++py;
 			else
 				++px;
-		}			
+		}
 	}
 	return sum;
 }
@@ -336,7 +337,7 @@ double Kernel::k_function(const svm_node *x, const svm_node *y,
 				else
 				{
 					if(x->index > y->index)
-					{	
+					{
 						sum += y->value * y->value;
 						++y;
 					}
@@ -359,7 +360,7 @@ double Kernel::k_function(const svm_node *x, const svm_node *y,
 				sum += y->value * y->value;
 				++y;
 			}
-			
+
 			return exp(-param.gamma*sum);
 		}
 		case SIGMOID:
@@ -367,7 +368,7 @@ double Kernel::k_function(const svm_node *x, const svm_node *y,
 		case PRECOMPUTED:  //x: test (validation), y: SV
 			return x[(int)(y->value)].value;
 		default:
-			return 0;  // Unreachable 
+			return 0;  // Unreachable
 	}
 }
 
@@ -443,7 +444,7 @@ class Solver {
 	virtual double calculate_rho();
 	virtual void do_shrinking();
 private:
-	bool be_shrunk(int i, double Gmax1, double Gmax2);	
+	bool be_shrunk(int i, double Gmax1, double Gmax2);
 };
 
 void Solver::swap_index(int i, int j)
@@ -559,7 +560,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 	int iter = 0;
 	int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
 	int counter = min(l,1000)+1;
-	
+
 	while(iter < max_iter)
 	{
 		// show progress and do shrinking
@@ -584,11 +585,11 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 			else
 				counter = 1;	// do shrinking next iteration
 		}
-		
+
 		++iter;
 
 		// update alpha[i] and alpha[j], handle bounds carefully
-		
+
 		const Qfloat *Q_i = Q.get_Q(i,active_size);
 		const Qfloat *Q_j = Q.get_Q(j,active_size);
 
@@ -607,7 +608,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 			double diff = alpha[i] - alpha[j];
 			alpha[i] += delta;
 			alpha[j] += delta;
-			
+
 			if(diff > 0)
 			{
 				if(alpha[j] < 0)
@@ -689,7 +690,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 
 		double delta_alpha_i = alpha[i] - old_alpha_i;
 		double delta_alpha_j = alpha[j] - old_alpha_j;
-		
+
 		for(int k=0;k<active_size;k++)
 		{
 			G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
@@ -736,7 +737,7 @@ void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
 			active_size = l;
 			info("*");
 		}
-		info("\nWARNING: reaching max number of iterations");
+		fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
 	}
 
 	// calculate rho
@@ -790,7 +791,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 	// j: minimizes the decrease of obj value
 	//    (if quadratic coefficeint <= 0, replace it with tau)
 	//    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
-	
+
 	double Gmax = -INF;
 	double Gmax2 = -INF;
 	int Gmax_idx = -1;
@@ -798,7 +799,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 	double obj_diff_min = INF;
 
 	for(int t=0;t<active_size;t++)
-		if(y[t]==+1)	
+		if(y[t]==+1)
 		{
 			if(!is_upper_bound(t))
 				if(-G[t] >= Gmax)
@@ -833,7 +834,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 					Gmax2 = G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -857,7 +858,7 @@ int Solver::select_working_set(int &out_i, int &out_j)
 					Gmax2 = -G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -895,7 +896,7 @@ bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
 	{
 		if(y[i]==+1)
 			return(G[i] > Gmax2);
-		else	
+		else
 			return(G[i] > Gmax1);
 	}
 	else
@@ -911,27 +912,27 @@ void Solver::do_shrinking()
 	// find maximal violating pair first
 	for(i=0;i<active_size;i++)
 	{
-		if(y[i]==+1)	
+		if(y[i]==+1)
 		{
-			if(!is_upper_bound(i))	
+			if(!is_upper_bound(i))
 			{
 				if(-G[i] >= Gmax1)
 					Gmax1 = -G[i];
 			}
-			if(!is_lower_bound(i))	
+			if(!is_lower_bound(i))
 			{
 				if(G[i] >= Gmax2)
 					Gmax2 = G[i];
 			}
 		}
-		else	
+		else
 		{
-			if(!is_upper_bound(i))	
+			if(!is_upper_bound(i))
 			{
 				if(-G[i] >= Gmax2)
 					Gmax2 = -G[i];
 			}
-			if(!is_lower_bound(i))	
+			if(!is_lower_bound(i))
 			{
 				if(G[i] >= Gmax1)
 					Gmax1 = G[i];
@@ -939,7 +940,7 @@ void Solver::do_shrinking()
 		}
 	}
 
-	if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
+	if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
 	{
 		unshrink = true;
 		reconstruct_gradient();
@@ -1006,7 +1007,7 @@ double Solver::calculate_rho()
 //
 // additional constraint: e^T \alpha = constant
 //
-class Solver_NU : public Solver
+class Solver_NU: public Solver
 {
 public:
 	Solver_NU() {}
@@ -1078,14 +1079,14 @@ int Solver_NU::select_working_set(int &out_i, int &out_j)
 	{
 		if(y[j]==+1)
 		{
-			if (!is_lower_bound(j))	
+			if (!is_lower_bound(j))
 			{
 				double grad_diff=Gmaxp+G[j];
 				if (G[j] >= Gmaxp2)
 					Gmaxp2 = G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -1109,7 +1110,7 @@ int Solver_NU::select_working_set(int &out_i, int &out_j)
 					Gmaxn2 = -G[j];
 				if (grad_diff > 0)
 				{
-					double obj_diff; 
+					double obj_diff;
 					double quad_coef = QD[in]+QD[j]-2*Q_in[j];
 					if (quad_coef > 0)
 						obj_diff = -(grad_diff*grad_diff)/quad_coef;
@@ -1144,14 +1145,14 @@ bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, doubl
 	{
 		if(y[i]==+1)
 			return(-G[i] > Gmax1);
-		else	
+		else
 			return(-G[i] > Gmax4);
 	}
 	else if(is_lower_bound(i))
 	{
 		if(y[i]==+1)
 			return(G[i] > Gmax2);
-		else	
+		else
 			return(G[i] > Gmax3);
 	}
 	else
@@ -1180,14 +1181,14 @@ void Solver_NU::do_shrinking()
 		if(!is_lower_bound(i))
 		{
 			if(y[i]==+1)
-			{	
+			{
 				if(G[i] > Gmax2) Gmax2 = G[i];
 			}
 			else	if(G[i] > Gmax3) Gmax3 = G[i];
 		}
 	}
 
-	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
+	if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
 	{
 		unshrink = true;
 		reconstruct_gradient();
@@ -1250,12 +1251,12 @@ double Solver_NU::calculate_rho()
 		r1 = sum_free1/nr_free1;
 	else
 		r1 = (ub1+lb1)/2;
-	
+
 	if(nr_free2 > 0)
 		r2 = sum_free2/nr_free2;
 	else
 		r2 = (ub2+lb2)/2;
-	
+
 	si->r = (r1+r2)/2;
 	return (r1-r2)/2;
 }
@@ -1264,7 +1265,7 @@ double Solver_NU::calculate_rho()
 // Q matrices for various formulations
 //
 class SVC_Q: public Kernel
-{ 
+{
 public:
 	SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
 	:Kernel(prob.l, prob.x, param)
@@ -1275,7 +1276,7 @@ class SVC_Q: public Kernel
 		for(int i=0;i<prob.l;i++)
 			QD[i] = (this->*kernel_function)(i,i);
 	}
-	
+
 	Qfloat *get_Q(int i, int len) const
 	{
 		Qfloat *data;
@@ -1324,7 +1325,7 @@ class ONE_CLASS_Q: public Kernel
 		for(int i=0;i<prob.l;i++)
 			QD[i] = (this->*kernel_function)(i,i);
 	}
-	
+
 	Qfloat *get_Q(int i, int len) const
 	{
 		Qfloat *data;
@@ -1360,7 +1361,7 @@ class ONE_CLASS_Q: public Kernel
 };
 
 class SVR_Q: public Kernel
-{ 
+{
 public:
 	SVR_Q(const svm_problem& prob, const svm_parameter& param)
 	:Kernel(prob.l, prob.x, param)
@@ -1390,7 +1391,7 @@ class SVR_Q: public Kernel
 		swap(index[i],index[j]);
 		swap(QD[i],QD[j]);
 	}
-	
+
 	Qfloat *get_Q(int i, int len) const
 	{
 		Qfloat *data;
@@ -1452,7 +1453,7 @@ class HINT_SVC_Q : public Kernel
 		cache = new Cache(l, (long int)(param.cache_size*(1<<20)));
 		isHint = new schar[l+nHint];
 		Y =  new schar[l+nHint] ;
-		
+
 		this->C = param.C ;
 		W = prob.W ;
 		loss_type = param.degree ;
@@ -1620,7 +1621,7 @@ static void solve_nu_svc(
 			y[i] = -1;
 		C[i] = prob->W[i];
 	}
-	
+
 	double nu_l = 0;
 	for(i=0;i<l;i++) nu_l += nu*C[i];
 	double sum_pos = nu_l/2;
@@ -1813,7 +1814,7 @@ static void solve_hint_svc(
 	double *linear_term = new double[l+nHint];
 	double *C2 = new double[l+nHint] ;
 	schar *y2 = new schar[l+nHint];
-	si->upper_bound = Malloc(double,prob->l+nHint); 
+	si->upper_bound = Malloc(double,prob->l+nHint);
 	for(k = 0, j = 0; k < l; ++k)
 	{
 		if(y1[k] != 0)
@@ -1822,7 +1823,7 @@ static void solve_hint_svc(
 			linear_term[k] = -1;
 			y2[k] = y1[k];
 			C2[k] = prob->W[k]*param->C;
-			
+
 		}
 		/*Modified by Macaca 20120120*/
 		else
@@ -1873,7 +1874,7 @@ static void solve_hint_svc(
 	for(int i = 0; i < l; ++i)
 		w[0] += alpha[i] * prob->x[i][0].value, w[1] += alpha[i] * prob->x[i][1].value;
 	fprintf(stderr, "w = (%lg, %lg)\n", w[0], w[1]);*/
-	
+
 	info("nu = %f\n", sum_alpha/(param->C*l));
 	delete[] alpha2;
 	delete[] linear_term;
@@ -1889,7 +1890,7 @@ static void solve_hint_svc(
 struct decision_function
 {
 	double *alpha;
-	double rho;	
+	double rho;
 };
 
 static decision_function svm_train_one(
@@ -1901,23 +1902,23 @@ static decision_function svm_train_one(
 	switch(param->svm_type)
 	{
 		case C_SVC:
-			si.upper_bound = Malloc(double,prob->l); 
+			si.upper_bound = Malloc(double,prob->l);
 			solve_c_svc(prob,param,alpha,&si,Cp,Cn);
 			break;
 		case NU_SVC:
-			si.upper_bound = Malloc(double,prob->l); 
+			si.upper_bound = Malloc(double,prob->l);
 			solve_nu_svc(prob,param,alpha,&si);
 			break;
 		case ONE_CLASS:
-			si.upper_bound = Malloc(double,prob->l); 
+			si.upper_bound = Malloc(double,prob->l);
 			solve_one_class(prob,param,alpha,&si);
 			break;
 		case EPSILON_SVR:
-			si.upper_bound = Malloc(double,2*prob->l); 
+			si.upper_bound = Malloc(double,2*prob->l);
 			solve_epsilon_svr(prob,param,alpha,&si);
 			break;
 		case NU_SVR:
-			si.upper_bound = Malloc(double,2*prob->l); 
+			si.upper_bound = Malloc(double,2*prob->l);
 			solve_nu_svr(prob,param,alpha,&si);
 			break;
 		/*added by Macaca referenced from ferng 20111222*/
@@ -1965,7 +1966,7 @@ static decision_function svm_train_one(
 
 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
 static void sigmoid_train(
-	int l, const double *dec_values, const double *labels, 
+	int l, const double *dec_values, const double *labels,
 	double& A, double& B)
 {
 	double prior1=0, prior0 = 0;
@@ -1974,7 +1975,7 @@ static void sigmoid_train(
 	for (i=0;i<l;i++)
 		if (labels[i] > 0) prior1+=1;
 		else prior0+=1;
-	
+
 	int max_iter=100;	// Maximal number of iterations
 	double min_step=1e-10;	// Minimal step taken in line search
 	double sigma=1e-12;	// For numerically strict PD of Hessian
@@ -1984,8 +1985,8 @@ static void sigmoid_train(
 	double *t=Malloc(double,l);
 	double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
 	double newA,newB,newf,d1,d2;
-	int iter; 
-	
+	int iter;
+
 	// Initial Point and Initial Fun Value
 	A=0.0; B=log((prior0+1.0)/(prior1+1.0));
 	double fval = 0.0;
@@ -2095,7 +2096,7 @@ static void multiclass_probability(int k, double **r, double *p)
 	double **Q=Malloc(double *,k);
 	double *Qp=Malloc(double,k);
 	double pQp, eps=0.005/k;
-	
+
 	for (t=0;t<k;t++)
 	{
 		p[t]=1.0/k;  // Valid if k = 1
@@ -2131,7 +2132,7 @@ static void multiclass_probability(int k, double **r, double *p)
 				max_error=error;
 		}
 		if (max_error<eps) break;
-		
+
 		for (t=0;t<k;t++)
 		{
 			double diff=(-Qp[t]+pQp)/Q[t][t];
@@ -2179,7 +2180,7 @@ static void svm_binary_svc_probability(
 		subprob.x = Malloc(struct svm_node*,subprob.l);
 		subprob.y = Malloc(double,subprob.l);
 		subprob.W = Malloc(double,subprob.l);
-			
+
 		k=0;
 		for(j=0;j<begin;j++)
 		{
@@ -2226,23 +2227,23 @@ static void svm_binary_svc_probability(
 			struct svm_model *submodel = svm_train(&subprob,&subparam);
 			for(j=begin;j<end;j++)
 			{
-				svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]])); 
+				svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
 				// ensure +1 -1 order; reason not using CV subroutine
 				dec_values[perm[j]] *= submodel->label[0];
-			}		
+			}
 			svm_free_and_destroy_model(&submodel);
 			svm_destroy_param(&subparam);
 		}
 		free(subprob.x);
 		free(subprob.y);
 		free(subprob.W);
-	}		
+	}
 	sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
 	free(dec_values);
 	free(perm);
 }
 
-// Return parameter of a Laplace distribution 
+// Return parameter of a Laplace distribution
 static double svm_svr_probability(
 	const svm_problem *prob, const svm_parameter *param)
 {
@@ -2258,15 +2259,15 @@ static double svm_svr_probability(
 	{
 		ymv[i]=prob->y[i]-ymv[i];
 		mae += fabs(ymv[i]);
-	}		
+	}
 	mae /= prob->l;
 	double std=sqrt(2*mae*mae);
 	int count=0;
 	mae=0;
 	for(i=0;i<prob->l;i++)
-		if (fabs(ymv[i]) > 5*std) 
+		if (fabs(ymv[i]) > 5*std)
 			count=count+1;
-		else 
+		else
 			mae+=fabs(ymv[i]);
 	mae /= (prob->l-count);
 	info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
@@ -2284,7 +2285,7 @@ static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **
 	int nr_class = 0;
 	int *label = Malloc(int,max_nr_class);
 	int *count = Malloc(int,max_nr_class);
-	int *data_label = Malloc(int,l);	
+	int *data_label = Malloc(int,l);
 	int i;
 
 	for(i=0;i<l;i++)
@@ -2314,6 +2315,24 @@ static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **
 		}
 	}
 
+	//
+	// Labels are ordered by their first occurrence in the training set.
+	// However, for two-class sets with -1/+1 labels and -1 appears first,
+	// we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
+	//
+	if (nr_class == 2 && label[0] == -1 && label[1] == 1)
+	{
+		swap(label[0],label[1]);
+		swap(count[0],count[1]);
+		for(i=0;i<l;i++)
+		{
+			if(data_label[i] == 0)
+				data_label[i] = 1;
+			else
+				data_label[i] = 0;
+		}
+	}
+
 	int *start = Malloc(int,nr_class);
 	start[0] = 0;
 	for(i=1;i<nr_class;i++)
@@ -2337,7 +2356,7 @@ static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **
 //
 // Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
 //
-static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob) 
+static void remove_zero_weight(svm_problem *newprob, const svm_problem *prob)
 {
 	int i;
 	int l = 0;
@@ -2368,7 +2387,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 	svm_problem newprob;
 	remove_zero_weight(&newprob, prob);
 	prob = &newprob;
-	
+
 	svm_model *model = Malloc(svm_model,1);
 	model->param = *param;
 	model->free_sv = 0;	// XXX
@@ -2384,7 +2403,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 		model->probA = NULL; model->probB = NULL;
 		model->sv_coef = Malloc(double *,1);
 
-		if(param->probability && 
+		if(param->probability &&
 		   (param->svm_type == EPSILON_SVR ||
 		    param->svm_type == NU_SVR))
 		{
@@ -2403,14 +2422,16 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 		model->l = nSV;
 		model->SV = Malloc(svm_node *,nSV);
 		model->sv_coef[0] = Malloc(double,nSV);
+		model->sv_indices = Malloc(int,nSV);
 		int j = 0;
 		for(i=0;i<prob->l;i++)
 			if(fabs(f.alpha[i]) > 0)
 			{
 				model->SV[j] = prob->x[i];
 				model->sv_coef[0][j] = f.alpha[i];
+				model->sv_indices[j] = i+1;
 				++j;
-			}		
+			}
 
 		free(f.alpha);
 	}
@@ -2549,9 +2570,9 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 
 		// group training data of the same class
 		svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
-		if(nr_class == 1) 
+		if(nr_class == 1)
 			info("WARNING: training data in only one class. See README for details.\n");
-		
+
 		svm_node **x = Malloc(svm_node *,l);
 		double *W;
 		W = Malloc(double,l);
@@ -2569,7 +2590,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 		for(i=0;i<nr_class;i++)
 			weighted_C[i] = param->C;
 		for(i=0;i<param->nr_weight;i++)
-		{	
+		{
 			int j;
 			for(j=0;j<nr_class;j++)
 				if(param->weight_label[i] == label[j])
@@ -2581,7 +2602,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 		}
 
 		// train k*(k-1)/2 models
-		
+
 		bool *nonzero = Malloc(bool,l);
 		for(i=0;i<l;i++)
 			nonzero[i] = false;
@@ -2638,11 +2659,11 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 		// build output
 
 		model->nr_class = nr_class;
-		
+
 		model->label = Malloc(int,nr_class);
 		for(i=0;i<nr_class;i++)
 			model->label[i] = label[i];
-		
+
 		model->rho = Malloc(double,nr_class*(nr_class-1)/2);
 		for(i=0;i<nr_class*(nr_class-1)/2;i++)
 			model->rho[i] = f[i].rho;
@@ -2671,21 +2692,26 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 			int nSV = 0;
 			for(int j=0;j<count[i];j++)
 				if(nonzero[start[i]+j])
-				{	
+				{
 					++nSV;
 					++total_sv;
 				}
 			model->nSV[i] = nSV;
 			nz_count[i] = nSV;
 		}
-		
+
 		info("Total nSV = %d\n",total_sv);
 
 		model->l = total_sv;
 		model->SV = Malloc(svm_node *,total_sv);
+		model->sv_indices = Malloc(int,total_sv);
 		p = 0;
 		for(i=0;i<l;i++)
-			if(nonzero[i]) model->SV[p++] = x[i];
+			if(nonzero[i])
+			{
+				model->SV[p] = x[i];
+				model->sv_indices[p++] = perm[i] + 1;
+			}
 
 		int *nz_start = Malloc(int,nr_class);
 		nz_start[0] = 0;
@@ -2708,7 +2734,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 				int sj = start[j];
 				int ci = count[i];
 				int cj = count[j];
-				
+
 				int q = nz_start[i];
 				int k;
 				for(k=0;k<ci;k++)
@@ -2720,7 +2746,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 						model->sv_coef[i][q++] = f[p].alpha[ci+k];
 				++p;
 			}
-		
+
 		free(label);
 		free(probA);
 		free(probB);
@@ -2747,11 +2773,16 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
 {
 	int i;
-	int *fold_start = Malloc(int,nr_fold+1);
+	int *fold_start;
 	int l = prob->l;
 	int *perm = Malloc(int,l);
 	int nr_class;
-
+	if (nr_fold > l)
+	{
+		nr_fold = l;
+		fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
+	}
+	fold_start = Malloc(int,nr_fold+1);
 	// stratified cv may not give leave-one-out rate
 	// Each class to l folds -> some folds may have zero elements
 	if((param->svm_type == C_SVC ||
@@ -2768,7 +2799,7 @@ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, i
 		int *index = Malloc(int,l);
 		for(i=0;i<l;i++)
 			index[i]=perm[i];
-		for (c=0; c<nr_class; c++) 
+		for (c=0; c<nr_class; c++)
 			for(i=0;i<count[c];i++)
 			{
 				int j = i+rand()%(count[c]-i);
@@ -2797,9 +2828,9 @@ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, i
 		fold_start[0]=0;
 		for (i=1;i<=nr_fold;i++)
 			fold_start[i] = fold_start[i-1]+fold_count[i-1];
-		free(start);	
+		free(start);
 		free(label);
-		free(count);	
+		free(count);
 		free(index);
 		free(fold_count);
 	}
@@ -2835,7 +2866,7 @@ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, i
 		int *index = Malloc(int,l);
 		for(i=0;i<l;i++)
 			index[i]=perm[i];
-		for (c=0; c<nr_class; c++) 
+		for (c=0; c<nr_class; c++)
 			for(i=0;i<count[c];i++)
 			{
 				int j = i+rand()%(count[c]-i);
@@ -2864,9 +2895,9 @@ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, i
 		fold_start[0]=0;
 		for (i=1;i<=nr_fold;i++)
 			fold_start[i] = fold_start[i-1]+fold_count[i-1];
-		free(start);	
+		free(start);
 		free(label);
-		free(count);	
+		free(count);
 		free(index);
 		free(fold_count);
 	}
@@ -2893,7 +2924,7 @@ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, i
 		subprob.l = l-(end-begin);
 		subprob.x = Malloc(struct svm_node*,subprob.l);
 		subprob.y = Malloc(double,subprob.l);
-			
+
 		subprob.W = Malloc(double,subprob.l);
 		k=0;
 		for(j=0;j<begin;j++)
@@ -2911,16 +2942,15 @@ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, i
 			++k;
 		}
 		struct svm_model *submodel = svm_train(&subprob,param);
-		if(param->probability && 
+		if(param->probability &&
 	/*modified by Macaca referenced form ferng 20111222*/
-	//	   (param->svm_type == C_SVC || param->svm_type == NU_SVC))
 		   (param->svm_type == C_SVC || param->svm_type == NU_SVC || param->svm_type == HINT_SVC))
 	/*modified by Macaca referenced form ferng 20111222*/
 		{
 			double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
 			for(j=begin;j<end;j++)
 				target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
-			free(prob_estimates);			
+			free(prob_estimates);
 		}
 		else
 			for(j=begin;j<end;j++)
@@ -2929,9 +2959,9 @@ void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, i
 		free(subprob.x);
 		free(subprob.y);
 		free(subprob.W);
-	}		
+	}
 	free(fold_start);
-	free(perm);	
+	free(perm);
 }
 
 
@@ -2952,6 +2982,18 @@ void svm_get_labels(const svm_model *model, int* label)
 			label[i] = model->label[i];
 }
 
+void svm_get_sv_indices(const svm_model *model, int* indices)
+{
+	if (model->sv_indices != NULL)
+		for(int i=0;i<model->l;i++)
+			indices[i] = model->sv_indices[i];
+}
+
+int svm_get_nr_sv(const svm_model *model)
+{
+	return model->l;
+}
+
 double svm_get_svr_probability(const svm_model *model)
 {
 	if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
@@ -2988,7 +3030,7 @@ double svm_predict_values(const svm_model *model, const svm_node *x, double* dec
 	{
 		int nr_class = model->nr_class;
 		int l = model->l;
-		
+
 		double *kvalue = Malloc(double,l);
 		for(i=0;i<l;i++)
 			kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
@@ -3011,7 +3053,7 @@ double svm_predict_values(const svm_model *model, const svm_node *x, double* dec
 				int sj = start[j];
 				int ci = model->nSV[i];
 				int cj = model->nSV[j];
-				
+
 				int k;
 				double *coef1 = model->sv_coef[j-1];
 				double *coef2 = model->sv_coef[i];
@@ -3058,7 +3100,7 @@ double svm_predict(const svm_model *model, const svm_node *x)
 		return model->label[(res>0)?0:1];
 	}
 	/*added by Macaca referenced from ferng 20111222*/
-	else 
+	else
 		dec_values = Malloc(double, nr_class*(nr_class-1)/2);
 	double pred_result = svm_predict_values(model, x, dec_values);
 	free(dec_values);
@@ -3069,7 +3111,7 @@ double svm_predict_probability(
 	const svm_model *model, const svm_node *x, double *prob_estimates)
 {
 	/*modeified by Macaca 20120208*/
-	if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC ||  
+	if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC ||
 	     model->param.svm_type == HINT_SVC ) &&
 	    model->probA!=NULL && model->probB!=NULL)
 	{
@@ -3099,17 +3141,16 @@ double svm_predict_probability(
 		for(i=0;i<nr_class;i++)
 			free(pairwise_prob[i]);
 		free(dec_values);
-		free(pairwise_prob);	     
+		free(pairwise_prob);
 		return model->label[prob_max_idx];
 	}
-	else 
+	else
 		return svm_predict(model, x);
 }
 
 static const char *svm_type_table[] =
 {
 /*editd by Macaca referenced from ferng 20111208*/
-//	"c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
 	"c_svc","nu_svc","one_class","epsilon_svr","nu_svr","hint_svc",NULL
 /*editd by Macaca referenced from ferng 20111208*/
 };
@@ -3124,6 +3165,9 @@ int svm_save_model(const char *model_file_name, const svm_model *model)
 	FILE *fp = fopen(model_file_name,"w");
 	if(fp==NULL) return -1;
 
+	char *old_locale = strdup(setlocale(LC_ALL, NULL));
+	setlocale(LC_ALL, "C");
+
 	const svm_parameter& param = model->param;
 
 	fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
@@ -3142,14 +3186,14 @@ int svm_save_model(const char *model_file_name, const svm_model *model)
 	int l = model->l;
 	fprintf(fp, "nr_class %d\n", nr_class);
 	fprintf(fp, "total_sv %d\n",l);
-	
+
 	{
 		fprintf(fp, "rho");
 		for(int i=0;i<nr_class*(nr_class-1)/2;i++)
 			fprintf(fp," %g",model->rho[i]);
 		fprintf(fp, "\n");
 	}
-	
+
 	if(model->label)
 	{
 		fprintf(fp, "label");
@@ -3202,6 +3246,10 @@ int svm_save_model(const char *model_file_name, const svm_model *model)
 			}
 		fprintf(fp, "\n");
 	}
+
+	setlocale(LC_ALL, old_locale);
+	free(old_locale);
+
 	if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
 	else return 0;
 }
@@ -3227,29 +3275,25 @@ static char* readline(FILE *input)
 	return line;
 }
 
-svm_model *svm_load_model(const char *model_file_name)
+//
+// FSCANF helps to handle fscanf failures.
+// Its do-while block avoids the ambiguity when
+// if (...)
+//    FSCANF();
+// is used
+//
+#define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
+bool read_model_header(FILE *fp, svm_model* model)
 {
-	FILE *fp = fopen(model_file_name,"rb");
-	if(fp==NULL) return NULL;
-	
-	// read parameters
-
-	svm_model *model = Malloc(svm_model,1);
 	svm_parameter& param = model->param;
-	model->rho = NULL;
-	model->probA = NULL;
-	model->probB = NULL;
-	model->label = NULL;
-	model->nSV = NULL;
-
 	char cmd[81];
 	while(1)
 	{
-		fscanf(fp,"%80s",cmd);
+		FSCANF(fp,"%80s",cmd);
 
 		if(strcmp(cmd,"svm_type")==0)
 		{
-			fscanf(fp,"%80s",cmd);
+			FSCANF(fp,"%80s",cmd);
 			int i;
 			for(i=0;svm_type_table[i];i++)
 			{
@@ -3262,16 +3306,12 @@ svm_model *svm_load_model(const char *model_file_name)
 			if(svm_type_table[i] == NULL)
 			{
 				fprintf(stderr,"unknown svm type.\n");
-				free(model->rho);
-				free(model->label);
-				free(model->nSV);
-				free(model);
-				return NULL;
+				return false;
 			}
 		}
 		else if(strcmp(cmd,"kernel_type")==0)
-		{		
-			fscanf(fp,"%80s",cmd);
+		{
+			FSCANF(fp,"%80s",cmd);
 			int i;
 			for(i=0;kernel_type_table[i];i++)
 			{
@@ -3284,78 +3324,105 @@ svm_model *svm_load_model(const char *model_file_name)
 			if(kernel_type_table[i] == NULL)
 			{
 				fprintf(stderr,"unknown kernel function.\n");
-				free(model->rho);
-				free(model->label);
-				free(model->nSV);
-				free(model);
-				return NULL;
+				return false;
 			}
 		}
 		else if(strcmp(cmd,"degree")==0)
-			fscanf(fp,"%d",&param.degree);
+			FSCANF(fp,"%d",&param.degree);
 		else if(strcmp(cmd,"gamma")==0)
-			fscanf(fp,"%lf",&param.gamma);
+			FSCANF(fp,"%lf",&param.gamma);
 		else if(strcmp(cmd,"coef0")==0)
-			fscanf(fp,"%lf",&param.coef0);
+			FSCANF(fp,"%lf",&param.coef0);
 		else if(strcmp(cmd,"nr_class")==0)
-			fscanf(fp,"%d",&model->nr_class);
+			FSCANF(fp,"%d",&model->nr_class);
 		else if(strcmp(cmd,"total_sv")==0)
-			fscanf(fp,"%d",&model->l);
+			FSCANF(fp,"%d",&model->l);
 		else if(strcmp(cmd,"rho")==0)
 		{
 			int n = model->nr_class * (model->nr_class-1)/2;
 			model->rho = Malloc(double,n);
 			for(int i=0;i<n;i++)
-				fscanf(fp,"%lf",&model->rho[i]);
+				FSCANF(fp,"%lf",&model->rho[i]);
 		}
 		else if(strcmp(cmd,"label")==0)
 		{
 			int n = model->nr_class;
 			model->label = Malloc(int,n);
 			for(int i=0;i<n;i++)
-				fscanf(fp,"%d",&model->label[i]);
+				FSCANF(fp,"%d",&model->label[i]);
 		}
 		else if(strcmp(cmd,"probA")==0)
 		{
 			int n = model->nr_class * (model->nr_class-1)/2;
 			model->probA = Malloc(double,n);
 			for(int i=0;i<n;i++)
-				fscanf(fp,"%lf",&model->probA[i]);
+				FSCANF(fp,"%lf",&model->probA[i]);
 		}
 		else if(strcmp(cmd,"probB")==0)
 		{
 			int n = model->nr_class * (model->nr_class-1)/2;
 			model->probB = Malloc(double,n);
 			for(int i=0;i<n;i++)
-				fscanf(fp,"%lf",&model->probB[i]);
+				FSCANF(fp,"%lf",&model->probB[i]);
 		}
 		else if(strcmp(cmd,"nr_sv")==0)
 		{
 			int n = model->nr_class;
 			model->nSV = Malloc(int,n);
 			for(int i=0;i<n;i++)
-				fscanf(fp,"%d",&model->nSV[i]);
+				FSCANF(fp,"%d",&model->nSV[i]);
 		}
 		else if(strcmp(cmd,"SV")==0)
 		{
 			while(1)
 			{
 				int c = getc(fp);
-				if(c==EOF || c=='\n') break;	
+				if(c==EOF || c=='\n') break;
 			}
 			break;
 		}
 		else
 		{
 			fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
-			free(model->rho);
-			free(model->label);
-			free(model->nSV);
-			free(model);
-			return NULL;
+			return false;
 		}
 	}
 
+	return true;
+
+}
+
+svm_model *svm_load_model(const char *model_file_name)
+{
+	FILE *fp = fopen(model_file_name,"rb");
+	if(fp==NULL) return NULL;
+
+	char *old_locale = strdup(setlocale(LC_ALL, NULL));
+	setlocale(LC_ALL, "C");
+
+	// read parameters
+
+	svm_model *model = Malloc(svm_model,1);
+	model->rho = NULL;
+	model->probA = NULL;
+	model->probB = NULL;
+	model->sv_indices = NULL;
+	model->label = NULL;
+	model->nSV = NULL;
+
+	// read header
+	if (!read_model_header(fp, model))
+	{
+		fprintf(stderr, "ERROR: fscanf failed to read model\n");
+		setlocale(LC_ALL, old_locale);
+		free(old_locale);
+		free(model->rho);
+		free(model->label);
+		free(model->nSV);
+		free(model);
+		return NULL;
+	}
+
 	// read sv_coef and SV
 
 	int elements = 0;
@@ -3420,6 +3487,9 @@ svm_model *svm_load_model(const char *model_file_name)
 	}
 	free(line);
 
+	setlocale(LC_ALL, old_locale);
+	free(old_locale);
+
 	if (ferror(fp) != 0 || fclose(fp) != 0)
 		return NULL;
 
@@ -3455,6 +3525,9 @@ void svm_free_model_content(svm_model* model_ptr)
 	free(model_ptr->probB);
 	model_ptr->probB= NULL;
 
+	free(model_ptr->sv_indices);
+	model_ptr->sv_indices = NULL;
+
 	free(model_ptr->nSV);
 	model_ptr->nSV = NULL;
 }
@@ -3487,9 +3560,9 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
 	   svm_type != HINT_SVC && 	// added by Macaca referneced from fenrg 20111222
 	   svm_type != NU_SVR)
 		return "unknown svm type";
-	
+
 	// kernel_type, degree
-	
+
 	int kernel_type = param->kernel_type;
 	if(kernel_type != LINEAR &&
 	   kernel_type != POLY &&
@@ -3542,7 +3615,7 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
 
 
 	// check whether nu-svc is feasible
-	
+
 	if(svm_type == NU_SVC)
 	{
 		int l = prob->l;
@@ -3575,7 +3648,7 @@ const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *pa
 				++nr_class;
 			}
 		}
-	
+
 		for(i=0;i<nr_class;i++)
 		{
 			double n1 = count[i];

From 88d443df90376d142ee6e5f455a77a98101fd6f1 Mon Sep 17 00:00:00 2001
From: yangarbiter <yangarbiter@gmail.com>
Date: Sun, 21 Feb 2016 14:11:49 +0800
Subject: [PATCH 2/7] update svm.h and Makefile to 3.20

---
 Makefile | 8 +++++++-
 svm.h    | 6 ++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 23df9620..375265a2 100644
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,17 @@
 CXX ?= g++
 CFLAGS = -Wall -Wconversion -O3 -fPIC
 SHVER = 2
+OS = $(shell uname)
 
 all: svm-train svm-predict svm-scale
 
 lib: svm.o
-	$(CXX) -shared -dynamiclib -Wl,-soname,libhintsvm.so.$(SHVER) svm.o -o libhintsvm.so.$(SHVER)
+	if [ "$(OS)" = "Darwin" ]; then \
+		SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,libhintsvm.so.$(SHVER)"; \
+	else \
+		SHARED_LIB_FLAG="-shared -Wl,-soname,libhintsvm.so.$(SHVER)"; \
+	fi; \
+	$(CXX) $${SHARED_LIB_FLAG} svm.o -o libhintsvm.so.$(SHVER)
 
 svm-predict: svm-predict.c svm.o
 	$(CXX) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm
diff --git a/svm.h b/svm.h
index 67ef82c4..fbca7049 100644
--- a/svm.h
+++ b/svm.h
@@ -1,7 +1,7 @@
 #ifndef _LIBSVM_H
 #define _LIBSVM_H
 
-#define LIBSVM_VERSION 311
+#define LIBSVM_VERSION 320
 
 #ifdef __cplusplus
 extern "C" {
@@ -24,7 +24,6 @@ struct svm_problem
 };
 
 /* edited by Macaca referenced from Macaca 20111222 */
-//enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
 enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR, HINT_SVC };	/* svm_type */
 /* edited by Macaca referenced from Macaca 20111222 */
 enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
@@ -63,6 +62,7 @@ struct svm_model
 	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
 	double *probA;		/* pariwise probability information */
 	double *probB;
+	int *sv_indices;        /* sv_indices[0,...,nSV-1] are values in [1,...,num_traning_data] to indicate SVs in the training set */
 
 	/* for classification only */
 
@@ -83,6 +83,8 @@ struct svm_model *svm_load_model(const char *model_file_name);
 int svm_get_svm_type(const struct svm_model *model);
 int svm_get_nr_class(const struct svm_model *model);
 void svm_get_labels(const struct svm_model *model, int *label);
+void svm_get_sv_indices(const struct svm_model *model, int *sv_indices);
+int svm_get_nr_sv(const struct svm_model *model);
 double svm_get_svr_probability(const struct svm_model *model);
 
 double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);

From 2d8d190806e6bc7e078471c810dc2d6b8eb3213b Mon Sep 17 00:00:00 2001
From: yangarbiter <yangarbiter@gmail.com>
Date: Sun, 21 Feb 2016 14:19:34 +0800
Subject: [PATCH 3/7] update svm.def, svm-scale.c, svm-train.c, svm-predict.c

---
 svm-predict.c | 34 ++++++++++++++---------
 svm-scale.c   | 74 ++++++++++++++++++++++++++++++++++++++++-----------
 svm-train.c   | 13 +++++----
 svm.def       |  2 ++
 4 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/svm-predict.c b/svm-predict.c
index edf039cf..859c9fff 100644
--- a/svm-predict.c
+++ b/svm-predict.c
@@ -5,6 +5,10 @@
 #include <errno.h>
 #include "svm.h"
 
+int print_null(const char *s,...) {return 0;}
+
+static int (*info)(const char *fmt,...) = &printf;
+
 struct svm_node *x;
 int max_nr_attr = 64;
 
@@ -17,7 +21,7 @@ static int max_line_len;
 static char* readline(FILE *input)
 {
 	int len;
-	
+
 	if(fgets(line,max_line_len,input) == NULL)
 		return NULL;
 
@@ -53,7 +57,7 @@ void predict(FILE *input, FILE *output)
 	if(predict_probability)
 	{
 		if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
-			printf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
+			info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
 		else
 		{
 			int *labels=(int *) malloc(nr_class*sizeof(int));
@@ -139,15 +143,15 @@ void predict(FILE *input, FILE *output)
 	}
 	if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
 	{
-		printf("Mean squared error = %g (regression)\n",error/total);
-		printf("Squared correlation coefficient = %g (regression)\n",
-		       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
-		       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
-		       );
+		info("Mean squared error = %g (regression)\n",error/total);
+		info("Squared correlation coefficient = %g (regression)\n",
+			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
+			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
+			);
 	}
 	else
-		printf("Accuracy = %g%% (%d/%d) (classification)\n",
-		       (double)correct/total*100,correct,total);
+		info("Accuracy = %g%% (%d/%d) (classification)\n",
+			(double)correct/total*100,correct,total);
 	if(predict_probability)
 		free(prob_estimates);
 }
@@ -158,6 +162,7 @@ void exit_with_help()
 	"Usage: svm-predict [options] test_file model_file output_file\n"
 	"options:\n"
 	"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
+	"-q : quiet mode (no outputs)\n"
 	);
 	exit(1);
 }
@@ -166,7 +171,6 @@ int main(int argc, char **argv)
 {
 	FILE *input, *output;
 	int i;
-
 	// parse options
 	for(i=1;i<argc;i++)
 	{
@@ -177,14 +181,19 @@ int main(int argc, char **argv)
 			case 'b':
 				predict_probability = atoi(argv[i]);
 				break;
+			case 'q':
+				info = &print_null;
+				i--;
+				break;
 			default:
 				fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
 				exit_with_help();
 		}
 	}
+
 	if(i>=argc-2)
 		exit_with_help();
-	
+
 	input = fopen(argv[i],"r");
 	if(input == NULL)
 	{
@@ -217,8 +226,9 @@ int main(int argc, char **argv)
 	else
 	{
 		if(svm_check_probability_model(model)!=0)
-			printf("Model supports probability estimates, but disabled in prediction.\n");
+			info("Model supports probability estimates, but disabled in prediction.\n");
 	}
+
 	predict(input,output);
 	svm_free_and_destroy_model(&model);
 	free(x);
diff --git a/svm-scale.c b/svm-scale.c
index fbfc8700..73cda126 100644
--- a/svm-scale.c
+++ b/svm-scale.c
@@ -27,6 +27,7 @@ double *feature_min;
 double y_max = -DBL_MAX;
 double y_min = DBL_MAX;
 int max_index;
+int min_index;
 long int num_nonzeros = 0;
 long int new_num_nonzeros = 0;
 
@@ -36,6 +37,7 @@ long int new_num_nonzeros = 0;
 void output_target(double value);
 void output(int index, double value);
 char* readline(FILE *input);
+int clean_up(FILE *fp_restore, FILE *fp, const char *msg);
 
 int main(int argc,char **argv)
 {
@@ -104,6 +106,7 @@ int main(int argc,char **argv)
 	/* assumption: min index of attributes is 1 */
 	/* pass 1: find out max index of attributes */
 	max_index = 0;
+	min_index = 1;
 
 	if(restore_filename)
 	{
@@ -140,15 +143,21 @@ int main(int argc,char **argv)
 		while(sscanf(p,"%d:%*f",&index)==1)
 		{
 			max_index = max(max_index, index);
+			min_index = min(min_index, index);
 			SKIP_ELEMENT
 			num_nonzeros++;
-		}		
+		}
 	}
+
+	if(min_index < 1)
+		fprintf(stderr,
+			"WARNING: minimal feature index is %d, but indices should start from 1\n", min_index);
+
 	rewind(fp);
-	
+
 	feature_max = (double *)malloc((max_index+1)* sizeof(double));
 	feature_min = (double *)malloc((max_index+1)* sizeof(double));
-	
+
 	if(feature_max == NULL || feature_min == NULL)
 	{
 		fprintf(stderr,"can't allocate enough memory\n");
@@ -169,7 +178,8 @@ int main(int argc,char **argv)
 		double target;
 		double value;
 
-		sscanf(p,"%lf",&target);
+		if (sscanf(p,"%lf",&target) != 1)
+			return clean_up(fp_restore, fp, "ERROR: failed to read labels\n");
 		y_max = max(y_max,target);
 		y_min = min(y_min,target);
 		
@@ -206,30 +216,45 @@ int main(int argc,char **argv)
 		/* fp_restore rewinded in finding max_index */
 		int idx, c;
 		double fmin, fmax;
+		int next_index = 1;
 		
 		if((c = fgetc(fp_restore)) == 'y')
 		{
-			fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper);
-			fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max);
+			if(fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper) != 2 ||
+			   fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max) != 2)
+				return clean_up(fp_restore, fp, "ERROR: failed to read scaling parameters\n");
 			y_scaling = 1;
 		}
 		else
 			ungetc(c, fp_restore);
 
-		if (fgetc(fp_restore) == 'x') {
-			fscanf(fp_restore, "%lf %lf\n", &lower, &upper);
+		if (fgetc(fp_restore) == 'x') 
+		{
+			if(fscanf(fp_restore, "%lf %lf\n", &lower, &upper) != 2)
+				return clean_up(fp_restore, fp, "ERROR: failed to read scaling parameters\n");
 			while(fscanf(fp_restore,"%d %lf %lf\n",&idx,&fmin,&fmax)==3)
 			{
-				if(idx<=max_index)
-				{
-					feature_min[idx] = fmin;
-					feature_max[idx] = fmax;
-				}
+				for(i = next_index;i<idx;i++)
+					if(feature_min[i] != feature_max[i])
+						fprintf(stderr,
+							"WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n",
+							i, argv[argc-1], restore_filename);
+
+				feature_min[idx] = fmin;
+				feature_max[idx] = fmax;
+
+				next_index = idx + 1;
 			}
+			
+			for(i=next_index;i<=max_index;i++)
+				if(feature_min[i] != feature_max[i])
+					fprintf(stderr,
+						"WARNING: feature index %d appeared in file %s was not seen in the scaling factor file %s.\n",
+						i, argv[argc-1], restore_filename);
 		}
 		fclose(fp_restore);
 	}
-	
+
 	if(save_filename)
 	{
 		FILE *fp_save = fopen(save_filename,"w");
@@ -251,6 +276,11 @@ int main(int argc,char **argv)
 			if(feature_min[i]!=feature_max[i])
 				fprintf(fp_save,"%d %.16g %.16g\n",i,feature_min[i],feature_max[i]);
 		}
+
+		if(min_index < 1)
+			fprintf(stderr,
+				"WARNING: scaling factors with indices smaller than 1 are not stored to the file %s.\n", save_filename);
+
 		fclose(fp_save);
 	}
 	
@@ -262,7 +292,8 @@ int main(int argc,char **argv)
 		double target;
 		double value;
 		
-		sscanf(p,"%lf",&target);
+		if (sscanf(p,"%lf",&target) != 1)
+			return clean_up(NULL, fp, "ERROR: failed to read labels\n");
 		output_target(target);
 
 		SKIP_TARGET
@@ -351,3 +382,16 @@ void output(int index, double value)
 		new_num_nonzeros++;
 	}
 }
+
+int clean_up(FILE *fp_restore, FILE *fp, const char* msg)
+{
+	fprintf(stderr,	"%s", msg);
+	free(line);
+	free(feature_max);
+	free(feature_min);
+	fclose(fp);
+	if (fp_restore)
+		fclose(fp_restore);
+	return -1;
+}
+
diff --git a/svm-train.c b/svm-train.c
index be8700b8..16ef2702 100644
--- a/svm-train.c
+++ b/svm-train.c
@@ -14,11 +14,11 @@ void exit_with_help()
 	"Usage: svm-train [options] training_set_file [model_file]\n"
 	"options:\n"
 	"-s svm_type : set type of SVM (default 0)\n"
-	"	0 -- C-SVC\n"
-	"	1 -- nu-SVC\n"
+	"	0 -- C-SVC		(multi-class classification)\n"
+	"	1 -- nu-SVC		(multi-class classification)\n"
 	"	2 -- one-class SVM\n"
-	"	3 -- epsilon-SVR\n"
-	"	4 -- nu-SVR\n"
+	"	3 -- epsilon-SVR	(regression)\n"
+	"	4 -- nu-SVR		(regression)\n"
 	/*added by Macaca 20111222*/
 	"	5 -- hint-SVC\n"
 	/*added by Macaca 20111222*/
@@ -195,8 +195,6 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 	param.nr_weight = 0;
 	param.weight_label = NULL;
 	param.weight = NULL;
-	/*Modified by Macaca 20120120*/
-	/*Modified by Macaca 20120120*/
 	cross_validation = 0;
 
 	// parse options
@@ -298,7 +296,8 @@ void parse_command_line(int argc, char **argv, char *input_file_name, char *mode
 
 void read_problem(const char *filename)
 {
-	int elements, max_index, inst_max_index, i, j;
+	int max_index, inst_max_index, i;
+	size_t elements, j;
 	FILE *fp = fopen(filename,"r");
 	char *endptr;
 	char *idx, *val, *label;
diff --git a/svm.def b/svm.def
index e885b23f..6bd1750e 100644
--- a/svm.def
+++ b/svm.def
@@ -17,3 +17,5 @@ EXPORTS
 	svm_check_parameter	@15
 	svm_check_probability_model	@16
 	svm_set_print_string_function	@17
+	svm_get_sv_indices	@18
+	svm_get_nr_sv	@19

From 7c13efd7f8c947ff3a625fab200ad059b8ae32e7 Mon Sep 17 00:00:00 2001
From: yangarbiter <yangarbiter@gmail.com>
Date: Sun, 21 Feb 2016 14:27:51 +0800
Subject: [PATCH 4/7] update python interface to 3.20

---
 python/hintsvm.py     | 117 +++++++++++++++++++++++++++---------------
 python/hintsvmutil.py |  85 ++++++++++++++++++------------
 2 files changed, 128 insertions(+), 74 deletions(-)

diff --git a/python/hintsvm.py b/python/hintsvm.py
index 89b18e9f..5f4830fc 100644
--- a/python/hintsvm.py
+++ b/python/hintsvm.py
@@ -2,38 +2,50 @@
 
 from ctypes import *
 from ctypes.util import find_library
+from os import path
 import sys
-import os
 
-# For unix the prefix 'lib' is not considered.
-if find_library('hintsvm'):
-	libsvm = CDLL(find_library('hintsvm'))
-elif find_library('libhintsvm'):
-	libsvm = CDLL(find_library('libhintsvm'))
-else:
+__all__ = ['libsvm', 'svm_problem', 'svm_parameter',
+           'toPyModel', 'gen_svm_nodearray', 'print_null', 'svm_node', 'C_SVC',
+           'EPSILON_SVR', 'LINEAR', 'NU_SVC', 'NU_SVR', 'ONE_CLASS',
+           'POLY', 'PRECOMPUTED', 'PRINT_STRING_FUN', 'RBF',
+           'SIGMOID', 'c_double', 'svm_model']
+
+try:
+	dirname = path.dirname(path.abspath(__file__))
 	if sys.platform == 'win32':
-		libsvm = CDLL(os.path.join(os.path.dirname(__file__),\
-				'../windows/libhintsvm.dll'))
+		libsvm = CDLL(path.join(dirname, r'..\windows\libhintsvm.dll'))
+	else:
+		libsvm = CDLL(path.join(dirname, '../libsvm.so.2'))
+except:
+# For unix the prefix 'lib' is not considered.
+	if find_library('hintsvm'):
+		libsvm = CDLL(find_library('hintsvm'))
+	elif find_library('libhintsvm'):
+		libsvm = CDLL(find_library('libhintsvm'))
 	else:
-		libsvm = CDLL(os.path.join(os.path.dirname(__file__),\
-				'../libhintsvm.so.2'))
+		raise Exception('LIBSVM library not found.')
+
+C_SVC = 0
+NU_SVC = 1
+ONE_CLASS = 2
+EPSILON_SVR = 3
+NU_SVR = 4
 
-# Construct constants
-#modify by Macaca 2013/10/9
-SVM_TYPE = ['C_SVC', 'NU_SVC', 'ONE_CLASS', 'EPSILON_SVR', 'NU_SVR', 'HINT_SVC' ]
-#modify by Macaca 2013/10/9
-KERNEL_TYPE = ['LINEAR', 'POLY', 'RBF', 'SIGMOID', 'PRECOMPUTED']
-for i, s in enumerate(SVM_TYPE): exec("%s = %d" % (s , i))
-for i, s in enumerate(KERNEL_TYPE): exec("%s = %d" % (s , i))
+LINEAR = 0
+POLY = 1
+RBF = 2
+SIGMOID = 3
+PRECOMPUTED = 4
 
 PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
-def print_null(s): 
-	return 
+def print_null(s):
+	return
 
-def genFields(names, types): 
+def genFields(names, types):
 	return list(zip(names, types))
 
-def fillprototype(f, restype, argtypes): 
+def fillprototype(f, restype, argtypes):
 	f.restype = restype
 	f.argtypes = argtypes
 
@@ -42,10 +54,15 @@ class svm_node(Structure):
 	_types = [c_int, c_double]
 	_fields_ = genFields(_names, _types)
 
-def gen_svm_nodearray(xi, feature_max=None, issparse=None):
+	def __str__(self):
+		return '%d:%g' % (self.index, self.value)
+
+def gen_svm_nodearray(xi, feature_max=None, isKernel=None):
 	if isinstance(xi, dict):
 		index_range = xi.keys()
 	elif isinstance(xi, (list, tuple)):
+		if not isKernel:
+			xi = [0] + xi  # idx should start from 1
 		index_range = range(len(xi))
 	else:
 		raise TypeError('xi should be a dictionary, list or tuple')
@@ -53,7 +70,7 @@ def gen_svm_nodearray(xi, feature_max=None, issparse=None):
 	if feature_max:
 		assert(isinstance(feature_max, int))
 		index_range = filter(lambda j: j <= feature_max, index_range)
-	if issparse: 
+	if not isKernel:
 		index_range = filter(lambda j:xi[j] != 0, index_range)
 
 	index_range = sorted(index_range)
@@ -63,7 +80,7 @@ def gen_svm_nodearray(xi, feature_max=None, issparse=None):
 		ret[idx].index = j
 		ret[idx].value = xi[j]
 	max_idx = 0
-	if index_range: 
+	if index_range:
 		max_idx = index_range[-1]
 	return ret, max_idx
 
@@ -72,7 +89,7 @@ class svm_problem(Structure):
 	_types = [c_int, POINTER(c_double), POINTER(POINTER(svm_node)), POINTER(c_double)]
 	_fields_ = genFields(_names, _types)
 
-	def __init__(self, W, y, x):
+	def __init__(self, W, y, x, isKernel=None):
 		if len(y) != len(x):
 			raise ValueError("len(y) != len(x)")
 		if len(W) != 0 and len(W) != len(x):
@@ -84,7 +101,7 @@ def __init__(self, W, y, x):
 		max_idx = 0
 		x_space = self.x_space = []
 		for i, xi in enumerate(x):
-			tmp_xi, tmp_idx = gen_svm_nodearray(xi)
+			tmp_xi, tmp_idx = gen_svm_nodearray(xi,isKernel=isKernel)
 			x_space += [tmp_xi]
 			max_idx = max(max_idx, tmp_idx)
 		self.n = max_idx
@@ -95,14 +112,14 @@ def __init__(self, W, y, x):
 		self.y = (c_double * l)()
 		for i, yi in enumerate(y): self.y[i] = yi
 
-		self.x = (POINTER(svm_node) * l)() 
+		self.x = (POINTER(svm_node) * l)()
 		for i, xi in enumerate(self.x_space): self.x[i] = xi
 
 class svm_parameter(Structure):
 	_names = ["svm_type", "kernel_type", "degree", "gamma", "coef0",
-			"cache_size", "eps", "C", "nr_weight", "weight_label", "weight", 
+			"cache_size", "eps", "C", "nr_weight", "weight_label", "weight",
 			"nu", "p", "shrinking", "probability"]
-	_types = [c_int, c_int, c_int, c_double, c_double, 
+	_types = [c_int, c_int, c_int, c_double, c_double,
 			c_double, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double),
 			c_double, c_double, c_int, c_int]
 	_fields_ = genFields(_names, _types)
@@ -112,11 +129,15 @@ def __init__(self, options = None):
 			options = ''
 		self.parse_options(options)
 
-	def show(self):
-		attrs = svm_parameter._names + self.__dict__.keys()
-		values = map(lambda attr: getattr(self, attr), attrs) 
+	def __str__(self):
+		s = ''
+		attrs = svm_parameter._names + list(self.__dict__.keys())
+		values = map(lambda attr: getattr(self, attr), attrs)
 		for attr, val in zip(attrs, values):
-			print(' %s: %s' % (attr, val))
+			s += (' %s: %s\n' % (attr, val))
+		s = s.strip()
+
+		return s
 
 	def set_to_default_values(self):
 		self.svm_type = C_SVC;
@@ -136,10 +157,15 @@ def set_to_default_values(self):
 		self.weight = (c_double*0)()
 		self.cross_validation = False
 		self.nr_fold = 0
-		self.print_func = None
+		self.print_func = cast(None, PRINT_STRING_FUN)
 
 	def parse_options(self, options):
-		argv = options.split()
+		if isinstance(options, list):
+			argv = options
+		elif isinstance(options, str):
+			argv = options.split()
+		else:
+			raise TypeError("arg 1 should be a list or a str.")
 		self.set_to_default_values()
 		self.print_func = cast(None, PRINT_STRING_FUN)
 		weight_label = []
@@ -204,17 +230,17 @@ def parse_options(self, options):
 		libsvm.svm_set_print_string_function(self.print_func)
 		self.weight_label = (c_int*self.nr_weight)()
 		self.weight = (c_double*self.nr_weight)()
-		for i in range(self.nr_weight): 
+		for i in range(self.nr_weight):
 			self.weight[i] = weight[i]
 			self.weight_label[i] = weight_label[i]
 
 class svm_model(Structure):
 	_names = ['param', 'nr_class', 'l', 'SV', 'sv_coef', 'rho',
-			'probA', 'probB', 'label', 'nSV', 'free_sv']
+			'probA', 'probB', 'sv_indices', 'label', 'nSV', 'free_sv']
 	_types = [svm_parameter, c_int, c_int, POINTER(POINTER(svm_node)),
 			POINTER(POINTER(c_double)), POINTER(c_double),
 			POINTER(c_double), POINTER(c_double), POINTER(c_int),
-			POINTER(c_int), c_int]
+			POINTER(c_int), POINTER(c_int), c_int]
 	_fields_ = genFields(_names, _types)
 
 	def __init__(self):
@@ -240,6 +266,15 @@ def get_labels(self):
 		libsvm.svm_get_labels(self, labels)
 		return labels[:nr_class]
 
+	def get_sv_indices(self):
+		total_sv = self.get_nr_sv()
+		sv_indices = (c_int * total_sv)()
+		libsvm.svm_get_sv_indices(self, sv_indices)
+		return sv_indices[:total_sv]
+
+	def get_nr_sv(self):
+		return libsvm.svm_get_nr_sv(self)
+
 	def is_probability_model(self):
 		return (libsvm.svm_check_probability_model(self) == 1)
 
@@ -251,7 +286,7 @@ def get_SV(self):
 		result = []
 		for sparse_sv in self.SV[:self.l]:
 			row = dict()
-			
+
 			i = 0
 			while True:
 				row[sparse_sv[i].index] = sparse_sv[i].value
@@ -283,6 +318,8 @@ def toPyModel(model_ptr):
 fillprototype(libsvm.svm_get_svm_type, c_int, [POINTER(svm_model)])
 fillprototype(libsvm.svm_get_nr_class, c_int, [POINTER(svm_model)])
 fillprototype(libsvm.svm_get_labels, None, [POINTER(svm_model), POINTER(c_int)])
+fillprototype(libsvm.svm_get_sv_indices, None, [POINTER(svm_model), POINTER(c_int)])
+fillprototype(libsvm.svm_get_nr_sv, c_int, [POINTER(svm_model)])
 fillprototype(libsvm.svm_get_svr_probability, c_double, [POINTER(svm_model)])
 
 fillprototype(libsvm.svm_predict_values, c_double, [POINTER(svm_model), POINTER(svm_node), POINTER(c_double)])
diff --git a/python/hintsvmutil.py b/python/hintsvmutil.py
index 1470bc53..3a845bcf 100644
--- a/python/hintsvmutil.py
+++ b/python/hintsvmutil.py
@@ -1,6 +1,15 @@
 #!/usr/bin/env python
 
+import os
+import sys
 from hintsvm import *
+from hintsvm import __all__ as svm_all
+
+
+__all__ = ['evaluations', 'svm_load_model', 'svm_predict', 'svm_read_problem',
+           'svm_save_model', 'svm_train'] + svm_all
+
+sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
 
 def svm_read_problem(data_file_name):
 	"""
@@ -27,11 +36,11 @@ def svm_read_problem(data_file_name):
 def svm_load_model(model_file_name):
 	"""
 	svm_load_model(model_file_name) -> model
-	
+
 	Load a LIBSVM model from model_file_name and return.
 	"""
-	model = libsvm.svm_load_model(model_file_name)
-	if not model: 
+	model = libsvm.svm_load_model(model_file_name.encode())
+	if not model:
 		print("can't open model file %s" % model_file_name)
 		return None
 	model = toPyModel(model)
@@ -43,7 +52,7 @@ def svm_save_model(model_file_name, model):
 
 	Save a LIBSVM model to the file model_file_name.
 	"""
-	libsvm.svm_save_model(model_file_name, model)
+	libsvm.svm_save_model(model_file_name.encode(), model)
 
 def evaluations(ty, pv):
 	"""
@@ -57,14 +66,14 @@ def evaluations(ty, pv):
 	total_correct = total_error = 0
 	sumv = sumy = sumvv = sumyy = sumvy = 0
 	for v, y in zip(pv, ty):
-		if y == v: 
+		if y == v:
 			total_correct += 1
 		total_error += (v-y)*(v-y)
 		sumv += v
 		sumy += y
 		sumvv += v*v
 		sumyy += y*y
-		sumvy += v*y 
+		sumvy += v*y
 	l = len(ty)
 	ACC = 100.0*total_correct/l
 	MSE = total_error/l
@@ -76,22 +85,22 @@ def evaluations(ty, pv):
 
 def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
 	"""
-	svm_train(W, y, x [, 'options']) -> model | ACC | MSE 
-	svm_train(prob, [, 'options']) -> model | ACC | MSE 
-	svm_train(prob, param) -> model | ACC| MSE 
+	svm_train(W, x [, options]) -> model | ACC | MSE
+	svm_train(prob [, options]) -> model | ACC | MSE
+	svm_train(prob, param) -> model | ACC| MSE
 
 	Train an SVM model from weighted data (W, y, x) or an svm_problem prob using
-	'options' or an svm_parameter param. 
+	'options' or an svm_parameter param.
 	If '-v' is specified in 'options' (i.e., cross validation)
 	either accuracy (ACC) or mean-squared error (MSE) is returned.
-	'options':
+	options:
 	    -s svm_type : set type of SVM (default 0)
-	        0 -- C-SVC
-	        1 -- nu-SVC
+	        0 -- C-SVC		(multi-class classification)
+	        1 -- nu-SVC		(multi-class classification)
 	        2 -- one-class SVM
-	        3 -- epsilon-SVR
-	        4 -- nu-SVR
-		5 -- hint SVM
+	        3 -- epsilon-SVR	(regression)
+	        4 -- nu-SVR		(regression)
+            5 -- hint SVM
 	    -t kernel_type : set type of kernel function (default 2)
 	        0 -- linear: u'*v
 	        1 -- polynomial: (gamma*u'*v + coef0)^degree
@@ -117,8 +126,8 @@ def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
 		assert isinstance(arg2, (list, tuple))
 		assert isinstance(arg3, list)
 		W, y, x, options = arg1, arg2, arg3, arg4
-		prob = svm_problem(W, y, x)
 		param = svm_parameter(options)
+		prob = svm_problem(W, y, x, isKernel=(param.kernel_type == PRECOMPUTED))
 	elif isinstance(arg1, svm_problem):
 		prob = arg1
 		if isinstance(arg2, svm_parameter):
@@ -136,7 +145,7 @@ def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
 			if val <= 0 or val > prob.n:
 				raise ValueError('Wrong input format: sample_serial_number out of range')
 
-	if param.gamma == 0 and prob.n > 0: 
+	if param.gamma == 0 and prob.n > 0:
 		param.gamma = 1.0 / prob.n
 	libsvm.svm_set_print_string_function(param.print_func)
 	err_msg = libsvm.svm_check_parameter(prob, param)
@@ -146,7 +155,7 @@ def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
 	if param.cross_validation:
 		l, nr_fold = prob.l, param.nr_fold
 		target = (c_double * l)()
-		libsvm.svm_cross_validation(prob, param, nr_fold, target)	
+		libsvm.svm_cross_validation(prob, param, nr_fold, target)
 		ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
 		if param.svm_type in [EPSILON_SVR, NU_SVR]:
 			print("Cross Validation Mean squared error = %g" % MSE)
@@ -165,18 +174,19 @@ def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
 
 def svm_predict(y, x, m, options=""):
 	"""
-	svm_predict(y, x, m [, "options"]) -> (p_labels, p_acc, p_vals)
+	svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
 
-	Predict data (y, x) with the SVM model m. 
-	"options": 
-	    -b probability_estimates: whether to predict probability estimates, 
+	Predict data (y, x) with the SVM model m.
+	options:
+	    -b probability_estimates: whether to predict probability estimates,
 	        0 or 1 (default 0); for one-class SVM only 0 is supported.
+	    -q : quiet mode (no outputs).
 
 	The return tuple contains
 	p_labels: a list of predicted labels
-	p_acc: a tuple including  accuracy (for classification), mean-squared 
+	p_acc: a tuple including  accuracy (for classification), mean-squared
 	       error, and squared correlation coefficient (for regression).
-	p_vals: a list of decision values or probability estimates (if '-b 1' 
+	p_vals: a list of decision values or probability estimates (if '-b 1'
 	        is specified). If k is the number of classes, for decision values,
 	        each element includes results of predicting k(k-1)/2 binary-class
 	        SVMs. For probabilities, each element contains k values indicating
@@ -184,6 +194,10 @@ def svm_predict(y, x, m, options=""):
 	        Note that the order of classes here is the same as 'model.label'
 	        field in the model structure.
 	"""
+
+	def info(s):
+		print(s)
+
 	predict_probability = 0
 	argv = options.split()
 	i = 0
@@ -191,6 +205,8 @@ def svm_predict(y, x, m, options=""):
 		if argv[i] == '-b':
 			i += 1
 			predict_probability = int(argv[i])
+		elif argv[i] == '-q':
+			info = print_null
 		else:
 			raise ValueError("Wrong options")
 		i+=1
@@ -206,31 +222,31 @@ def svm_predict(y, x, m, options=""):
 			raise ValueError("Model does not support probabiliy estimates")
 
 		if svm_type in [NU_SVR, EPSILON_SVR]:
-			print("Prob. model for test data: target value = predicted value + z,\n"
+			info("Prob. model for test data: target value = predicted value + z,\n"
 			"z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
 			nr_class = 0
 
 		prob_estimates = (c_double * nr_class)()
 		for xi in x:
-			xi, idx = gen_svm_nodearray(xi)
+			xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
 			label = libsvm.svm_predict_probability(m, xi, prob_estimates)
 			values = prob_estimates[:nr_class]
 			pred_labels += [label]
 			pred_values += [values]
 	else:
 		if is_prob_model:
-			print("Model supports probability estimates, but disabled in predicton.")
+			info("Model supports probability estimates, but disabled in predicton.")
 		if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
 			nr_classifier = 1
 		else:
 			nr_classifier = nr_class*(nr_class-1)//2
 		dec_values = (c_double * nr_classifier)()
 		for xi in x:
-			xi, idx = gen_svm_nodearray(xi)
+			xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
 			label = libsvm.svm_predict_values(m, xi, dec_values)
-			if(nr_class == 1): 
+			if(nr_class == 1):
 				values = [1]
-			else: 
+			else:
 				values = dec_values[:nr_classifier]
 			pred_labels += [label]
 			pred_values += [values]
@@ -238,10 +254,11 @@ def svm_predict(y, x, m, options=""):
 	ACC, MSE, SCC = evaluations(y, pred_labels)
 	l = len(y)
 	if svm_type in [EPSILON_SVR, NU_SVR]:
-		print("Mean squared error = %g (regression)" % MSE)
-		print("Squared correlation coefficient = %g (regression)" % SCC)
+		info("Mean squared error = %g (regression)" % MSE)
+		info("Squared correlation coefficient = %g (regression)" % SCC)
 	else:
-		print("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
+		info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
 
 	return pred_labels, (ACC, MSE, SCC), pred_values
 
+

From 17064af4ed0ac01bb5c72c610a5f1dfb89175886 Mon Sep 17 00:00:00 2001
From: yangarbiter <yangarbiter@gmail.com>
Date: Sun, 21 Feb 2016 16:16:08 +0800
Subject: [PATCH 5/7] update hintsvm.py

---
 python/hintsvm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/hintsvm.py b/python/hintsvm.py
index 5f4830fc..04d55bc9 100644
--- a/python/hintsvm.py
+++ b/python/hintsvm.py
@@ -8,7 +8,7 @@
 __all__ = ['libsvm', 'svm_problem', 'svm_parameter',
            'toPyModel', 'gen_svm_nodearray', 'print_null', 'svm_node', 'C_SVC',
            'EPSILON_SVR', 'LINEAR', 'NU_SVC', 'NU_SVR', 'ONE_CLASS',
-           'POLY', 'PRECOMPUTED', 'PRINT_STRING_FUN', 'RBF',
+           'HINTSVM_SVC', 'POLY', 'PRECOMPUTED', 'PRINT_STRING_FUN', 'RBF',
            'SIGMOID', 'c_double', 'svm_model']
 
 try:
@@ -31,6 +31,7 @@
 ONE_CLASS = 2
 EPSILON_SVR = 3
 NU_SVR = 4
+HINTSVM_SVC = 5
 
 LINEAR = 0
 POLY = 1

From a44594eba5e617bb2b8cdb7421d57cee2ba0d85e Mon Sep 17 00:00:00 2001
From: yangarbiter <yangarbiter@gmail.com>
Date: Sun, 21 Feb 2016 16:46:00 +0800
Subject: [PATCH 6/7] update hintsvm.py and retab *.py

---
 python/hintsvm.py     | 527 +++++++++++++++++++++---------------------
 python/hintsvmutil.py | 436 +++++++++++++++++-----------------
 2 files changed, 483 insertions(+), 480 deletions(-)

diff --git a/python/hintsvm.py b/python/hintsvm.py
index 04d55bc9..565229a5 100644
--- a/python/hintsvm.py
+++ b/python/hintsvm.py
@@ -5,6 +5,9 @@
 from os import path
 import sys
 
+if sys.version_info[0] >= 3:
+    xrange = range
+
 __all__ = ['libsvm', 'svm_problem', 'svm_parameter',
            'toPyModel', 'gen_svm_nodearray', 'print_null', 'svm_node', 'C_SVC',
            'EPSILON_SVR', 'LINEAR', 'NU_SVC', 'NU_SVR', 'ONE_CLASS',
@@ -12,19 +15,19 @@
            'SIGMOID', 'c_double', 'svm_model']
 
 try:
-	dirname = path.dirname(path.abspath(__file__))
-	if sys.platform == 'win32':
-		libsvm = CDLL(path.join(dirname, r'..\windows\libhintsvm.dll'))
-	else:
-		libsvm = CDLL(path.join(dirname, '../libsvm.so.2'))
+    dirname = path.dirname(path.abspath(__file__))
+    if sys.platform == 'win32':
+        libsvm = CDLL(path.join(dirname, r'..\windows\libhintsvm.dll'))
+    else:
+        libsvm = CDLL(path.join(dirname, '../libhintsvm.so.2'))
 except:
 # For unix the prefix 'lib' is not considered.
-	if find_library('hintsvm'):
-		libsvm = CDLL(find_library('hintsvm'))
-	elif find_library('libhintsvm'):
-		libsvm = CDLL(find_library('libhintsvm'))
-	else:
-		raise Exception('LIBSVM library not found.')
+    if find_library('hintsvm'):
+        libsvm = CDLL(find_library('hintsvm'))
+    elif find_library('libhintsvm'):
+        libsvm = CDLL(find_library('libhintsvm'))
+    else:
+        raise Exception('LIBSVM library not found.')
 
 C_SVC = 0
 NU_SVC = 1
@@ -41,274 +44,274 @@
 
 PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
 def print_null(s):
-	return
+    return
 
 def genFields(names, types):
-	return list(zip(names, types))
+    return list(zip(names, types))
 
 def fillprototype(f, restype, argtypes):
-	f.restype = restype
-	f.argtypes = argtypes
+    f.restype = restype
+    f.argtypes = argtypes
 
 class svm_node(Structure):
-	_names = ["index", "value"]
-	_types = [c_int, c_double]
-	_fields_ = genFields(_names, _types)
+    _names = ["index", "value"]
+    _types = [c_int, c_double]
+    _fields_ = genFields(_names, _types)
 
-	def __str__(self):
-		return '%d:%g' % (self.index, self.value)
+    def __str__(self):
+        return '%d:%g' % (self.index, self.value)
 
 def gen_svm_nodearray(xi, feature_max=None, isKernel=None):
-	if isinstance(xi, dict):
-		index_range = xi.keys()
-	elif isinstance(xi, (list, tuple)):
-		if not isKernel:
-			xi = [0] + xi  # idx should start from 1
-		index_range = range(len(xi))
-	else:
-		raise TypeError('xi should be a dictionary, list or tuple')
-
-	if feature_max:
-		assert(isinstance(feature_max, int))
-		index_range = filter(lambda j: j <= feature_max, index_range)
-	if not isKernel:
-		index_range = filter(lambda j:xi[j] != 0, index_range)
-
-	index_range = sorted(index_range)
-	ret = (svm_node * (len(index_range)+1))()
-	ret[-1].index = -1
-	for idx, j in enumerate(index_range):
-		ret[idx].index = j
-		ret[idx].value = xi[j]
-	max_idx = 0
-	if index_range:
-		max_idx = index_range[-1]
-	return ret, max_idx
+    if isinstance(xi, dict):
+        index_range = xi.keys()
+    elif isinstance(xi, (list, tuple)):
+        if not isKernel:
+            xi = [0] + xi  # idx should start from 1
+        index_range = range(len(xi))
+    else:
+        raise TypeError('xi should be a dictionary, list or tuple')
+
+    if feature_max:
+        assert(isinstance(feature_max, int))
+        index_range = filter(lambda j: j <= feature_max, index_range)
+    if not isKernel:
+        index_range = filter(lambda j:xi[j] != 0, index_range)
+
+    index_range = sorted(index_range)
+    ret = (svm_node * (len(index_range)+1))()
+    ret[-1].index = -1
+    for idx, j in enumerate(index_range):
+        ret[idx].index = j
+        ret[idx].value = xi[j]
+    max_idx = 0
+    if index_range:
+        max_idx = index_range[-1]
+    return ret, max_idx
 
 class svm_problem(Structure):
-	_names = ["l", "y", "x", "W"]
-	_types = [c_int, POINTER(c_double), POINTER(POINTER(svm_node)), POINTER(c_double)]
-	_fields_ = genFields(_names, _types)
-
-	def __init__(self, W, y, x, isKernel=None):
-		if len(y) != len(x):
-			raise ValueError("len(y) != len(x)")
-		if len(W) != 0 and len(W) != len(x):
-			raise ValueError("len(W) != len(x)")
-		self.l = l = len(y)
-		if len(W) == 0:
-			W = [1] * l
-
-		max_idx = 0
-		x_space = self.x_space = []
-		for i, xi in enumerate(x):
-			tmp_xi, tmp_idx = gen_svm_nodearray(xi,isKernel=isKernel)
-			x_space += [tmp_xi]
-			max_idx = max(max_idx, tmp_idx)
-		self.n = max_idx
-
-		self.W = (c_double * l)()
-		for i, Wi in enumerate(W): self.W[i] = Wi
-
-		self.y = (c_double * l)()
-		for i, yi in enumerate(y): self.y[i] = yi
-
-		self.x = (POINTER(svm_node) * l)()
-		for i, xi in enumerate(self.x_space): self.x[i] = xi
+    _names = ["l", "y", "x", "W"]
+    _types = [c_int, POINTER(c_double), POINTER(POINTER(svm_node)), POINTER(c_double)]
+    _fields_ = genFields(_names, _types)
+
+    def __init__(self, W, y, x, isKernel=None):
+        if len(y) != len(x):
+            raise ValueError("len(y) != len(x)")
+        if len(W) != 0 and len(W) != len(x):
+            raise ValueError("len(W) != len(x)")
+        self.l = l = len(y)
+        if len(W) == 0:
+            W = [1] * l
+
+        max_idx = 0
+        x_space = self.x_space = []
+        for i, xi in enumerate(x):
+            tmp_xi, tmp_idx = gen_svm_nodearray(xi,isKernel=isKernel)
+            x_space += [tmp_xi]
+            max_idx = max(max_idx, tmp_idx)
+        self.n = max_idx
+
+        self.W = (c_double * l)()
+        for i, Wi in enumerate(W): self.W[i] = Wi
+
+        self.y = (c_double * l)()
+        for i, yi in enumerate(y): self.y[i] = yi
+
+        self.x = (POINTER(svm_node) * l)()
+        for i, xi in enumerate(self.x_space): self.x[i] = xi
 
 class svm_parameter(Structure):
-	_names = ["svm_type", "kernel_type", "degree", "gamma", "coef0",
-			"cache_size", "eps", "C", "nr_weight", "weight_label", "weight",
-			"nu", "p", "shrinking", "probability"]
-	_types = [c_int, c_int, c_int, c_double, c_double,
-			c_double, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double),
-			c_double, c_double, c_int, c_int]
-	_fields_ = genFields(_names, _types)
-
-	def __init__(self, options = None):
-		if options == None:
-			options = ''
-		self.parse_options(options)
-
-	def __str__(self):
-		s = ''
-		attrs = svm_parameter._names + list(self.__dict__.keys())
-		values = map(lambda attr: getattr(self, attr), attrs)
-		for attr, val in zip(attrs, values):
-			s += (' %s: %s\n' % (attr, val))
-		s = s.strip()
-
-		return s
-
-	def set_to_default_values(self):
-		self.svm_type = C_SVC;
-		self.kernel_type = RBF
-		self.degree = 3
-		self.gamma = 0
-		self.coef0 = 0
-		self.nu = 0.5
-		self.cache_size = 100
-		self.C = 1
-		self.eps = 0.001
-		self.p = 0.1
-		self.shrinking = 1
-		self.probability = 0
-		self.nr_weight = 0
-		self.weight_label = (c_int*0)()
-		self.weight = (c_double*0)()
-		self.cross_validation = False
-		self.nr_fold = 0
-		self.print_func = cast(None, PRINT_STRING_FUN)
-
-	def parse_options(self, options):
-		if isinstance(options, list):
-			argv = options
-		elif isinstance(options, str):
-			argv = options.split()
-		else:
-			raise TypeError("arg 1 should be a list or a str.")
-		self.set_to_default_values()
-		self.print_func = cast(None, PRINT_STRING_FUN)
-		weight_label = []
-		weight = []
-
-		i = 0
-		while i < len(argv):
-			if argv[i] == "-s":
-				i = i + 1
-				self.svm_type = int(argv[i])
-			elif argv[i] == "-t":
-				i = i + 1
-				self.kernel_type = int(argv[i])
-			elif argv[i] == "-d":
-				i = i + 1
-				self.degree = int(argv[i])
-			elif argv[i] == "-g":
-				i = i + 1
-				self.gamma = float(argv[i])
-			elif argv[i] == "-r":
-				i = i + 1
-				self.coef0 = float(argv[i])
-			elif argv[i] == "-n":
-				i = i + 1
-				self.nu = float(argv[i])
-			elif argv[i] == "-m":
-				i = i + 1
-				self.cache_size = float(argv[i])
-			elif argv[i] == "-c":
-				i = i + 1
-				self.C = float(argv[i])
-			elif argv[i] == "-e":
-				i = i + 1
-				self.eps = float(argv[i])
-			elif argv[i] == "-p":
-				i = i + 1
-				self.p = float(argv[i])
-			elif argv[i] == "-h":
-				i = i + 1
-				self.shrinking = int(argv[i])
-			elif argv[i] == "-b":
-				i = i + 1
-				self.probability = int(argv[i])
-			elif argv[i] == "-q":
-				self.print_func = PRINT_STRING_FUN(print_null)
-			elif argv[i] == "-v":
-				i = i + 1
-				self.cross_validation = 1
-				self.nr_fold = int(argv[i])
-				if self.nr_fold < 2:
-					raise ValueError("n-fold cross validation: n must >= 2")
-			elif argv[i].startswith("-w"):
-				i = i + 1
-				self.nr_weight += 1
-				nr_weight = self.nr_weight
-				weight_label += [int(argv[i-1][2:])]
-				weight += [float(argv[i])]
-			else:
-				raise ValueError("Wrong options")
-			i += 1
-
-		libsvm.svm_set_print_string_function(self.print_func)
-		self.weight_label = (c_int*self.nr_weight)()
-		self.weight = (c_double*self.nr_weight)()
-		for i in range(self.nr_weight):
-			self.weight[i] = weight[i]
-			self.weight_label[i] = weight_label[i]
+    _names = ["svm_type", "kernel_type", "degree", "gamma", "coef0",
+            "cache_size", "eps", "C", "nr_weight", "weight_label", "weight",
+            "nu", "p", "shrinking", "probability"]
+    _types = [c_int, c_int, c_int, c_double, c_double,
+            c_double, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double),
+            c_double, c_double, c_int, c_int]
+    _fields_ = genFields(_names, _types)
+
+    def __init__(self, options = None):
+        if options == None:
+            options = ''
+        self.parse_options(options)
+
+    def __str__(self):
+        s = ''
+        attrs = svm_parameter._names + list(self.__dict__.keys())
+        values = map(lambda attr: getattr(self, attr), attrs)
+        for attr, val in zip(attrs, values):
+            s += (' %s: %s\n' % (attr, val))
+        s = s.strip()
+
+        return s
+
+    def set_to_default_values(self):
+        self.svm_type = C_SVC;
+        self.kernel_type = RBF
+        self.degree = 3
+        self.gamma = 0
+        self.coef0 = 0
+        self.nu = 0.5
+        self.cache_size = 100
+        self.C = 1
+        self.eps = 0.001
+        self.p = 0.1
+        self.shrinking = 1
+        self.probability = 0
+        self.nr_weight = 0
+        self.weight_label = (c_int*0)()
+        self.weight = (c_double*0)()
+        self.cross_validation = False
+        self.nr_fold = 0
+        self.print_func = cast(None, PRINT_STRING_FUN)
+
+    def parse_options(self, options):
+        if isinstance(options, list):
+            argv = options
+        elif isinstance(options, str):
+            argv = options.split()
+        else:
+            raise TypeError("arg 1 should be a list or a str.")
+        self.set_to_default_values()
+        self.print_func = cast(None, PRINT_STRING_FUN)
+        weight_label = []
+        weight = []
+
+        i = 0
+        while i < len(argv):
+            if argv[i] == "-s":
+                i = i + 1
+                self.svm_type = int(argv[i])
+            elif argv[i] == "-t":
+                i = i + 1
+                self.kernel_type = int(argv[i])
+            elif argv[i] == "-d":
+                i = i + 1
+                self.degree = int(argv[i])
+            elif argv[i] == "-g":
+                i = i + 1
+                self.gamma = float(argv[i])
+            elif argv[i] == "-r":
+                i = i + 1
+                self.coef0 = float(argv[i])
+            elif argv[i] == "-n":
+                i = i + 1
+                self.nu = float(argv[i])
+            elif argv[i] == "-m":
+                i = i + 1
+                self.cache_size = float(argv[i])
+            elif argv[i] == "-c":
+                i = i + 1
+                self.C = float(argv[i])
+            elif argv[i] == "-e":
+                i = i + 1
+                self.eps = float(argv[i])
+            elif argv[i] == "-p":
+                i = i + 1
+                self.p = float(argv[i])
+            elif argv[i] == "-h":
+                i = i + 1
+                self.shrinking = int(argv[i])
+            elif argv[i] == "-b":
+                i = i + 1
+                self.probability = int(argv[i])
+            elif argv[i] == "-q":
+                self.print_func = PRINT_STRING_FUN(print_null)
+            elif argv[i] == "-v":
+                i = i + 1
+                self.cross_validation = 1
+                self.nr_fold = int(argv[i])
+                if self.nr_fold < 2:
+                    raise ValueError("n-fold cross validation: n must >= 2")
+            elif argv[i].startswith("-w"):
+                i = i + 1
+                self.nr_weight += 1
+                nr_weight = self.nr_weight
+                weight_label += [int(argv[i-1][2:])]
+                weight += [float(argv[i])]
+            else:
+                raise ValueError("Wrong options")
+            i += 1
+
+        libsvm.svm_set_print_string_function(self.print_func)
+        self.weight_label = (c_int*self.nr_weight)()
+        self.weight = (c_double*self.nr_weight)()
+        for i in range(self.nr_weight):
+            self.weight[i] = weight[i]
+            self.weight_label[i] = weight_label[i]
 
 class svm_model(Structure):
-	_names = ['param', 'nr_class', 'l', 'SV', 'sv_coef', 'rho',
-			'probA', 'probB', 'sv_indices', 'label', 'nSV', 'free_sv']
-	_types = [svm_parameter, c_int, c_int, POINTER(POINTER(svm_node)),
-			POINTER(POINTER(c_double)), POINTER(c_double),
-			POINTER(c_double), POINTER(c_double), POINTER(c_int),
-			POINTER(c_int), POINTER(c_int), c_int]
-	_fields_ = genFields(_names, _types)
-
-	def __init__(self):
-		self.__createfrom__ = 'python'
-
-	def __del__(self):
-		# free memory created by C to avoid memory leak
-		if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
-			libsvm.svm_free_and_destroy_model(pointer(self))
-
-	def get_svm_type(self):
-		return libsvm.svm_get_svm_type(self)
-
-	def get_nr_class(self):
-		return libsvm.svm_get_nr_class(self)
-
-	def get_svr_probability(self):
-		return libsvm.svm_get_svr_probability(self)
-
-	def get_labels(self):
-		nr_class = self.get_nr_class()
-		labels = (c_int * nr_class)()
-		libsvm.svm_get_labels(self, labels)
-		return labels[:nr_class]
-
-	def get_sv_indices(self):
-		total_sv = self.get_nr_sv()
-		sv_indices = (c_int * total_sv)()
-		libsvm.svm_get_sv_indices(self, sv_indices)
-		return sv_indices[:total_sv]
-
-	def get_nr_sv(self):
-		return libsvm.svm_get_nr_sv(self)
-
-	def is_probability_model(self):
-		return (libsvm.svm_check_probability_model(self) == 1)
-
-	def get_sv_coef(self):
-		return [tuple(self.sv_coef[j][i] for j in xrange(self.nr_class - 1))
-				for i in xrange(self.l)]
-
-	def get_SV(self):
-		result = []
-		for sparse_sv in self.SV[:self.l]:
-			row = dict()
-
-			i = 0
-			while True:
-				row[sparse_sv[i].index] = sparse_sv[i].value
-				if sparse_sv[i].index == -1:
-					break
-				i += 1
-
-			result.append(row)
-		return result
+    _names = ['param', 'nr_class', 'l', 'SV', 'sv_coef', 'rho',
+            'probA', 'probB', 'sv_indices', 'label', 'nSV', 'free_sv']
+    _types = [svm_parameter, c_int, c_int, POINTER(POINTER(svm_node)),
+            POINTER(POINTER(c_double)), POINTER(c_double),
+            POINTER(c_double), POINTER(c_double), POINTER(c_int),
+            POINTER(c_int), POINTER(c_int), c_int]
+    _fields_ = genFields(_names, _types)
+
+    def __init__(self):
+        self.__createfrom__ = 'python'
+
+    def __del__(self):
+        # free memory created by C to avoid memory leak
+        if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
+            libsvm.svm_free_and_destroy_model(pointer(self))
+
+    def get_svm_type(self):
+        return libsvm.svm_get_svm_type(self)
+
+    def get_nr_class(self):
+        return libsvm.svm_get_nr_class(self)
+
+    def get_svr_probability(self):
+        return libsvm.svm_get_svr_probability(self)
+
+    def get_labels(self):
+        nr_class = self.get_nr_class()
+        labels = (c_int * nr_class)()
+        libsvm.svm_get_labels(self, labels)
+        return labels[:nr_class]
+
+    def get_sv_indices(self):
+        total_sv = self.get_nr_sv()
+        sv_indices = (c_int * total_sv)()
+        libsvm.svm_get_sv_indices(self, sv_indices)
+        return sv_indices[:total_sv]
+
+    def get_nr_sv(self):
+        return libsvm.svm_get_nr_sv(self)
+
+    def is_probability_model(self):
+        return (libsvm.svm_check_probability_model(self) == 1)
+
+    def get_sv_coef(self):
+        return [tuple(self.sv_coef[j][i] for j in xrange(self.nr_class - 1))
+                for i in xrange(self.l)]
+
+    def get_SV(self):
+        result = []
+        for sparse_sv in self.SV[:self.l]:
+            row = dict()
+
+            i = 0
+            while True:
+                row[sparse_sv[i].index] = sparse_sv[i].value
+                if sparse_sv[i].index == -1:
+                    break
+                i += 1
+
+            result.append(row)
+        return result
 
 def toPyModel(model_ptr):
-	"""
-	toPyModel(model_ptr) -> svm_model
-
-	Convert a ctypes POINTER(svm_model) to a Python svm_model
-	"""
-	if bool(model_ptr) == False:
-		raise ValueError("Null pointer")
-	m = model_ptr.contents
-	m.__createfrom__ = 'C'
-	return m
+    """
+    toPyModel(model_ptr) -> svm_model
+
+    Convert a ctypes POINTER(svm_model) to a Python svm_model
+    """
+    if bool(model_ptr) == False:
+        raise ValueError("Null pointer")
+    m = model_ptr.contents
+    m.__createfrom__ = 'C'
+    return m
 
 fillprototype(libsvm.svm_train, POINTER(svm_model), [POINTER(svm_problem), POINTER(svm_parameter)])
 fillprototype(libsvm.svm_cross_validation, None, [POINTER(svm_problem), POINTER(svm_parameter), c_int, POINTER(c_double)])
diff --git a/python/hintsvmutil.py b/python/hintsvmutil.py
index 3a845bcf..e8f461c1 100644
--- a/python/hintsvmutil.py
+++ b/python/hintsvmutil.py
@@ -12,253 +12,253 @@
 sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
 
 def svm_read_problem(data_file_name):
-	"""
-	svm_read_problem(data_file_name) -> [y, x]
+    """
+    svm_read_problem(data_file_name) -> [y, x]
 
-	Read LIBSVM-format data from data_file_name and return labels y
-	and data instances x.
-	"""
-	prob_y = []
-	prob_x = []
-	for line in open(data_file_name):
-		line = line.split(None, 1)
-		# In case an instance with all zero features
-		if len(line) == 1: line += ['']
-		label, features = line
-		xi = {}
-		for e in features.split():
-			ind, val = e.split(":")
-			xi[int(ind)] = float(val)
-		prob_y += [float(label)]
-		prob_x += [xi]
-	return (prob_y, prob_x)
+    Read LIBSVM-format data from data_file_name and return labels y
+    and data instances x.
+    """
+    prob_y = []
+    prob_x = []
+    for line in open(data_file_name):
+        line = line.split(None, 1)
+        # In case an instance with all zero features
+        if len(line) == 1: line += ['']
+        label, features = line
+        xi = {}
+        for e in features.split():
+            ind, val = e.split(":")
+            xi[int(ind)] = float(val)
+        prob_y += [float(label)]
+        prob_x += [xi]
+    return (prob_y, prob_x)
 
 def svm_load_model(model_file_name):
-	"""
-	svm_load_model(model_file_name) -> model
+    """
+    svm_load_model(model_file_name) -> model
 
-	Load a LIBSVM model from model_file_name and return.
-	"""
-	model = libsvm.svm_load_model(model_file_name.encode())
-	if not model:
-		print("can't open model file %s" % model_file_name)
-		return None
-	model = toPyModel(model)
-	return model
+    Load a LIBSVM model from model_file_name and return.
+    """
+    model = libsvm.svm_load_model(model_file_name.encode())
+    if not model:
+        print("can't open model file %s" % model_file_name)
+        return None
+    model = toPyModel(model)
+    return model
 
 def svm_save_model(model_file_name, model):
-	"""
-	svm_save_model(model_file_name, model) -> None
+    """
+    svm_save_model(model_file_name, model) -> None
 
-	Save a LIBSVM model to the file model_file_name.
-	"""
-	libsvm.svm_save_model(model_file_name.encode(), model)
+    Save a LIBSVM model to the file model_file_name.
+    """
+    libsvm.svm_save_model(model_file_name.encode(), model)
 
 def evaluations(ty, pv):
-	"""
-	evaluations(ty, pv) -> (ACC, MSE, SCC)
+    """
+    evaluations(ty, pv) -> (ACC, MSE, SCC)
 
-	Calculate accuracy, mean squared error and squared correlation coefficient
-	using the true values (ty) and predicted values (pv).
-	"""
-	if len(ty) != len(pv):
-		raise ValueError("len(ty) must equal to len(pv)")
-	total_correct = total_error = 0
-	sumv = sumy = sumvv = sumyy = sumvy = 0
-	for v, y in zip(pv, ty):
-		if y == v:
-			total_correct += 1
-		total_error += (v-y)*(v-y)
-		sumv += v
-		sumy += y
-		sumvv += v*v
-		sumyy += y*y
-		sumvy += v*y
-	l = len(ty)
-	ACC = 100.0*total_correct/l
-	MSE = total_error/l
-	try:
-		SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
-	except:
-		SCC = float('nan')
-	return (ACC, MSE, SCC)
+    Calculate accuracy, mean squared error and squared correlation coefficient
+    using the true values (ty) and predicted values (pv).
+    """
+    if len(ty) != len(pv):
+        raise ValueError("len(ty) must equal to len(pv)")
+    total_correct = total_error = 0
+    sumv = sumy = sumvv = sumyy = sumvy = 0
+    for v, y in zip(pv, ty):
+        if y == v:
+            total_correct += 1
+        total_error += (v-y)*(v-y)
+        sumv += v
+        sumy += y
+        sumvv += v*v
+        sumyy += y*y
+        sumvy += v*y
+    l = len(ty)
+    ACC = 100.0*total_correct/l
+    MSE = total_error/l
+    try:
+        SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
+    except:
+        SCC = float('nan')
+    return (ACC, MSE, SCC)
 
 def svm_train(arg1, arg2=None, arg3=None, arg4 = None):
-	"""
-	svm_train(W, x [, options]) -> model | ACC | MSE
-	svm_train(prob [, options]) -> model | ACC | MSE
-	svm_train(prob, param) -> model | ACC| MSE
+    """
+    svm_train(W, x [, options]) -> model | ACC | MSE
+    svm_train(prob [, options]) -> model | ACC | MSE
+    svm_train(prob, param) -> model | ACC| MSE
 
-	Train an SVM model from weighted data (W, y, x) or an svm_problem prob using
-	'options' or an svm_parameter param.
-	If '-v' is specified in 'options' (i.e., cross validation)
-	either accuracy (ACC) or mean-squared error (MSE) is returned.
-	options:
-	    -s svm_type : set type of SVM (default 0)
-	        0 -- C-SVC		(multi-class classification)
-	        1 -- nu-SVC		(multi-class classification)
-	        2 -- one-class SVM
-	        3 -- epsilon-SVR	(regression)
-	        4 -- nu-SVR		(regression)
+    Train an SVM model from weighted data (W, y, x) or an svm_problem prob using
+    'options' or an svm_parameter param.
+    If '-v' is specified in 'options' (i.e., cross validation)
+    either accuracy (ACC) or mean-squared error (MSE) is returned.
+    options:
+        -s svm_type : set type of SVM (default 0)
+            0 -- C-SVC      (multi-class classification)
+            1 -- nu-SVC     (multi-class classification)
+            2 -- one-class SVM
+            3 -- epsilon-SVR    (regression)
+            4 -- nu-SVR     (regression)
             5 -- hint SVM
-	    -t kernel_type : set type of kernel function (default 2)
-	        0 -- linear: u'*v
-	        1 -- polynomial: (gamma*u'*v + coef0)^degree
-	        2 -- radial basis function: exp(-gamma*|u-v|^2)
-	        3 -- sigmoid: tanh(gamma*u'*v + coef0)
-	        4 -- precomputed kernel (kernel values in training_set_file)
-	    -d degree : set degree in kernel function (default 3)
-	    -g gamma : set gamma in kernel function (default 1/num_features)
-	    -r coef0 : set coef0 in kernel function (default 0)
-	    -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
-	    -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
-	    -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
-	    -m cachesize : set cache memory size in MB (default 100)
-	    -e epsilon : set tolerance of termination criterion (default 0.001)
-	    -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
-	    -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
-	    -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
-	    -v n: n-fold cross validation mode
-	    -q : quiet mode (no outputs)
-	"""
-	prob, param = None, None
-	if isinstance(arg1, (list, tuple)):
-		assert isinstance(arg2, (list, tuple))
-		assert isinstance(arg3, list)
-		W, y, x, options = arg1, arg2, arg3, arg4
-		param = svm_parameter(options)
-		prob = svm_problem(W, y, x, isKernel=(param.kernel_type == PRECOMPUTED))
-	elif isinstance(arg1, svm_problem):
-		prob = arg1
-		if isinstance(arg2, svm_parameter):
-			param = arg2
-		else:
-			param = svm_parameter(arg2)
-	if prob == None or param == None:
-		raise TypeError("Wrong types for the arguments")
+        -t kernel_type : set type of kernel function (default 2)
+            0 -- linear: u'*v
+            1 -- polynomial: (gamma*u'*v + coef0)^degree
+            2 -- radial basis function: exp(-gamma*|u-v|^2)
+            3 -- sigmoid: tanh(gamma*u'*v + coef0)
+            4 -- precomputed kernel (kernel values in training_set_file)
+        -d degree : set degree in kernel function (default 3)
+        -g gamma : set gamma in kernel function (default 1/num_features)
+        -r coef0 : set coef0 in kernel function (default 0)
+        -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
+        -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
+        -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
+        -m cachesize : set cache memory size in MB (default 100)
+        -e epsilon : set tolerance of termination criterion (default 0.001)
+        -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
+        -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
+        -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
+        -v n: n-fold cross validation mode
+        -q : quiet mode (no outputs)
+    """
+    prob, param = None, None
+    if isinstance(arg1, (list, tuple)):
+        assert isinstance(arg2, (list, tuple))
+        assert isinstance(arg3, list)
+        W, y, x, options = arg1, arg2, arg3, arg4
+        param = svm_parameter(options)
+        prob = svm_problem(W, y, x, isKernel=(param.kernel_type == PRECOMPUTED))
+    elif isinstance(arg1, svm_problem):
+        prob = arg1
+        if isinstance(arg2, svm_parameter):
+            param = arg2
+        else:
+            param = svm_parameter(arg2)
+    if prob == None or param == None:
+        raise TypeError("Wrong types for the arguments")
 
-	if param.kernel_type == PRECOMPUTED:
-		for xi in prob.x_space:
-			idx, val = xi[0].index, xi[0].value
-			if xi[0].index != 0:
-				raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
-			if val <= 0 or val > prob.n:
-				raise ValueError('Wrong input format: sample_serial_number out of range')
+    if param.kernel_type == PRECOMPUTED:
+        for xi in prob.x_space:
+            idx, val = xi[0].index, xi[0].value
+            if xi[0].index != 0:
+                raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
+            if val <= 0 or val > prob.n:
+                raise ValueError('Wrong input format: sample_serial_number out of range')
 
-	if param.gamma == 0 and prob.n > 0:
-		param.gamma = 1.0 / prob.n
-	libsvm.svm_set_print_string_function(param.print_func)
-	err_msg = libsvm.svm_check_parameter(prob, param)
-	if err_msg:
-		raise ValueError('Error: %s' % err_msg)
+    if param.gamma == 0 and prob.n > 0:
+        param.gamma = 1.0 / prob.n
+    libsvm.svm_set_print_string_function(param.print_func)
+    err_msg = libsvm.svm_check_parameter(prob, param)
+    if err_msg:
+        raise ValueError('Error: %s' % err_msg)
 
-	if param.cross_validation:
-		l, nr_fold = prob.l, param.nr_fold
-		target = (c_double * l)()
-		libsvm.svm_cross_validation(prob, param, nr_fold, target)
-		ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
-		if param.svm_type in [EPSILON_SVR, NU_SVR]:
-			print("Cross Validation Mean squared error = %g" % MSE)
-			print("Cross Validation Squared correlation coefficient = %g" % SCC)
-			return MSE
-		else:
-			print("Cross Validation Accuracy = %g%%" % ACC)
-			return ACC
-	else:
-		m = libsvm.svm_train(prob, param)
-		m = toPyModel(m)
+    if param.cross_validation:
+        l, nr_fold = prob.l, param.nr_fold
+        target = (c_double * l)()
+        libsvm.svm_cross_validation(prob, param, nr_fold, target)
+        ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
+        if param.svm_type in [EPSILON_SVR, NU_SVR]:
+            print("Cross Validation Mean squared error = %g" % MSE)
+            print("Cross Validation Squared correlation coefficient = %g" % SCC)
+            return MSE
+        else:
+            print("Cross Validation Accuracy = %g%%" % ACC)
+            return ACC
+    else:
+        m = libsvm.svm_train(prob, param)
+        m = toPyModel(m)
 
-		# If prob is destroyed, data including SVs pointed by m can remain.
-		m.x_space = prob.x_space
-		return m
+        # If prob is destroyed, data including SVs pointed by m can remain.
+        m.x_space = prob.x_space
+        return m
 
 def svm_predict(y, x, m, options=""):
-	"""
-	svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
+    """
+    svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
 
-	Predict data (y, x) with the SVM model m.
-	options:
-	    -b probability_estimates: whether to predict probability estimates,
-	        0 or 1 (default 0); for one-class SVM only 0 is supported.
-	    -q : quiet mode (no outputs).
+    Predict data (y, x) with the SVM model m.
+    options:
+        -b probability_estimates: whether to predict probability estimates,
+            0 or 1 (default 0); for one-class SVM only 0 is supported.
+        -q : quiet mode (no outputs).
 
-	The return tuple contains
-	p_labels: a list of predicted labels
-	p_acc: a tuple including  accuracy (for classification), mean-squared
-	       error, and squared correlation coefficient (for regression).
-	p_vals: a list of decision values or probability estimates (if '-b 1'
-	        is specified). If k is the number of classes, for decision values,
-	        each element includes results of predicting k(k-1)/2 binary-class
-	        SVMs. For probabilities, each element contains k values indicating
-	        the probability that the testing instance is in each class.
-	        Note that the order of classes here is the same as 'model.label'
-	        field in the model structure.
-	"""
+    The return tuple contains
+    p_labels: a list of predicted labels
+    p_acc: a tuple including  accuracy (for classification), mean-squared
+           error, and squared correlation coefficient (for regression).
+    p_vals: a list of decision values or probability estimates (if '-b 1'
+            is specified). If k is the number of classes, for decision values,
+            each element includes results of predicting k(k-1)/2 binary-class
+            SVMs. For probabilities, each element contains k values indicating
+            the probability that the testing instance is in each class.
+            Note that the order of classes here is the same as 'model.label'
+            field in the model structure.
+    """
 
-	def info(s):
-		print(s)
+    def info(s):
+        print(s)
 
-	predict_probability = 0
-	argv = options.split()
-	i = 0
-	while i < len(argv):
-		if argv[i] == '-b':
-			i += 1
-			predict_probability = int(argv[i])
-		elif argv[i] == '-q':
-			info = print_null
-		else:
-			raise ValueError("Wrong options")
-		i+=1
+    predict_probability = 0
+    argv = options.split()
+    i = 0
+    while i < len(argv):
+        if argv[i] == '-b':
+            i += 1
+            predict_probability = int(argv[i])
+        elif argv[i] == '-q':
+            info = print_null
+        else:
+            raise ValueError("Wrong options")
+        i+=1
 
-	svm_type = m.get_svm_type()
-	is_prob_model = m.is_probability_model()
-	nr_class = m.get_nr_class()
-	pred_labels = []
-	pred_values = []
+    svm_type = m.get_svm_type()
+    is_prob_model = m.is_probability_model()
+    nr_class = m.get_nr_class()
+    pred_labels = []
+    pred_values = []
 
-	if predict_probability:
-		if not is_prob_model:
-			raise ValueError("Model does not support probabiliy estimates")
+    if predict_probability:
+        if not is_prob_model:
+            raise ValueError("Model does not support probabiliy estimates")
 
-		if svm_type in [NU_SVR, EPSILON_SVR]:
-			info("Prob. model for test data: target value = predicted value + z,\n"
-			"z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
-			nr_class = 0
+        if svm_type in [NU_SVR, EPSILON_SVR]:
+            info("Prob. model for test data: target value = predicted value + z,\n"
+            "z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g" % m.get_svr_probability());
+            nr_class = 0
 
-		prob_estimates = (c_double * nr_class)()
-		for xi in x:
-			xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
-			label = libsvm.svm_predict_probability(m, xi, prob_estimates)
-			values = prob_estimates[:nr_class]
-			pred_labels += [label]
-			pred_values += [values]
-	else:
-		if is_prob_model:
-			info("Model supports probability estimates, but disabled in predicton.")
-		if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
-			nr_classifier = 1
-		else:
-			nr_classifier = nr_class*(nr_class-1)//2
-		dec_values = (c_double * nr_classifier)()
-		for xi in x:
-			xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
-			label = libsvm.svm_predict_values(m, xi, dec_values)
-			if(nr_class == 1):
-				values = [1]
-			else:
-				values = dec_values[:nr_classifier]
-			pred_labels += [label]
-			pred_values += [values]
+        prob_estimates = (c_double * nr_class)()
+        for xi in x:
+            xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
+            label = libsvm.svm_predict_probability(m, xi, prob_estimates)
+            values = prob_estimates[:nr_class]
+            pred_labels += [label]
+            pred_values += [values]
+    else:
+        if is_prob_model:
+            info("Model supports probability estimates, but disabled in predicton.")
+        if svm_type in (ONE_CLASS, EPSILON_SVR, NU_SVC):
+            nr_classifier = 1
+        else:
+            nr_classifier = nr_class*(nr_class-1)//2
+        dec_values = (c_double * nr_classifier)()
+        for xi in x:
+            xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == PRECOMPUTED))
+            label = libsvm.svm_predict_values(m, xi, dec_values)
+            if(nr_class == 1):
+                values = [1]
+            else:
+                values = dec_values[:nr_classifier]
+            pred_labels += [label]
+            pred_values += [values]
 
-	ACC, MSE, SCC = evaluations(y, pred_labels)
-	l = len(y)
-	if svm_type in [EPSILON_SVR, NU_SVR]:
-		info("Mean squared error = %g (regression)" % MSE)
-		info("Squared correlation coefficient = %g (regression)" % SCC)
-	else:
-		info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
+    ACC, MSE, SCC = evaluations(y, pred_labels)
+    l = len(y)
+    if svm_type in [EPSILON_SVR, NU_SVR]:
+        info("Mean squared error = %g (regression)" % MSE)
+        info("Squared correlation coefficient = %g (regression)" % SCC)
+    else:
+        info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
 
-	return pred_labels, (ACC, MSE, SCC), pred_values
+    return pred_labels, (ACC, MSE, SCC), pred_values
 
 

From f2b18261063b09cb9f4d5f761fe8125d2d8906b0 Mon Sep 17 00:00:00 2001
From: yangarbiter <yangarbiter@gmail.com>
Date: Sun, 21 Feb 2016 23:00:40 +0800
Subject: [PATCH 7/7] segfault bug fix

---
 python/hintsvm.py | 4 ++--
 svm.cpp           | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/hintsvm.py b/python/hintsvm.py
index 565229a5..31f1d054 100644
--- a/python/hintsvm.py
+++ b/python/hintsvm.py
@@ -11,7 +11,7 @@
 __all__ = ['libsvm', 'svm_problem', 'svm_parameter',
            'toPyModel', 'gen_svm_nodearray', 'print_null', 'svm_node', 'C_SVC',
            'EPSILON_SVR', 'LINEAR', 'NU_SVC', 'NU_SVR', 'ONE_CLASS',
-           'HINTSVM_SVC', 'POLY', 'PRECOMPUTED', 'PRINT_STRING_FUN', 'RBF',
+           'HINT_SVC', 'POLY', 'PRECOMPUTED', 'PRINT_STRING_FUN', 'RBF',
            'SIGMOID', 'c_double', 'svm_model']
 
 try:
@@ -34,7 +34,7 @@
 ONE_CLASS = 2
 EPSILON_SVR = 3
 NU_SVR = 4
-HINTSVM_SVC = 5
+HINT_SVC = 5
 
 LINEAR = 0
 POLY = 1
diff --git a/svm.cpp b/svm.cpp
index 4744e230..376c4daf 100644
--- a/svm.cpp
+++ b/svm.cpp
@@ -1925,6 +1925,7 @@ static decision_function svm_train_one(
 		case HINT_SVC:
 			/***put 1 to reduce the complexity***/
 			/***reallocate new si2 in solve_hint_svc***/
+			si.upper_bound = NULL;
 			solve_hint_svc(prob, param, alpha, &si);
 			break;
 		/*added by Macaca referenced from ferng 20111222*/
@@ -2515,6 +2516,7 @@ svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
 		model->rho[0] = f.rho;
 		model->probA = NULL;
 		model->probB = NULL;
+        model->sv_indices = NULL;
 
 		int total_sv = 0;
 		model->nSV = Malloc(int,nr_class);