From 30455b992c49ec1d4ce84228acb280bde2c05afd Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Thu, 27 Feb 2020 17:23:20 +0100 Subject: [PATCH 01/20] Added tests to folder "ml". --- src/secml/ml/classifiers/tests/__init__.py | 1 + .../tests/c_classifier_testcases.py | 411 ++++++++++++++++++ src/secml/ml/classifiers/tests/figs/.gitkeep | 0 .../features/normalization/tests/__init__.py | 0 .../tests/test_c_normalizer_dnn.py | 206 +++++++++ .../tests/test_c_normalizer_mean_std.py | 102 +++++ .../tests/test_c_normalizer_minmax.py | 82 ++++ .../tests/test_c_normalizer_unitnorm.py | 51 +++ .../ml/features/reduction/tests/__init__.py | 0 .../reduction/tests/test_c_reducer_lda.py | 100 +++++ .../reduction/tests/test_c_reducer_pca.py | 69 +++ src/secml/ml/features/tests/__init__.py | 1 + .../features/tests/c_preprocess_testcases.py | 102 +++++ src/secml/ml/kernel/tests/__init__.py | 1 + .../ml/kernel/tests/c_kernel_testcases.py | 198 +++++++++ .../tests/test_c_kernel_chebyshev_distance.py | 27 ++ .../kernel/tests/test_c_kernel_euclidean.py | 25 ++ .../tests/test_c_kernel_histintersect.py | 23 + .../kernel/tests/test_c_kernel_laplacian.py | 23 + .../ml/kernel/tests/test_c_kernel_linear.py | 23 + .../ml/kernel/tests/test_c_kernel_poly.py | 23 + .../ml/kernel/tests/test_c_kernel_rbf.py | 23 + src/secml/ml/model_zoo/tests/__init__.py | 0 .../ml/model_zoo/tests/_test_model-clf.gz | Bin 0 -> 174 bytes .../ml/model_zoo/tests/_test_model_clf.py | 11 + .../ml/model_zoo/tests/models_dict_test.json | 8 + .../ml/model_zoo/tests/test_model_zoo.py | 217 +++++++++ src/secml/ml/peval/tests/__init__.py | 0 .../ml/peval/tests/test_perf_evaluator.py | 249 +++++++++++ .../tests/test_perf_evaluator_multiclass.py | 80 ++++ src/secml/ml/stats/tests/__init__.py | 0 .../stats/tests/test_c_density_estimation.py | 48 ++ 32 files changed, 2104 insertions(+) create mode 100644 src/secml/ml/classifiers/tests/__init__.py create mode 100644 src/secml/ml/classifiers/tests/c_classifier_testcases.py create mode 100644 src/secml/ml/classifiers/tests/figs/.gitkeep create mode 100644 src/secml/ml/features/normalization/tests/__init__.py create mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py create mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py create mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py create mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py create mode 100644 src/secml/ml/features/reduction/tests/__init__.py create mode 100644 src/secml/ml/features/reduction/tests/test_c_reducer_lda.py create mode 100644 src/secml/ml/features/reduction/tests/test_c_reducer_pca.py create mode 100644 src/secml/ml/features/tests/__init__.py create mode 100644 src/secml/ml/features/tests/c_preprocess_testcases.py create mode 100644 src/secml/ml/kernel/tests/__init__.py create mode 100644 src/secml/ml/kernel/tests/c_kernel_testcases.py create mode 100644 src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py create mode 100644 src/secml/ml/kernel/tests/test_c_kernel_euclidean.py create mode 100644 src/secml/ml/kernel/tests/test_c_kernel_histintersect.py create mode 100644 src/secml/ml/kernel/tests/test_c_kernel_laplacian.py create mode 100644 src/secml/ml/kernel/tests/test_c_kernel_linear.py create mode 100644 src/secml/ml/kernel/tests/test_c_kernel_poly.py create mode 100644 src/secml/ml/kernel/tests/test_c_kernel_rbf.py create mode 100644 src/secml/ml/model_zoo/tests/__init__.py create mode 100644 src/secml/ml/model_zoo/tests/_test_model-clf.gz create mode 100644 src/secml/ml/model_zoo/tests/_test_model_clf.py create mode 100644 src/secml/ml/model_zoo/tests/models_dict_test.json create mode 100644 src/secml/ml/model_zoo/tests/test_model_zoo.py create mode 100644 src/secml/ml/peval/tests/__init__.py create mode 100644 src/secml/ml/peval/tests/test_perf_evaluator.py create mode 100644 src/secml/ml/peval/tests/test_perf_evaluator_multiclass.py create mode 100644 src/secml/ml/stats/tests/__init__.py create mode 100644 src/secml/ml/stats/tests/test_c_density_estimation.py diff --git a/src/secml/ml/classifiers/tests/__init__.py b/src/secml/ml/classifiers/tests/__init__.py new file mode 100644 index 00000000..c3580629 --- /dev/null +++ b/src/secml/ml/classifiers/tests/__init__.py @@ -0,0 +1 @@ +from .c_classifier_testcases import CClassifierTestCases diff --git a/src/secml/ml/classifiers/tests/c_classifier_testcases.py b/src/secml/ml/classifiers/tests/c_classifier_testcases.py new file mode 100644 index 00000000..4e587c01 --- /dev/null +++ b/src/secml/ml/classifiers/tests/c_classifier_testcases.py @@ -0,0 +1,411 @@ +from secml.testing import CUnitTest + +from secml.array import CArray +from secml.data import CDataset +from secml.ml.features import CPreProcess +from secml.optim.function import CFunction +from secml.figure import CFigure +from secml.core.constants import eps + + +class CClassifierTestCases(CUnitTest): + """Unittests interface for CClassifier.""" + + def _check_df_scores(self, s, n_samples): + self.assertEqual(type(s), CArray) + self.assertTrue(s.isdense) + self.assertEqual(1, s.ndim) + self.assertEqual((n_samples,), s.shape) + self.assertEqual(float, s.dtype) + + def _check_classify_scores(self, l, s, n_samples, n_classes): + self.assertEqual(type(l), CArray) + self.assertEqual(type(s), CArray) + self.assertTrue(l.isdense) + self.assertTrue(s.isdense) + self.assertEqual(1, l.ndim) + self.assertEqual(2, s.ndim) + self.assertEqual((n_samples,), l.shape) + self.assertEqual((n_samples, n_classes), s.shape) + self.assertEqual(int, l.dtype) + self.assertEqual(float, s.dtype) + + def _test_fun(self, clf, ds): + """Test for `decision_function` and `predict` + + Parameters + ---------- + clf : CClassifier + ds : CDataset + + Returns + ------- + scores : CArray + Classifier scores computed on a single point. + + """ + self.logger.info( + "Test for decision_function() and predict() methods.") + + if ds.issparse: + self.logger.info("Testing on sparse data...") + else: + self.logger.info("Testing on dense data...") + + clf.fit(ds) + + # we have to ensure at least 2d here, since _decision_function is not + # applying this change anymore (while decision_function does). + x = x_norm = ds.X.atleast_2d() + p = p_norm = ds.X[0, :].ravel().atleast_2d() + + # Transform data if preprocess is defined + if clf.preprocess is not None: + x_norm = clf.preprocess.transform(x) + p_norm = clf.preprocess.transform(p) + + # Testing decision_function on multiple points + df, df_priv = [], [] + for y in range(ds.num_classes): + df.append(clf.decision_function(x, y=y)) + df_priv.append(clf._forward(x_norm)[:, y].ravel()) + self.logger.info( + "decision_function(x, y={:}): {:}".format(y, df[y])) + self.logger.info( + "_decision_function(x_norm, y={:}): {:}".format(y, df_priv[y])) + self._check_df_scores(df_priv[y], ds.num_samples) + self._check_df_scores(df[y], ds.num_samples) + self.assertFalse((df[y] != df_priv[y]).any()) + + # Testing predict on multiple points + labels, scores = clf.predict( + x, return_decision_function=True) + self.logger.info( + "predict(x):\nlabels: {:}\nscores: {:}".format(labels, scores)) + self._check_classify_scores( + labels, scores, ds.num_samples, clf.n_classes) + + # Comparing output of decision_function and predict + for y in range(ds.num_classes): + self.assertFalse((df[y] != scores[:, y].ravel()).any()) + + # Testing decision_function on single point + df, df_priv = [], [] + for y in range(ds.num_classes): + df.append(clf.decision_function(p, y=y)) + df_priv.append(clf._forward(p_norm)[:, y].ravel()) + self.logger.info( + "decision_function(p, y={:}): {:}".format(y, df[y])) + self._check_df_scores(df[y], 1) + self.logger.info( + "_decision_function(p_norm, y={:}): {:}".format(y, df_priv[y])) + self._check_df_scores(df_priv[y], 1) + self.assertFalse((df[y] != df_priv[y]).any()) + + self.logger.info("Testing predict on single point") + + labels, scores = clf.predict( + p, return_decision_function=True) + self.logger.info( + "predict(p):\nlabels: {:}\nscores: {:}".format(labels, scores)) + self._check_classify_scores(labels, scores, 1, clf.n_classes) + + # Comparing output of decision_function and predict + for y in range(ds.num_classes): + self.assertFalse((df[y] != scores[:, y].ravel()).any()) + + return scores + + def _test_plot(self, clf, ds, levels=None): + """Plot the decision function of a classifier.""" + self.logger.info("Testing classifiers graphically") + # Preparation of the grid + fig = CFigure(width=8, height=4, fontsize=8) + clf.fit(ds) + + fig.subplot(1, 2, 1) + fig.sp.plot_ds(ds) + fig.sp.plot_decision_regions( + clf, n_grid_points=50, grid_limits=ds.get_bounds()) + fig.sp.title("Decision regions") + + fig.subplot(1, 2, 2) + fig.sp.plot_ds(ds) + fig.sp.plot_fun(clf.decision_function, grid_limits=ds.get_bounds(), + levels=levels, y=1) + fig.sp.title("Discriminant function for y=1") + + return fig + + def _test_gradient_numerical(self, clf, x, extra_classes=None, + th=1e-3, epsilon=eps, **grad_kwargs): + """Test for clf.grad_f_x comparing to numerical gradient. + + Parameters + ---------- + clf : CClassifier + x : CArray + extra_classes : None or list of int, optional + Any extra class which gradient wrt should be tested + th : float, optional + The threshold for the check with numerical gradient. + epsilon : float, optional + The epsilon to use for computing the numerical gradient. + grad_kwargs : kwargs + Any extra parameter for the gradient function. + + Returns + ------- + grads : list of CArray + A list with the gradients computed wrt each class. + + """ + if 'y' in grad_kwargs: + raise ValueError("`y` cannot be passed to this unittest.") + + if extra_classes is not None: + classes = clf.classes.append(extra_classes) + else: + classes = clf.classes + + grads = [] + for c in classes: + grad_kwargs['y'] = c # Appending class to test_f_x + + # Analytical gradient + gradient = clf.grad_f_x(x, **grad_kwargs) + grads.append(gradient) + + self.assertTrue(gradient.is_vector_like) + self.assertEqual(x.size, gradient.size) + self.assertEqual(x.issparse, gradient.issparse) + + # Numerical gradient + num_gradient = CFunction( + clf.decision_function).approx_fprime(x.todense(), epsilon, y=c) + + # Compute the norm of the difference + error = (gradient - num_gradient).norm() + + self.logger.info( + "Analytic grad wrt. class {:}:\n{:}".format(c, gradient)) + self.logger.info( + "Numeric gradient wrt. class {:}:\n{:}".format( + c, num_gradient)) + + self.logger.info("norm(grad - num_grad): {:}".format(error)) + self.assertLess(error, th) + + self.assertIsSubDtype(gradient.dtype, float) + + return grads + + @staticmethod + def _create_preprocess_chain(pre_id_list, kwargs_list): + """Creates a preprocessor with other preprocessors chained + and a list of the same preprocessors (not chained)""" + chain = None + pre_list = [] + for i, pre_id in enumerate(pre_id_list): + chain = CPreProcess.create( + pre_id, preprocess=chain, **kwargs_list[i]) + pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) + + return chain, pre_list + + def _create_preprocess_test(self, ds, clf, pre_id_list, kwargs_list): + """Fit 2 clf, one with internal preprocessor chain + and another using pre-transformed data. + + Parameters + ---------- + ds : CDataset + clf : CClassifier + pre_id_list : list of str + This list should contain the class_id of each preprocessor + that should be part of the chain. + kwargs_list : list of dict + This list should contain a dictionary of extra parameter for + each preprocessor that should be part of the chain. + + Returns + ------- + pre1 : CPreProcess + The preprocessors chain. + data_pre : CArray + Data (ds.X) transformed using pre1. + clf_pre : CClassifier + The classifier with a copy the preprocessors chain inside, + trained on ds. + clf : CClassifier + The classifier without the preprocessors chain inside, + trained on data_pre. + + """ + pre1 = CPreProcess.create_chain(pre_id_list, kwargs_list) + data_pre = pre1.fit_transform(ds.X) + + pre2 = CPreProcess.create_chain(pre_id_list, kwargs_list) + clf_pre = clf.deepcopy() + clf_pre.preprocess = pre2 + + clf_pre.fit(ds) + clf.fit(CDataset(data_pre, ds.Y)) + + return pre1, data_pre, clf_pre, clf + + def _test_preprocess(self, ds, clf, pre_id_list, kwargs_list): + """Test if clf with preprocessor inside returns the same + prediction of the clf trained on pre-transformed data. + + Parameters + ---------- + ds : CDataset + clf : CClassifier + pre_id_list : list of str + This list should contain the class_id of each preprocessor + that should be part of the chain. + kwargs_list : list of dict + This list should contain a dictionary of extra parameter for + each preprocessor that should be part of the chain. + + """ + pre, data_pre, clf_pre, clf = self._create_preprocess_test( + ds, clf, pre_id_list, kwargs_list) + + self.logger.info( + "Testing {:} with preprocessor inside:\n{:}".format( + clf.__class__.__name__, clf_pre)) + + y1, score1 = clf_pre.predict(ds.X, return_decision_function=True) + y2, score2 = clf.predict(data_pre, return_decision_function=True) + + self.assert_array_equal(y1, y2) + self.assert_array_almost_equal(score1, score2) + + # The number of features of the clf with preprocess inside should be + # equal to the number of dataset features (so before preprocessing) + self.assertEqual(ds.num_features, clf_pre.n_features) + + def _test_preprocess_grad(self, ds, clf, pre_id_list, kwargs_list, + extra_classes=None, check_numerical=True, + th=1e-3, epsilon=eps, **grad_kwargs): + """Test if clf gradient with preprocessor inside is equal to the + gradient of the clf trained on pre-transformed data. + Also compare the gradient of the clf with preprocessor + inside with numerical gradient. + + Parameters + ---------- + ds : CDataset + clf : CClassifier + pre_id_list : list of str + This list should contain the class_id of each preprocessor + that should be part of the chain. + kwargs_list : list of dict + This list should contain a dictionary of extra parameter for + each preprocessor that should be part of the chain. + extra_classes : None or list of int, optional + Any extra class which gradient wrt should be tested + check_numerical : bool, optional + If True, the gradient will be compared with + the numerical approximation. + th : float, optional + The threshold for the check with numerical gradient. + epsilon : float, optional + The epsilon to use for computing the numerical gradient. + grad_kwargs : kwargs + Any extra parameter for the gradient function. + + """ + pre, data_pre, clf_pre, clf = self._create_preprocess_test( + ds, clf, pre_id_list, kwargs_list) + + self.logger.info("Testing clf gradient with preprocessor " + "inside:\n{:}".format(clf_pre)) + + if 'y' in grad_kwargs: + raise ValueError("`y` cannot be passed to this unittest.") + + if extra_classes is not None: + classes = clf.classes.append(extra_classes) + else: + classes = clf.classes + + for c in classes: + self.logger.info( + "Testing grad wrt. class {:}".format(c)) + + # Grad of clf without preprocessor inside (using transformed data) + v_pre = data_pre[0, :] + clf_grad = clf.grad_f_x(v_pre, y=c, **grad_kwargs) + + # Output of grad_f_x should be a float vector + self.assertEqual(1, clf_grad.ndim) + self.assertIsSubDtype(clf_grad.dtype, float) + + # Gradient of clf with preprocessor inside + v = ds.X[0, :] + clf_pre_grad = clf_pre.grad_f_x(v, y=c, **grad_kwargs) + + # Gradient of the preprocessor. Should be equal to the gradient + # of the clf with preprocessor inside + pre_grad = pre.gradient(v_pre, w=clf_grad) + + # As clf_grad should be a float vector, + # output of gradient should be the same + self.assertEqual(1, pre_grad.ndim) + self.assertIsSubDtype(pre_grad.dtype, float) + + self.assert_array_almost_equal(clf_pre_grad, pre_grad) + + if check_numerical is True: + # Comparison with numerical gradient + self._test_gradient_numerical( + clf_pre, ds.X[0, :], extra_classes=extra_classes, + th=th, epsilon=epsilon, **grad_kwargs) + + def _test_sparse_linear(self, ds, clf): + """Test linear classifier operations on sparse data. + + For linear classifiers, when training on sparse data, the weights + vector must be sparse. Also `grad_f_x` must return a sparse array. + + Parameters + ---------- + ds : CDataset + clf : CClassifier + + """ + self.logger.info("Testing {:} operations on sparse data.".format( + clf.__class__.__name__)) + + ds_sparse = ds.tosparse() + + # Fitting on sparse data + clf.fit(ds_sparse) + + # Resulting weights vector must be sparse + self.assertTrue(clf.w.issparse) + + # Predictions on dense and sparse data + x = ds.X[0, :] + x_sparse = ds_sparse.X[0, :] + + y, s = clf.predict( + x, return_decision_function=True) + y_sparse, s_sparse = clf.predict( + x_sparse, return_decision_function=True) + + self.assert_array_equal(y, y_sparse) + self.assert_array_equal(s, s_sparse) + + # Gradient must be sparse if training data is sparse + grad = clf.grad_f_x(x_sparse, y=0) + self.assertTrue(grad.issparse) + grad = clf.grad_f_x(x, y=0) + self.assertTrue(grad.issparse) + + +if __name__ == '__main__': + CUnitTest.main() diff --git a/src/secml/ml/classifiers/tests/figs/.gitkeep b/src/secml/ml/classifiers/tests/figs/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/secml/ml/features/normalization/tests/__init__.py b/src/secml/ml/features/normalization/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py new file mode 100644 index 00000000..b7f33ff3 --- /dev/null +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py @@ -0,0 +1,206 @@ +from secml.ml.features.tests import CPreProcessTestCases + +from collections import OrderedDict + +try: + import torch + import torchvision +except ImportError: + CPreProcessTestCases.importskip("torch") + CPreProcessTestCases.importskip("torchvision") +else: + import torch + from torch import nn, optim + from torchvision import transforms + torch.manual_seed(0) + +from secml.array import CArray +from secml.ml.features.normalization import CNormalizerDNN +from secml.ml.classifiers import CClassifierPyTorch +from secml.data.loader import CDLRandom +from secml.optim.function import CFunction + + +def mlp(input_dims=100, hidden_dims=(50, 50), output_dims=10): + """Multi-layer Perceptron""" + if len(hidden_dims) < 1: + raise ValueError("at least one hidden dim should be defined") + if any(d <= 0 for d in hidden_dims): + raise ValueError("each hidden layer must have at least one neuron") + + # Input layers + layers = [ + ('linear1', torch.nn.Linear(input_dims, hidden_dims[0])), + ('relu1', torch.nn.ReLU()), + ] + # Appending additional hidden layers + for hl_i, hl_dims in enumerate(hidden_dims[1:]): + prev_hl_dims = hidden_dims[hl_i] # Dims of the previous hl + i_str = str(hl_i + 2) + layers += [ + ('linear' + i_str, torch.nn.Linear(prev_hl_dims, hl_dims)), + ('relu' + i_str, torch.nn.ReLU())] + # Output layers + layers += [ + ('linear' + str(len(hidden_dims) + 1), + torch.nn.Linear(hidden_dims[-1], output_dims))] + + # Creating the model with the list of layers + return torch.nn.Sequential(OrderedDict(layers)) + + +class TestCNormalizerPyTorch(CPreProcessTestCases): + + @classmethod + def setUpClass(cls): + cls.ds = CDLRandom(n_samples=40, n_classes=3, + n_features=20, n_informative=15, + random_state=0).load() + + model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) + loss = nn.CrossEntropyLoss() + optimizer = optim.SGD(model.parameters(), lr=1e-1) + cls.net = CClassifierPyTorch(model=model, loss=loss, + optimizer=optimizer, random_state=0, + epochs=10, pretrained=True) + cls.net.fit(cls.ds) + cls.norm = CNormalizerDNN(net=cls.net) + + CPreProcessTestCases.setUpClass() + + def test_normalization(self): + """Testing normalization.""" + x = self.ds.X[0, :] + + self.logger.info("Testing normalization at last layer") + + self.norm.out_layer = None + + out_norm = self.norm.transform(x) + out_net = self.net.get_layer_output(x, layer=None) + + self.logger.info("Output of normalize:\n{:}".format(out_norm)) + self.logger.info("Output of net:\n{:}".format(out_net)) + + self.assert_allclose(out_norm, out_net) + + self.norm.out_layer = 'linear1' + + self.logger.info( + "Testing normalization at layer {:}".format(self.norm.out_layer)) + + out_norm = self.norm.transform(x) + out_net = self.net.get_layer_output(x, layer=self.norm.out_layer) + + self.logger.info("Output of normalize:\n{:}".format(out_norm)) + self.logger.info("Output of net:\n{:}".format(out_net)) + + self.assert_allclose(out_norm, out_net) + + def test_chain(self): + """Test for preprocessors chain.""" + # Inner preprocessors should be passed to the pytorch clf + with self.assertRaises(ValueError): + CNormalizerDNN(net=self.net, preprocess='min-max') + + def test_gradient(self): + """Test for gradient.""" + x = self.ds.X[0, :] + + layer = None + self.norm.out_layer = layer + self.logger.info("Returning gradient for layer: {:}".format(layer)) + shape = self.norm.transform(x).shape + w = CArray.zeros(shape=shape) + w[0] = 1 + grad = self.norm.gradient(x, w=w) + + self.logger.info("Output of gradient_f_x:\n{:}".format(grad)) + + self.assertTrue(grad.is_vector_like) + self.assertEqual(x.size, grad.size) + + layer = 'linear1' + self.norm.out_layer = layer + self.logger.info("Returning output for layer: {:}".format(layer)) + out = self.net.get_layer_output(x, layer=layer) + self.logger.info("Returning gradient for layer: {:}".format(layer)) + grad = self.norm.gradient(x, w=out) + + self.logger.info("Output of grad_f_x:\n{:}".format(grad)) + + self.assertTrue(grad.is_vector_like) + self.assertEqual(x.size, grad.size) + + def test_aspreprocess(self): + """Test for normalizer used as preprocess.""" + from secml.ml.classifiers import CClassifierSVM + from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA + + model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) + loss = nn.CrossEntropyLoss() + optimizer = optim.SGD(model.parameters(), lr=1e-1) + net = CClassifierPyTorch(model=model, loss=loss, + optimizer=optimizer, random_state=0, + epochs=10, preprocess='min-max') + net.fit(self.ds) + + norm = CNormalizerDNN(net=net) + + clf = CClassifierMulticlassOVA( + classifier=CClassifierSVM, preprocess=norm) + + self.logger.info("Testing last layer") + + clf.fit(self.ds) + + y_pred, scores = clf.predict( + self.ds.X, return_decision_function=True) + self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) + self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) + self.logger.info("Scores:\n{:}".format(scores)) + + x = self.ds.X[0, :] + + self.logger.info("Testing last layer gradient") + + for c in self.ds.classes: + self.logger.info("Gradient w.r.t. class {:}".format(c)) + + grad = clf.grad_f_x(x, y=c) + + self.logger.info("Output of grad_f_x:\n{:}".format(grad)) + + check_grad_val = CFunction( + clf.decision_function, clf.grad_f_x).check_grad( + x, y=c, epsilon=1e-1) + self.logger.info( + "norm(grad - num_grad): %s", str(check_grad_val)) + self.assertLess(check_grad_val, 1e-3) + + self.assertTrue(grad.is_vector_like) + self.assertEqual(x.size, grad.size) + + layer = 'linear1' + norm.out_layer = layer + + self.logger.info("Testing layer {:}".format(norm.out_layer)) + + clf.fit(self.ds) + + y_pred, scores = clf.predict( + self.ds.X, return_decision_function=True) + self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) + self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) + self.logger.info("Scores:\n{:}".format(scores)) + + self.logger.info("Testing 'linear1' layer gradient") + grad = clf.grad_f_x(x, y=0) # y is required for multiclassova + self.logger.info("Output of grad_f_x:\n{:}".format(grad)) + + self.assertTrue(grad.is_vector_like) + self.assertEqual(x.size, grad.size) + + +if __name__ == '__main__': + CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py new file mode 100644 index 00000000..43bab3c5 --- /dev/null +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -0,0 +1,102 @@ +from secml.ml.features.tests import CPreProcessTestCases + +from sklearn.preprocessing import StandardScaler + +from secml.array import CArray +from secml.ml.features.normalization import CNormalizerMeanStd + + +class TestCNormalizerMeanStd(CPreProcessTestCases): + """Unittest for CNormalizerMeanStd""" + + def test_zscore(self): + """Test for CNormalizerMeanStd to obtain zero mean and unit variance""" + + def sklearn_comp(array): + + self.logger.info("Original array is:\n{:}".format(array)) + + # Sklearn normalizer + target = CArray(StandardScaler().fit_transform( + array.astype(float).tondarray())) + # Our normalizer + n = CNormalizerMeanStd().fit(array) + result = n.transform(array) + + self.logger.info("Correct result is:\n{:}".format(target)) + self.logger.info("Our result is:\n{:}".format(result)) + + self.assert_array_almost_equal(target, result) + + self.logger.info("Testing without std") + # Sklearn normalizer + target = CArray(StandardScaler(with_std=False).fit_transform( + array.astype(float).tondarray())) + # Our normalizer + n = CNormalizerMeanStd(with_std=False).fit(array) + result = n.transform(array) + + self.logger.info("Correct result is:\n{:}".format(target)) + self.logger.info("Our result is:\n{:}".format(result)) + + self.assert_array_almost_equal(target, result) + + sklearn_comp(self.array_dense) + sklearn_comp(self.array_sparse) + sklearn_comp(self.row_dense.atleast_2d()) + sklearn_comp(self.row_sparse) + sklearn_comp(self.column_dense) + sklearn_comp(self.column_sparse) + + def test_normalizer_mean_std(self): + """Test for CNormalizerMeanStd.""" + + for (mean, std) in [(1.5, 0.1), + ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: + for array in [self.array_dense, self.array_sparse]: + + self.logger.info("Original array is:\n{:}".format(array)) + self.logger.info( + "Normalizing using mean: {:} std: {:}".format(mean, std)) + + n = CNormalizerMeanStd(mean=mean, std=std).fit(array) + out = n.transform(array) + + self.logger.info("Result is:\n{:}".format(out)) + + out_mean = out.mean(axis=0, keepdims=False) + out_std = out.std(axis=0, keepdims=False) + + self.logger.info("Result mean is:\n{:}".format(out_mean)) + self.logger.info("Result std is:\n{:}".format(out_std)) + + rev = n.inverse_transform(out) + + self.assert_array_almost_equal(array, rev) + + def test_chain(self): + """Test a chain of preprocessors.""" + x_chain = self._test_chain( + self.array_dense, + ['min-max', 'pca', 'mean-std'], + [{'feature_range': (-5, 5)}, {}, {}] + ) + + # Expected shape is (3, 3), as pca max n_components is 4-1 + self.assertEqual((self.array_dense.shape[0], + self.array_dense.shape[1]-1), x_chain.shape) + + def test_chain_gradient(self): + """Check gradient of a chain of preprocessors.""" + grad = self._test_chain_gradient( + self.array_dense, + ['min-max', 'mean-std'], + [{'feature_range': (-5, 5)}, {}] + ) + + # Expected shape is (n_feats, ), so (4, ) + self.assertEqual((self.array_dense.shape[1], ), grad.shape) + + +if __name__ == '__main__': + CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py new file mode 100644 index 00000000..324c6adc --- /dev/null +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -0,0 +1,82 @@ +from secml.ml.features.tests import CPreProcessTestCases + + +from sklearn.preprocessing import MinMaxScaler + +from secml.array import CArray +from secml.ml.features.normalization import CNormalizerMinMax + + +class TestCNormalizerLinear(CPreProcessTestCases): + """Unittest for CNormalizerLinear.""" + + def test_norm_minmax(self): + """Test for CNormalizerMinMax.""" + + def sklearn_comp(array): + + self.logger.info("Original array is:\n{:}".format(array)) + + # Sklearn normalizer (requires float dtype input) + array_sk = array.astype(float).tondarray() + sk_norm = MinMaxScaler().fit(array_sk) + + target = CArray(sk_norm.transform(array_sk)) + + # Our normalizer + our_norm = CNormalizerMinMax().fit(array) + result = our_norm.transform(array) + + self.logger.info("Correct result is:\n{:}".format(target)) + self.logger.info("Our result is:\n{:}".format(result)) + + self.assert_array_almost_equal(target, result) + + # Testing out of range normalization + + self.logger.info("Testing out of range normalization") + + # Sklearn normalizer (requires float dtype input) + target = CArray(sk_norm.transform(array_sk * 2)) + + # Our normalizer + result = our_norm.transform(array * 2) + + self.logger.info("Correct result is:\n{:}".format(target)) + self.logger.info("Our result is:\n{:}".format(result)) + + self.assert_array_almost_equal(target, result) + + sklearn_comp(self.array_dense) + sklearn_comp(self.array_sparse) + sklearn_comp(self.row_dense.atleast_2d()) + sklearn_comp(self.row_sparse) + sklearn_comp(self.column_dense) + sklearn_comp(self.column_sparse) + + def test_chain(self): + """Test a chain of preprocessors.""" + x_chain = self._test_chain( + self.array_dense, + ['min-max', 'pca', 'min-max'], + [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] + ) + + # Expected shape is (3, 3), as pca max n_components is 4-1 + self.assertEqual((self.array_dense.shape[0], + self.array_dense.shape[1]-1), x_chain.shape) + + def test_chain_gradient(self): + """Check gradient of a chain of preprocessors.""" + grad = self._test_chain_gradient( + self.array_dense, + ['min-max', 'mean-std', 'min-max'], + [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] + ) + + # Expected shape is (n_feats, ), so (4, ) + self.assertEqual((self.array_dense.shape[1], ), grad.shape) + + +if __name__ == '__main__': + CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py new file mode 100644 index 00000000..c17ac912 --- /dev/null +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -0,0 +1,51 @@ +from secml.ml.features.tests import CPreProcessTestCases + +from sklearn.preprocessing import Normalizer + +from secml.array import CArray +from secml.ml.features.normalization import CNormalizerUnitNorm + + +class TestCNormalizerUnitNorm(CPreProcessTestCases): + """Unittest for CNormalizerUnitNorm.""" + + def test_norm_unitnorm(self): + """Test for CNormalizerUnitNorm.""" + + def sklearn_comp(array): + + self.logger.info("Original array is:\n{:}".format(array)) + + # Sklearn normalizer (requires float dtype input) + target = CArray(Normalizer().fit_transform( + array.astype(float).get_data())) + # Our normalizer + result = CNormalizerUnitNorm().fit_transform(array) + + self.logger.info("Correct result is:\n{:}".format(target)) + self.logger.info("Our result is:\n{:}".format(result)) + + self.assert_array_almost_equal(target, result) + + sklearn_comp(self.array_dense) + sklearn_comp(self.array_sparse) + sklearn_comp(self.row_dense.atleast_2d()) + sklearn_comp(self.row_sparse) + sklearn_comp(self.column_dense) + sklearn_comp(self.column_sparse) + + def test_chain(self): + """Test a chain of preprocessors.""" + x_chain = self._test_chain( + self.array_dense, + ['min-max', 'pca', 'unit-norm'], + [{'feature_range': (-5, 5)}, {}, {}] + ) + + # Expected shape is (3, 3), as pca max n_components is 4-1 + self.assertEqual((self.array_dense.shape[0], + self.array_dense.shape[1]-1), x_chain.shape) + + +if __name__ == '__main__': + CPreProcessTestCases.main() diff --git a/src/secml/ml/features/reduction/tests/__init__.py b/src/secml/ml/features/reduction/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py b/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py new file mode 100644 index 00000000..750e5afc --- /dev/null +++ b/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py @@ -0,0 +1,100 @@ +from secml.ml.features.tests import CPreProcessTestCases + +from sklearn.discriminant_analysis import LinearDiscriminantAnalysis + +from secml.array import CArray +from secml.ml.features.reduction import CLDA +from secml.figure import CFigure + + +class TestCLda(CPreProcessTestCases): + """Unittests for CLDA.""" + + def setUp(self): + # As our test cases are not always linearly independent, + # LDA will warn about "Variables are collinear". + # We can ignore the warning in this context + self.logger.filterwarnings("ignore", "Variables are collinear.") + + super(TestCLda, self).setUp() + + def test_lda(self): + """Test for LDA. This compares sklearn equivalent to our method.""" + + def sklearn_comp(array, y): + self.logger.info("Original array is:\n{:}".format(array)) + + # Sklearn normalizer + sklearn_lda = LinearDiscriminantAnalysis().fit( + array.tondarray(), y.tondarray()) + target = CArray(sklearn_lda.transform(array.tondarray())) + # Our normalizer + lda = CLDA().fit(array, y) + result = lda.transform(array) + + self.logger.info("Sklearn result is:\n{:}".format(target)) + self.logger.info("Result is:\n{:}".format(result)) + + self.assert_array_almost_equal(result, target) + + # A min of 2 samples is required by LDA so we cannot test single rows + sklearn_comp(self.array_dense, CArray([0, 1, 0])) + sklearn_comp(self.array_sparse, CArray([0, 1, 0])) + sklearn_comp(self.column_dense, CArray([0, 1, 0])) + sklearn_comp(self.column_sparse, CArray([0, 1, 0])) + + def test_plot(self): + """Test for LDA. Check LDA Result Graphically. + + Apply Lda to Sklearn Iris Dataset and compare it with + "Linear Discriminant Analysis bit by bit" by Sebastian Raschka + http://sebastianraschka.com/Articles/2014_python_lda.html + into the plot we must see approximatively: + x axes: from -2 to -1 virginica, from -1 to 0 versicolor, from 1 to 2,3 setosa + y axes: from -1 to -1 virginica, from -1 to 0.5 versicolor, from -1 to 1 setosa + + """ + from sklearn.datasets import load_iris + + iris_db = load_iris() + patterns = CArray(iris_db.data) + labels = CArray(iris_db.target) + + lda = CLDA() + lda.fit(patterns, labels) + # store dataset reduced with pca + red_dts = lda.fit_transform(patterns, labels) + + fig = CFigure(width=10, markersize=8) + fig.sp.scatter(red_dts[:, 0].ravel(), + red_dts[:, 1].ravel(), + c=labels) + fig.show() + + def test_chain(self): + """Test a chain of preprocessors.""" + x_chain = self._test_chain( + self.array_dense, + ['min-max', 'mean-std', 'lda'], + [{'feature_range': (-5, 5)}, {}, {}], + y=CArray([1, 0, 1]) # LDA is supervised + ) + + # Expected shape is (3, 1), as lda max n_components is classes - 1 + self.assertEqual((self.array_dense.shape[0], 1), x_chain.shape) + + x_chain = self._test_chain( + self.array_dense, + ['mean-std', 'lda', 'min-max'], + [{}, {}, {}], + y=CArray([1, 0, 1]) # LDA is supervised + ) + + # Expected shape is (3, 1), as lda max n_components is classes - 1 + self.assertEqual((self.array_dense.shape[0], 1), x_chain.shape) + + # TODO: ADD TEST FOR GRADIENT (WHEN IMPLEMENTED) + + +if __name__ == '__main__': + CPreProcessTestCases.main() diff --git a/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py b/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py new file mode 100644 index 00000000..adf18ff5 --- /dev/null +++ b/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py @@ -0,0 +1,69 @@ +from secml.ml.features.tests import CPreProcessTestCases + +from secml.array import CArray +from secml.ml.features.reduction import CPCA +from sklearn.decomposition import PCA + + +class TestCPca(CPreProcessTestCases): + """Unittests for CPCA.""" + + def test_pca(self): + """Test for PCA. This compares sklearn equivalent to our method.""" + + # Few test cases involve an all-zero column, + # so PCA will trigger a 0/0 warning + self.logger.filterwarnings( + action='ignore', + message='invalid value encountered in true_divide', + category=RuntimeWarning + ) + self.logger.filterwarnings( + action='ignore', + message='invalid value encountered in divide', + category=RuntimeWarning + ) + + def sklearn_comp(array): + self.logger.info("Original array is:\n{:}".format(array)) + + # Sklearn normalizer + sklearn_pca = PCA().fit(array.tondarray()) + target = CArray(sklearn_pca.transform(array.tondarray())) + # Our normalizer + pca = CPCA().fit(array) + result = pca.transform(array) + + self.logger.info("Sklearn result is:\n{:}".format(target)) + self.logger.info("Result is:\n{:}".format(result)) + + self.assert_array_almost_equal(result, target) + + original = pca.inverse_transform(result) + + self.assert_array_almost_equal(original, array) + + sklearn_comp(self.array_dense) + sklearn_comp(self.array_sparse) + sklearn_comp(self.row_dense.atleast_2d()) + sklearn_comp(self.row_sparse) + sklearn_comp(self.column_dense) + sklearn_comp(self.column_sparse) + + def test_chain(self): + """Test a chain of preprocessors.""" + x_chain = self._test_chain( + self.array_dense, + ['min-max', 'unit-norm', 'pca'], + [{'feature_range': (-5, 5)}, {}, {}] + ) + + # Expected shape is (3, 3), as pca max n_components is 4-1 + self.assertEqual((self.array_dense.shape[0], + self.array_dense.shape[1] - 1), x_chain.shape) + + # TODO: ADD TEST FOR GRADIENT (WHEN IMPLEMENTED) + + +if __name__ == '__main__': + CPreProcessTestCases.main() diff --git a/src/secml/ml/features/tests/__init__.py b/src/secml/ml/features/tests/__init__.py new file mode 100644 index 00000000..8524a8ee --- /dev/null +++ b/src/secml/ml/features/tests/__init__.py @@ -0,0 +1 @@ +from .c_preprocess_testcases import CPreProcessTestCases diff --git a/src/secml/ml/features/tests/c_preprocess_testcases.py b/src/secml/ml/features/tests/c_preprocess_testcases.py new file mode 100644 index 00000000..c534f76a --- /dev/null +++ b/src/secml/ml/features/tests/c_preprocess_testcases.py @@ -0,0 +1,102 @@ +from secml.testing import CUnitTest + +from secml.array import CArray +from secml.ml.features import CPreProcess + + +class CPreProcessTestCases(CUnitTest): + """Unittests interface for CPreProcess.""" + + def setUp(self): + + self.array_dense = CArray([[1, 0, 0, 5], + [2, 4, 0, 0], + [3, 6, 0, 0]]) + self.array_sparse = CArray(self.array_dense.deepcopy(), tosparse=True) + + self.row_dense = CArray([4, 0, 6]) + self.column_dense = self.row_dense.deepcopy().T + + self.row_sparse = CArray(self.row_dense.deepcopy(), tosparse=True) + self.column_sparse = self.row_sparse.deepcopy().T + + @staticmethod + def _create_chain(pre_id_list, kwargs_list): + """Creates a preprocessor with other preprocessors chained + and a list of the same preprocessors (not chained)""" + chain = None + pre_list = [] + for i, pre_id in enumerate(pre_id_list): + chain = CPreProcess.create( + pre_id, preprocess=chain, **kwargs_list[i]) + pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) + + return chain, pre_list + + def _test_chain(self, x, pre_id_list, kwargs_list, y=None): + """Tests if preprocess chain and manual chaining yield same result.""" + chain, pre_list = self._create_chain(pre_id_list, kwargs_list) + + chain = chain.fit(x, y=y) + self.logger.info("Preprocessors chain:\n{:}".format(chain)) + + x_chain = chain.transform(x) + self.logger.info("Trasformed X (chain):\n{:}".format(x_chain)) + + # Train the manual chain and transform + x_manual = x + for pre in pre_list: + x_manual = pre.fit_transform(x_manual, y=y) + + self.logger.info("Trasformed X (manual):\n{:}".format(x_manual)) + self.assert_allclose(x_chain, x_manual) + + # Reverting array (if available) + try: + x_chain_revert = chain.inverse_transform(x_chain) + self.logger.info("Reverted X (chain):\n{:}".format(x_chain_revert)) + self.logger.info("Original X:\n{:}".format(x)) + self.assert_array_almost_equal(x_chain_revert, x) + except NotImplementedError: + self.logger.info("inverse_transform not available") + + return x_chain + + def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): + """Tests if gradient preprocess chain and + gradient of manual chaining yield same result.""" + chain, pre_list = self._create_chain(pre_id_list, kwargs_list) + + chain = chain.fit(x, y=y) + self.logger.info("Preprocessors chain:\n{:}".format(chain)) + + v = x[1, :] + grad_chain = chain.gradient(v) + self.logger.info( + "gradient({:}) (chain):\n{:}".format(v, grad_chain)) + + # Manually compose the chain and transform + for pre in pre_list: + x = pre.fit_transform(x, y=y) + + v_list = [v] + for pre in pre_list[:-1]: + v = pre.transform(v) + v_list.append(v) + + v_list = list(reversed(v_list)) + pre_list = list(reversed(pre_list)) + + grad = None + for i, v in enumerate(v_list): + grad = pre_list[i].gradient(v, w=grad) + + self.logger.info( + "gradient({:}) (manual):\n{:}".format(v, grad)) + self.assert_allclose(grad_chain, grad) + + return grad_chain + + +if __name__ == '__main__': + CUnitTest.main() diff --git a/src/secml/ml/kernel/tests/__init__.py b/src/secml/ml/kernel/tests/__init__.py new file mode 100644 index 00000000..c236cd43 --- /dev/null +++ b/src/secml/ml/kernel/tests/__init__.py @@ -0,0 +1 @@ +from .c_kernel_testcases import CCKernelTestCases diff --git a/src/secml/ml/kernel/tests/c_kernel_testcases.py b/src/secml/ml/kernel/tests/c_kernel_testcases.py new file mode 100644 index 00000000..5a3ea56b --- /dev/null +++ b/src/secml/ml/kernel/tests/c_kernel_testcases.py @@ -0,0 +1,198 @@ +from secml.testing import CUnitTest + +from secml.array import CArray +from secml.data.loader import CDLRandom +from secml.core.type_utils import is_scalar +from secml.ml.kernel import CKernel +from secml.optim.function import CFunction + + +class CCKernelTestCases(CUnitTest): + def _set_up(self, kernel_name): + + self.d_dense = CDLRandom(n_samples=10, n_features=5, + n_redundant=0, n_informative=3, + n_clusters_per_class=1, + random_state=100).load() + + self.p1_dense = self.d_dense.X[0, :] + self.p2_dense = self.d_dense.X[1, :] + + self.d_sparse = self.d_dense.tosparse() + self.p1_sparse = self.d_sparse.X[0, :] + self.p2_sparse = self.d_sparse.X[1, :] + + self.kernel = CKernel.create(kernel_name) + + def _has_gradient(self): + try: + self.kernel.gradient(self.p1_dense, self.p2_dense) + return True + except NotImplementedError: + return False + + def _cmp_kernel(self, k_fun, a1, a2): + k = k_fun(a1, a2) + if isinstance(k, CArray): + self.logger.info("k shape with inputs {:} {:} is: {:}" + "".format(a1.shape, a2.shape, k.shape)) + self.assertEqual(k.shape, (CArray(a1).atleast_2d().shape[0], + CArray(a2).atleast_2d().shape[0])) + else: + self.assertTrue(is_scalar(k)) + + def _test_similarity_shape(self): + """Test shape of kernel.""" + self.logger.info( + "Testing shape of " + self.kernel.class_type + " kernel output.") + + x_vect = CArray.rand(shape=(1, 10)).ravel() + x_mat = CArray.rand(shape=(10, 10)) + x_col = CArray.rand(shape=(10, 1)) + x_single = CArray.rand(shape=(1, 1)) + + self._cmp_kernel(self.kernel.k, x_vect, x_vect) + self._cmp_kernel(self.kernel.k, x_mat, x_vect) + self._cmp_kernel(self.kernel.k, x_vect, x_mat) + self._cmp_kernel(self.kernel.k, x_mat, x_mat) + self._cmp_kernel(self.kernel.k, x_col, x_col) + self._cmp_kernel(self.kernel.k, x_col, x_single) + self._cmp_kernel(self.kernel.k, x_single, x_col) + self._cmp_kernel(self.kernel.k, x_single, x_single) + + def _test_similarity_shape_sparse(self): + """Test shape of kernel.""" + self.logger.info( + "Testing shape of " + self.kernel.class_type + " kernel output.") + + x_vect = CArray.rand(shape=(1, 10)).ravel().tosparse() + x_mat = CArray.rand(shape=(10, 10)).tosparse() + x_col = CArray.rand(shape=(10, 1)).tosparse() + x_single = CArray.rand(shape=(1, 1)).tosparse() + + self._cmp_kernel(self.kernel.k, x_vect, x_vect) + self._cmp_kernel(self.kernel.k, x_mat, x_vect) + self._cmp_kernel(self.kernel.k, x_vect, x_mat) + self._cmp_kernel(self.kernel.k, x_mat, x_mat) + self._cmp_kernel(self.kernel.k, x_col, x_col) + self._cmp_kernel(self.kernel.k, x_col, x_single) + self._cmp_kernel(self.kernel.k, x_single, x_col) + self._cmp_kernel(self.kernel.k, x_single, x_single) + + def _test_gradient(self): + """Test for kernel gradients with dense points.""" + + if not self._has_gradient(): + self.logger.info( + "Gradient is not implemented for %s. " + "Skipping gradient dense tests.", self.kernel.class_type) + return + + # we invert the order of input patterns as we compute the kernel + # gradient wrt the second point but check_grad needs it as first input + def kern_f_for_test(p2, p1, kernel_func): + return kernel_func.similarity(p1, p2) + + def kern_grad_for_test(p2, p1, kernel_func): + return kernel_func.gradient(p1, p2) + + self.logger.info("Testing gradient with dense data.") + self.logger.info("Kernel type: %s", self.kernel.class_type) + + for i in range(self.d_dense.num_samples): + self.logger.info("x point: " + str(self.p2_dense)) + self.logger.info("y point: " + str(self.d_dense.X[i, :])) + + # TODO: implement centered numerical differences. + # if analytical gradient is zero, numerical estimation does not + # work, as it is using one-side estimation. We should use centered + # numerical differences to gain precision. + grad = self.kernel.gradient(self.d_dense.X[i, :], self.p2_dense) + if grad.norm() >= 1e-10: + grad_error = CFunction( + kern_f_for_test, kern_grad_for_test).check_grad( + self.p2_dense, 1e-8, self.d_dense.X[i, :], self.kernel) + self.logger.info("Gradient approx. error: {:}" + "".format(grad_error)) + self.assertTrue(grad_error < 1e-4) + + def _test_gradient_sparse(self): + """Test for kernel gradients with sparse points.""" + + if not self._has_gradient(): + self.logger.info( + "Gradient is not implemented for %s. " + "Skipping gradient sparse tests.", self.kernel.class_type) + return + + self.logger.info("Testing gradient with sparse data.") + self.logger.info("Kernel type: %s", self.kernel.class_type) + + k_grad = self.kernel.gradient(self.d_sparse.X, self.p2_dense) + self.logger.info( + "sparse/dense ->.isdense: {:}".format(k_grad.isdense)) + self.assertTrue(k_grad.isdense) + + k_grad = self.kernel.gradient(self.d_dense.X, self.p2_sparse) + self.logger.info( + "dense/sparse ->.issparse: {:}".format(k_grad.issparse)) + self.assertTrue(k_grad.issparse) + + k_grad = self.kernel.gradient(self.d_sparse.X, self.p2_sparse) + self.logger.info( + "sparse/sparse ->.issparse: {:}".format(k_grad.issparse)) + self.assertTrue(k_grad.issparse) + + def _test_gradient_multiple_points(self): + """Test for kernel gradients with multiple points vs single point.""" + + if not self._has_gradient(): + self.logger.info( + "Gradient is not implemented for %s. " + "Skipping multiple-point tests.", self.kernel.class_type) + return + + # check if gradient computed on multiple points is the same as + # the gradients computed on one point at a time. + data = self.d_dense.X[0:5, :] # using same no. of points and features + k1 = self.kernel.gradient(data, self.p2_dense) + k2 = CArray.zeros(shape=k1.shape) + for i in range(k2.shape[0]): + k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) + self.assertTrue((k1 - k2).ravel().norm() < 1e-4) + + data = self.d_dense.X # using different no. of points/features + k1 = self.kernel.gradient(data, self.p2_dense) + k2 = CArray.zeros(shape=k1.shape) + for i in range(k2.shape[0]): + k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) + self.assertTrue((k1 - k2).ravel().norm() < 1e-4) + + def _test_gradient_multiple_points_sparse(self): + """Test for kernel gradients with multiple points vs single point.""" + + if not self._has_gradient(): + self.logger.info( + "Gradient is not implemented for %s. " + "Skipping multiple-point tests.", self.kernel.class_type) + return + + # check if gradient computed on multiple points is the same as + # the gradients computed on one point at a time. + data = self.d_sparse.X[0:5, :] # using same no. of points and features + k1 = self.kernel.gradient(data, self.p2_dense) + k2 = CArray.zeros(shape=k1.shape) + for i in range(k2.shape[0]): + k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) + self.assertTrue((k1 - k2).ravel().norm() < 1e-4) + + data = self.d_sparse.X # using different no. of points/features + k1 = self.kernel.gradient(data, self.p2_dense) + k2 = CArray.zeros(shape=k1.shape) + for i in range(k2.shape[0]): + k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) + self.assertTrue((k1 - k2).ravel().norm() < 1e-4) + + +if __name__ == '__main__': + CUnitTest.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py b/src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py new file mode 100644 index 00000000..7fb3adbc --- /dev/null +++ b/src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py @@ -0,0 +1,27 @@ +from secml.ml.kernel.tests import CCKernelTestCases + + +class TestCKernelChebyshevDistance(CCKernelTestCases): + """Unit test for CKernelChebyshevDistance.""" + + def setUp(self): + self._set_up('chebyshev-dist') + + def test_similarity_shape(self): + """Test shape of kernel.""" + self._test_similarity_shape() + try: + self._test_similarity_shape_sparse() + except TypeError: + # computation of kernel is not supported on sparse matrices + pass + + def test_gradient(self): + self._test_gradient() + self._test_gradient_sparse() + self._test_gradient_multiple_points() + self._test_gradient_multiple_points_sparse() + + +if __name__ == '__main__': + CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_euclidean.py b/src/secml/ml/kernel/tests/test_c_kernel_euclidean.py new file mode 100644 index 00000000..9adc2675 --- /dev/null +++ b/src/secml/ml/kernel/tests/test_c_kernel_euclidean.py @@ -0,0 +1,25 @@ +from secml.ml.kernel.tests import CCKernelTestCases + + +class TestCKernelEuclidean(CCKernelTestCases): + """Unit test for CKernelHamming.""" + + def setUp(self): + self._set_up('euclidean') + + def test_similarity_shape(self): + """Test shape of kernel.""" + self._test_similarity_shape() + self._test_similarity_shape_sparse() + + def test_gradient(self): + self._test_gradient() + self._test_gradient_sparse() + self._test_gradient_multiple_points() + self._test_gradient_multiple_points() + + # TODO test when squared=True. but this needs to be passed to __init__ + + +if __name__ == '__main__': + CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_histintersect.py b/src/secml/ml/kernel/tests/test_c_kernel_histintersect.py new file mode 100644 index 00000000..0d938edb --- /dev/null +++ b/src/secml/ml/kernel/tests/test_c_kernel_histintersect.py @@ -0,0 +1,23 @@ +from secml.ml.kernel.tests import CCKernelTestCases + + +class TestCKernelHistIntersect(CCKernelTestCases): + """Unit test for CKernelHistIntersect.""" + + def setUp(self): + self._set_up('hist-intersect') + + def test_similarity_shape(self): + """Test shape of kernel.""" + self._test_similarity_shape() + self._test_similarity_shape_sparse() + + def test_gradient(self): + self._test_gradient() + self._test_gradient_sparse() + self._test_gradient_multiple_points() + self._test_gradient_multiple_points_sparse() + + +if __name__ == '__main__': + CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_laplacian.py b/src/secml/ml/kernel/tests/test_c_kernel_laplacian.py new file mode 100644 index 00000000..4f304e5c --- /dev/null +++ b/src/secml/ml/kernel/tests/test_c_kernel_laplacian.py @@ -0,0 +1,23 @@ +from secml.ml.kernel.tests import CCKernelTestCases + + +class TestCKernelLaplacian(CCKernelTestCases): + """Unit test for CKernelLaplacian.""" + + def setUp(self): + self._set_up('laplacian') + + def test_similarity_shape(self): + """Test shape of kernel.""" + self._test_similarity_shape() + self._test_similarity_shape_sparse() + + def test_gradient(self): + self._test_gradient() + self._test_gradient_sparse() + self._test_gradient_multiple_points() + self._test_gradient_multiple_points_sparse() + + +if __name__ == '__main__': + CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_linear.py b/src/secml/ml/kernel/tests/test_c_kernel_linear.py new file mode 100644 index 00000000..a5e9a85d --- /dev/null +++ b/src/secml/ml/kernel/tests/test_c_kernel_linear.py @@ -0,0 +1,23 @@ +from secml.ml.kernel.tests import CCKernelTestCases + + +class TestCKernelLinear(CCKernelTestCases): + """Unit test for CKernelLinear.""" + + def setUp(self): + self._set_up('linear') + + def test_similarity_shape(self): + """Test shape of kernel.""" + self._test_similarity_shape() + self._test_similarity_shape_sparse() + + def test_gradient(self): + self._test_gradient() + self._test_gradient_sparse() + self._test_gradient_multiple_points() + self._test_gradient_multiple_points_sparse() + + +if __name__ == '__main__': + CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_poly.py b/src/secml/ml/kernel/tests/test_c_kernel_poly.py new file mode 100644 index 00000000..c95ed235 --- /dev/null +++ b/src/secml/ml/kernel/tests/test_c_kernel_poly.py @@ -0,0 +1,23 @@ +from secml.ml.kernel.tests import CCKernelTestCases + + +class TestCKernelPoly(CCKernelTestCases): + """Unit test for CKernelPoly.""" + + def setUp(self): + self._set_up('poly') + + def test_similarity_shape(self): + """Test shape of kernel.""" + self._test_similarity_shape() + self._test_similarity_shape_sparse() + + def test_gradient(self): + self._test_gradient() + self._test_gradient_sparse() + self._test_gradient_multiple_points() + self._test_gradient_multiple_points_sparse() + + +if __name__ == '__main__': + CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_rbf.py b/src/secml/ml/kernel/tests/test_c_kernel_rbf.py new file mode 100644 index 00000000..f5538ad2 --- /dev/null +++ b/src/secml/ml/kernel/tests/test_c_kernel_rbf.py @@ -0,0 +1,23 @@ +from secml.ml.kernel.tests import CCKernelTestCases + + +class TestCKernelRBF(CCKernelTestCases): + """Unit test for CKernelRBF.""" + + def setUp(self): + self._set_up('rbf') + + def test_similarity_shape(self): + """Test shape of kernel.""" + self._test_similarity_shape() + self._test_similarity_shape_sparse() + + def test_gradient(self): + self._test_gradient() + self._test_gradient_sparse() + self._test_gradient_multiple_points() + self._test_gradient_multiple_points_sparse() + + +if __name__ == '__main__': + CCKernelTestCases.main() diff --git a/src/secml/ml/model_zoo/tests/__init__.py b/src/secml/ml/model_zoo/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/secml/ml/model_zoo/tests/_test_model-clf.gz b/src/secml/ml/model_zoo/tests/_test_model-clf.gz new file mode 100644 index 0000000000000000000000000000000000000000..d60e6f509a8cef7bcb2fd341f9587338101c7cfd GIT binary patch literal 174 zcmb2|=HN*1^o?WsA77GMToRv~pOTuRo1Bxzkex7dR|6wc*7A%m23G!s Date: Sat, 14 Mar 2020 16:28:47 +0100 Subject: [PATCH 02/20] Revert "Added tests to folder "ml"." This reverts commit 30455b992c49ec1d4ce84228acb280bde2c05afd. --- src/secml/ml/classifiers/tests/__init__.py | 1 - .../tests/c_classifier_testcases.py | 411 ------------------ src/secml/ml/classifiers/tests/figs/.gitkeep | 0 .../features/normalization/tests/__init__.py | 0 .../tests/test_c_normalizer_dnn.py | 206 --------- .../tests/test_c_normalizer_mean_std.py | 102 ----- .../tests/test_c_normalizer_minmax.py | 82 ---- .../tests/test_c_normalizer_unitnorm.py | 51 --- .../ml/features/reduction/tests/__init__.py | 0 .../reduction/tests/test_c_reducer_lda.py | 100 ----- .../reduction/tests/test_c_reducer_pca.py | 69 --- src/secml/ml/features/tests/__init__.py | 1 - .../features/tests/c_preprocess_testcases.py | 102 ----- src/secml/ml/kernel/tests/__init__.py | 1 - .../ml/kernel/tests/c_kernel_testcases.py | 198 --------- .../tests/test_c_kernel_chebyshev_distance.py | 27 -- .../kernel/tests/test_c_kernel_euclidean.py | 25 -- .../tests/test_c_kernel_histintersect.py | 23 - .../kernel/tests/test_c_kernel_laplacian.py | 23 - .../ml/kernel/tests/test_c_kernel_linear.py | 23 - .../ml/kernel/tests/test_c_kernel_poly.py | 23 - .../ml/kernel/tests/test_c_kernel_rbf.py | 23 - src/secml/ml/model_zoo/tests/__init__.py | 0 .../ml/model_zoo/tests/_test_model-clf.gz | Bin 174 -> 0 bytes .../ml/model_zoo/tests/_test_model_clf.py | 11 - .../ml/model_zoo/tests/models_dict_test.json | 8 - .../ml/model_zoo/tests/test_model_zoo.py | 217 --------- src/secml/ml/peval/tests/__init__.py | 0 .../ml/peval/tests/test_perf_evaluator.py | 249 ----------- .../tests/test_perf_evaluator_multiclass.py | 80 ---- src/secml/ml/stats/tests/__init__.py | 0 .../stats/tests/test_c_density_estimation.py | 48 -- 32 files changed, 2104 deletions(-) delete mode 100644 src/secml/ml/classifiers/tests/__init__.py delete mode 100644 src/secml/ml/classifiers/tests/c_classifier_testcases.py delete mode 100644 src/secml/ml/classifiers/tests/figs/.gitkeep delete mode 100644 src/secml/ml/features/normalization/tests/__init__.py delete mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py delete mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py delete mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py delete mode 100644 src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py delete mode 100644 src/secml/ml/features/reduction/tests/__init__.py delete mode 100644 src/secml/ml/features/reduction/tests/test_c_reducer_lda.py delete mode 100644 src/secml/ml/features/reduction/tests/test_c_reducer_pca.py delete mode 100644 src/secml/ml/features/tests/__init__.py delete mode 100644 src/secml/ml/features/tests/c_preprocess_testcases.py delete mode 100644 src/secml/ml/kernel/tests/__init__.py delete mode 100644 src/secml/ml/kernel/tests/c_kernel_testcases.py delete mode 100644 src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py delete mode 100644 src/secml/ml/kernel/tests/test_c_kernel_euclidean.py delete mode 100644 src/secml/ml/kernel/tests/test_c_kernel_histintersect.py delete mode 100644 src/secml/ml/kernel/tests/test_c_kernel_laplacian.py delete mode 100644 src/secml/ml/kernel/tests/test_c_kernel_linear.py delete mode 100644 src/secml/ml/kernel/tests/test_c_kernel_poly.py delete mode 100644 src/secml/ml/kernel/tests/test_c_kernel_rbf.py delete mode 100644 src/secml/ml/model_zoo/tests/__init__.py delete mode 100644 src/secml/ml/model_zoo/tests/_test_model-clf.gz delete mode 100644 src/secml/ml/model_zoo/tests/_test_model_clf.py delete mode 100644 src/secml/ml/model_zoo/tests/models_dict_test.json delete mode 100644 src/secml/ml/model_zoo/tests/test_model_zoo.py delete mode 100644 src/secml/ml/peval/tests/__init__.py delete mode 100644 src/secml/ml/peval/tests/test_perf_evaluator.py delete mode 100644 src/secml/ml/peval/tests/test_perf_evaluator_multiclass.py delete mode 100644 src/secml/ml/stats/tests/__init__.py delete mode 100644 src/secml/ml/stats/tests/test_c_density_estimation.py diff --git a/src/secml/ml/classifiers/tests/__init__.py b/src/secml/ml/classifiers/tests/__init__.py deleted file mode 100644 index c3580629..00000000 --- a/src/secml/ml/classifiers/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .c_classifier_testcases import CClassifierTestCases diff --git a/src/secml/ml/classifiers/tests/c_classifier_testcases.py b/src/secml/ml/classifiers/tests/c_classifier_testcases.py deleted file mode 100644 index 4e587c01..00000000 --- a/src/secml/ml/classifiers/tests/c_classifier_testcases.py +++ /dev/null @@ -1,411 +0,0 @@ -from secml.testing import CUnitTest - -from secml.array import CArray -from secml.data import CDataset -from secml.ml.features import CPreProcess -from secml.optim.function import CFunction -from secml.figure import CFigure -from secml.core.constants import eps - - -class CClassifierTestCases(CUnitTest): - """Unittests interface for CClassifier.""" - - def _check_df_scores(self, s, n_samples): - self.assertEqual(type(s), CArray) - self.assertTrue(s.isdense) - self.assertEqual(1, s.ndim) - self.assertEqual((n_samples,), s.shape) - self.assertEqual(float, s.dtype) - - def _check_classify_scores(self, l, s, n_samples, n_classes): - self.assertEqual(type(l), CArray) - self.assertEqual(type(s), CArray) - self.assertTrue(l.isdense) - self.assertTrue(s.isdense) - self.assertEqual(1, l.ndim) - self.assertEqual(2, s.ndim) - self.assertEqual((n_samples,), l.shape) - self.assertEqual((n_samples, n_classes), s.shape) - self.assertEqual(int, l.dtype) - self.assertEqual(float, s.dtype) - - def _test_fun(self, clf, ds): - """Test for `decision_function` and `predict` - - Parameters - ---------- - clf : CClassifier - ds : CDataset - - Returns - ------- - scores : CArray - Classifier scores computed on a single point. - - """ - self.logger.info( - "Test for decision_function() and predict() methods.") - - if ds.issparse: - self.logger.info("Testing on sparse data...") - else: - self.logger.info("Testing on dense data...") - - clf.fit(ds) - - # we have to ensure at least 2d here, since _decision_function is not - # applying this change anymore (while decision_function does). - x = x_norm = ds.X.atleast_2d() - p = p_norm = ds.X[0, :].ravel().atleast_2d() - - # Transform data if preprocess is defined - if clf.preprocess is not None: - x_norm = clf.preprocess.transform(x) - p_norm = clf.preprocess.transform(p) - - # Testing decision_function on multiple points - df, df_priv = [], [] - for y in range(ds.num_classes): - df.append(clf.decision_function(x, y=y)) - df_priv.append(clf._forward(x_norm)[:, y].ravel()) - self.logger.info( - "decision_function(x, y={:}): {:}".format(y, df[y])) - self.logger.info( - "_decision_function(x_norm, y={:}): {:}".format(y, df_priv[y])) - self._check_df_scores(df_priv[y], ds.num_samples) - self._check_df_scores(df[y], ds.num_samples) - self.assertFalse((df[y] != df_priv[y]).any()) - - # Testing predict on multiple points - labels, scores = clf.predict( - x, return_decision_function=True) - self.logger.info( - "predict(x):\nlabels: {:}\nscores: {:}".format(labels, scores)) - self._check_classify_scores( - labels, scores, ds.num_samples, clf.n_classes) - - # Comparing output of decision_function and predict - for y in range(ds.num_classes): - self.assertFalse((df[y] != scores[:, y].ravel()).any()) - - # Testing decision_function on single point - df, df_priv = [], [] - for y in range(ds.num_classes): - df.append(clf.decision_function(p, y=y)) - df_priv.append(clf._forward(p_norm)[:, y].ravel()) - self.logger.info( - "decision_function(p, y={:}): {:}".format(y, df[y])) - self._check_df_scores(df[y], 1) - self.logger.info( - "_decision_function(p_norm, y={:}): {:}".format(y, df_priv[y])) - self._check_df_scores(df_priv[y], 1) - self.assertFalse((df[y] != df_priv[y]).any()) - - self.logger.info("Testing predict on single point") - - labels, scores = clf.predict( - p, return_decision_function=True) - self.logger.info( - "predict(p):\nlabels: {:}\nscores: {:}".format(labels, scores)) - self._check_classify_scores(labels, scores, 1, clf.n_classes) - - # Comparing output of decision_function and predict - for y in range(ds.num_classes): - self.assertFalse((df[y] != scores[:, y].ravel()).any()) - - return scores - - def _test_plot(self, clf, ds, levels=None): - """Plot the decision function of a classifier.""" - self.logger.info("Testing classifiers graphically") - # Preparation of the grid - fig = CFigure(width=8, height=4, fontsize=8) - clf.fit(ds) - - fig.subplot(1, 2, 1) - fig.sp.plot_ds(ds) - fig.sp.plot_decision_regions( - clf, n_grid_points=50, grid_limits=ds.get_bounds()) - fig.sp.title("Decision regions") - - fig.subplot(1, 2, 2) - fig.sp.plot_ds(ds) - fig.sp.plot_fun(clf.decision_function, grid_limits=ds.get_bounds(), - levels=levels, y=1) - fig.sp.title("Discriminant function for y=1") - - return fig - - def _test_gradient_numerical(self, clf, x, extra_classes=None, - th=1e-3, epsilon=eps, **grad_kwargs): - """Test for clf.grad_f_x comparing to numerical gradient. - - Parameters - ---------- - clf : CClassifier - x : CArray - extra_classes : None or list of int, optional - Any extra class which gradient wrt should be tested - th : float, optional - The threshold for the check with numerical gradient. - epsilon : float, optional - The epsilon to use for computing the numerical gradient. - grad_kwargs : kwargs - Any extra parameter for the gradient function. - - Returns - ------- - grads : list of CArray - A list with the gradients computed wrt each class. - - """ - if 'y' in grad_kwargs: - raise ValueError("`y` cannot be passed to this unittest.") - - if extra_classes is not None: - classes = clf.classes.append(extra_classes) - else: - classes = clf.classes - - grads = [] - for c in classes: - grad_kwargs['y'] = c # Appending class to test_f_x - - # Analytical gradient - gradient = clf.grad_f_x(x, **grad_kwargs) - grads.append(gradient) - - self.assertTrue(gradient.is_vector_like) - self.assertEqual(x.size, gradient.size) - self.assertEqual(x.issparse, gradient.issparse) - - # Numerical gradient - num_gradient = CFunction( - clf.decision_function).approx_fprime(x.todense(), epsilon, y=c) - - # Compute the norm of the difference - error = (gradient - num_gradient).norm() - - self.logger.info( - "Analytic grad wrt. class {:}:\n{:}".format(c, gradient)) - self.logger.info( - "Numeric gradient wrt. class {:}:\n{:}".format( - c, num_gradient)) - - self.logger.info("norm(grad - num_grad): {:}".format(error)) - self.assertLess(error, th) - - self.assertIsSubDtype(gradient.dtype, float) - - return grads - - @staticmethod - def _create_preprocess_chain(pre_id_list, kwargs_list): - """Creates a preprocessor with other preprocessors chained - and a list of the same preprocessors (not chained)""" - chain = None - pre_list = [] - for i, pre_id in enumerate(pre_id_list): - chain = CPreProcess.create( - pre_id, preprocess=chain, **kwargs_list[i]) - pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) - - return chain, pre_list - - def _create_preprocess_test(self, ds, clf, pre_id_list, kwargs_list): - """Fit 2 clf, one with internal preprocessor chain - and another using pre-transformed data. - - Parameters - ---------- - ds : CDataset - clf : CClassifier - pre_id_list : list of str - This list should contain the class_id of each preprocessor - that should be part of the chain. - kwargs_list : list of dict - This list should contain a dictionary of extra parameter for - each preprocessor that should be part of the chain. - - Returns - ------- - pre1 : CPreProcess - The preprocessors chain. - data_pre : CArray - Data (ds.X) transformed using pre1. - clf_pre : CClassifier - The classifier with a copy the preprocessors chain inside, - trained on ds. - clf : CClassifier - The classifier without the preprocessors chain inside, - trained on data_pre. - - """ - pre1 = CPreProcess.create_chain(pre_id_list, kwargs_list) - data_pre = pre1.fit_transform(ds.X) - - pre2 = CPreProcess.create_chain(pre_id_list, kwargs_list) - clf_pre = clf.deepcopy() - clf_pre.preprocess = pre2 - - clf_pre.fit(ds) - clf.fit(CDataset(data_pre, ds.Y)) - - return pre1, data_pre, clf_pre, clf - - def _test_preprocess(self, ds, clf, pre_id_list, kwargs_list): - """Test if clf with preprocessor inside returns the same - prediction of the clf trained on pre-transformed data. - - Parameters - ---------- - ds : CDataset - clf : CClassifier - pre_id_list : list of str - This list should contain the class_id of each preprocessor - that should be part of the chain. - kwargs_list : list of dict - This list should contain a dictionary of extra parameter for - each preprocessor that should be part of the chain. - - """ - pre, data_pre, clf_pre, clf = self._create_preprocess_test( - ds, clf, pre_id_list, kwargs_list) - - self.logger.info( - "Testing {:} with preprocessor inside:\n{:}".format( - clf.__class__.__name__, clf_pre)) - - y1, score1 = clf_pre.predict(ds.X, return_decision_function=True) - y2, score2 = clf.predict(data_pre, return_decision_function=True) - - self.assert_array_equal(y1, y2) - self.assert_array_almost_equal(score1, score2) - - # The number of features of the clf with preprocess inside should be - # equal to the number of dataset features (so before preprocessing) - self.assertEqual(ds.num_features, clf_pre.n_features) - - def _test_preprocess_grad(self, ds, clf, pre_id_list, kwargs_list, - extra_classes=None, check_numerical=True, - th=1e-3, epsilon=eps, **grad_kwargs): - """Test if clf gradient with preprocessor inside is equal to the - gradient of the clf trained on pre-transformed data. - Also compare the gradient of the clf with preprocessor - inside with numerical gradient. - - Parameters - ---------- - ds : CDataset - clf : CClassifier - pre_id_list : list of str - This list should contain the class_id of each preprocessor - that should be part of the chain. - kwargs_list : list of dict - This list should contain a dictionary of extra parameter for - each preprocessor that should be part of the chain. - extra_classes : None or list of int, optional - Any extra class which gradient wrt should be tested - check_numerical : bool, optional - If True, the gradient will be compared with - the numerical approximation. - th : float, optional - The threshold for the check with numerical gradient. - epsilon : float, optional - The epsilon to use for computing the numerical gradient. - grad_kwargs : kwargs - Any extra parameter for the gradient function. - - """ - pre, data_pre, clf_pre, clf = self._create_preprocess_test( - ds, clf, pre_id_list, kwargs_list) - - self.logger.info("Testing clf gradient with preprocessor " - "inside:\n{:}".format(clf_pre)) - - if 'y' in grad_kwargs: - raise ValueError("`y` cannot be passed to this unittest.") - - if extra_classes is not None: - classes = clf.classes.append(extra_classes) - else: - classes = clf.classes - - for c in classes: - self.logger.info( - "Testing grad wrt. class {:}".format(c)) - - # Grad of clf without preprocessor inside (using transformed data) - v_pre = data_pre[0, :] - clf_grad = clf.grad_f_x(v_pre, y=c, **grad_kwargs) - - # Output of grad_f_x should be a float vector - self.assertEqual(1, clf_grad.ndim) - self.assertIsSubDtype(clf_grad.dtype, float) - - # Gradient of clf with preprocessor inside - v = ds.X[0, :] - clf_pre_grad = clf_pre.grad_f_x(v, y=c, **grad_kwargs) - - # Gradient of the preprocessor. Should be equal to the gradient - # of the clf with preprocessor inside - pre_grad = pre.gradient(v_pre, w=clf_grad) - - # As clf_grad should be a float vector, - # output of gradient should be the same - self.assertEqual(1, pre_grad.ndim) - self.assertIsSubDtype(pre_grad.dtype, float) - - self.assert_array_almost_equal(clf_pre_grad, pre_grad) - - if check_numerical is True: - # Comparison with numerical gradient - self._test_gradient_numerical( - clf_pre, ds.X[0, :], extra_classes=extra_classes, - th=th, epsilon=epsilon, **grad_kwargs) - - def _test_sparse_linear(self, ds, clf): - """Test linear classifier operations on sparse data. - - For linear classifiers, when training on sparse data, the weights - vector must be sparse. Also `grad_f_x` must return a sparse array. - - Parameters - ---------- - ds : CDataset - clf : CClassifier - - """ - self.logger.info("Testing {:} operations on sparse data.".format( - clf.__class__.__name__)) - - ds_sparse = ds.tosparse() - - # Fitting on sparse data - clf.fit(ds_sparse) - - # Resulting weights vector must be sparse - self.assertTrue(clf.w.issparse) - - # Predictions on dense and sparse data - x = ds.X[0, :] - x_sparse = ds_sparse.X[0, :] - - y, s = clf.predict( - x, return_decision_function=True) - y_sparse, s_sparse = clf.predict( - x_sparse, return_decision_function=True) - - self.assert_array_equal(y, y_sparse) - self.assert_array_equal(s, s_sparse) - - # Gradient must be sparse if training data is sparse - grad = clf.grad_f_x(x_sparse, y=0) - self.assertTrue(grad.issparse) - grad = clf.grad_f_x(x, y=0) - self.assertTrue(grad.issparse) - - -if __name__ == '__main__': - CUnitTest.main() diff --git a/src/secml/ml/classifiers/tests/figs/.gitkeep b/src/secml/ml/classifiers/tests/figs/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/secml/ml/features/normalization/tests/__init__.py b/src/secml/ml/features/normalization/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py deleted file mode 100644 index b7f33ff3..00000000 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_dnn.py +++ /dev/null @@ -1,206 +0,0 @@ -from secml.ml.features.tests import CPreProcessTestCases - -from collections import OrderedDict - -try: - import torch - import torchvision -except ImportError: - CPreProcessTestCases.importskip("torch") - CPreProcessTestCases.importskip("torchvision") -else: - import torch - from torch import nn, optim - from torchvision import transforms - torch.manual_seed(0) - -from secml.array import CArray -from secml.ml.features.normalization import CNormalizerDNN -from secml.ml.classifiers import CClassifierPyTorch -from secml.data.loader import CDLRandom -from secml.optim.function import CFunction - - -def mlp(input_dims=100, hidden_dims=(50, 50), output_dims=10): - """Multi-layer Perceptron""" - if len(hidden_dims) < 1: - raise ValueError("at least one hidden dim should be defined") - if any(d <= 0 for d in hidden_dims): - raise ValueError("each hidden layer must have at least one neuron") - - # Input layers - layers = [ - ('linear1', torch.nn.Linear(input_dims, hidden_dims[0])), - ('relu1', torch.nn.ReLU()), - ] - # Appending additional hidden layers - for hl_i, hl_dims in enumerate(hidden_dims[1:]): - prev_hl_dims = hidden_dims[hl_i] # Dims of the previous hl - i_str = str(hl_i + 2) - layers += [ - ('linear' + i_str, torch.nn.Linear(prev_hl_dims, hl_dims)), - ('relu' + i_str, torch.nn.ReLU())] - # Output layers - layers += [ - ('linear' + str(len(hidden_dims) + 1), - torch.nn.Linear(hidden_dims[-1], output_dims))] - - # Creating the model with the list of layers - return torch.nn.Sequential(OrderedDict(layers)) - - -class TestCNormalizerPyTorch(CPreProcessTestCases): - - @classmethod - def setUpClass(cls): - cls.ds = CDLRandom(n_samples=40, n_classes=3, - n_features=20, n_informative=15, - random_state=0).load() - - model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) - loss = nn.CrossEntropyLoss() - optimizer = optim.SGD(model.parameters(), lr=1e-1) - cls.net = CClassifierPyTorch(model=model, loss=loss, - optimizer=optimizer, random_state=0, - epochs=10, pretrained=True) - cls.net.fit(cls.ds) - cls.norm = CNormalizerDNN(net=cls.net) - - CPreProcessTestCases.setUpClass() - - def test_normalization(self): - """Testing normalization.""" - x = self.ds.X[0, :] - - self.logger.info("Testing normalization at last layer") - - self.norm.out_layer = None - - out_norm = self.norm.transform(x) - out_net = self.net.get_layer_output(x, layer=None) - - self.logger.info("Output of normalize:\n{:}".format(out_norm)) - self.logger.info("Output of net:\n{:}".format(out_net)) - - self.assert_allclose(out_norm, out_net) - - self.norm.out_layer = 'linear1' - - self.logger.info( - "Testing normalization at layer {:}".format(self.norm.out_layer)) - - out_norm = self.norm.transform(x) - out_net = self.net.get_layer_output(x, layer=self.norm.out_layer) - - self.logger.info("Output of normalize:\n{:}".format(out_norm)) - self.logger.info("Output of net:\n{:}".format(out_net)) - - self.assert_allclose(out_norm, out_net) - - def test_chain(self): - """Test for preprocessors chain.""" - # Inner preprocessors should be passed to the pytorch clf - with self.assertRaises(ValueError): - CNormalizerDNN(net=self.net, preprocess='min-max') - - def test_gradient(self): - """Test for gradient.""" - x = self.ds.X[0, :] - - layer = None - self.norm.out_layer = layer - self.logger.info("Returning gradient for layer: {:}".format(layer)) - shape = self.norm.transform(x).shape - w = CArray.zeros(shape=shape) - w[0] = 1 - grad = self.norm.gradient(x, w=w) - - self.logger.info("Output of gradient_f_x:\n{:}".format(grad)) - - self.assertTrue(grad.is_vector_like) - self.assertEqual(x.size, grad.size) - - layer = 'linear1' - self.norm.out_layer = layer - self.logger.info("Returning output for layer: {:}".format(layer)) - out = self.net.get_layer_output(x, layer=layer) - self.logger.info("Returning gradient for layer: {:}".format(layer)) - grad = self.norm.gradient(x, w=out) - - self.logger.info("Output of grad_f_x:\n{:}".format(grad)) - - self.assertTrue(grad.is_vector_like) - self.assertEqual(x.size, grad.size) - - def test_aspreprocess(self): - """Test for normalizer used as preprocess.""" - from secml.ml.classifiers import CClassifierSVM - from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA - - model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3) - loss = nn.CrossEntropyLoss() - optimizer = optim.SGD(model.parameters(), lr=1e-1) - net = CClassifierPyTorch(model=model, loss=loss, - optimizer=optimizer, random_state=0, - epochs=10, preprocess='min-max') - net.fit(self.ds) - - norm = CNormalizerDNN(net=net) - - clf = CClassifierMulticlassOVA( - classifier=CClassifierSVM, preprocess=norm) - - self.logger.info("Testing last layer") - - clf.fit(self.ds) - - y_pred, scores = clf.predict( - self.ds.X, return_decision_function=True) - self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) - self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) - self.logger.info("Scores:\n{:}".format(scores)) - - x = self.ds.X[0, :] - - self.logger.info("Testing last layer gradient") - - for c in self.ds.classes: - self.logger.info("Gradient w.r.t. class {:}".format(c)) - - grad = clf.grad_f_x(x, y=c) - - self.logger.info("Output of grad_f_x:\n{:}".format(grad)) - - check_grad_val = CFunction( - clf.decision_function, clf.grad_f_x).check_grad( - x, y=c, epsilon=1e-1) - self.logger.info( - "norm(grad - num_grad): %s", str(check_grad_val)) - self.assertLess(check_grad_val, 1e-3) - - self.assertTrue(grad.is_vector_like) - self.assertEqual(x.size, grad.size) - - layer = 'linear1' - norm.out_layer = layer - - self.logger.info("Testing layer {:}".format(norm.out_layer)) - - clf.fit(self.ds) - - y_pred, scores = clf.predict( - self.ds.X, return_decision_function=True) - self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist())) - self.logger.info("Predictions:\n{:}".format(y_pred.tolist())) - self.logger.info("Scores:\n{:}".format(scores)) - - self.logger.info("Testing 'linear1' layer gradient") - grad = clf.grad_f_x(x, y=0) # y is required for multiclassova - self.logger.info("Output of grad_f_x:\n{:}".format(grad)) - - self.assertTrue(grad.is_vector_like) - self.assertEqual(x.size, grad.size) - - -if __name__ == '__main__': - CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py deleted file mode 100644 index 43bab3c5..00000000 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ /dev/null @@ -1,102 +0,0 @@ -from secml.ml.features.tests import CPreProcessTestCases - -from sklearn.preprocessing import StandardScaler - -from secml.array import CArray -from secml.ml.features.normalization import CNormalizerMeanStd - - -class TestCNormalizerMeanStd(CPreProcessTestCases): - """Unittest for CNormalizerMeanStd""" - - def test_zscore(self): - """Test for CNormalizerMeanStd to obtain zero mean and unit variance""" - - def sklearn_comp(array): - - self.logger.info("Original array is:\n{:}".format(array)) - - # Sklearn normalizer - target = CArray(StandardScaler().fit_transform( - array.astype(float).tondarray())) - # Our normalizer - n = CNormalizerMeanStd().fit(array) - result = n.transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - self.logger.info("Testing without std") - # Sklearn normalizer - target = CArray(StandardScaler(with_std=False).fit_transform( - array.astype(float).tondarray())) - # Our normalizer - n = CNormalizerMeanStd(with_std=False).fit(array) - result = n.transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - sklearn_comp(self.array_dense) - sklearn_comp(self.array_sparse) - sklearn_comp(self.row_dense.atleast_2d()) - sklearn_comp(self.row_sparse) - sklearn_comp(self.column_dense) - sklearn_comp(self.column_sparse) - - def test_normalizer_mean_std(self): - """Test for CNormalizerMeanStd.""" - - for (mean, std) in [(1.5, 0.1), - ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: - for array in [self.array_dense, self.array_sparse]: - - self.logger.info("Original array is:\n{:}".format(array)) - self.logger.info( - "Normalizing using mean: {:} std: {:}".format(mean, std)) - - n = CNormalizerMeanStd(mean=mean, std=std).fit(array) - out = n.transform(array) - - self.logger.info("Result is:\n{:}".format(out)) - - out_mean = out.mean(axis=0, keepdims=False) - out_std = out.std(axis=0, keepdims=False) - - self.logger.info("Result mean is:\n{:}".format(out_mean)) - self.logger.info("Result std is:\n{:}".format(out_std)) - - rev = n.inverse_transform(out) - - self.assert_array_almost_equal(array, rev) - - def test_chain(self): - """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'pca', 'mean-std'], - [{'feature_range': (-5, 5)}, {}, {}] - ) - - # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1]-1), x_chain.shape) - - def test_chain_gradient(self): - """Check gradient of a chain of preprocessors.""" - grad = self._test_chain_gradient( - self.array_dense, - ['min-max', 'mean-std'], - [{'feature_range': (-5, 5)}, {}] - ) - - # Expected shape is (n_feats, ), so (4, ) - self.assertEqual((self.array_dense.shape[1], ), grad.shape) - - -if __name__ == '__main__': - CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py deleted file mode 100644 index 324c6adc..00000000 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ /dev/null @@ -1,82 +0,0 @@ -from secml.ml.features.tests import CPreProcessTestCases - - -from sklearn.preprocessing import MinMaxScaler - -from secml.array import CArray -from secml.ml.features.normalization import CNormalizerMinMax - - -class TestCNormalizerLinear(CPreProcessTestCases): - """Unittest for CNormalizerLinear.""" - - def test_norm_minmax(self): - """Test for CNormalizerMinMax.""" - - def sklearn_comp(array): - - self.logger.info("Original array is:\n{:}".format(array)) - - # Sklearn normalizer (requires float dtype input) - array_sk = array.astype(float).tondarray() - sk_norm = MinMaxScaler().fit(array_sk) - - target = CArray(sk_norm.transform(array_sk)) - - # Our normalizer - our_norm = CNormalizerMinMax().fit(array) - result = our_norm.transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - # Testing out of range normalization - - self.logger.info("Testing out of range normalization") - - # Sklearn normalizer (requires float dtype input) - target = CArray(sk_norm.transform(array_sk * 2)) - - # Our normalizer - result = our_norm.transform(array * 2) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - sklearn_comp(self.array_dense) - sklearn_comp(self.array_sparse) - sklearn_comp(self.row_dense.atleast_2d()) - sklearn_comp(self.row_sparse) - sklearn_comp(self.column_dense) - sklearn_comp(self.column_sparse) - - def test_chain(self): - """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'pca', 'min-max'], - [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] - ) - - # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1]-1), x_chain.shape) - - def test_chain_gradient(self): - """Check gradient of a chain of preprocessors.""" - grad = self._test_chain_gradient( - self.array_dense, - ['min-max', 'mean-std', 'min-max'], - [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] - ) - - # Expected shape is (n_feats, ), so (4, ) - self.assertEqual((self.array_dense.shape[1], ), grad.shape) - - -if __name__ == '__main__': - CPreProcessTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py deleted file mode 100644 index c17ac912..00000000 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ /dev/null @@ -1,51 +0,0 @@ -from secml.ml.features.tests import CPreProcessTestCases - -from sklearn.preprocessing import Normalizer - -from secml.array import CArray -from secml.ml.features.normalization import CNormalizerUnitNorm - - -class TestCNormalizerUnitNorm(CPreProcessTestCases): - """Unittest for CNormalizerUnitNorm.""" - - def test_norm_unitnorm(self): - """Test for CNormalizerUnitNorm.""" - - def sklearn_comp(array): - - self.logger.info("Original array is:\n{:}".format(array)) - - # Sklearn normalizer (requires float dtype input) - target = CArray(Normalizer().fit_transform( - array.astype(float).get_data())) - # Our normalizer - result = CNormalizerUnitNorm().fit_transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - sklearn_comp(self.array_dense) - sklearn_comp(self.array_sparse) - sklearn_comp(self.row_dense.atleast_2d()) - sklearn_comp(self.row_sparse) - sklearn_comp(self.column_dense) - sklearn_comp(self.column_sparse) - - def test_chain(self): - """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'pca', 'unit-norm'], - [{'feature_range': (-5, 5)}, {}, {}] - ) - - # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1]-1), x_chain.shape) - - -if __name__ == '__main__': - CPreProcessTestCases.main() diff --git a/src/secml/ml/features/reduction/tests/__init__.py b/src/secml/ml/features/reduction/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py b/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py deleted file mode 100644 index 750e5afc..00000000 --- a/src/secml/ml/features/reduction/tests/test_c_reducer_lda.py +++ /dev/null @@ -1,100 +0,0 @@ -from secml.ml.features.tests import CPreProcessTestCases - -from sklearn.discriminant_analysis import LinearDiscriminantAnalysis - -from secml.array import CArray -from secml.ml.features.reduction import CLDA -from secml.figure import CFigure - - -class TestCLda(CPreProcessTestCases): - """Unittests for CLDA.""" - - def setUp(self): - # As our test cases are not always linearly independent, - # LDA will warn about "Variables are collinear". - # We can ignore the warning in this context - self.logger.filterwarnings("ignore", "Variables are collinear.") - - super(TestCLda, self).setUp() - - def test_lda(self): - """Test for LDA. This compares sklearn equivalent to our method.""" - - def sklearn_comp(array, y): - self.logger.info("Original array is:\n{:}".format(array)) - - # Sklearn normalizer - sklearn_lda = LinearDiscriminantAnalysis().fit( - array.tondarray(), y.tondarray()) - target = CArray(sklearn_lda.transform(array.tondarray())) - # Our normalizer - lda = CLDA().fit(array, y) - result = lda.transform(array) - - self.logger.info("Sklearn result is:\n{:}".format(target)) - self.logger.info("Result is:\n{:}".format(result)) - - self.assert_array_almost_equal(result, target) - - # A min of 2 samples is required by LDA so we cannot test single rows - sklearn_comp(self.array_dense, CArray([0, 1, 0])) - sklearn_comp(self.array_sparse, CArray([0, 1, 0])) - sklearn_comp(self.column_dense, CArray([0, 1, 0])) - sklearn_comp(self.column_sparse, CArray([0, 1, 0])) - - def test_plot(self): - """Test for LDA. Check LDA Result Graphically. - - Apply Lda to Sklearn Iris Dataset and compare it with - "Linear Discriminant Analysis bit by bit" by Sebastian Raschka - http://sebastianraschka.com/Articles/2014_python_lda.html - into the plot we must see approximatively: - x axes: from -2 to -1 virginica, from -1 to 0 versicolor, from 1 to 2,3 setosa - y axes: from -1 to -1 virginica, from -1 to 0.5 versicolor, from -1 to 1 setosa - - """ - from sklearn.datasets import load_iris - - iris_db = load_iris() - patterns = CArray(iris_db.data) - labels = CArray(iris_db.target) - - lda = CLDA() - lda.fit(patterns, labels) - # store dataset reduced with pca - red_dts = lda.fit_transform(patterns, labels) - - fig = CFigure(width=10, markersize=8) - fig.sp.scatter(red_dts[:, 0].ravel(), - red_dts[:, 1].ravel(), - c=labels) - fig.show() - - def test_chain(self): - """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'mean-std', 'lda'], - [{'feature_range': (-5, 5)}, {}, {}], - y=CArray([1, 0, 1]) # LDA is supervised - ) - - # Expected shape is (3, 1), as lda max n_components is classes - 1 - self.assertEqual((self.array_dense.shape[0], 1), x_chain.shape) - - x_chain = self._test_chain( - self.array_dense, - ['mean-std', 'lda', 'min-max'], - [{}, {}, {}], - y=CArray([1, 0, 1]) # LDA is supervised - ) - - # Expected shape is (3, 1), as lda max n_components is classes - 1 - self.assertEqual((self.array_dense.shape[0], 1), x_chain.shape) - - # TODO: ADD TEST FOR GRADIENT (WHEN IMPLEMENTED) - - -if __name__ == '__main__': - CPreProcessTestCases.main() diff --git a/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py b/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py deleted file mode 100644 index adf18ff5..00000000 --- a/src/secml/ml/features/reduction/tests/test_c_reducer_pca.py +++ /dev/null @@ -1,69 +0,0 @@ -from secml.ml.features.tests import CPreProcessTestCases - -from secml.array import CArray -from secml.ml.features.reduction import CPCA -from sklearn.decomposition import PCA - - -class TestCPca(CPreProcessTestCases): - """Unittests for CPCA.""" - - def test_pca(self): - """Test for PCA. This compares sklearn equivalent to our method.""" - - # Few test cases involve an all-zero column, - # so PCA will trigger a 0/0 warning - self.logger.filterwarnings( - action='ignore', - message='invalid value encountered in true_divide', - category=RuntimeWarning - ) - self.logger.filterwarnings( - action='ignore', - message='invalid value encountered in divide', - category=RuntimeWarning - ) - - def sklearn_comp(array): - self.logger.info("Original array is:\n{:}".format(array)) - - # Sklearn normalizer - sklearn_pca = PCA().fit(array.tondarray()) - target = CArray(sklearn_pca.transform(array.tondarray())) - # Our normalizer - pca = CPCA().fit(array) - result = pca.transform(array) - - self.logger.info("Sklearn result is:\n{:}".format(target)) - self.logger.info("Result is:\n{:}".format(result)) - - self.assert_array_almost_equal(result, target) - - original = pca.inverse_transform(result) - - self.assert_array_almost_equal(original, array) - - sklearn_comp(self.array_dense) - sklearn_comp(self.array_sparse) - sklearn_comp(self.row_dense.atleast_2d()) - sklearn_comp(self.row_sparse) - sklearn_comp(self.column_dense) - sklearn_comp(self.column_sparse) - - def test_chain(self): - """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'unit-norm', 'pca'], - [{'feature_range': (-5, 5)}, {}, {}] - ) - - # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1] - 1), x_chain.shape) - - # TODO: ADD TEST FOR GRADIENT (WHEN IMPLEMENTED) - - -if __name__ == '__main__': - CPreProcessTestCases.main() diff --git a/src/secml/ml/features/tests/__init__.py b/src/secml/ml/features/tests/__init__.py deleted file mode 100644 index 8524a8ee..00000000 --- a/src/secml/ml/features/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .c_preprocess_testcases import CPreProcessTestCases diff --git a/src/secml/ml/features/tests/c_preprocess_testcases.py b/src/secml/ml/features/tests/c_preprocess_testcases.py deleted file mode 100644 index c534f76a..00000000 --- a/src/secml/ml/features/tests/c_preprocess_testcases.py +++ /dev/null @@ -1,102 +0,0 @@ -from secml.testing import CUnitTest - -from secml.array import CArray -from secml.ml.features import CPreProcess - - -class CPreProcessTestCases(CUnitTest): - """Unittests interface for CPreProcess.""" - - def setUp(self): - - self.array_dense = CArray([[1, 0, 0, 5], - [2, 4, 0, 0], - [3, 6, 0, 0]]) - self.array_sparse = CArray(self.array_dense.deepcopy(), tosparse=True) - - self.row_dense = CArray([4, 0, 6]) - self.column_dense = self.row_dense.deepcopy().T - - self.row_sparse = CArray(self.row_dense.deepcopy(), tosparse=True) - self.column_sparse = self.row_sparse.deepcopy().T - - @staticmethod - def _create_chain(pre_id_list, kwargs_list): - """Creates a preprocessor with other preprocessors chained - and a list of the same preprocessors (not chained)""" - chain = None - pre_list = [] - for i, pre_id in enumerate(pre_id_list): - chain = CPreProcess.create( - pre_id, preprocess=chain, **kwargs_list[i]) - pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) - - return chain, pre_list - - def _test_chain(self, x, pre_id_list, kwargs_list, y=None): - """Tests if preprocess chain and manual chaining yield same result.""" - chain, pre_list = self._create_chain(pre_id_list, kwargs_list) - - chain = chain.fit(x, y=y) - self.logger.info("Preprocessors chain:\n{:}".format(chain)) - - x_chain = chain.transform(x) - self.logger.info("Trasformed X (chain):\n{:}".format(x_chain)) - - # Train the manual chain and transform - x_manual = x - for pre in pre_list: - x_manual = pre.fit_transform(x_manual, y=y) - - self.logger.info("Trasformed X (manual):\n{:}".format(x_manual)) - self.assert_allclose(x_chain, x_manual) - - # Reverting array (if available) - try: - x_chain_revert = chain.inverse_transform(x_chain) - self.logger.info("Reverted X (chain):\n{:}".format(x_chain_revert)) - self.logger.info("Original X:\n{:}".format(x)) - self.assert_array_almost_equal(x_chain_revert, x) - except NotImplementedError: - self.logger.info("inverse_transform not available") - - return x_chain - - def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): - """Tests if gradient preprocess chain and - gradient of manual chaining yield same result.""" - chain, pre_list = self._create_chain(pre_id_list, kwargs_list) - - chain = chain.fit(x, y=y) - self.logger.info("Preprocessors chain:\n{:}".format(chain)) - - v = x[1, :] - grad_chain = chain.gradient(v) - self.logger.info( - "gradient({:}) (chain):\n{:}".format(v, grad_chain)) - - # Manually compose the chain and transform - for pre in pre_list: - x = pre.fit_transform(x, y=y) - - v_list = [v] - for pre in pre_list[:-1]: - v = pre.transform(v) - v_list.append(v) - - v_list = list(reversed(v_list)) - pre_list = list(reversed(pre_list)) - - grad = None - for i, v in enumerate(v_list): - grad = pre_list[i].gradient(v, w=grad) - - self.logger.info( - "gradient({:}) (manual):\n{:}".format(v, grad)) - self.assert_allclose(grad_chain, grad) - - return grad_chain - - -if __name__ == '__main__': - CUnitTest.main() diff --git a/src/secml/ml/kernel/tests/__init__.py b/src/secml/ml/kernel/tests/__init__.py deleted file mode 100644 index c236cd43..00000000 --- a/src/secml/ml/kernel/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .c_kernel_testcases import CCKernelTestCases diff --git a/src/secml/ml/kernel/tests/c_kernel_testcases.py b/src/secml/ml/kernel/tests/c_kernel_testcases.py deleted file mode 100644 index 5a3ea56b..00000000 --- a/src/secml/ml/kernel/tests/c_kernel_testcases.py +++ /dev/null @@ -1,198 +0,0 @@ -from secml.testing import CUnitTest - -from secml.array import CArray -from secml.data.loader import CDLRandom -from secml.core.type_utils import is_scalar -from secml.ml.kernel import CKernel -from secml.optim.function import CFunction - - -class CCKernelTestCases(CUnitTest): - def _set_up(self, kernel_name): - - self.d_dense = CDLRandom(n_samples=10, n_features=5, - n_redundant=0, n_informative=3, - n_clusters_per_class=1, - random_state=100).load() - - self.p1_dense = self.d_dense.X[0, :] - self.p2_dense = self.d_dense.X[1, :] - - self.d_sparse = self.d_dense.tosparse() - self.p1_sparse = self.d_sparse.X[0, :] - self.p2_sparse = self.d_sparse.X[1, :] - - self.kernel = CKernel.create(kernel_name) - - def _has_gradient(self): - try: - self.kernel.gradient(self.p1_dense, self.p2_dense) - return True - except NotImplementedError: - return False - - def _cmp_kernel(self, k_fun, a1, a2): - k = k_fun(a1, a2) - if isinstance(k, CArray): - self.logger.info("k shape with inputs {:} {:} is: {:}" - "".format(a1.shape, a2.shape, k.shape)) - self.assertEqual(k.shape, (CArray(a1).atleast_2d().shape[0], - CArray(a2).atleast_2d().shape[0])) - else: - self.assertTrue(is_scalar(k)) - - def _test_similarity_shape(self): - """Test shape of kernel.""" - self.logger.info( - "Testing shape of " + self.kernel.class_type + " kernel output.") - - x_vect = CArray.rand(shape=(1, 10)).ravel() - x_mat = CArray.rand(shape=(10, 10)) - x_col = CArray.rand(shape=(10, 1)) - x_single = CArray.rand(shape=(1, 1)) - - self._cmp_kernel(self.kernel.k, x_vect, x_vect) - self._cmp_kernel(self.kernel.k, x_mat, x_vect) - self._cmp_kernel(self.kernel.k, x_vect, x_mat) - self._cmp_kernel(self.kernel.k, x_mat, x_mat) - self._cmp_kernel(self.kernel.k, x_col, x_col) - self._cmp_kernel(self.kernel.k, x_col, x_single) - self._cmp_kernel(self.kernel.k, x_single, x_col) - self._cmp_kernel(self.kernel.k, x_single, x_single) - - def _test_similarity_shape_sparse(self): - """Test shape of kernel.""" - self.logger.info( - "Testing shape of " + self.kernel.class_type + " kernel output.") - - x_vect = CArray.rand(shape=(1, 10)).ravel().tosparse() - x_mat = CArray.rand(shape=(10, 10)).tosparse() - x_col = CArray.rand(shape=(10, 1)).tosparse() - x_single = CArray.rand(shape=(1, 1)).tosparse() - - self._cmp_kernel(self.kernel.k, x_vect, x_vect) - self._cmp_kernel(self.kernel.k, x_mat, x_vect) - self._cmp_kernel(self.kernel.k, x_vect, x_mat) - self._cmp_kernel(self.kernel.k, x_mat, x_mat) - self._cmp_kernel(self.kernel.k, x_col, x_col) - self._cmp_kernel(self.kernel.k, x_col, x_single) - self._cmp_kernel(self.kernel.k, x_single, x_col) - self._cmp_kernel(self.kernel.k, x_single, x_single) - - def _test_gradient(self): - """Test for kernel gradients with dense points.""" - - if not self._has_gradient(): - self.logger.info( - "Gradient is not implemented for %s. " - "Skipping gradient dense tests.", self.kernel.class_type) - return - - # we invert the order of input patterns as we compute the kernel - # gradient wrt the second point but check_grad needs it as first input - def kern_f_for_test(p2, p1, kernel_func): - return kernel_func.similarity(p1, p2) - - def kern_grad_for_test(p2, p1, kernel_func): - return kernel_func.gradient(p1, p2) - - self.logger.info("Testing gradient with dense data.") - self.logger.info("Kernel type: %s", self.kernel.class_type) - - for i in range(self.d_dense.num_samples): - self.logger.info("x point: " + str(self.p2_dense)) - self.logger.info("y point: " + str(self.d_dense.X[i, :])) - - # TODO: implement centered numerical differences. - # if analytical gradient is zero, numerical estimation does not - # work, as it is using one-side estimation. We should use centered - # numerical differences to gain precision. - grad = self.kernel.gradient(self.d_dense.X[i, :], self.p2_dense) - if grad.norm() >= 1e-10: - grad_error = CFunction( - kern_f_for_test, kern_grad_for_test).check_grad( - self.p2_dense, 1e-8, self.d_dense.X[i, :], self.kernel) - self.logger.info("Gradient approx. error: {:}" - "".format(grad_error)) - self.assertTrue(grad_error < 1e-4) - - def _test_gradient_sparse(self): - """Test for kernel gradients with sparse points.""" - - if not self._has_gradient(): - self.logger.info( - "Gradient is not implemented for %s. " - "Skipping gradient sparse tests.", self.kernel.class_type) - return - - self.logger.info("Testing gradient with sparse data.") - self.logger.info("Kernel type: %s", self.kernel.class_type) - - k_grad = self.kernel.gradient(self.d_sparse.X, self.p2_dense) - self.logger.info( - "sparse/dense ->.isdense: {:}".format(k_grad.isdense)) - self.assertTrue(k_grad.isdense) - - k_grad = self.kernel.gradient(self.d_dense.X, self.p2_sparse) - self.logger.info( - "dense/sparse ->.issparse: {:}".format(k_grad.issparse)) - self.assertTrue(k_grad.issparse) - - k_grad = self.kernel.gradient(self.d_sparse.X, self.p2_sparse) - self.logger.info( - "sparse/sparse ->.issparse: {:}".format(k_grad.issparse)) - self.assertTrue(k_grad.issparse) - - def _test_gradient_multiple_points(self): - """Test for kernel gradients with multiple points vs single point.""" - - if not self._has_gradient(): - self.logger.info( - "Gradient is not implemented for %s. " - "Skipping multiple-point tests.", self.kernel.class_type) - return - - # check if gradient computed on multiple points is the same as - # the gradients computed on one point at a time. - data = self.d_dense.X[0:5, :] # using same no. of points and features - k1 = self.kernel.gradient(data, self.p2_dense) - k2 = CArray.zeros(shape=k1.shape) - for i in range(k2.shape[0]): - k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) - self.assertTrue((k1 - k2).ravel().norm() < 1e-4) - - data = self.d_dense.X # using different no. of points/features - k1 = self.kernel.gradient(data, self.p2_dense) - k2 = CArray.zeros(shape=k1.shape) - for i in range(k2.shape[0]): - k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) - self.assertTrue((k1 - k2).ravel().norm() < 1e-4) - - def _test_gradient_multiple_points_sparse(self): - """Test for kernel gradients with multiple points vs single point.""" - - if not self._has_gradient(): - self.logger.info( - "Gradient is not implemented for %s. " - "Skipping multiple-point tests.", self.kernel.class_type) - return - - # check if gradient computed on multiple points is the same as - # the gradients computed on one point at a time. - data = self.d_sparse.X[0:5, :] # using same no. of points and features - k1 = self.kernel.gradient(data, self.p2_dense) - k2 = CArray.zeros(shape=k1.shape) - for i in range(k2.shape[0]): - k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) - self.assertTrue((k1 - k2).ravel().norm() < 1e-4) - - data = self.d_sparse.X # using different no. of points/features - k1 = self.kernel.gradient(data, self.p2_dense) - k2 = CArray.zeros(shape=k1.shape) - for i in range(k2.shape[0]): - k2[i, :] = self.kernel.gradient(data[i, :], self.p2_dense) - self.assertTrue((k1 - k2).ravel().norm() < 1e-4) - - -if __name__ == '__main__': - CUnitTest.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py b/src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py deleted file mode 100644 index 7fb3adbc..00000000 --- a/src/secml/ml/kernel/tests/test_c_kernel_chebyshev_distance.py +++ /dev/null @@ -1,27 +0,0 @@ -from secml.ml.kernel.tests import CCKernelTestCases - - -class TestCKernelChebyshevDistance(CCKernelTestCases): - """Unit test for CKernelChebyshevDistance.""" - - def setUp(self): - self._set_up('chebyshev-dist') - - def test_similarity_shape(self): - """Test shape of kernel.""" - self._test_similarity_shape() - try: - self._test_similarity_shape_sparse() - except TypeError: - # computation of kernel is not supported on sparse matrices - pass - - def test_gradient(self): - self._test_gradient() - self._test_gradient_sparse() - self._test_gradient_multiple_points() - self._test_gradient_multiple_points_sparse() - - -if __name__ == '__main__': - CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_euclidean.py b/src/secml/ml/kernel/tests/test_c_kernel_euclidean.py deleted file mode 100644 index 9adc2675..00000000 --- a/src/secml/ml/kernel/tests/test_c_kernel_euclidean.py +++ /dev/null @@ -1,25 +0,0 @@ -from secml.ml.kernel.tests import CCKernelTestCases - - -class TestCKernelEuclidean(CCKernelTestCases): - """Unit test for CKernelHamming.""" - - def setUp(self): - self._set_up('euclidean') - - def test_similarity_shape(self): - """Test shape of kernel.""" - self._test_similarity_shape() - self._test_similarity_shape_sparse() - - def test_gradient(self): - self._test_gradient() - self._test_gradient_sparse() - self._test_gradient_multiple_points() - self._test_gradient_multiple_points() - - # TODO test when squared=True. but this needs to be passed to __init__ - - -if __name__ == '__main__': - CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_histintersect.py b/src/secml/ml/kernel/tests/test_c_kernel_histintersect.py deleted file mode 100644 index 0d938edb..00000000 --- a/src/secml/ml/kernel/tests/test_c_kernel_histintersect.py +++ /dev/null @@ -1,23 +0,0 @@ -from secml.ml.kernel.tests import CCKernelTestCases - - -class TestCKernelHistIntersect(CCKernelTestCases): - """Unit test for CKernelHistIntersect.""" - - def setUp(self): - self._set_up('hist-intersect') - - def test_similarity_shape(self): - """Test shape of kernel.""" - self._test_similarity_shape() - self._test_similarity_shape_sparse() - - def test_gradient(self): - self._test_gradient() - self._test_gradient_sparse() - self._test_gradient_multiple_points() - self._test_gradient_multiple_points_sparse() - - -if __name__ == '__main__': - CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_laplacian.py b/src/secml/ml/kernel/tests/test_c_kernel_laplacian.py deleted file mode 100644 index 4f304e5c..00000000 --- a/src/secml/ml/kernel/tests/test_c_kernel_laplacian.py +++ /dev/null @@ -1,23 +0,0 @@ -from secml.ml.kernel.tests import CCKernelTestCases - - -class TestCKernelLaplacian(CCKernelTestCases): - """Unit test for CKernelLaplacian.""" - - def setUp(self): - self._set_up('laplacian') - - def test_similarity_shape(self): - """Test shape of kernel.""" - self._test_similarity_shape() - self._test_similarity_shape_sparse() - - def test_gradient(self): - self._test_gradient() - self._test_gradient_sparse() - self._test_gradient_multiple_points() - self._test_gradient_multiple_points_sparse() - - -if __name__ == '__main__': - CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_linear.py b/src/secml/ml/kernel/tests/test_c_kernel_linear.py deleted file mode 100644 index a5e9a85d..00000000 --- a/src/secml/ml/kernel/tests/test_c_kernel_linear.py +++ /dev/null @@ -1,23 +0,0 @@ -from secml.ml.kernel.tests import CCKernelTestCases - - -class TestCKernelLinear(CCKernelTestCases): - """Unit test for CKernelLinear.""" - - def setUp(self): - self._set_up('linear') - - def test_similarity_shape(self): - """Test shape of kernel.""" - self._test_similarity_shape() - self._test_similarity_shape_sparse() - - def test_gradient(self): - self._test_gradient() - self._test_gradient_sparse() - self._test_gradient_multiple_points() - self._test_gradient_multiple_points_sparse() - - -if __name__ == '__main__': - CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_poly.py b/src/secml/ml/kernel/tests/test_c_kernel_poly.py deleted file mode 100644 index c95ed235..00000000 --- a/src/secml/ml/kernel/tests/test_c_kernel_poly.py +++ /dev/null @@ -1,23 +0,0 @@ -from secml.ml.kernel.tests import CCKernelTestCases - - -class TestCKernelPoly(CCKernelTestCases): - """Unit test for CKernelPoly.""" - - def setUp(self): - self._set_up('poly') - - def test_similarity_shape(self): - """Test shape of kernel.""" - self._test_similarity_shape() - self._test_similarity_shape_sparse() - - def test_gradient(self): - self._test_gradient() - self._test_gradient_sparse() - self._test_gradient_multiple_points() - self._test_gradient_multiple_points_sparse() - - -if __name__ == '__main__': - CCKernelTestCases.main() diff --git a/src/secml/ml/kernel/tests/test_c_kernel_rbf.py b/src/secml/ml/kernel/tests/test_c_kernel_rbf.py deleted file mode 100644 index f5538ad2..00000000 --- a/src/secml/ml/kernel/tests/test_c_kernel_rbf.py +++ /dev/null @@ -1,23 +0,0 @@ -from secml.ml.kernel.tests import CCKernelTestCases - - -class TestCKernelRBF(CCKernelTestCases): - """Unit test for CKernelRBF.""" - - def setUp(self): - self._set_up('rbf') - - def test_similarity_shape(self): - """Test shape of kernel.""" - self._test_similarity_shape() - self._test_similarity_shape_sparse() - - def test_gradient(self): - self._test_gradient() - self._test_gradient_sparse() - self._test_gradient_multiple_points() - self._test_gradient_multiple_points_sparse() - - -if __name__ == '__main__': - CCKernelTestCases.main() diff --git a/src/secml/ml/model_zoo/tests/__init__.py b/src/secml/ml/model_zoo/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/secml/ml/model_zoo/tests/_test_model-clf.gz b/src/secml/ml/model_zoo/tests/_test_model-clf.gz deleted file mode 100644 index d60e6f509a8cef7bcb2fd341f9587338101c7cfd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 174 zcmb2|=HN*1^o?WsA77GMToRv~pOTuRo1Bxzkex7dR|6wc*7A%m23G!s Date: Tue, 17 Mar 2020 16:46:58 +0100 Subject: [PATCH 03/20] Added CNormalizerTestCases, refactor of the tests of the normalizers --- .../features/normalization/tests/__init__.py | 1 + .../tests/c_normalizer_testcases.py | 134 ++++++++++++++++++ .../tests/test_c_normalizer_mean_std.py | 75 +++------- .../tests/test_c_normalizer_minmax.py | 80 +++-------- .../tests/test_c_normalizer_unitnorm.py | 15 +- 5 files changed, 180 insertions(+), 125 deletions(-) create mode 100644 src/secml/ml/features/normalization/tests/c_normalizer_testcases.py diff --git a/src/secml/ml/features/normalization/tests/__init__.py b/src/secml/ml/features/normalization/tests/__init__.py index e69de29b..2ed319b0 100644 --- a/src/secml/ml/features/normalization/tests/__init__.py +++ b/src/secml/ml/features/normalization/tests/__init__.py @@ -0,0 +1 @@ +from .c_normalizer_testcases import CNormalizerTestCases diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py new file mode 100644 index 00000000..f828867b --- /dev/null +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -0,0 +1,134 @@ +from secml.testing import CUnitTest + +from secml.array import CArray +from secml.ml.features import CPreProcess + + +class CNormalizerTestCases(CUnitTest): + + """Unittests interface for CPreProcess.""" + + def sklearn_comp(self, array, norm_sklearn, norm): + """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer""" + + self.logger.info("Original array is:\n{:}".format(array)) + target = CArray(norm_sklearn.fit_transform(array.astype(float).tondarray())) + # Our normalizer + n = norm.fit(array) + result = n.transform(array) + + self.logger.info("Correct result is:\n{:}".format(target)) + self.logger.info("Our result is:\n{:}".format(result)) + + self.assert_array_almost_equal(target, result) + + def setup_x_chain(self, name, feature_range=None): + """Arranges a setup for x_chain depending on the normalizer and tests a chain of preprocessors""" + if feature_range is None: + feature_range = {} + x_chain = self._test_chain( + self.array_dense, + ['min-max', 'pca', name], + [{'feature_range': (-5, 5)}, {}, feature_range] + ) + self.assertEqual((self.array_dense.shape[0], + self.array_dense.shape[1] - 1), x_chain.shape) + + def setup_grad(self, names, feature_ranges): + """Arranges a setup for the gradient of a chain of preprocessors and tests it""" + grad = self._test_chain_gradient(self.array_dense, names, feature_ranges) + self.assertEqual((self.array_dense.shape[1],), grad.shape) + + def setUp(self): + + self.array_dense = CArray([[1, 0, 0, 5], + [2, 4, 0, 0], + [3, 6, 0, 0]]) + self.array_sparse = CArray(self.array_dense.deepcopy(), tosparse=True) + + self.row_dense = CArray([4, 0, 6]) + self.column_dense = self.row_dense.deepcopy().T + + self.row_sparse = CArray(self.row_dense.deepcopy(), tosparse=True) + self.column_sparse = self.row_sparse.deepcopy().T + + @staticmethod + def _create_chain(pre_id_list, kwargs_list): + """Creates a preprocessor with other preprocessors chained + and a list of the same preprocessors (not chained)""" + chain = None + pre_list = [] + for i, pre_id in enumerate(pre_id_list): + chain = CPreProcess.create( + pre_id, preprocess=chain, **kwargs_list[i]) + pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) + + return chain, pre_list + + def _test_chain(self, x, pre_id_list, kwargs_list, y=None): + """Tests if preprocess chain and manual chaining yield same result.""" + chain, pre_list = self._create_chain(pre_id_list, kwargs_list) + + chain = chain.fit(x, y=y) + self.logger.info("Preprocessors chain:\n{:}".format(chain)) + + x_chain = chain.transform(x) + self.logger.info("Trasformed X (chain):\n{:}".format(x_chain)) + + # Train the manual chain and transform + x_manual = x + for pre in pre_list: + x_manual = pre.fit_transform(x_manual, y=y) + + self.logger.info("Trasformed X (manual):\n{:}".format(x_manual)) + self.assert_allclose(x_chain, x_manual) + + # Reverting array (if available) + try: + x_chain_revert = chain.inverse_transform(x_chain) + self.logger.info("Reverted X (chain):\n{:}".format(x_chain_revert)) + self.logger.info("Original X:\n{:}".format(x)) + self.assert_array_almost_equal(x_chain_revert, x) + except NotImplementedError: + self.logger.info("inverse_transform not available") + + return x_chain + + def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): + """Tests if gradient preprocess chain and + gradient of manual chaining yield same result.""" + chain, pre_list = self._create_chain(pre_id_list, kwargs_list) + + chain = chain.fit(x, y=y) + self.logger.info("Preprocessors chain:\n{:}".format(chain)) + + v = x[1, :] + grad_chain = chain.gradient(v) + self.logger.info( + "gradient({:}) (chain):\n{:}".format(v, grad_chain)) + + # Manually compose the chain and transform + for pre in pre_list: + x = pre.fit_transform(x, y=y) + + v_list = [v] + for pre in pre_list[:-1]: + v = pre.transform(v) + v_list.append(v) + + v_list = list(reversed(v_list)) + pre_list = list(reversed(pre_list)) + + grad = None + for i, v in enumerate(v_list): + grad = pre_list[i].gradient(v, w=grad) + + self.logger.info( + "gradient({:}) (manual):\n{:}".format(v, grad)) + self.assert_allclose(grad_chain, grad) + + return grad_chain + + +if __name__ == '__main__': + CUnitTest.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index 43bab3c5..24fb5cbf 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -1,52 +1,30 @@ -from secml.ml.features.tests import CPreProcessTestCases +from secml.ml.features.normalization.tests import CNormalizerTestCases from sklearn.preprocessing import StandardScaler -from secml.array import CArray from secml.ml.features.normalization import CNormalizerMeanStd -class TestCNormalizerMeanStd(CPreProcessTestCases): +class TestCNormalizerMeanStd(CNormalizerTestCases): """Unittest for CNormalizerMeanStd""" def test_zscore(self): """Test for CNormalizerMeanStd to obtain zero mean and unit variance""" - def sklearn_comp(array): - - self.logger.info("Original array is:\n{:}".format(array)) - - # Sklearn normalizer - target = CArray(StandardScaler().fit_transform( - array.astype(float).tondarray())) - # Our normalizer - n = CNormalizerMeanStd().fit(array) - result = n.transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - self.logger.info("Testing without std") - # Sklearn normalizer - target = CArray(StandardScaler(with_std=False).fit_transform( - array.astype(float).tondarray())) - # Our normalizer - n = CNormalizerMeanStd(with_std=False).fit(array) - result = n.transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - sklearn_comp(self.array_dense) - sklearn_comp(self.array_sparse) - sklearn_comp(self.row_dense.atleast_2d()) - sklearn_comp(self.row_sparse) - sklearn_comp(self.column_dense) - sklearn_comp(self.column_sparse) + self.sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) + self.sklearn_comp(self.array_sparse, StandardScaler(), CNormalizerMeanStd()) + self.sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(), CNormalizerMeanStd()) + self.sklearn_comp(self.row_sparse, StandardScaler(), CNormalizerMeanStd()) + self.sklearn_comp(self.column_dense, StandardScaler(), CNormalizerMeanStd()) + self.sklearn_comp(self.column_sparse, StandardScaler(), CNormalizerMeanStd()) + + self.sklearn_comp(self.array_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self.sklearn_comp(self.array_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self.sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(with_std=False), + CNormalizerMeanStd(with_std=False)) + self.sklearn_comp(self.row_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self.sklearn_comp(self.column_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self.sklearn_comp(self.column_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) def test_normalizer_mean_std(self): """Test for CNormalizerMeanStd.""" @@ -76,27 +54,16 @@ class TestCNormalizerMeanStd(CPreProcessTestCases): def test_chain(self): """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'pca', 'mean-std'], - [{'feature_range': (-5, 5)}, {}, {}] - ) - + self.setup_x_chain('mean-std') # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1]-1), x_chain.shape) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - grad = self._test_chain_gradient( - self.array_dense, - ['min-max', 'mean-std'], - [{'feature_range': (-5, 5)}, {}] - ) - + names = ['min-max', 'mean-std'] + feature_ranges = [{'feature_range': (-5, 5)}, {}] # Expected shape is (n_feats, ), so (4, ) - self.assertEqual((self.array_dense.shape[1], ), grad.shape) + self.setup_grad(names, feature_ranges) if __name__ == '__main__': - CPreProcessTestCases.main() + CNormalizerTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index c79f8e59..66dbe7c8 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -1,81 +1,41 @@ -from secml.ml.features.tests import CPreProcessTestCases +from secml.ml.features.normalization.tests import CNormalizerTestCases from sklearn.preprocessing import MinMaxScaler -from secml.array import CArray from secml.ml.features.normalization import CNormalizerMinMax -class TestCNormalizerLinear(CPreProcessTestCases): +class TestCNormalizerLinear(CNormalizerTestCases): """Unittest for CNormalizerLinear.""" def test_norm_minmax(self): """Test for CNormalizerMinMax.""" - - def sklearn_comp(array): - - self.logger.info("Original array is:\n{:}".format(array)) - - # Sklearn normalizer (requires float dtype input) - array_sk = array.astype(float).tondarray() - sk_norm = MinMaxScaler().fit(array_sk) - - target = CArray(sk_norm.transform(array_sk)) - - # Our normalizer - our_norm = CNormalizerMinMax().fit(array) - result = our_norm.transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - # Testing out of range normalization - - self.logger.info("Testing out of range normalization") - - # Sklearn normalizer (requires float dtype input) - target = CArray(sk_norm.transform(array_sk * 2)) - - # Our normalizer - result = our_norm.transform(array * 2) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - sklearn_comp(self.array_dense) - sklearn_comp(self.array_sparse) - sklearn_comp(self.row_dense.atleast_2d()) - sklearn_comp(self.row_sparse) - sklearn_comp(self.column_dense) - sklearn_comp(self.column_sparse) + self.sklearn_comp(self.array_dense, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.column_dense, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.array_dense*2, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.array_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.row_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.column_dense*2, MinMaxScaler(), CNormalizerMinMax()) + self.sklearn_comp(self.column_sparse*2, MinMaxScaler(), CNormalizerMinMax()) def test_chain(self): """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'pca', 'min-max'], - [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] - ) - + feature_range = {'feature_range': (0, 1)} + self.setup_x_chain('min-max', feature_range) # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1]-1), x_chain.shape) def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - grad = self._test_chain_gradient( - self.array_dense, - ['min-max', 'mean-std', 'min-max'], - [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] - ) - # Expected shape is (n_feats, ), so (4, ) - self.assertEqual((self.array_dense.shape[1], ), grad.shape) + names = ['min-max', 'mean-std', 'min-max'] + feature_ranges = [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] + self.setup_grad(names, feature_ranges) if __name__ == '__main__': - CPreProcessTestCases.main() + CNormalizerTestCases.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index f97bc09f..a6c34af6 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -1,4 +1,4 @@ -from secml.ml.features.tests import CPreProcessTestCases +from secml.ml.features.normalization.tests import CNormalizerTestCases from sklearn.preprocessing import Normalizer @@ -7,7 +7,7 @@ from secml.ml.features.normalization import CNormalizerUnitNorm from secml.optim.function import CFunction -class TestCNormalizerUnitNorm(CPreProcessTestCases): +class TestCNormalizerUnitNorm(CNormalizerTestCases): """Unittest for CNormalizerUnitNorm.""" def test_norm_unitnorm(self): @@ -41,15 +41,8 @@ class TestCNormalizerUnitNorm(CPreProcessTestCases): def test_chain(self): """Test a chain of preprocessors.""" - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'pca', 'unit-norm'], - [{'feature_range': (-5, 5)}, {}, {}] - ) - + self.setup_x_chain('unit-norm') # Expected shape is (3, 3), as pca max n_components is 4-1 - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1] - 1), x_chain.shape) def _test_gradient(self): """Check the normalizer gradient.""" @@ -108,4 +101,4 @@ class TestCNormalizerUnitNorm(CPreProcessTestCases): if __name__ == '__main__': - CPreProcessTestCases.main() + CNormalizerTestCases.main() -- GitLab From 81c9bc3374315633b1c89a56323b58dcbfa3a420 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Tue, 17 Mar 2020 17:18:30 +0100 Subject: [PATCH 04/20] Refactor of some names in normalizers' tests --- .../tests/c_normalizer_testcases.py | 3 +- .../tests/test_c_normalizer_mean_std.py | 29 +++++++++---------- .../tests/test_c_normalizer_minmax.py | 24 +++++++-------- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index f828867b..33aac68e 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -5,10 +5,9 @@ from secml.ml.features import CPreProcess class CNormalizerTestCases(CUnitTest): - """Unittests interface for CPreProcess.""" - def sklearn_comp(self, array, norm_sklearn, norm): + def _sklearn_comp(self, array, norm_sklearn, norm): """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer""" self.logger.info("Original array is:\n{:}".format(array)) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index 24fb5cbf..e6fd6d58 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -11,20 +11,20 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): def test_zscore(self): """Test for CNormalizerMeanStd to obtain zero mean and unit variance""" - self.sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) - self.sklearn_comp(self.array_sparse, StandardScaler(), CNormalizerMeanStd()) - self.sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(), CNormalizerMeanStd()) - self.sklearn_comp(self.row_sparse, StandardScaler(), CNormalizerMeanStd()) - self.sklearn_comp(self.column_dense, StandardScaler(), CNormalizerMeanStd()) - self.sklearn_comp(self.column_sparse, StandardScaler(), CNormalizerMeanStd()) - - self.sklearn_comp(self.array_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self.sklearn_comp(self.array_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self.sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(with_std=False), - CNormalizerMeanStd(with_std=False)) - self.sklearn_comp(self.row_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self.sklearn_comp(self.column_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self.sklearn_comp(self.column_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) + self._sklearn_comp(self.array_sparse, StandardScaler(), CNormalizerMeanStd()) + self._sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(), CNormalizerMeanStd()) + self._sklearn_comp(self.row_sparse, StandardScaler(), CNormalizerMeanStd()) + self._sklearn_comp(self.column_dense, StandardScaler(), CNormalizerMeanStd()) + self._sklearn_comp(self.column_sparse, StandardScaler(), CNormalizerMeanStd()) + + self._sklearn_comp(self.array_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.array_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(with_std=False), + CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.row_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.column_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.column_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) def test_normalizer_mean_std(self): """Test for CNormalizerMeanStd.""" @@ -32,7 +32,6 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): for (mean, std) in [(1.5, 0.1), ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: for array in [self.array_dense, self.array_sparse]: - self.logger.info("Original array is:\n{:}".format(array)) self.logger.info( "Normalizing using mean: {:} std: {:}".format(mean, std)) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index 66dbe7c8..e35a137e 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -10,18 +10,18 @@ class TestCNormalizerLinear(CNormalizerTestCases): def test_norm_minmax(self): """Test for CNormalizerMinMax.""" - self.sklearn_comp(self.array_dense, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.column_dense, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.array_dense*2, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.array_sparse*2, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.row_sparse*2, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.column_dense*2, MinMaxScaler(), CNormalizerMinMax()) - self.sklearn_comp(self.column_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_dense, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_dense, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_dense*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_dense*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_sparse*2, MinMaxScaler(), CNormalizerMinMax()) def test_chain(self): """Test a chain of preprocessors.""" -- GitLab From daea15e2b1975fc5c95cfd653d75ecbbfc1cbf75 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Tue, 17 Mar 2020 18:14:38 +0100 Subject: [PATCH 05/20] Refactor of _test_chain and _test_chain_grad --- .../tests/c_normalizer_testcases.py | 26 +++---------------- .../tests/test_c_normalizer_mean_std.py | 10 +++---- .../tests/test_c_normalizer_minmax.py | 25 +++++++++--------- .../tests/test_c_normalizer_unitnorm.py | 8 +++--- 4 files changed, 24 insertions(+), 45 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 33aac68e..6246b3f6 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -1,5 +1,4 @@ from secml.testing import CUnitTest - from secml.array import CArray from secml.ml.features import CPreProcess @@ -9,35 +8,15 @@ class CNormalizerTestCases(CUnitTest): def _sklearn_comp(self, array, norm_sklearn, norm): """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer""" - self.logger.info("Original array is:\n{:}".format(array)) target = CArray(norm_sklearn.fit_transform(array.astype(float).tondarray())) # Our normalizer n = norm.fit(array) result = n.transform(array) - self.logger.info("Correct result is:\n{:}".format(target)) self.logger.info("Our result is:\n{:}".format(result)) - self.assert_array_almost_equal(target, result) - def setup_x_chain(self, name, feature_range=None): - """Arranges a setup for x_chain depending on the normalizer and tests a chain of preprocessors""" - if feature_range is None: - feature_range = {} - x_chain = self._test_chain( - self.array_dense, - ['min-max', 'pca', name], - [{'feature_range': (-5, 5)}, {}, feature_range] - ) - self.assertEqual((self.array_dense.shape[0], - self.array_dense.shape[1] - 1), x_chain.shape) - - def setup_grad(self, names, feature_ranges): - """Arranges a setup for the gradient of a chain of preprocessors and tests it""" - grad = self._test_chain_gradient(self.array_dense, names, feature_ranges) - self.assertEqual((self.array_dense.shape[1],), grad.shape) - def setUp(self): self.array_dense = CArray([[1, 0, 0, 5], @@ -91,6 +70,9 @@ class CNormalizerTestCases(CUnitTest): except NotImplementedError: self.logger.info("inverse_transform not available") + self.assertEqual((self.array_dense.shape[0], + self.array_dense.shape[1] - 1), x_chain.shape) + return x_chain def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): @@ -121,10 +103,10 @@ class CNormalizerTestCases(CUnitTest): grad = None for i, v in enumerate(v_list): grad = pre_list[i].gradient(v, w=grad) - self.logger.info( "gradient({:}) (manual):\n{:}".format(v, grad)) self.assert_allclose(grad_chain, grad) + self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) return grad_chain diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index e6fd6d58..d096ca19 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -1,7 +1,5 @@ from secml.ml.features.normalization.tests import CNormalizerTestCases - from sklearn.preprocessing import StandardScaler - from secml.ml.features.normalization import CNormalizerMeanStd @@ -53,15 +51,15 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): def test_chain(self): """Test a chain of preprocessors.""" - self.setup_x_chain('mean-std') + self._test_chain(self.array_dense, + ['min-max', 'pca', 'mean-std'], + [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" - names = ['min-max', 'mean-std'] - feature_ranges = [{'feature_range': (-5, 5)}, {}] # Expected shape is (n_feats, ), so (4, ) - self.setup_grad(names, feature_ranges) + self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std'], [{'feature_range': (-5, 5)}, {}]) if __name__ == '__main__': diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index e35a137e..f810335f 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -1,7 +1,5 @@ from secml.ml.features.normalization.tests import CNormalizerTestCases - from sklearn.preprocessing import MinMaxScaler - from secml.ml.features.normalization import CNormalizerMinMax @@ -16,25 +14,26 @@ class TestCNormalizerLinear(CNormalizerTestCases): self._sklearn_comp(self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp(self.column_dense, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_dense*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_sparse*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_sparse*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_dense*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_dense * 2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_sparse * 2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_dense.atleast_2d() * 2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_sparse * 2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_dense * 2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_sparse * 2, MinMaxScaler(), CNormalizerMinMax()) def test_chain(self): """Test a chain of preprocessors.""" - feature_range = {'feature_range': (0, 1)} - self.setup_x_chain('min-max', feature_range) + self._test_chain(self.array_dense, + ['min-max', 'pca', 'min-max'], + [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): """Check gradient of a chain of preprocessors.""" # Expected shape is (n_feats, ), so (4, ) - names = ['min-max', 'mean-std', 'min-max'] - feature_ranges = [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}] - self.setup_grad(names, feature_ranges) + self._test_chain_gradient(self.array_dense, + ['min-max', 'mean-std', 'min-max'], + [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}]) if __name__ == '__main__': diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index a6c34af6..99a2bdc1 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -1,7 +1,5 @@ from secml.ml.features.normalization.tests import CNormalizerTestCases - from sklearn.preprocessing import Normalizer - from secml.array import CArray from secml.ml.features.normalization import CNormalizerUnitNorm from secml.optim.function import CFunction @@ -21,7 +19,7 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): # Sklearn normalizer (requires float dtype input) target = CArray(Normalizer(norm=norm_type).fit_transform( - array.astype(float).get_data())) + array.astype(float).get_data())) # Create our normalizer result = CNormalizerUnitNorm(norm=norm_type).fit_transform(array) @@ -41,7 +39,9 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): def test_chain(self): """Test a chain of preprocessors.""" - self.setup_x_chain('unit-norm') + self._test_chain(self.array_dense, + ['min-max', 'pca', 'unit-norm'], + [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def _test_gradient(self): -- GitLab From 458dc631e824dda347bcea2133ca75b20bfbe08a Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Tue, 17 Mar 2020 18:20:31 +0100 Subject: [PATCH 06/20] Refactor of _test_chain and _test_chain_grad --- .../normalization/tests/test_c_normalizer_minmax.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index f810335f..0aed52f0 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -14,12 +14,12 @@ class TestCNormalizerLinear(CNormalizerTestCases): self._sklearn_comp(self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp(self.column_dense, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_dense * 2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_sparse * 2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_dense.atleast_2d() * 2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_sparse * 2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_dense * 2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_sparse * 2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_dense*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.row_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_dense*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.column_sparse*2, MinMaxScaler(), CNormalizerMinMax()) def test_chain(self): """Test a chain of preprocessors.""" -- GitLab From 0ab961d7d54a09927216f601f0704f3e8566d4a1 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Tue, 17 Mar 2020 18:37:09 +0100 Subject: [PATCH 07/20] Refactor of _test_chain and _test_chain_grad --- .../tests/c_normalizer_testcases.py | 15 ++++++++++----- .../tests/test_c_normalizer_mean_std.py | 18 ++++++++++-------- .../tests/test_c_normalizer_minmax.py | 13 ++++++++----- .../tests/test_c_normalizer_unitnorm.py | 13 ++++++++----- 4 files changed, 36 insertions(+), 23 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 6246b3f6..243583ad 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -4,10 +4,12 @@ from secml.ml.features import CPreProcess class CNormalizerTestCases(CUnitTest): - """Unittests interface for CPreProcess.""" + """Unittests interface for CPreProcess. + """ def _sklearn_comp(self, array, norm_sklearn, norm): - """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer""" + """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer + """ self.logger.info("Original array is:\n{:}".format(array)) target = CArray(norm_sklearn.fit_transform(array.astype(float).tondarray())) # Our normalizer @@ -33,7 +35,8 @@ class CNormalizerTestCases(CUnitTest): @staticmethod def _create_chain(pre_id_list, kwargs_list): """Creates a preprocessor with other preprocessors chained - and a list of the same preprocessors (not chained)""" + and a list of the same preprocessors (not chained) + """ chain = None pre_list = [] for i, pre_id in enumerate(pre_id_list): @@ -44,7 +47,8 @@ class CNormalizerTestCases(CUnitTest): return chain, pre_list def _test_chain(self, x, pre_id_list, kwargs_list, y=None): - """Tests if preprocess chain and manual chaining yield same result.""" + """Tests if preprocess chain and manual chaining yield same result. + """ chain, pre_list = self._create_chain(pre_id_list, kwargs_list) chain = chain.fit(x, y=y) @@ -77,7 +81,8 @@ class CNormalizerTestCases(CUnitTest): def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): """Tests if gradient preprocess chain and - gradient of manual chaining yield same result.""" + gradient of manual chaining yield same result. + """ chain, pre_list = self._create_chain(pre_id_list, kwargs_list) chain = chain.fit(x, y=y) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index d096ca19..cbfb579f 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -4,11 +4,11 @@ from secml.ml.features.normalization import CNormalizerMeanStd class TestCNormalizerMeanStd(CNormalizerTestCases): - """Unittest for CNormalizerMeanStd""" - + """Unittest for CNormalizerMeanStd + """ def test_zscore(self): - """Test for CNormalizerMeanStd to obtain zero mean and unit variance""" - + """Test for CNormalizerMeanStd to obtain zero mean and unit variance + """ self._sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) self._sklearn_comp(self.array_sparse, StandardScaler(), CNormalizerMeanStd()) self._sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(), CNormalizerMeanStd()) @@ -25,8 +25,8 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): self._sklearn_comp(self.column_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) def test_normalizer_mean_std(self): - """Test for CNormalizerMeanStd.""" - + """Test for CNormalizerMeanStd. + """ for (mean, std) in [(1.5, 0.1), ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: for array in [self.array_dense, self.array_sparse]: @@ -50,14 +50,16 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): self.assert_array_almost_equal(array, rev) def test_chain(self): - """Test a chain of preprocessors.""" + """Test a chain of preprocessors. + """ self._test_chain(self.array_dense, ['min-max', 'pca', 'mean-std'], [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Check gradient of a chain of preprocessors.""" + """Check gradient of a chain of preprocessors. + """ # Expected shape is (n_feats, ), so (4, ) self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std'], [{'feature_range': (-5, 5)}, {}]) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index 0aed52f0..02979eea 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -4,10 +4,11 @@ from secml.ml.features.normalization import CNormalizerMinMax class TestCNormalizerLinear(CNormalizerTestCases): - """Unittest for CNormalizerLinear.""" - + """Unittest for CNormalizerLinear. + """ def test_norm_minmax(self): - """Test for CNormalizerMinMax.""" + """Test for CNormalizerMinMax. + """ self._sklearn_comp(self.array_dense, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp(self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp(self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) @@ -22,14 +23,16 @@ class TestCNormalizerLinear(CNormalizerTestCases): self._sklearn_comp(self.column_sparse*2, MinMaxScaler(), CNormalizerMinMax()) def test_chain(self): - """Test a chain of preprocessors.""" + """Test a chain of preprocessors. + """ self._test_chain(self.array_dense, ['min-max', 'pca', 'min-max'], [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Check gradient of a chain of preprocessors.""" + """Check gradient of a chain of preprocessors. + """ # Expected shape is (n_feats, ), so (4, ) self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std', 'min-max'], diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index 99a2bdc1..2332ed0f 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -6,11 +6,12 @@ from secml.optim.function import CFunction class TestCNormalizerUnitNorm(CNormalizerTestCases): - """Unittest for CNormalizerUnitNorm.""" + """Unittest for CNormalizerUnitNorm. + """ def test_norm_unitnorm(self): - """Test for CNormalizerUnitNorm.""" - + """Test for CNormalizerUnitNorm. + """ norm_type_lst = ["l1", "l2", "max"] def sklearn_comp(array, norm_type): @@ -38,14 +39,16 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): sklearn_comp(self.column_sparse, norm_type) def test_chain(self): - """Test a chain of preprocessors.""" + """Test a chain of preprocessors. + """ self._test_chain(self.array_dense, ['min-max', 'pca', 'unit-norm'], [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def _test_gradient(self): - """Check the normalizer gradient.""" + """Check the normalizer gradient. + """ norm_type_lst = ["l1", "l2", "max"] -- GitLab From 6bba73135ef9aadf05484a223dc5852eefcf2904 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Wed, 18 Mar 2020 12:17:14 +0100 Subject: [PATCH 08/20] Refactor of test_norm_unitnorm --- .../tests/c_normalizer_testcases.py | 2 +- .../tests/test_c_normalizer_unitnorm.py | 33 +++++++------------ 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 243583ad..22c15813 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -11,7 +11,7 @@ class CNormalizerTestCases(CUnitTest): """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer """ self.logger.info("Original array is:\n{:}".format(array)) - target = CArray(norm_sklearn.fit_transform(array.astype(float).tondarray())) + target = CArray(norm_sklearn.fit_transform(array.tondarray())) # Our normalizer n = norm.fit(array) result = n.transform(array) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index 2332ed0f..47e79a31 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -8,35 +8,24 @@ from secml.optim.function import CFunction class TestCNormalizerUnitNorm(CNormalizerTestCases): """Unittest for CNormalizerUnitNorm. """ + def _sklearn_comp(self, array, norm_sklearn, norm, norm_type=None): + self.logger.info("Norm type: {:}".format(norm_type)) + norm_sklearn = norm_sklearn(norm=norm_type) + norm = norm(norm=norm_type) + super(TestCNormalizerUnitNorm, self)._sklearn_comp(array, norm_sklearn, norm) def test_norm_unitnorm(self): """Test for CNormalizerUnitNorm. """ norm_type_lst = ["l1", "l2", "max"] - def sklearn_comp(array, norm_type): - self.logger.info("Norm type: {:}".format(norm_type)) - self.logger.info("Original array is: {:}".format(array)) - - # Sklearn normalizer (requires float dtype input) - target = CArray(Normalizer(norm=norm_type).fit_transform( - array.astype(float).get_data())) - - # Create our normalizer - result = CNormalizerUnitNorm(norm=norm_type).fit_transform(array) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - for norm_type in norm_type_lst: - sklearn_comp(self.array_dense, norm_type) - sklearn_comp(self.array_sparse, norm_type) - sklearn_comp(self.row_dense.atleast_2d(), norm_type) - sklearn_comp(self.row_sparse, norm_type) - sklearn_comp(self.column_dense, norm_type) - sklearn_comp(self.column_sparse, norm_type) + self._sklearn_comp(self.array_dense, Normalizer, CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.array_sparse, Normalizer, CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.row_dense.atleast_2d(), Normalizer, CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.row_sparse, Normalizer, CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.column_dense, Normalizer, CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.column_sparse, Normalizer, CNormalizerUnitNorm, norm_type) def test_chain(self): """Test a chain of preprocessors. -- GitLab From 9c4d655b307724ad9210f6d464346e42cc839657 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Wed, 18 Mar 2020 12:38:15 +0100 Subject: [PATCH 09/20] Refactor of test_norm_unitnorm --- .../normalization/tests/test_c_normalizer_mean_std.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index cbfb579f..58f5e5ef 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -61,7 +61,9 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): """Check gradient of a chain of preprocessors. """ # Expected shape is (n_feats, ), so (4, ) - self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std'], [{'feature_range': (-5, 5)}, {}]) + self._test_chain_gradient(self.array_dense, + ['min-max', 'mean-std'], + [{'feature_range': (-5, 5)}, {}]) if __name__ == '__main__': -- GitLab From 62aaabf22d035e58394bc1e4c4f3ea7cbf278978 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Wed, 18 Mar 2020 14:26:25 +0100 Subject: [PATCH 10/20] Refactor of _create_chain --- src/secml/ml/features/c_preprocess.py | 16 ++++++++++++++++ .../tests/c_normalizer_testcases.py | 7 +++++++ 2 files changed, 23 insertions(+) diff --git a/src/secml/ml/features/c_preprocess.py b/src/secml/ml/features/c_preprocess.py index 07bff995..2e4ec48b 100644 --- a/src/secml/ml/features/c_preprocess.py +++ b/src/secml/ml/features/c_preprocess.py @@ -29,6 +29,22 @@ class CPreProcess(CModule, metaclass=ABCMeta): else CPreProcess.create(preprocess) CModule.__init__(self, preprocess=preprocess) + # Remove this method to return to the standard version + # This is a possible solution to _create_chain in NormalizerTestcases: + @staticmethod + def _create_chain2(pre_id_list, kwargs_list): + """Creates a preprocessor with other preprocessors chained + and a list of the same preprocessors (not chained) + """ + chain = None + pre_list = [] + for i, pre_id in enumerate(pre_id_list): + chain = CPreProcess.create( + pre_id, preprocess=chain, **kwargs_list[i]) + pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) + + return chain, pre_list + @staticmethod def create_chain(class_items, kwargs_list): """Creates a chain of preprocessors. diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 22c15813..a77e98ce 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -32,8 +32,15 @@ class CNormalizerTestCases(CUnitTest): self.row_sparse = CArray(self.row_dense.deepcopy(), tosparse=True) self.column_sparse = self.row_sparse.deepcopy().T + # _create_chain issue: added method... + # This is a possible solution: @staticmethod def _create_chain(pre_id_list, kwargs_list): + return CPreProcess._create_chain2(pre_id_list, kwargs_list) + + # remove the x in chain'x' to return to the standard version + @staticmethod + def _create_chainx(pre_id_list, kwargs_list): """Creates a preprocessor with other preprocessors chained and a list of the same preprocessors (not chained) """ -- GitLab From bd69a3187e44e88c4e0496ca74162bd60ca1a0b2 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Wed, 18 Mar 2020 18:31:17 +0100 Subject: [PATCH 11/20] Revert "Refactor of _create_chain" This reverts commit 62aaabf22d035e58394bc1e4c4f3ea7cbf278978. --- src/secml/ml/features/c_preprocess.py | 16 ---------------- .../tests/c_normalizer_testcases.py | 7 ------- 2 files changed, 23 deletions(-) diff --git a/src/secml/ml/features/c_preprocess.py b/src/secml/ml/features/c_preprocess.py index 2e4ec48b..07bff995 100644 --- a/src/secml/ml/features/c_preprocess.py +++ b/src/secml/ml/features/c_preprocess.py @@ -29,22 +29,6 @@ class CPreProcess(CModule, metaclass=ABCMeta): else CPreProcess.create(preprocess) CModule.__init__(self, preprocess=preprocess) - # Remove this method to return to the standard version - # This is a possible solution to _create_chain in NormalizerTestcases: - @staticmethod - def _create_chain2(pre_id_list, kwargs_list): - """Creates a preprocessor with other preprocessors chained - and a list of the same preprocessors (not chained) - """ - chain = None - pre_list = [] - for i, pre_id in enumerate(pre_id_list): - chain = CPreProcess.create( - pre_id, preprocess=chain, **kwargs_list[i]) - pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) - - return chain, pre_list - @staticmethod def create_chain(class_items, kwargs_list): """Creates a chain of preprocessors. diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index a77e98ce..22c15813 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -32,15 +32,8 @@ class CNormalizerTestCases(CUnitTest): self.row_sparse = CArray(self.row_dense.deepcopy(), tosparse=True) self.column_sparse = self.row_sparse.deepcopy().T - # _create_chain issue: added method... - # This is a possible solution: @staticmethod def _create_chain(pre_id_list, kwargs_list): - return CPreProcess._create_chain2(pre_id_list, kwargs_list) - - # remove the x in chain'x' to return to the standard version - @staticmethod - def _create_chainx(pre_id_list, kwargs_list): """Creates a preprocessor with other preprocessors chained and a list of the same preprocessors (not chained) """ -- GitLab From 008ce7f3cf4a39413a9df49bded40c9370f279d3 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Wed, 18 Mar 2020 19:16:39 +0100 Subject: [PATCH 12/20] Refactor of Unitnorm: get_data() --- .../features/normalization/tests/c_normalizer_testcases.py | 7 +++++-- .../normalization/tests/test_c_normalizer_unitnorm.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 22c15813..87fd84a7 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -7,11 +7,14 @@ class CNormalizerTestCases(CUnitTest): """Unittests interface for CPreProcess. """ - def _sklearn_comp(self, array, norm_sklearn, norm): + def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer """ self.logger.info("Original array is:\n{:}".format(array)) - target = CArray(norm_sklearn.fit_transform(array.tondarray())) + if sparse: + target = CArray(norm_sklearn.fit_transform(array.get_data())) + else: + target = CArray(norm_sklearn.fit_transform(array.tondarray())) # Our normalizer n = norm.fit(array) result = n.transform(array) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index 47e79a31..12e5560c 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -12,7 +12,7 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): self.logger.info("Norm type: {:}".format(norm_type)) norm_sklearn = norm_sklearn(norm=norm_type) norm = norm(norm=norm_type) - super(TestCNormalizerUnitNorm, self)._sklearn_comp(array, norm_sklearn, norm) + super(TestCNormalizerUnitNorm, self)._sklearn_comp(array, norm_sklearn, norm, sparse=True) def test_norm_unitnorm(self): """Test for CNormalizerUnitNorm. -- GitLab From 424fe7268d213841114a67297c519a0a8675db3d Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Wed, 18 Mar 2020 19:46:04 +0100 Subject: [PATCH 13/20] Refactor of CNormalizerTestcases: restore dependency between CNormalizerTestCases and CPreprocessTestcases --- .../tests/c_normalizer_testcases.py | 90 +------------------ 1 file changed, 4 insertions(+), 86 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 87fd84a7..6a416e30 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -1,9 +1,9 @@ from secml.testing import CUnitTest from secml.array import CArray -from secml.ml.features import CPreProcess +from secml.ml.features.tests import CPreProcessTestCases -class CNormalizerTestCases(CUnitTest): +class CNormalizerTestCases(CPreProcessTestCases): """Unittests interface for CPreProcess. """ @@ -22,102 +22,20 @@ class CNormalizerTestCases(CUnitTest): self.logger.info("Our result is:\n{:}".format(result)) self.assert_array_almost_equal(target, result) - def setUp(self): - - self.array_dense = CArray([[1, 0, 0, 5], - [2, 4, 0, 0], - [3, 6, 0, 0]]) - self.array_sparse = CArray(self.array_dense.deepcopy(), tosparse=True) - - self.row_dense = CArray([4, 0, 6]) - self.column_dense = self.row_dense.deepcopy().T - - self.row_sparse = CArray(self.row_dense.deepcopy(), tosparse=True) - self.column_sparse = self.row_sparse.deepcopy().T - - @staticmethod - def _create_chain(pre_id_list, kwargs_list): - """Creates a preprocessor with other preprocessors chained - and a list of the same preprocessors (not chained) - """ - chain = None - pre_list = [] - for i, pre_id in enumerate(pre_id_list): - chain = CPreProcess.create( - pre_id, preprocess=chain, **kwargs_list[i]) - pre_list.append(CPreProcess.create(pre_id, **kwargs_list[i])) - - return chain, pre_list - def _test_chain(self, x, pre_id_list, kwargs_list, y=None): """Tests if preprocess chain and manual chaining yield same result. """ - chain, pre_list = self._create_chain(pre_id_list, kwargs_list) - - chain = chain.fit(x, y=y) - self.logger.info("Preprocessors chain:\n{:}".format(chain)) - - x_chain = chain.transform(x) - self.logger.info("Trasformed X (chain):\n{:}".format(x_chain)) - - # Train the manual chain and transform - x_manual = x - for pre in pre_list: - x_manual = pre.fit_transform(x_manual, y=y) - - self.logger.info("Trasformed X (manual):\n{:}".format(x_manual)) - self.assert_allclose(x_chain, x_manual) - - # Reverting array (if available) - try: - x_chain_revert = chain.inverse_transform(x_chain) - self.logger.info("Reverted X (chain):\n{:}".format(x_chain_revert)) - self.logger.info("Original X:\n{:}".format(x)) - self.assert_array_almost_equal(x_chain_revert, x) - except NotImplementedError: - self.logger.info("inverse_transform not available") - + x_chain = super(CNormalizerTestCases, self)._test_chain(x, pre_id_list, kwargs_list, y=None) self.assertEqual((self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape) - return x_chain - def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): """Tests if gradient preprocess chain and gradient of manual chaining yield same result. """ - chain, pre_list = self._create_chain(pre_id_list, kwargs_list) - - chain = chain.fit(x, y=y) - self.logger.info("Preprocessors chain:\n{:}".format(chain)) - - v = x[1, :] - grad_chain = chain.gradient(v) - self.logger.info( - "gradient({:}) (chain):\n{:}".format(v, grad_chain)) - - # Manually compose the chain and transform - for pre in pre_list: - x = pre.fit_transform(x, y=y) - - v_list = [v] - for pre in pre_list[:-1]: - v = pre.transform(v) - v_list.append(v) - - v_list = list(reversed(v_list)) - pre_list = list(reversed(pre_list)) - - grad = None - for i, v in enumerate(v_list): - grad = pre_list[i].gradient(v, w=grad) - self.logger.info( - "gradient({:}) (manual):\n{:}".format(v, grad)) - self.assert_allclose(grad_chain, grad) + grad_chain = super(CNormalizerTestCases, self)._test_chain_gradient(x, pre_id_list, kwargs_list, y=None) self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) - return grad_chain - if __name__ == '__main__': CUnitTest.main() -- GitLab From d5d5a928d2f3790648c541102cc1af9940feb13d Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Wed, 18 Mar 2020 23:34:03 +0100 Subject: [PATCH 14/20] Refactor of Normalizers'tests: PEP8 update --- .../tests/c_normalizer_testcases.py | 9 ++-- .../tests/test_c_normalizer_mean_std.py | 37 ++++++++++------ .../tests/test_c_normalizer_minmax.py | 42 ++++++++++++------- .../tests/test_c_normalizer_unitnorm.py | 21 ++++++---- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 6a416e30..8b73c1fa 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -8,7 +8,8 @@ class CNormalizerTestCases(CPreProcessTestCases): """ def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): - """Check if the result given by the sklearn normalizer is almost equal to the one given by our normalizer + """Check if the result given by the sklearn normalizer is almost + equal to the one given by our normalizer """ self.logger.info("Original array is:\n{:}".format(array)) if sparse: @@ -25,7 +26,8 @@ class CNormalizerTestCases(CPreProcessTestCases): def _test_chain(self, x, pre_id_list, kwargs_list, y=None): """Tests if preprocess chain and manual chaining yield same result. """ - x_chain = super(CNormalizerTestCases, self)._test_chain(x, pre_id_list, kwargs_list, y=None) + x_chain = super(CNormalizerTestCases, self)._test_chain( + x, pre_id_list, kwargs_list, y=None) self.assertEqual((self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape) @@ -33,7 +35,8 @@ class CNormalizerTestCases(CPreProcessTestCases): """Tests if gradient preprocess chain and gradient of manual chaining yield same result. """ - grad_chain = super(CNormalizerTestCases, self)._test_chain_gradient(x, pre_id_list, kwargs_list, y=None) + grad_chain = super(CNormalizerTestCases, self)._test_chain_gradient( + x, pre_id_list, kwargs_list, y=None) self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index 58f5e5ef..5404751f 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -6,23 +6,36 @@ from secml.ml.features.normalization import CNormalizerMeanStd class TestCNormalizerMeanStd(CNormalizerTestCases): """Unittest for CNormalizerMeanStd """ + def test_zscore(self): """Test for CNormalizerMeanStd to obtain zero mean and unit variance """ - self._sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) - self._sklearn_comp(self.array_sparse, StandardScaler(), CNormalizerMeanStd()) - self._sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(), CNormalizerMeanStd()) - self._sklearn_comp(self.row_sparse, StandardScaler(), CNormalizerMeanStd()) - self._sklearn_comp(self.column_dense, StandardScaler(), CNormalizerMeanStd()) - self._sklearn_comp(self.column_sparse, StandardScaler(), CNormalizerMeanStd()) + self._sklearn_comp(self.array_dense, StandardScaler(), + CNormalizerMeanStd()) + self._sklearn_comp(self.array_sparse, StandardScaler(), + CNormalizerMeanStd()) + self._sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(), + CNormalizerMeanStd()) + self._sklearn_comp(self.row_sparse, StandardScaler(), + CNormalizerMeanStd()) + self._sklearn_comp(self.column_dense, StandardScaler(), + CNormalizerMeanStd()) + self._sklearn_comp(self.column_sparse, StandardScaler(), + CNormalizerMeanStd()) - self._sklearn_comp(self.array_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.array_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(with_std=False), + self._sklearn_comp(self.array_dense, StandardScaler(with_std=False), + CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.array_sparse, StandardScaler(with_std=False), + CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.row_dense.atleast_2d(), + StandardScaler(with_std=False), + CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.row_sparse, StandardScaler(with_std=False), + CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.column_dense, StandardScaler(with_std=False), + CNormalizerMeanStd(with_std=False)) + self._sklearn_comp(self.column_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.row_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.column_dense, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.column_sparse, StandardScaler(with_std=False), CNormalizerMeanStd(with_std=False)) def test_normalizer_mean_std(self): """Test for CNormalizerMeanStd. diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index 02979eea..e3e28914 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -9,25 +9,38 @@ class TestCNormalizerLinear(CNormalizerTestCases): def test_norm_minmax(self): """Test for CNormalizerMinMax. """ - self._sklearn_comp(self.array_dense, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_dense, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_dense*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_sparse*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.row_sparse*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_dense*2, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.column_sparse*2, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp(self.array_dense, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.array_sparse, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.row_dense.atleast_2d(), MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.row_sparse, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.column_dense, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.column_sparse, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.array_dense*2, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.array_sparse*2, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.row_sparse*2, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.column_dense*2, MinMaxScaler(), + CNormalizerMinMax()) + self._sklearn_comp(self.column_sparse*2, MinMaxScaler(), + CNormalizerMinMax()) def test_chain(self): """Test a chain of preprocessors. """ self._test_chain(self.array_dense, ['min-max', 'pca', 'min-max'], - [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}]) + [{'feature_range': (-5, 5)}, {}, + {'feature_range': (0, 1)}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): @@ -36,7 +49,8 @@ class TestCNormalizerLinear(CNormalizerTestCases): # Expected shape is (n_feats, ), so (4, ) self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std', 'min-max'], - [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}]) + [{'feature_range': (-5, 5)}, {}, + {'feature_range': (0, 1)}]) if __name__ == '__main__': diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index 12e5560c..187c1348 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -12,7 +12,8 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): self.logger.info("Norm type: {:}".format(norm_type)) norm_sklearn = norm_sklearn(norm=norm_type) norm = norm(norm=norm_type) - super(TestCNormalizerUnitNorm, self)._sklearn_comp(array, norm_sklearn, norm, sparse=True) + super(TestCNormalizerUnitNorm, self)._sklearn_comp(array, norm_sklearn, + norm, sparse=True) def test_norm_unitnorm(self): """Test for CNormalizerUnitNorm. @@ -20,12 +21,18 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): norm_type_lst = ["l1", "l2", "max"] for norm_type in norm_type_lst: - self._sklearn_comp(self.array_dense, Normalizer, CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.array_sparse, Normalizer, CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.row_dense.atleast_2d(), Normalizer, CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.row_sparse, Normalizer, CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.column_dense, Normalizer, CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.column_sparse, Normalizer, CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.array_dense, Normalizer, + CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.array_sparse, Normalizer, + CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.row_dense.atleast_2d(), Normalizer, + CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.row_sparse, Normalizer, + CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.column_dense, Normalizer, + CNormalizerUnitNorm, norm_type) + self._sklearn_comp(self.column_sparse, Normalizer, + CNormalizerUnitNorm, norm_type) def test_chain(self): """Test a chain of preprocessors. -- GitLab From 863145d52031f5dd4484b85e78d6c61fe7c24fa2 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Thu, 19 Mar 2020 15:01:38 +0100 Subject: [PATCH 15/20] Refactor of Normalizers'tests: docstrings and parameter y update --- .../tests/c_normalizer_testcases.py | 10 ++++------ .../tests/test_c_normalizer_mean_std.py | 15 +++++---------- .../tests/test_c_normalizer_minmax.py | 14 +++++--------- .../tests/test_c_normalizer_unitnorm.py | 18 +++++++++--------- 4 files changed, 23 insertions(+), 34 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 8b73c1fa..7fde5169 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -4,8 +4,7 @@ from secml.ml.features.tests import CPreProcessTestCases class CNormalizerTestCases(CPreProcessTestCases): - """Unittests interface for CPreProcess. - """ + """Unittests interface for Normalizers.""" def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): """Check if the result given by the sklearn normalizer is almost @@ -24,10 +23,9 @@ class CNormalizerTestCases(CPreProcessTestCases): self.assert_array_almost_equal(target, result) def _test_chain(self, x, pre_id_list, kwargs_list, y=None): - """Tests if preprocess chain and manual chaining yield same result. - """ + """Tests if preprocess chain and manual chaining yield same result.""" x_chain = super(CNormalizerTestCases, self)._test_chain( - x, pre_id_list, kwargs_list, y=None) + x, pre_id_list, kwargs_list, y) self.assertEqual((self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape) @@ -36,7 +34,7 @@ class CNormalizerTestCases(CPreProcessTestCases): gradient of manual chaining yield same result. """ grad_chain = super(CNormalizerTestCases, self)._test_chain_gradient( - x, pre_id_list, kwargs_list, y=None) + x, pre_id_list, kwargs_list, y) self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index 5404751f..7dc16127 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -4,12 +4,10 @@ from secml.ml.features.normalization import CNormalizerMeanStd class TestCNormalizerMeanStd(CNormalizerTestCases): - """Unittest for CNormalizerMeanStd - """ + """Unittest for CNormalizerMeanStd""" def test_zscore(self): - """Test for CNormalizerMeanStd to obtain zero mean and unit variance - """ + """Test for CNormalizerMeanStd to obtain zero mean and unit variance""" self._sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) self._sklearn_comp(self.array_sparse, StandardScaler(), @@ -38,8 +36,7 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): CNormalizerMeanStd(with_std=False)) def test_normalizer_mean_std(self): - """Test for CNormalizerMeanStd. - """ + """Test for CNormalizerMeanStd.""" for (mean, std) in [(1.5, 0.1), ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: for array in [self.array_dense, self.array_sparse]: @@ -63,16 +60,14 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): self.assert_array_almost_equal(array, rev) def test_chain(self): - """Test a chain of preprocessors. - """ + """Test a chain of preprocessors.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'mean-std'], [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Check gradient of a chain of preprocessors. - """ + """Check gradient of a chain of preprocessors.""" # Expected shape is (n_feats, ), so (4, ) self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std'], diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index e3e28914..6c64e383 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -3,12 +3,10 @@ from sklearn.preprocessing import MinMaxScaler from secml.ml.features.normalization import CNormalizerMinMax -class TestCNormalizerLinear(CNormalizerTestCases): - """Unittest for CNormalizerLinear. - """ +class TestCNormalizerMinMax(CNormalizerTestCases): + """Unittest for CNormalizerMinMax.""" def test_norm_minmax(self): - """Test for CNormalizerMinMax. - """ + """Test for CNormalizerMinMax.""" self._sklearn_comp(self.array_dense, MinMaxScaler(), CNormalizerMinMax()) self._sklearn_comp(self.array_sparse, MinMaxScaler(), @@ -35,8 +33,7 @@ class TestCNormalizerLinear(CNormalizerTestCases): CNormalizerMinMax()) def test_chain(self): - """Test a chain of preprocessors. - """ + """Test a chain of preprocessors.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'min-max'], [{'feature_range': (-5, 5)}, {}, @@ -44,8 +41,7 @@ class TestCNormalizerLinear(CNormalizerTestCases): # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Check gradient of a chain of preprocessors. - """ + """Check gradient of a chain of preprocessors.""" # Expected shape is (n_feats, ), so (4, ) self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std', 'min-max'], diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index 187c1348..aef0c1fc 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -6,18 +6,20 @@ from secml.optim.function import CFunction class TestCNormalizerUnitNorm(CNormalizerTestCases): - """Unittest for CNormalizerUnitNorm. - """ + """Unittest for CNormalizerUnitNorm.""" + def _sklearn_comp(self, array, norm_sklearn, norm, norm_type=None): + """Check if the result given by the sklearn normalizer is almost + equal to the one given by our normalizer + """ self.logger.info("Norm type: {:}".format(norm_type)) norm_sklearn = norm_sklearn(norm=norm_type) norm = norm(norm=norm_type) super(TestCNormalizerUnitNorm, self)._sklearn_comp(array, norm_sklearn, - norm, sparse=True) + norm, True) def test_norm_unitnorm(self): - """Test for CNormalizerUnitNorm. - """ + """Test for CNormalizerUnitNorm.""" norm_type_lst = ["l1", "l2", "max"] for norm_type in norm_type_lst: @@ -35,16 +37,14 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): CNormalizerUnitNorm, norm_type) def test_chain(self): - """Test a chain of preprocessors. - """ + """Test a chain of preprocessors.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'unit-norm'], [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def _test_gradient(self): - """Check the normalizer gradient. - """ + """Check the normalizer gradient.""" norm_type_lst = ["l1", "l2", "max"] -- GitLab From 4c548e2ff64310fd2a62db377b6fc886976fd73a Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Thu, 19 Mar 2020 16:42:09 +0100 Subject: [PATCH 16/20] Refactor of Normalizers'tests: docstrings, parameter y and test out of range normalization for CNormalizerMinMax update --- .../tests/c_normalizer_testcases.py | 9 +++- .../tests/test_c_normalizer_mean_std.py | 10 ++-- .../tests/test_c_normalizer_minmax.py | 48 +++++++++++++------ .../tests/test_c_normalizer_unitnorm.py | 14 +++--- 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 7fde5169..4ab16029 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -7,8 +7,7 @@ class CNormalizerTestCases(CPreProcessTestCases): """Unittests interface for Normalizers.""" def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): - """Check if the result given by the sklearn normalizer is almost - equal to the one given by our normalizer + """Tests if the sklearn normalizer and our normalizer yield same result. """ self.logger.info("Original array is:\n{:}".format(array)) if sparse: @@ -22,6 +21,8 @@ class CNormalizerTestCases(CPreProcessTestCases): self.logger.info("Our result is:\n{:}".format(result)) self.assert_array_almost_equal(target, result) + return target, result + def _test_chain(self, x, pre_id_list, kwargs_list, y=None): """Tests if preprocess chain and manual chaining yield same result.""" x_chain = super(CNormalizerTestCases, self)._test_chain( @@ -29,6 +30,8 @@ class CNormalizerTestCases(CPreProcessTestCases): self.assertEqual((self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape) + return x_chain + def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): """Tests if gradient preprocess chain and gradient of manual chaining yield same result. @@ -37,6 +40,8 @@ class CNormalizerTestCases(CPreProcessTestCases): x, pre_id_list, kwargs_list, y) self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) + return grad_chain + if __name__ == '__main__': CUnitTest.main() diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index 7dc16127..aaec5204 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -4,10 +4,10 @@ from secml.ml.features.normalization import CNormalizerMeanStd class TestCNormalizerMeanStd(CNormalizerTestCases): - """Unittest for CNormalizerMeanStd""" + """Unittest for CNormalizerMeanStd.""" def test_zscore(self): - """Test for CNormalizerMeanStd to obtain zero mean and unit variance""" + """Test for CNormalizerMeanStd to obtain zero mean and unit variance.""" self._sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) self._sklearn_comp(self.array_sparse, StandardScaler(), @@ -60,14 +60,16 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): self.assert_array_almost_equal(array, rev) def test_chain(self): - """Test a chain of preprocessors.""" + """Tests a chain of preprocessors related to CNormalizerMeanStd.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'mean-std'], [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Check gradient of a chain of preprocessors.""" + """Tests the gradient of a chain of preprocessors + related to CNormalizerMeanStd. + """ # Expected shape is (n_feats, ), so (4, ) self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std'], diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index 6c64e383..fa0b9268 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -1,10 +1,40 @@ from secml.ml.features.normalization.tests import CNormalizerTestCases from sklearn.preprocessing import MinMaxScaler from secml.ml.features.normalization import CNormalizerMinMax +from secml.array import CArray class TestCNormalizerMinMax(CNormalizerTestCases): """Unittest for CNormalizerMinMax.""" + + def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): + """Tests if the sklearn normalizer (MinMaxScaler) and + our normalizer (CNormalizerMinMax) yield same result. + """ + super(TestCNormalizerMinMax, self)._sklearn_comp( + array, norm_sklearn, norm, sparse) + + array_sk = array.tondarray() + sk_norm = norm_sklearn.fit(array_sk) + our_norm = norm.fit(array) + + # Testing out of range normalization + + self.logger.info("Testing out of range normalization") + + # Sklearn normalizer (requires float dtype input) + target = CArray(sk_norm.transform(array_sk * 2)) + + # Our normalizer + result = our_norm.transform(array * 2) + + self.logger.info("Correct result is:\n{:}".format(target)) + self.logger.info("Our result is:\n{:}".format(result)) + + self.assert_array_almost_equal(target, result) + + return target, result + def test_norm_minmax(self): """Test for CNormalizerMinMax.""" self._sklearn_comp(self.array_dense, MinMaxScaler(), @@ -19,21 +49,9 @@ class TestCNormalizerMinMax(CNormalizerTestCases): CNormalizerMinMax()) self._sklearn_comp(self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) - self._sklearn_comp(self.array_dense*2, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.array_sparse*2, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.row_dense.atleast_2d()*2, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.row_sparse*2, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.column_dense*2, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.column_sparse*2, MinMaxScaler(), - CNormalizerMinMax()) def test_chain(self): - """Test a chain of preprocessors.""" + """Tests a chain of preprocessors related to CNormalizerMinMax.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'min-max'], [{'feature_range': (-5, 5)}, {}, @@ -41,7 +59,9 @@ class TestCNormalizerMinMax(CNormalizerTestCases): # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Check gradient of a chain of preprocessors.""" + """Tests the gradient of a chain of preprocessors + related to CNormalizerMinMax. + """ # Expected shape is (n_feats, ), so (4, ) self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std', 'min-max'], diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index aef0c1fc..a5052f48 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -9,14 +9,16 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): """Unittest for CNormalizerUnitNorm.""" def _sklearn_comp(self, array, norm_sklearn, norm, norm_type=None): - """Check if the result given by the sklearn normalizer is almost - equal to the one given by our normalizer + """Tests if the sklearn normalizer (Normalizer) and our normalizer + (CNormalizerUnitNorm) yield same result. """ self.logger.info("Norm type: {:}".format(norm_type)) norm_sklearn = norm_sklearn(norm=norm_type) norm = norm(norm=norm_type) - super(TestCNormalizerUnitNorm, self)._sklearn_comp(array, norm_sklearn, - norm, True) + target, result = super(TestCNormalizerUnitNorm, self)._sklearn_comp( + array, norm_sklearn, norm, True) + + return target, result def test_norm_unitnorm(self): """Test for CNormalizerUnitNorm.""" @@ -37,14 +39,14 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): CNormalizerUnitNorm, norm_type) def test_chain(self): - """Test a chain of preprocessors.""" + """Test a chain of preprocessors related to CNormalizerUnitNorm.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'unit-norm'], [{'feature_range': (-5, 5)}, {}, {}]) # Expected shape is (3, 3), as pca max n_components is 4-1 def _test_gradient(self): - """Check the normalizer gradient.""" + """Check the normalizer gradient related to CNormalizerUnitNorm.""" norm_type_lst = ["l1", "l2", "max"] -- GitLab From da0bedb4a89843ce4f3a43eecc35a5defe77847f Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Thu, 19 Mar 2020 16:44:08 +0100 Subject: [PATCH 17/20] Refactor of Normalizers'tests: docstrings, parameter y and test out of range normalization for CNormalizerMinMax update --- .../features/normalization/tests/test_c_normalizer_mean_std.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index aaec5204..da9bfdad 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -7,7 +7,8 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): """Unittest for CNormalizerMeanStd.""" def test_zscore(self): - """Test for CNormalizerMeanStd to obtain zero mean and unit variance.""" + """Test for CNormalizerMeanStd to obtain zero mean and unit variance. + """ self._sklearn_comp(self.array_dense, StandardScaler(), CNormalizerMeanStd()) self._sklearn_comp(self.array_sparse, StandardScaler(), -- GitLab From fe35349aaad22751abade5be87e64de11e5239e7 Mon Sep 17 00:00:00 2001 From: Giovanni Pau Date: Thu, 19 Mar 2020 18:08:02 +0100 Subject: [PATCH 18/20] Refactor of Normalizers'tests: test out of range normalization for CNormalizerMinMax update 2.0 --- .../normalization/tests/c_normalizer_testcases.py | 14 +++++++++++--- .../tests/test_c_normalizer_minmax.py | 9 +++------ .../tests/test_c_normalizer_unitnorm.py | 5 +++-- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 4ab16029..750e723b 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -11,9 +11,14 @@ class CNormalizerTestCases(CPreProcessTestCases): """ self.logger.info("Original array is:\n{:}".format(array)) if sparse: - target = CArray(norm_sklearn.fit_transform(array.get_data())) + array_sk = array.get_data() else: - target = CArray(norm_sklearn.fit_transform(array.tondarray())) + array_sk = array.tondarray() + + # Sklearn normalizer + sk_norm = norm_sklearn.fit(array_sk) + target = CArray(sk_norm.transform(array_sk)) + # Our normalizer n = norm.fit(array) result = n.transform(array) @@ -21,7 +26,10 @@ class CNormalizerTestCases(CPreProcessTestCases): self.logger.info("Our result is:\n{:}".format(result)) self.assert_array_almost_equal(target, result) - return target, result + array_sk = array.tondarray() + sk_norm = norm_sklearn.fit(array_sk) + + return target, result, sk_norm, n, array_sk def _test_chain(self, x, pre_id_list, kwargs_list, y=None): """Tests if preprocess chain and manual chaining yield same result.""" diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index fa0b9268..f4683216 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -11,13 +11,10 @@ class TestCNormalizerMinMax(CNormalizerTestCases): """Tests if the sklearn normalizer (MinMaxScaler) and our normalizer (CNormalizerMinMax) yield same result. """ - super(TestCNormalizerMinMax, self)._sklearn_comp( + target, result, sk_norm, our_norm, array_sk = super( + TestCNormalizerMinMax, self)._sklearn_comp( array, norm_sklearn, norm, sparse) - array_sk = array.tondarray() - sk_norm = norm_sklearn.fit(array_sk) - our_norm = norm.fit(array) - # Testing out of range normalization self.logger.info("Testing out of range normalization") @@ -33,7 +30,7 @@ class TestCNormalizerMinMax(CNormalizerTestCases): self.assert_array_almost_equal(target, result) - return target, result + return target, result, sk_norm, our_norm, array_sk def test_norm_minmax(self): """Test for CNormalizerMinMax.""" diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index a5052f48..0c8875f8 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -15,10 +15,11 @@ class TestCNormalizerUnitNorm(CNormalizerTestCases): self.logger.info("Norm type: {:}".format(norm_type)) norm_sklearn = norm_sklearn(norm=norm_type) norm = norm(norm=norm_type) - target, result = super(TestCNormalizerUnitNorm, self)._sklearn_comp( + target, result, sk_norm, our_norm, array_sk = super( + TestCNormalizerUnitNorm, self)._sklearn_comp( array, norm_sklearn, norm, True) - return target, result + return target, result, sk_norm, our_norm, array_sk def test_norm_unitnorm(self): """Test for CNormalizerUnitNorm.""" -- GitLab From 2579f6978eba65cf10a8280527192baf24f52534 Mon Sep 17 00:00:00 2001 From: Marco Melis Date: Wed, 1 Apr 2020 16:42:54 +0200 Subject: [PATCH 19/20] Use a for loop in test_transform instead of duplicating the calls. --- .../tests/test_c_normalizer_mean_std.py | 53 +++++++++---------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index da9bfdad..06079b5c 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -6,35 +6,30 @@ from secml.ml.features.normalization import CNormalizerMeanStd class TestCNormalizerMeanStd(CNormalizerTestCases): """Unittest for CNormalizerMeanStd.""" - def test_zscore(self): - """Test for CNormalizerMeanStd to obtain zero mean and unit variance. - """ - self._sklearn_comp(self.array_dense, StandardScaler(), - CNormalizerMeanStd()) - self._sklearn_comp(self.array_sparse, StandardScaler(), - CNormalizerMeanStd()) - self._sklearn_comp(self.row_dense.atleast_2d(), StandardScaler(), - CNormalizerMeanStd()) - self._sklearn_comp(self.row_sparse, StandardScaler(), - CNormalizerMeanStd()) - self._sklearn_comp(self.column_dense, StandardScaler(), - CNormalizerMeanStd()) - self._sklearn_comp(self.column_sparse, StandardScaler(), - CNormalizerMeanStd()) - - self._sklearn_comp(self.array_dense, StandardScaler(with_std=False), - CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.array_sparse, StandardScaler(with_std=False), - CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.row_dense.atleast_2d(), - StandardScaler(with_std=False), - CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.row_sparse, StandardScaler(with_std=False), - CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.column_dense, StandardScaler(with_std=False), - CNormalizerMeanStd(with_std=False)) - self._sklearn_comp(self.column_sparse, StandardScaler(with_std=False), - CNormalizerMeanStd(with_std=False)) + def test_transform(self): + """Test for `.transform()` method.""" + for with_std in (True, False): + + self.logger.info("Testing using std? {:}".format(with_std)) + + self._sklearn_comp(self.array_dense, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std)) + self._sklearn_comp(self.array_sparse, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std)) + self._sklearn_comp(self.row_dense.atleast_2d(), + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std)) + self._sklearn_comp(self.row_sparse, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std)) + self._sklearn_comp(self.column_dense, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std)) + self._sklearn_comp(self.column_sparse, + StandardScaler(with_std=with_std), + CNormalizerMeanStd(with_std=with_std)) def test_normalizer_mean_std(self): """Test for CNormalizerMeanStd.""" -- GitLab From a8e7138de173fc94d76c92d68f3365ded7b97770 Mon Sep 17 00:00:00 2001 From: Marco Melis Date: Wed, 1 Apr 2020 16:43:48 +0200 Subject: [PATCH 20/20] Update for code style and missing docstrings. --- .../tests/c_normalizer_testcases.py | 49 ++++++---- .../tests/test_c_normalizer_mean_std.py | 16 ++-- .../tests/test_c_normalizer_minmax.py | 92 +++++++++++-------- .../tests/test_c_normalizer_unitnorm.py | 58 +++++------- 4 files changed, 117 insertions(+), 98 deletions(-) diff --git a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py index 750e723b..d61131ba 100644 --- a/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py +++ b/src/secml/ml/features/normalization/tests/c_normalizer_testcases.py @@ -7,34 +7,49 @@ class CNormalizerTestCases(CPreProcessTestCases): """Unittests interface for Normalizers.""" def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): - """Tests if the sklearn normalizer and our normalizer yield same result. + """Compare scikit-learn normalizer with our implementation. + + Parameters + ---------- + array : CArray + norm_sklearn + Scikit-learn normalizer (from `sklearn.preprocessing`). + norm : CNormalizer + sparse : bool, optional + If False (default) sklearn normalizer only supports dense data. + + Returns + ------- + norm_sklearn + Trained Scikit-learn normalizer (from `sklearn.preprocessing`). + norm : CNormalizer + Trained normalizer. + """ self.logger.info("Original array is:\n{:}".format(array)) - if sparse: - array_sk = array.get_data() - else: - array_sk = array.tondarray() + + array_sk = array.get_data() if sparse is True else array.tondarray() # Sklearn normalizer - sk_norm = norm_sklearn.fit(array_sk) - target = CArray(sk_norm.transform(array_sk)) + norm_sklearn.fit(array_sk) + transform_sklearn = CArray(norm_sklearn.transform(array_sk)) # Our normalizer - n = norm.fit(array) - result = n.transform(array) - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - self.assert_array_almost_equal(target, result) + norm.fit(array) + transform = norm.transform(array) - array_sk = array.tondarray() - sk_norm = norm_sklearn.fit(array_sk) + self.logger.info("sklearn result is:\n{:}".format(transform_sklearn)) + self.logger.info("Our result is:\n{:}".format(transform)) - return target, result, sk_norm, n, array_sk + self.assert_array_almost_equal(transform_sklearn, transform) + + return norm_sklearn, norm def _test_chain(self, x, pre_id_list, kwargs_list, y=None): """Tests if preprocess chain and manual chaining yield same result.""" x_chain = super(CNormalizerTestCases, self)._test_chain( x, pre_id_list, kwargs_list, y) + self.assertEqual((self.array_dense.shape[0], self.array_dense.shape[1] - 1), x_chain.shape) @@ -42,10 +57,10 @@ class CNormalizerTestCases(CPreProcessTestCases): def _test_chain_gradient(self, x, pre_id_list, kwargs_list, y=None): """Tests if gradient preprocess chain and - gradient of manual chaining yield same result. - """ + gradient of manual chaining yield same result.""" grad_chain = super(CNormalizerTestCases, self)._test_chain_gradient( x, pre_id_list, kwargs_list, y) + self.assertEqual((self.array_dense.shape[1],), grad_chain.shape) return grad_chain diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py index 06079b5c..a3234cf4 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_mean_std.py @@ -1,10 +1,12 @@ from secml.ml.features.normalization.tests import CNormalizerTestCases + from sklearn.preprocessing import StandardScaler + from secml.ml.features.normalization import CNormalizerMeanStd class TestCNormalizerMeanStd(CNormalizerTestCases): - """Unittest for CNormalizerMeanStd.""" + """Unittests for CNormalizerMeanStd.""" def test_transform(self): """Test for `.transform()` method.""" @@ -31,8 +33,8 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): StandardScaler(with_std=with_std), CNormalizerMeanStd(with_std=with_std)) - def test_normalizer_mean_std(self): - """Test for CNormalizerMeanStd.""" + def test_mean_std(self): + """Test using specific mean/std.""" for (mean, std) in [(1.5, 0.1), ((1.0, 1.1, 1.2, 1.3), (0.0, 0.1, 0.2, 0.3))]: for array in [self.array_dense, self.array_sparse]: @@ -56,17 +58,13 @@ class TestCNormalizerMeanStd(CNormalizerTestCases): self.assert_array_almost_equal(array, rev) def test_chain(self): - """Tests a chain of preprocessors related to CNormalizerMeanStd.""" + """Test a chain of preprocessors.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'mean-std'], [{'feature_range': (-5, 5)}, {}, {}]) - # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Tests the gradient of a chain of preprocessors - related to CNormalizerMeanStd. - """ - # Expected shape is (n_feats, ), so (4, ) + """Check gradient of a chain of preprocessors.""" self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std'], [{'feature_range': (-5, 5)}, {}]) diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py index f4683216..2ed9065a 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_minmax.py @@ -1,65 +1,79 @@ from secml.ml.features.normalization.tests import CNormalizerTestCases + from sklearn.preprocessing import MinMaxScaler -from secml.ml.features.normalization import CNormalizerMinMax + from secml.array import CArray +from secml.ml.features.normalization import CNormalizerMinMax class TestCNormalizerMinMax(CNormalizerTestCases): - """Unittest for CNormalizerMinMax.""" + """Unittests for CNormalizerMinMax.""" def _sklearn_comp(self, array, norm_sklearn, norm, sparse=False): - """Tests if the sklearn normalizer (MinMaxScaler) and - our normalizer (CNormalizerMinMax) yield same result. - """ - target, result, sk_norm, our_norm, array_sk = super( - TestCNormalizerMinMax, self)._sklearn_comp( - array, norm_sklearn, norm, sparse) + """Compare scikit-learn normalizer with our implementation. + + Parameters + ---------- + array : CArray + norm_sklearn + Scikit-learn normalizer (from `sklearn.preprocessing`). + norm : CNormalizer + sparse : bool, optional + If False (default) sklearn normalizer only supports dense data. + + Returns + ------- + norm_sklearn + Trained Scikit-learn normalizer (from `sklearn.preprocessing`). + norm : CNormalizer + Trained normalizer. - # Testing out of range normalization + """ + norm_sklearn, norm = \ + super(TestCNormalizerMinMax, self)._sklearn_comp( + array, norm_sklearn, norm, sparse) self.logger.info("Testing out of range normalization") + array_sk = array.get_data() if sparse is True else array.tondarray() + # Sklearn normalizer (requires float dtype input) - target = CArray(sk_norm.transform(array_sk * 2)) + transform_sklearn = CArray(norm_sklearn.transform(array_sk * 2)) # Our normalizer - result = our_norm.transform(array * 2) - - self.logger.info("Correct result is:\n{:}".format(target)) - self.logger.info("Our result is:\n{:}".format(result)) - - self.assert_array_almost_equal(target, result) - - return target, result, sk_norm, our_norm, array_sk - - def test_norm_minmax(self): - """Test for CNormalizerMinMax.""" - self._sklearn_comp(self.array_dense, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.array_sparse, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.row_dense.atleast_2d(), MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.row_sparse, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.column_dense, MinMaxScaler(), - CNormalizerMinMax()) - self._sklearn_comp(self.column_sparse, MinMaxScaler(), - CNormalizerMinMax()) + transform = norm.transform(array * 2) + + self.logger.info("Correct result is:\n{:}".format(transform_sklearn)) + self.logger.info("Our result is:\n{:}".format(transform)) + + self.assert_array_almost_equal(transform_sklearn, transform) + + return norm_sklearn, norm + + def test_transform(self): + """Test for `.transform()` method.""" + self._sklearn_comp( + self.array_dense, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp( + self.array_sparse, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp( + self.row_dense.atleast_2d(), MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp( + self.row_sparse, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp( + self.column_dense, MinMaxScaler(), CNormalizerMinMax()) + self._sklearn_comp( + self.column_sparse, MinMaxScaler(), CNormalizerMinMax()) def test_chain(self): - """Tests a chain of preprocessors related to CNormalizerMinMax.""" + """Test a chain of preprocessors.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'min-max'], [{'feature_range': (-5, 5)}, {}, {'feature_range': (0, 1)}]) - # Expected shape is (3, 3), as pca max n_components is 4-1 def test_chain_gradient(self): - """Tests the gradient of a chain of preprocessors - related to CNormalizerMinMax. - """ - # Expected shape is (n_feats, ), so (4, ) + """Check gradient of a chain of preprocessors.""" self._test_chain_gradient(self.array_dense, ['min-max', 'mean-std', 'min-max'], [{'feature_range': (-5, 5)}, {}, diff --git a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py index 0c8875f8..e89df4c6 100644 --- a/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py +++ b/src/secml/ml/features/normalization/tests/test_c_normalizer_unitnorm.py @@ -1,5 +1,7 @@ from secml.ml.features.normalization.tests import CNormalizerTestCases + from sklearn.preprocessing import Normalizer + from secml.array import CArray from secml.ml.features.normalization import CNormalizerUnitNorm from secml.optim.function import CFunction @@ -8,46 +10,36 @@ from secml.optim.function import CFunction class TestCNormalizerUnitNorm(CNormalizerTestCases): """Unittest for CNormalizerUnitNorm.""" - def _sklearn_comp(self, array, norm_sklearn, norm, norm_type=None): - """Tests if the sklearn normalizer (Normalizer) and our normalizer - (CNormalizerUnitNorm) yield same result. - """ - self.logger.info("Norm type: {:}".format(norm_type)) - norm_sklearn = norm_sklearn(norm=norm_type) - norm = norm(norm=norm_type) - target, result, sk_norm, our_norm, array_sk = super( - TestCNormalizerUnitNorm, self)._sklearn_comp( - array, norm_sklearn, norm, True) - - return target, result, sk_norm, our_norm, array_sk - - def test_norm_unitnorm(self): - """Test for CNormalizerUnitNorm.""" - norm_type_lst = ["l1", "l2", "max"] - - for norm_type in norm_type_lst: - self._sklearn_comp(self.array_dense, Normalizer, - CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.array_sparse, Normalizer, - CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.row_dense.atleast_2d(), Normalizer, - CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.row_sparse, Normalizer, - CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.column_dense, Normalizer, - CNormalizerUnitNorm, norm_type) - self._sklearn_comp(self.column_sparse, Normalizer, - CNormalizerUnitNorm, norm_type) + def test_transform(self): + """Test for `.transform()` method.""" + for norm_type in ["l1", "l2", "max"]: + self._sklearn_comp(self.array_dense, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type)) + self._sklearn_comp(self.array_sparse, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type)) + self._sklearn_comp(self.row_dense.atleast_2d(), + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type)) + self._sklearn_comp(self.row_sparse, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type)) + self._sklearn_comp(self.column_dense, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type)) + self._sklearn_comp(self.column_sparse, + Normalizer(norm=norm_type), + CNormalizerUnitNorm(norm=norm_type)) def test_chain(self): - """Test a chain of preprocessors related to CNormalizerUnitNorm.""" + """Test a chain of preprocessors.""" self._test_chain(self.array_dense, ['min-max', 'pca', 'unit-norm'], [{'feature_range': (-5, 5)}, {}, {}]) - # Expected shape is (3, 3), as pca max n_components is 4-1 def _test_gradient(self): - """Check the normalizer gradient related to CNormalizerUnitNorm.""" + """Check the normalizer gradient.""" norm_type_lst = ["l1", "l2", "max"] -- GitLab