From 916cd574fab62bb44bc33c2bf225da61cb15198e Mon Sep 17 00:00:00 2001 From: lei-1126 Date: Mon, 29 Mar 2021 15:48:40 +0800 Subject: [PATCH 1/2] gaussian_kde.py: modify cumulative_distribution function --- copulas/univariate/gaussian_kde.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/copulas/univariate/gaussian_kde.py b/copulas/univariate/gaussian_kde.py index 50542f7a..90ab7ac0 100644 --- a/copulas/univariate/gaussian_kde.py +++ b/copulas/univariate/gaussian_kde.py @@ -1,5 +1,6 @@ import numpy as np +import pandas as pd from scipy.special import ndtr from scipy.stats import gaussian_kde @@ -101,9 +102,17 @@ def cumulative_distribution(self, X): self.check_fit() X = np.array(X) stdev = np.sqrt(self._model.covariance[0, 0]) - lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0] - uppers = ndtr((X[:, None] - self._model.dataset) / stdev) - return (uppers - lower).dot(self._model.weights) + # lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0] + # uppers = ndtr((X[:, None] - self._model.dataset) / stdev) + # return (uppers - lower).dot(self._model.weights) + + data_flatten = pd.Series(self._model.dataset.flatten()) + v_c = data_flatten.value_counts() + weights = v_c.values / data_flatten.__len__() + dataset_weighted = np.array(v_c.index).reshape(1, -1) + lower = ndtr((self._get_bounds()[0] - dataset_weighted) / stdev)[0] + uppers = ndtr((X[:, None] - dataset_weighted) / stdev) + return (uppers - lower).dot(weights) def percent_point(self, U, method="chandrupatla"): """Compute the inverse cumulative distribution value for each point in U. From 2cd71e7761a9e0a8de34ecfd1fb2addf5c67d53f Mon Sep 17 00:00:00 2001 From: lei-1126 Date: Fri, 2 Apr 2021 10:37:50 +0800 Subject: [PATCH 2/2] add self._lower,self._upper --- copulas/univariate/gaussian_kde.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/copulas/univariate/gaussian_kde.py b/copulas/univariate/gaussian_kde.py index 90ab7ac0..249e7405 100644 --- a/copulas/univariate/gaussian_kde.py +++ b/copulas/univariate/gaussian_kde.py @@ -40,10 +40,10 @@ def _get_model(self): def _get_bounds(self): X = self._params['dataset'] - lower = np.min(X) - (5 * np.std(X)) - upper = np.max(X) + (5 * np.std(X)) + self._lower = np.min(X) - (5 * np.std(X)) + self._upper = np.max(X) + (5 * np.std(X)) - return lower, upper + return self._lower, self._upper def probability_density(self, X): """Compute the probability density for each point in X. @@ -110,7 +110,10 @@ def cumulative_distribution(self, X): v_c = data_flatten.value_counts() weights = v_c.values / data_flatten.__len__() dataset_weighted = np.array(v_c.index).reshape(1, -1) - lower = ndtr((self._get_bounds()[0] - dataset_weighted) / stdev)[0] + if '_lower' not in dir(self): + lower = ndtr((self._get_bounds()[0] - dataset_weighted) / stdev)[0] + else: + lower = ndtr((self._lower - dataset_weighted) / stdev)[0] uppers = ndtr((X[:, None] - dataset_weighted) / stdev) return (uppers - lower).dot(weights)