diff --git a/copulas/univariate/gaussian_kde.py b/copulas/univariate/gaussian_kde.py index 50542f7a..249e7405 100644 --- a/copulas/univariate/gaussian_kde.py +++ b/copulas/univariate/gaussian_kde.py @@ -1,5 +1,6 @@ import numpy as np +import pandas as pd from scipy.special import ndtr from scipy.stats import gaussian_kde @@ -39,10 +40,10 @@ def _get_model(self): def _get_bounds(self): X = self._params['dataset'] - lower = np.min(X) - (5 * np.std(X)) - upper = np.max(X) + (5 * np.std(X)) + self._lower = np.min(X) - (5 * np.std(X)) + self._upper = np.max(X) + (5 * np.std(X)) - return lower, upper + return self._lower, self._upper def probability_density(self, X): """Compute the probability density for each point in X. @@ -101,9 +102,20 @@ def cumulative_distribution(self, X): self.check_fit() X = np.array(X) stdev = np.sqrt(self._model.covariance[0, 0]) - lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0] - uppers = ndtr((X[:, None] - self._model.dataset) / stdev) - return (uppers - lower).dot(self._model.weights) + # lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0] + # uppers = ndtr((X[:, None] - self._model.dataset) / stdev) + # return (uppers - lower).dot(self._model.weights) + + data_flatten = pd.Series(self._model.dataset.flatten()) + v_c = data_flatten.value_counts() + weights = v_c.values / data_flatten.__len__() + dataset_weighted = np.array(v_c.index).reshape(1, -1) + if '_lower' not in dir(self): + lower = ndtr((self._get_bounds()[0] - dataset_weighted) / stdev)[0] + else: + lower = ndtr((self._lower - dataset_weighted) / stdev)[0] + uppers = ndtr((X[:, None] - dataset_weighted) / stdev) + return (uppers - lower).dot(weights) def percent_point(self, U, method="chandrupatla"): """Compute the inverse cumulative distribution value for each point in U.