diff --git a/qdx/Bind.py b/qdx/Bind.py index 0a145f2..db887ff 100644 --- a/qdx/Bind.py +++ b/qdx/Bind.py @@ -1,7 +1,7 @@ import numpy as np import matplotlib.pyplot as plt -from .utils import get_hist, GMM_slash +from .utils import get_hist, GMM_clip from .model import Linear1D, FixedSlopeLine, pXLine from .fit import fit_line, fit_hist_gaussian @@ -65,16 +65,16 @@ class Bind(object): else np.full(len(x), px) ) - def slash(self): - """Using Gaussian Mixture Method (GMM) to decompose the data into noise and slashes""" - data = GMM_slash( + def clip(self): + """Using Gaussian Mixture Method (GMM) to decompose the data into noise and available data""" + data = GMM_clip( np.array(list(zip(self.x[0], self.y[0], self.px[0])), dtype=object) ) self.x[0] = data[:, 0] self.y[0] = data[:, 1] self.px[0] = data[:, 2] - data = GMM_slash( + data = GMM_clip( np.array(list(zip(self.x[1], self.y[1], self.px[1])), dtype=object) ) self.x[1] = data[:, 0] @@ -276,8 +276,8 @@ class Bind(object): def _r_square(self, y, yp): mean = np.mean(y) SST = np.sum((y - mean) ** 2) - SSR = np.sum((yp - mean) ** 2) - return SSR / SST + SSE = np.sum((y - yp) ** 2) + return 1 - SSE / SST @property def RSquare1(self): diff --git a/qdx/calibration.py b/qdx/calibration.py index fd5b9bb..64013f9 100644 --- a/qdx/calibration.py +++ b/qdx/calibration.py @@ -62,7 +62,7 @@ class Calibration(object): for i in range(n): for j in range(m): bind: Bind = self.binds[i][j] - bind.slash() + bind.clip() bind.get_line() bind.get_kb(bias_b, delta_e) pbar.update(1) diff --git a/qdx/utils.py b/qdx/utils.py index 83bb1c7..10f7657 100644 --- a/qdx/utils.py +++ b/qdx/utils.py @@ -85,7 +85,7 @@ def draw_scatter(data, title, s=0.1): plt.close() -def get_hist(data, step=1, maxN=50): +def get_hist(data, step=1, maxN=50, return_edge=False): """Gets the boundary of histogram that the maximum count is bigger than threshold Parameters @@ -94,6 +94,8 @@ def get_hist(data, step=1, maxN=50): Minimum bin width. The bin width is an integer multiple of step. maxN : int, optional Maximum count threshold + return_edge: bool, optional + If True, then the bin edges are also returned. """ delta = step edge = np.arange(data.min(), data.max() + 1, delta) @@ -107,11 +109,22 @@ def get_hist(data, step=1, maxN=50): edge = np.arange(data.min(), data.max() + 1, step) count, _ = np.histogram(data, bins=edge) - return count / delta, (edge[1:] + edge[:-1]) / 2 + if return_edge: + return count / delta, (edge[1:] + edge[:-1]) / 2, edge + else: + return count / delta, (edge[1:] + edge[:-1]) / 2 -def GMM_slash(data): - """Using Gaussian Mixture Method (GMM) to decompose the data into noise and slashes""" +def GMM_clip(data, return_all=False): + """Using Gaussian Mixture Method (GMM) to decompose the data into noise and available data + + Parameters + ---------- + data : numpy.ndarray + Data to be clipped + return_all: bool, optional + If True, then all data will be returned. + """ fit_data = np.array([]) model = GaussianMixture(n_components=2) @@ -122,4 +135,7 @@ def GMM_slash(data): idx = np.where(ny == i)[0] fit_data = idx if len(idx) > len(fit_data) else fit_data - return data[fit_data] + if return_all: + return ny + else: + return data[fit_data]