import uproot import numpy as np import matplotlib.pyplot as plt from sklearn.mixture import GaussianMixture def readFileData(file, count, n=6, m=8, minT=800, maxT=4000): """Read whole data from root file Parameters ---------- file : str root file path count : int count that normalized by counts of Faraday cylinder n : int, optional number of blocks, default 6 m : int, optional number of binds, default 8 minT/maxT : int, optional Filtering data, the sum of the left and right sides needs to be in the interval [minT, maxT] min / max threshold """ data = uproot.open(file)["Tree1"] ldata, rdata = [], [] for i in range(n): for j in range(m): na = i // 2 nc = j + 2 * m * (i % 2) x = data["adc{:d}ch{:d}".format(na, nc)].array(library="np")[:count] y = data["adc{:d}ch{:d}".format(na, nc + m)].array(library="np")[:count] idx = np.where((x + y >= minT) & (x + y <= maxT))[0] ldata.append(x[idx]) rdata.append(y[idx]) return ldata, rdata def readBlockData(file, count, n, m=8, minT=800, maxT=4000): """Read block data from root file Parameters ---------- file : str root file path count : int count that normalized by counts of Faraday cylinder n : int No.n block m : int, optional number of binds, default 8 minT/maxT : int, optional Filtering data, the sum of the left and right sides needs to be in the interval [minT, maxT] min / max threshold """ data = uproot.open(file)["Tree1"] ldata, rdata = [], [] for j in range(m): na = n // 2 nc = j + 2 * m * (n % 2) x = data["adc{:d}ch{:d}".format(na, nc)].array(library="np")[:count] y = data["adc{:d}ch{:d}".format(na, nc + m)].array(library="np")[:count] idx = np.where((x + y >= minT) & (x + y <= maxT))[0] ldata.append(x[idx]) rdata.append(y[idx]) return ldata, rdata def draw_scatter(data, title, s=0.1): """Draw points using scatter Parameters ---------- s : float, optional size of scatter point, default 0.1 """ fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1) for cluster in data: ax.scatter(cluster[:, 0], cluster[:, 1], s=s) fig.savefig(title, facecolor="w", transparent=False) plt.close() def get_hist(data, step=1, maxN=50, return_edge=False): """Gets the boundary of histogram that the maximum count is bigger than threshold Parameters ---------- step : int, optional Minimum bin width. The bin width is an integer multiple of step. maxN : int, optional Maximum count threshold return_edge: bool, optional If True, then the bin edges are also returned. """ delta = step edge = np.arange(data.min(), data.max() + 1, delta) count, _ = np.histogram(data, bins=edge) try: while count.max() <= maxN: delta += step edge = np.arange(data.min(), data.max() + 1, delta) count, _ = np.histogram(data, bins=edge) except: edge = np.arange(data.min(), data.max() + 1, step) count, _ = np.histogram(data, bins=edge) if return_edge: return count / delta, (edge[1:] + edge[:-1]) / 2, edge else: return count / delta, (edge[1:] + edge[:-1]) / 2 def GMM_clip(data, return_all=False): """Using Gaussian Mixture Method (GMM) to decompose the data into noise and available data Parameters ---------- data : numpy.ndarray Data to be clipped return_all: bool, optional If True, then all data will be returned. """ fit_data = np.array([]) model = GaussianMixture(n_components=2) model.fit(data[:, :2]) ny = model.predict(data[:, :2]) for i in np.unique(ny): idx = np.where(ny == i)[0] fit_data = idx if len(idx) > len(fit_data) else fit_data if return_all: return ny else: return data[fit_data]