import uproot import numpy as np import matplotlib.pyplot as plt from sklearn.mixture import GaussianMixture def readData(file, n, m=8, minT=800, maxT=4000): """Read data from root file Parameters ---------- file : str root file path n : int No.n block m : int, optional number of binds, default 8 minT/maxT : int, optional Filtering data, the sum of the left and right sides needs to be in the interval [minT, maxT] min / max threshold """ data = uproot.open(file)["Tree1"] ldata, rdata = [], [] for j in range(m): na = n // 2 nc = j + 2 * m * (n % 2) x = data["adc{:d}ch{:d}".format(na, nc)].array() y = data["adc{:d}ch{:d}".format(na, nc + m)].array() idx = np.where((x + y >= minT) & (x + y <= maxT))[0] ldata.append(x[idx]) rdata.append(y[idx]) return ldata, rdata def draw_scatter(data, title, s=0.1): """Draw points using scatter Parameters ---------- s : float, optional size of scatter point, default 0.1 """ fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1) for cluster in data: ax.scatter(cluster[:, 0], cluster[:, 1], s=s) fig.savefig(title, facecolor="w", transparent=False) plt.close() def get_hist(data, delta=1, maxN=50): """Gets the boundary of histogram that the maximum count is bigger than threshold Parameters ---------- delta : int, optional Minimum bin width. The bin width is an integer multiple of delta. maxN : int, optional Maximum count threshold """ step = delta edge = np.arange(data.min(), data.max() + 1, step) count, _ = np.histogram(data, bins=edge) try: while count.max() <= maxN: step += delta edge = np.arange(data.min(), data.max() + 1, step) count, _ = np.histogram(data, bins=edge) except: edge = np.arange(data.min(), data.max() + 1, delta) count, _ = np.histogram(data, bins=edge) return count, (edge[1:] + edge[:-1]) / 2 def GMM_slash(data): """Using Gaussian Mixture Method (GMM) to decompose the data into noise and slashes""" fit_data = np.array([]) model = GaussianMixture(n_components=2) model.fit(data[:, :2]) ny = model.predict(data[:, :2]) for i in np.unique(ny): idx = np.where(ny == i)[0] fit_data = idx if len(idx) > len(fit_data) else fit_data return data[fit_data]