Q3D-Calibration/qdx/utils.py

import uproot
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture


def readFileData(file, count, n=6, m=8, minT=800, maxT=4000):
    """Read whole data from root file

    Parameters
    ----------
    file : str
        root file path
    count : int
        count that normalized by counts of Faraday cylinder
    n : int, optional
        number of blocks, default 6
    m : int, optional
        number of binds, default 8
    minT/maxT : int, optional
        Filtering data, the sum of the left and right sides needs to be in the interval [minT, maxT]
        min / max threshold
    """
    data = uproot.open(file)["Tree1"]

    ldata, rdata = [], []
    for i in range(n):
        for j in range(m):
            na = i // 2
            nc = j + 2 * m * (i % 2)
            x = data["adc{:d}ch{:d}".format(na, nc)].array(library="np")[:count]
            y = data["adc{:d}ch{:d}".format(na, nc + m)].array(library="np")[:count]
            idx = np.where((x + y >= minT) & (x + y <= maxT))[0]
            ldata.append(x[idx])
            rdata.append(y[idx])

    return ldata, rdata


def readBlockData(file, count, n, m=8, minT=800, maxT=4000):
    """Read block data from root file

    Parameters
    ----------
    file : str
        root file path
    count : int
        count that normalized by counts of Faraday cylinder
    n : int
        No.n block
    m : int, optional
        number of binds, default 8
    minT/maxT : int, optional
        Filtering data, the sum of the left and right sides needs to be in the interval [minT, maxT]
        min / max threshold
    """
    data = uproot.open(file)["Tree1"]

    ldata, rdata = [], []
    for j in range(m):
        na = n // 2
        nc = j + 2 * m * (n % 2)
        x = data["adc{:d}ch{:d}".format(na, nc)].array(library="np")[:count]
        y = data["adc{:d}ch{:d}".format(na, nc + m)].array(library="np")[:count]
        idx = np.where((x + y >= minT) & (x + y <= maxT))[0]
        ldata.append(x[idx])
        rdata.append(y[idx])

    return ldata, rdata


def draw_scatter(data, title, s=0.1):
    """Draw points using scatter

    Parameters
    ----------
    s : float, optional
        size of scatter point, default 0.1
    """
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(1, 1, 1)
    for cluster in data:
        ax.scatter(cluster[:, 0], cluster[:, 1], s=s)
    fig.savefig(title, facecolor="w", transparent=False)
    plt.close()


def get_hist(data, step=1, maxN=50):
    """Gets the boundary of histogram that the maximum count is bigger than threshold

    Parameters
    ----------
    step : int, optional
        Minimum bin width. The bin width is an integer multiple of step.
    maxN : int, optional
        Maximum count threshold
    """
    delta = step
    edge = np.arange(data.min(), data.max() + 1, delta)
    count, _ = np.histogram(data, bins=edge)
    try:
        while count.max() <= maxN:
            delta += step
            edge = np.arange(data.min(), data.max() + 1, delta)
            count, _ = np.histogram(data, bins=edge)
    except:
        edge = np.arange(data.min(), data.max() + 1, step)
        count, _ = np.histogram(data, bins=edge)

    return count / delta, (edge[1:] + edge[:-1]) / 2


def GMM_slash(data):
    """Using Gaussian Mixture Method (GMM) to decompose the data into noise and slashes"""
    fit_data = np.array([])

    model = GaussianMixture(n_components=2)
    model.fit(data[:, :2])

    ny = model.predict(data[:, :2])
    for i in np.unique(ny):
        idx = np.where(ny == i)[0]
        fit_data = idx if len(idx) > len(fit_data) else fit_data

    return data[fit_data]