This repository has been archived on 2024-11-23. You can view files and clone it, but cannot push or open issues or pull requests.
GBM-data-tools/finder.py
2022-07-15 07:36:07 +00:00

1158 lines
43 KiB
Python

# finder.py: Module containing data finder and data catalog classes
#
# Authors: William Cleveland (USRA),
# Adam Goldstein (USRA) and
# Daniel Kocevski (NASA)
#
# Portions of the code are Copyright 2020 William Cleveland and
# Adam Goldstein, Universities Space Research Association
# All rights reserved.
#
# Written for the Fermi Gamma-ray Burst Monitor (Fermi-GBM)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
import os
import socket
import ssl
import sys
import time
from ftplib import FTP_TLS
from urllib.request import urlopen
import numpy as np
from astropy import units as astro_units
from astropy.coordinates import SkyCoord
from gbm.time import Met
class FtpFinder:
"""A base class for the interface to the HEASARC FTP archive of GBM data.
Specifically, it creates a connection to legacy.gsfc.nasa.gov
Attributes:
num_files (int): Number of files in the current directory
files (list of str): The list of files in the current directory
Note:
This class should not be directly instantiated, but rather inherited.
"""
_ftp = FTP_TLS(host='heasarc.gsfc.nasa.gov')
_ftp.login()
_ftp.prot_p()
def __init__(self):
self._downloading_file = None
self._download_dir = None
self._file_list = []
def __del__(self):
self._ftp.close()
def _reconnect(self):
"""Attempt a reconnect in case connection was lost
"""
self._ftp.close()
self._ftp = FTP_TLS(host='heasarc.gsfc.nasa.gov')
self._ftp.login()
self._ftp.prot_p()
def _ftp_status(self, chunk):
"""FTP GET callback function that downloads and reports the percent
progress of the download.
Args:
chunk (str): The byte data to be written
"""
# append to file
file_path = os.path.join(self._download_dir, self._downloading_file)
with open(file_path, 'ab') as f:
f.write(chunk)
self._transferred_bytes += len(chunk)
percent = float(self._transferred_bytes) / float(self._total_bytes)
# download bar
bar = ('=' * int(percent * 30)).ljust(30)
# format percent and print along with download bar
percent = str("{0:.2f}".format(percent * 100.0))
sys.stdout.write(
"\r%s [%s] %s%%" % (self._downloading_file, bar, percent))
# file download is finished
if self._transferred_bytes == self._total_bytes:
sys.stdout.write('\n')
sys.stdout.flush()
def _ftp_silent(self, chunk):
"""FTP GET callback function that silently downloads a file.
Args:
chunk (str): The byte data to be written
"""
# append to file
file_path = os.path.join(self._download_dir, self._downloading_file)
with open(file_path, 'ab') as f:
f.write(chunk)
def _construct_path(self, id):
return NotImplemented
def _file_filter(self, file_list, filetype, extension, dets=None):
"""Filters the directory for the requested filetype, extension, and
detectors
Args:
filetype (str): The type of file, e.g. 'cspec'
extension (str): The file extension, e.g. '.pha'
dets (list, optional): The detectors. If omitted, then files for
all detectors are returned
Returns:
list: The filtered file list
"""
files = [f for f in file_list if
(filetype in f) & (f.endswith(extension))]
if dets is not None:
if type(dets) == str:
dets = [dets]
files = [f for f in files if
any('_' + det + '_' in f for det in dets)]
return files
def _get(self, download_dir, files, verbose=True):
"""Downloads a list of files from FTP
Args:
download_dir (str): The download directory location
files (list of str): The list of files to download
verbose (bool, optional): If True, will output the download status.
Default is True.
Returns:
list of str: The full paths to the downloaded files
"""
if verbose:
callback = self._ftp_status
else:
callback = self._ftp_silent
if os.path.exists(download_dir) == False:
os.makedirs(download_dir)
self._download_dir = download_dir
# download each file
filepaths = []
for file in files:
# have to save in self because this can't be passed as an argument
# in the callback
self._downloading_file = file
# download file
self._ftp.voidcmd('TYPE I')
self._total_bytes = self._ftp.size(file)
self._transferred_bytes = 0
self._ftp.retrbinary('RETR ' + file, callback=callback)
filepaths.append(os.path.join(download_dir, file))
return filepaths
@property
def num_files(self):
return len(self._file_list)
@property
def files(self):
return self._file_list
def ls(self, id):
"""List the directory contents of an FTP directory associated with
a trigger or data set.
Args:
id (str): The id associated with a trigger or data set
Returns:
list of str: Alphabetically ordered file list
"""
path = self._construct_path(id)
try:
files = self._ftp.nlst(path)
except AttributeError:
print('Connection appears to have failed. Attempting to reconnect...')
try:
self._reconnect()
print('Reconnected.')
return self.ls(id)
except:
raise RuntimeError('Failed to reconnect.')
except:
raise FileExistsError('{} does not exist'.format(path))
files = sorted([os.path.basename(f) for f in files])
return files
class TriggerFtp(FtpFinder):
"""A class that interfaces with the HEASARC FTP trigger directories.
An instance of this class will represent the available files associated
with a single trigger.
An instance can be created without a trigger number, however a trigger
number will need to be set by set_trigger(tnum) to query and download files.
An instance can also be changed from one trigger number to another without
having to create a new instance. If multiple instances are created and
exist simultaneously, they will all use a single FTP connection.
Note:
Since HEASARC transitioned to FTPS, some have had issues with
connecting to the HEASARC FTP site via Python's ftplib for no obvious
reason while it works flawlessy for others (even on the same platform).
Currently the thought is that this may be related to the underlying
OpenSSL version that is installed. If you have connection problems
using this, you may consider upgrading you OpenSSL and see if that
solves your problem. A potential solution is to do the following:
* $ pip3 install pyopenssl
* $ pip3 install requests[security]
Parameters:
tnum (str, optional): A valid trigger number
Attributes:
num_files (int): Number of files in the current directory
files (list of str): The list of files in the current directory
"""
_root = '/fermi/data/gbm/triggers'
def __init__(self, tnum=None):
self._downloading_file = None
self._download_dir = None
self._tnum = None
self._file_list = []
if tnum is not None:
try:
self._file_list = self.ls(tnum)
self._ftp.cwd(self._construct_path(tnum))
self._tnum = tnum
except FileExistsError:
raise ValueError(
'{} is not a valid trigger number'.format(tnum))
def set_trigger(self, tnum):
"""Set the trigger number. If the object was previously associated
with a trigger number, this will effectively change the working
directory to that of the new trigger number. If the trigger number is
invalid, an exception will be raised, and no directory change will be
made.
Args:
tnum (str): A valid trigger number
"""
try:
self._file_list = self.ls(tnum)
self._ftp.cwd(self._construct_path(tnum))
self._tnum = tnum
except FileExistsError:
self._tnum = None
self._file_list = []
raise ValueError('{} is not a valid trigger number'.format(tnum))
def ls_ctime(self):
"""List all ctime files for the trigger
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'ctime', 'pha')
def ls_cspec(self):
"""List all cspec files for the trigger
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'cspec', 'pha')
def ls_tte(self):
"""List all tte files for the trigger
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'tte', 'fit')
def ls_rsp(self, ctime=True, cspec=True):
"""List all response Type-I files for the trigger
Args:
ctime (bool, optional): If True, list the ctime responses.
Default is True.
cspec (bool, optional): If True, list the cspec responses.
Default is True.
Returns:
list of str: The file list
"""
files = []
if cspec:
files.extend(self._file_filter(self.files, 'cspec', 'rsp'))
if ctime:
files.extend(self._file_filter(self.files, 'ctime', 'rsp'))
return files
def ls_rsp2(self, ctime=True, cspec=True):
"""List all response Type-II files for the trigger
Args:
ctime (bool, optional): If True, list the ctime responses.
Default is True.
cspec (bool, optional): If True, list the cspec responses.
Default is True.
Returns:
list of str: The file list
"""
files = []
if cspec:
files.extend(self._file_filter(self.files, 'cspec', 'rsp2'))
if ctime:
files.extend(self._file_filter(self.files, 'ctime', 'rsp2'))
return files
def ls_lightcurve(self):
"""List all lightcurve plots for the trigger
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'lc', 'pdf')
def ls_cat_files(self):
"""List all catalog files for the trigger
Returns:
list of str: The file list
"""
files = []
files.extend(self._file_filter(self.files, 'bcat', 'fit'))
files.extend(self._file_filter(self.files, 'scat', 'fit'))
files.extend(self._file_filter(self.files, 'tcat', 'fit'))
return files
def ls_trigdat(self):
"""List the trigger data (trigdat) file for the trigger
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'trigdat', 'fit')
def ls_localization(self):
"""List all localization files for the trigger
Returns:
list of str: The file list
"""
files = []
files.extend(self._file_filter(self.files, 'healpix', 'fit'))
files.extend(self._file_filter(self.files, 'skymap', 'png'))
files.extend(self._file_filter(self.files, 'loclist', 'txt'))
files.extend(self._file_filter(self.files, 'locprob', 'fit'))
files.extend(self._file_filter(self.files, 'locplot', 'png'))
return files
def get_ctime(self, download_dir, dets=None, **kwargs):
"""Download the ctime files for the trigger
Args:
download_dir (str): The download directory
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'ctime', 'pha', dets=dets)
self._get(download_dir, files, **kwargs)
def get_cspec(self, download_dir, dets=None, **kwargs):
"""Download the cspec files for the trigger
Args:
download_dir (str): The download directory
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'cspec', 'pha', dets=dets)
self._get(download_dir, files, **kwargs)
def get_tte(self, download_dir, dets=None, **kwargs):
"""Download the TTE files for the trigger
Args:
download_dir (str): The download directory
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'tte', 'fit', dets=dets)
self._get(download_dir, files, **kwargs)
def get_rsp(self, download_dir, ctime=True, cspec=True, dets=None,
**kwargs):
"""Download the response Type-I files for the trigger
Args:
download_dir (str): The download directory
ctime (bool, optional): If True, download the ctime responses.
Default is True.
cspec (bool, optional): If True, download the cspec responses.
Default is True.
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = []
if cspec:
files.extend(
self._file_filter(self.files, 'cspec', 'rsp', dets=dets))
if ctime:
files.extend(
self._file_filter(self.files, 'ctime', 'rsp', dets=dets))
self._get(download_dir, files, **kwargs)
def get_rsp2(self, download_dir, ctime=True, cspec=True, dets=None,
**kwargs):
"""Download the response Type-I files for the trigger
Args:
download_dir (str): The download directory
ctime (bool, optional): If True, download the ctime responses.
Default is True.
cspec (bool, optional): If True, download the cspec responses.
Default is True.
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = []
if cspec:
files.extend(
self._file_filter(self.files, 'cspec', 'rsp2', dets=dets))
if ctime:
files.extend(
self._file_filter(self.files, 'ctime', 'rsp2', dets=dets))
self._get(download_dir, files, **kwargs)
def get_lightcurve(self, download_dir, **kwargs):
"""Download the lightcurve plots for the trigger
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'lc', 'pdf')
self._get(download_dir, files, **kwargs)
def get_cat_files(self, download_dir, **kwargs):
"""Download all catalog files for the trigger
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = []
files.extend(self._file_filter(self.files, 'bcat', 'fit'))
files.extend(self._file_filter(self.files, 'scat', 'fit'))
files.extend(self._file_filter(self.files, 'tcat', 'fit'))
self._get(download_dir, files, **kwargs)
def get_trigdat(self, download_dir, **kwargs):
"""Download the trigger data (trigdat) file for the trigger
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'trigdat', 'fit')
self._get(download_dir, files, **kwargs)
def get_localization(self, download_dir, **kwargs):
"""Download all localization files for the trigger
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = []
files.extend(self._file_filter(self.files, 'healpix', 'fit'))
files.extend(self._file_filter(self.files, 'skymap', 'png'))
files.extend(self._file_filter(self.files, 'loclist', 'txt'))
files.extend(self._file_filter(self.files, 'locprob', 'fit'))
files.extend(self._file_filter(self.files, 'locplot', 'png'))
self._get(download_dir, files, **kwargs)
def get_healpix(self, download_dir, **kwargs):
"""Download the healpix localization file for the trigger.
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'healpix', 'fit')
self._get(download_dir, files, **kwargs)
def get_all(self, download_dir, **kwargs):
"""Download all files associated with the trigger
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
self._get(download_dir, self._file_list, **kwargs)
def _construct_path(self, str_trigger_num):
"""Constructs the FTP path for a trigger
Args:
str_trigger_num (str): The trigger number
Returns:
str: The path of the FTP directory for the trigger
"""
year = '20' + str_trigger_num[0:2]
path = os.path.join(self._root, year, 'bn' + str_trigger_num,
'current')
return path
# mark: TODO: Need date range functionality
class ContinuousFtp(FtpFinder):
"""A class that interfaces with the HEASARC FTP continuous daily data
directories. An instance of this class will represent the available files
associated with a single day.
An instance can be created without a time, however a time will need to be
set by set_time() to query and download files. An instance can also be
changed from one time to another without having to create a new instance.
If multiple instances are created and exist simultaneously, they will all
use a single FTP connection.
Note:
Since HEASARC transitioned to FTPS, some have had issues with
connecting to the HEASARC FTP site via Python's ftplib for no obvious
reason while it works flawlessy for others (even on the same platform).
Currently the thought is that this may be related to the underlying
OpenSSL version that is installed. If you have connection problems
using this, you may consider upgrading you OpenSSL and see if that
solves your problem. A potential solution is to do the following:
* $ pip3 install pyopenssl
* $ pip3 install requests[security]
Parameters:
met (float, optional): A time in MET. Either met, utc, or gps must be set.
utc (str, optional): A UTC time in ISO format: YYYY-MM-DDTHH:MM:SS
gps (float, optional): A GPS time
Attributes:
num_files (int): Number of files in the current directory
files (list of str): The list of files in the current directory
"""
_root = '/fermi/data/gbm/daily'
def __init__(self, met=None, utc=None, gps=None):
self._downloading_file = None
self._download_dir = None
self._file_list = []
self._met = None
if met is not None:
self._met = Met(met)
elif utc is not None:
self._met = Met.from_iso(utc)
elif gps is not None:
self._met = Met.from_gps(gps)
if self._met is not None:
try:
self._file_list = self.ls(self._met)
self._ftp.cwd(self._construct_path(self._met))
except FileExistsError:
raise ValueError('{} is not a valid MET'.format(self._met))
def set_time(self, met=None, utc=None, gps=None):
"""Set the time. If the object was previously associated with a
different time, this will effectively change the working directory to
that of the new time. If the time is invalid, an exception will be
raised, and no directory change will be made.
Only one of met, utc, or gps should be defined.
Args:
met (float, optional): A time in MET.
utc (str, optional): A UTC time in ISO format: YYYY-MM-DDTHH:MM:SS
gps (float, optional): A GPS time
"""
if met is not None:
self._met = Met(met)
elif utc is not None:
self._met = Met.from_iso(utc)
elif gps is not None:
self._met = Met.from_gps(gps)
else:
raise ValueError('Either met, utc, or gps must be specified')
try:
self._file_list = self.ls(self._met)
self._ftp.cwd(self._construct_path(self._met))
except FileExistsError:
badtime = self._met
self._met = None
self._file_list = []
raise ValueError('{} is not a valid MET'.format(badtime))
def ls_ctime(self):
"""List all ctime files
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'ctime', 'pha')
def ls_cspec(self):
"""List all cspec files
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'cspec', 'pha')
def ls_poshist(self):
"""List the poshist file
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'poshist', 'fit')
def ls_spechist(self):
"""List all spechist files
Returns:
list of str: The file list
"""
return self._file_filter(self.files, 'spechist', 'fit')
def ls_tte(self, full_day=False):
"""List all TTE files
Args:
full_day (bool, optional):
If True, will return the TTE files for the full day. If False,
will return the TTE files for the hour covering the specified
time. Default is False.
Returns:
list of str: The file list
"""
files = []
files.extend(self._file_filter(self.files, 'tte', 'fit.gz'))
files.extend(self._file_filter(self.files, 'tte', 'fit'))
if not full_day:
files = self._filter_tte(files)
return files
def get_ctime(self, download_dir, dets=None, **kwargs):
"""Download the ctime files
Args:
download_dir (str): The download directory
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'ctime', 'pha', dets=dets)
self._get(download_dir, files, **kwargs)
def get_cspec(self, download_dir, dets=None, **kwargs):
"""Download the cspec files
Args:
download_dir (str): The download directory
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'cspec', 'pha', dets=dets)
self._get(download_dir, files, **kwargs)
def get_poshist(self, download_dir, **kwargs):
"""Download the poshist file
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'poshist', 'fit')
self._get(download_dir, files, **kwargs)
def get_spechist(self, download_dir, dets=None, **kwargs):
"""Download the spechist files
Args:
download_dir (str): The download directory
dets (list, optional): The detectors' data to download.
If omitted, will download all.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = self._file_filter(self.files, 'spechist', 'fit', dets=dets)
self._get(download_dir, files, **kwargs)
def get_tte(self, download_dir, dets=None, full_day=False, **kwargs):
"""Download all TTE files associated with a time.
Note:
Unless you have a high-bandwidth connection and can handle
downloading several GBs, it is not recommended to download the
full day of TTE data.
Args:
download_dir (str): The download directory
dets (list, optional): The detectors' data to download.
If omitted, will download all.
full_day (bool, optional):
If True, will download the TTE files for the full day. If False,
will return the TTE files for the covering the specified time.
Default is False.
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
files = []
files.extend(self._file_filter(self.files, 'tte', 'fit.gz', dets=dets))
files.extend(self._file_filter(self.files, 'tte', 'fit', dets=dets))
if not full_day:
files = self._filter_tte(files)
self._get(download_dir, files, **kwargs)
def get_all(self, download_dir, **kwargs):
"""Download all files within a daily directory.
Note:
Use at your own risk. Unless you have a high-bandwidth connection
and can handle downloading several GBs, this function is not
recommended for use.
Args:
download_dir (str): The download directory
verbose (bool, optional): If True, will output the download status.
Default is True.
"""
self._get(download_dir, self._file_list, **kwargs)
def _construct_path(self, met_obj):
"""Constructs the FTP path for antime
Args:
met_obj (:class:`.time.Met`): The MET time object
Returns:
str: The path of the FTP directory for the time
"""
path = os.path.join(self._root, met_obj.datetime.strftime('%Y/%m/%d'),
'current')
return path
def _filter_tte(self, files):
"""Filters a list of TTE files for only the files that contain the
desired time
Args:
files (list of str): The list of TTE files
Returns:
list of str: The filtered list of files
"""
id = self._met.ymd_h
files = [f for f in files if id in f]
return files
class HeasarcBrowse():
"""A class that interfaces with the HEASARC Browse API. This can be
called directly, but primarily intended as a base class.
The class makes a query to HEASARC's w3query.pl perl script in
BATCHRETRIEVALCATALOG mode. All fields and rows are retrieved so that
this class, on instantiation, contains the full set of catalog data.
Any queries based on row or columns selections/slices are then done locally,
instead of making repeated requests to the HEASARC.
Parameters:
table (str, optional): The name of the table to be passed to the
w3query.pl script.
verbose (bool, optional): Default is True
Attributes:
columns (np.array): The names of the columns available in the table
num_cols (int): The total number of columns (fields) in the data table
num_rows: (int): The total number of rows in the data table
"""
def __init__(self, table=None, verbose=True):
self._verbose = verbose
host = 'https://heasarc.gsfc.nasa.gov'
script = 'cgi-bin/W3Browse/w3query.pl'
query = 'tablehead=name=BATCHRETRIEVALCATALOG_2.0+{}&Fields=All'.format(
table)
# have to add this because HEASARC changed the default behavior without
# telling anyone
query += '&ResultMax=0'
if table is not None:
self._is_connected(host)
self._header, self._table = self._read_table(
host + '/' + script + '?' + query)
self._typedefs = self._auto_typedefs()
@property
def num_rows(self):
return self._table.shape[0]
@property
def num_cols(self):
return self._table.shape[1]
@property
def columns(self):
return self._header
def _is_connected(self, host):
try:
# connect to the host -- tells us if the host is actually
# reachable
socket.create_connection((host.split('/')[-1], 80))
return True
except OSError:
raise OSError("Either you are not connected to the internet or "
"{0} is down.".format(host))
return False
def _read_table(self, url):
"""Read the table from HEASARC
Args:
url (str): The URL including the query to the HEASARC perl script
Returns:
header (np.array): The column names of the table
table (np.array): The complete data table, unformatted
"""
# secure connection
context = ssl._create_unverified_context()
page = urlopen(url, context=context)
if self._verbose:
print('Downloading Catalog from HEASARC via w3query.pl...')
t0 = time.time()
# get content, decode to ascii, and split into lines
lines = page.read().decode('utf8').splitlines(False)
if self._verbose:
print('Finished in {} s'.format(int(time.time() - t0)))
# now we have to do the following because HEASARC changed the behavior
# of their public script without telling anyone
lines = lines[1:-1]
# table header
header = np.array([col.strip() for col in lines[0].split('|')])
# the table data
lines = lines[1:]
lines = [line for line in lines if '|' in line]
table = np.array(
[item.strip() for line in lines for item in line.split('|')])
table = table.reshape(-1, header.size)
# another undocumented and unannounced change to HEASARC browse:
# they added an additional '|' delimiter at the beginning and end of
# each line
header = header[1:-1]
table = table[:, 1:-1]
# clean nulls from table
table[(table == 'null') | (table == '')] = 'nan'
return (header, table)
def _auto_typedefs(self):
"""Auto-detect the datatype for each column of the table. The HEASARC
tables are returned as strings, with no definition of datatypes, so
we have to do a little work to guess what the proper types are. This
usually works pretty well. Can be overridden in a derived class after
the base class __init__ has been called.
"""
typedefs = []
# cycle through each column
for i in range(self.num_cols):
col = self._table[:, i]
j = 0
while (True):
# cycle to the first non-null entry
if col[j] == 'nan':
j += 1
continue
# if an entry is a digit, set as integer
if col[j].isdigit():
typedefs.append('int')
else:
# otherwise try applying float
try:
float(col[j])
typedefs.append('float')
except:
# if float fails, then must be a string, try datetime
try:
Met.from_iso(col[j])
typedefs.append('datetime')
# all else fails, this is definitely a string
except ValueError:
typedefs.append('str')
break
return np.array(typedefs)
def _apply_typedef(self, typedef, column):
"""Apply the type definition to a column of data.
Args:
typedef (str): The type definition
column (np.array): A column of data
Returns:
np.array: The column of data converted to the requested type
"""
if typedef == 'int':
try:
newcol = column.astype('int')
except:
# nan doesn't work for ints, for now. Not the best solution...
mask = (column == 'nan')
newcol = np.copy(column)
newcol[mask] = '-99999'
newcol = newcol.astype('int', copy=False)
elif typedef == 'float':
newcol = column.astype('float')
elif typedef == 'datetime':
newcol = column
# newcol = np.array([Met.from_iso(item).datetime for item in column])
else:
newcol = column
return newcol
def _colname_to_idx(self, colname):
"""Convert a column name to the index into the table array
Args:
colname (str): The column name
Returns:
int: The index into the table array
"""
if colname not in self._header:
raise ValueError('{} not a valid column name'.format(colname))
idx = np.where(self._header == colname)[0][0]
return idx
def get_table(self, columns=None):
"""Return the table data as a record array with proper type conversions.
Missing values are treated as type-converted ``np.nan``.
Args:
columns (list of str, optional): The columns to return. If omitted,
returns all columns.
Returns:
np.recarray: A record array containing the requested data
"""
if columns is None:
columns = self.columns
idx = np.array([self._colname_to_idx(column) for column in columns])
data = [self._apply_typedef(self._typedefs[i], self._table[:, i]) for i
in idx]
table = np.rec.fromarrays(data, names=','.join(columns))
return table
def column_range(self, column):
"""Return the data range for a given column
Args:
column (str): The column name
Returns:
tuple: The (lo, hi) range of the data column
"""
idx = self._colname_to_idx(column)
col = self._apply_typedef(self._typedefs[idx], self._table[:, idx])
col.sort()
return (col[0], col[-1])
def slice(self, column, lo=None, hi=None):
"""Perform row slices of the data table based on a conditional of a
single column
Args:
column (str): The column name
lo (optional): The minimum (inclusive) value of the slice. If not
set, uses the lowest range of the data in the column.
hi (optional): The maximum (inclusive) value of the slice. If not
set, uses the highest range of the data in the column.
Returns:
:class:`HeasarcBrowse`: Returns a new catalog with the sliced rows
"""
# have to apply the types and create a mask
idx = self._colname_to_idx(column)
col = self._apply_typedef(self._typedefs[idx], self._table[:, idx])
if lo is None:
lo, _ = self.column_range(column)
if hi is None:
_, hi = self.column_range(column)
mask = (col >= lo) & (col <= hi)
# create a new object and fill it with the sliced data
obj = HeasarcBrowse()
obj._header = np.copy(self._header)
obj._table = self._table[mask, :]
obj._typedefs = np.copy(self._typedefs)
return obj
def slices(self, columns):
"""Perform row slices of the data table based on a conditional of
multiple columns
Args:
columns (list of tuples):
A list of tuples, where each tuple is (column, lo, hi). The
'column' is the column name, 'lo' is the lowest bounding value,
and 'hi' is the highest bouding value. If no low or high
bounding is desired, set to None. See :meth:`slice()` for more
info.
Returns:
:class:`HeasarcBrowse`: Returns a new catalog with the sliced rows.
"""
numcols = len(columns)
obj = self
for i in range(numcols):
obj = obj.slice(columns[i][0], lo=columns[i][1], hi=columns[i][2])
return obj
class TriggerCatalog(HeasarcBrowse):
"""Class that interfaces with the GBM Trigger Catalog via HEASARC Browse.
Note:
Because this calls HEASARC's w3query.pl script on initialization,
it may take several seconds for the object to load.
Parameters:
coord_units_deg (bool, optional):
If True, converts the hms sexigesimal format output by HEASARC to
decimal degree. Default is True.
verbose (bool, optional): Default is True
Attributes:
columns (np.array): The names of the columns available in the table
num_cols (int): The total number of columns (fields) in the data table
num_rows: (int): The total number of rows in the data table
"""
def __init__(self, coord_units_deg=True, **kwargs):
super().__init__(table='fermigtrig', **kwargs)
# override detector mask typedef
idx = self._colname_to_idx('detector_mask')
self._typedefs[idx] = 'str'
# heasarc only provides these coordinates in hms. if we want
# decimal degrees, do the conversion and update the table and typedefs
if coord_units_deg:
idx1 = self._colname_to_idx('ra')
idx2 = self._colname_to_idx('dec')
coords = SkyCoord(self._table[:, idx1], self._table[:, idx2],
unit=(astro_units.hourangle, astro_units.deg))
self._table[:, idx1] = coords.ra.degree.astype('str')
self._table[:, idx2] = coords.dec.degree.astype('str')
self._typedefs[idx1] = 'float'
self._typedefs[idx2] = 'float'
class BurstCatalog(HeasarcBrowse):
"""Class that interfaces with the GBM Burst Catalog via HEASARC Browse.
Note:
Because this calls HEASARC's w3query.pl script on initialization,
it may take several seconds up to a couple of minutes for the object
to load.
Parameters:
coord_units_deg (bool, optional):
If True, converts the hms sexigesimal format output by HEASARC to
decimal degree. Default is True.
verbose (bool, optional): Default is True
Attributes:
columns (np.array): The names of the columns available in the table
num_cols (int): The total number of columns (fields) in the data table
num_rows: (int): The total number of rows in the data table
"""
def __init__(self, coord_units_deg=True, **kwargs):
super().__init__(table='fermigbrst', **kwargs)
# override detector mask typedef
idx = self._colname_to_idx('bcat_detector_mask')
self._typedefs[idx] = 'str'
# heasarc only provides these coordinates in hms. if we want
# decimal degrees, do the conversion and update the table and typedefs
if coord_units_deg:
idx1 = self._colname_to_idx('ra')
idx2 = self._colname_to_idx('dec')
coords = SkyCoord(self._table[:, idx1], self._table[:, idx2],
unit=(astro_units.hourangle, astro_units.deg))
self._table[:, idx1] = coords.ra.degree.astype('str')
self._table[:, idx2] = coords.dec.degree.astype('str')
self._typedefs[idx1] = 'float'
self._typedefs[idx2] = 'float'