HARPS-N Solar Data Download

How to download HARPS-N Solar data from DACE

This tutorial was originally developed for the Sun-as-a-Star Splinter at the Extremely Precise Radial Velocities Sixth Workshop (EPRV6) in June 2024, main contrutors: Khaled Al Moulla and Ryan Rubenzhal. It also includes extracts from another HARPS-N solar data downaload tutorial developed by Xavier Dumusque.

The Data & Analysis Center for Exoplanets DACE is a web platform developed by the University of Geneva to provide access to a variety of exoplanet-related data. It includes a query python package and extensive documentation and tutorials.

To install the DACE query package use:

                
                pip install dace_query

Import packages and set up output directory

                from   astropy.io          import fits
from   astropy.time        import Time
from   dace_query.sun      import Sun
from   glob                import glob
import matplotlib.cm       as     cm
from   matplotlib.colors   import LinearSegmentedColormap, Normalize
import matplotlib.pyplot   as     plt
import numpy               as     np
import os
import tarfile

output_directory = 'sun_harpn_ccf/'
output_directory_s2d = 'sun_harpn_s2d/'
os.makedirs(output_directory, exist_ok=True)

Define range of time and basic parameter, then query the HARPS-N solar files and download them. Any DRS-produced file can be downloaded, from 1d spectra to radial velocities. For this tutorial we will be downloading the CCFs as FITS files.

Tip: If you don't know the filter keywords by heart, make a query based on the dates only and then list all the dictionary keywords from the query output.

                # Filter: Barycentric Julian Date (BJD)
obj_date_bjd_min = Time('2018-07-01T00:00:00').jd-2400000
obj_date_bjd_max = Time('2018-07-01T23:59:59').jd-2400000

# Filter: Quality flag
spectro_analysis_qualflag_min = 0.95
spectro_analysis_qualflag_max = 1.00

# Filter dictionary
filters = {
'obj_date_bjd'             : {'min': obj_date_bjd_min             , 'max': obj_date_bjd_max             },
'spectro_analysis_qualflag': {'min': spectro_analysis_qualflag_min, 'max': spectro_analysis_qualflag_max},
}

# Query
# (optional: only to inspect the output before downloading it)
sun_query = Sun.query_database(filters=filters, sort={'obj_date_bjd': 'asc'})

# Download CCF files
Sun.download('ccf', filters=filters, output_directory=output_directory, output_filename='temp.tar', compressed=False)

# Download S1D files
Sun.download('s1d', filters=filters, output_directory=output_directory, output_filename="s1d_temp.tar" % file_rootpath[:-5],)

# Unzip
tar = tarfile.open(output_directory+'temp.tar', 'r:')
tar.extractall(output_directory)
tar.close()
# Delete temporary file
os.remove(output_directory+'temp.tar')

tar = tarfile.open(output_directory+'s1d_temp.tar', 'r:')
tar.extractall(output_directory)
tar.close()
# Delete temporary file
os.remove(output_directory_s2d+'s1d_temp.tar')

To extract the relevant information, loop over the files and extract the needed data from the headers. We here include the CCF and the RV extraction.

                files = sorted(glob(output_directory+'**/*.fits', recursive=True))
Nfile = len(files)

# Get CCF velocity grid from first file
hdul     = fits.open(files[0])
Ngrid    = hdul[1].data.shape[1]
grid_del = hdul[0].header['HIERARCH TNG RV STEP']
grid_min = hdul[0].header['HIERARCH TNG RV START']
grid_val = np.arange(Ngrid)*grid_del+grid_min

# Empty arrays for CCF variables
ccf_val  = np.empty((Nfile,Ngrid))
ccf_err  = np.empty((Nfile,Ngrid))

# Empty arrays for RV variables
time_val = np.empty(Nfile)
vrad_val = np.empty(Nfile)
vrad_err = np.empty(Nfile)

# Loop files
for i in range(Nfile):

    # Open FITS file
    hdul = fits.open(files[i])

    # Extract CCF variables
    ccf_val[i] = hdul[1].data[-1] # last row contains the order-combined CCF values
    ccf_err[i] = hdul[2].data[-1] # last row contains the order-combined CCF errors

    # Extract RV variables
    header      = hdul[0].header
    time_val[i] = header['HIERARCH TNG QC BJD']
    vrad_val[i] = header['HIERARCH TNG QC CCF RV']
    vrad_err[i] = header['HIERARCH TNG QC CCF RV ERROR']

    # Close FITS file
    hdul.close()

Example plot of the extracted data.

                # Normalization
cval   = time_val - int(time_val[-1])
vmax   = np.max([np.abs(np.nanmin(cval)),np.abs(np.nanmax(cval))])
norm   = Normalize(vmin=-vmax, vmax=vmax)

# Color map
values = [0.0,0.5,1.0]
colors = ['grey', 'orange', 'grey']
cmap   = LinearSegmentedColormap.from_list('rg', list(zip(values,colors)), N=256)

# Colors
cmsm   = cm.ScalarMappable(norm=norm, cmap=cmap)
color  = cmsm.to_rgba(cval)

# Figure
fig, axs = plt.subplots(1,2, figsize=(15,5))

# CCF time series
ax = axs[0]
for i in range(Nfile):
    ax.errorbar(grid_val, ccf_val[i], ccf_err[i], ls='-', color=color[i], ecolor='r')
ax.set_xlabel('RV [km/s]')
ax.set_ylabel('CCF [ADU]')
cb = plt.colorbar(cmsm, ax=ax)
cb.set_label(f'BJD $-$ {int(time_val[-1])} [d]')

# RV time series
ax = axs[1]
ax.errorbar(time_val - int(time_val[-1]), vrad_val, vrad_err, fmt='.', color='k', ecolor='r', capsize=2)
ax.set_xlabel(f'BJD $-$ {int(time_val[-1])} [d]')
ax.set_ylabel('RV [km/s]')

# Show
plt.show()

Extracting and plotting an order of 2D spectra.

                s1d = fits.getdata(str(output_directory_s1d+"r.HARPN.2018-07-01T13-20-27.252_S1D_A.fits"))

plt.figure(figsize=(10, 5))
plt.plot(s1d["wavelength_air"], s1d["flux"])
plt.xlabel("Wavelength in the air [A]")
plt.ylabel("Flux")