# Basic imports and settings for the notebook.

%matplotlib inline

import warnings
import numpy as np
import matplotlib.pyplot as plt
import astropy.units as u

from astropy.utils.data import download_file

datafile = download_file(
    'https://zenodo.org/record/4050489/files/M33_ALMA_ACA_12CO21.cutout.fits',
    cache=True, show_progress=True)

from spectral_cube import SpectralCube

# Note that the datafile will not end in ".fits". Because of that, we need to specify the format
# When the file name end in "fits", `format` will not need to be specified 
cube = SpectralCube.read(datafile, format='fits', use_dask=True)

# Use km/s as the spectral unit
cube = cube.with_spectral_unit(u.km / u.s)

cube = cube.rechunk(chunks=(-1, 16, 16))

# Estimate the noise along each line-of-sight
mad_std_plane = cube.mad_std(axis=0)

# Make a low and high mask
low_snr_mask = cube > 3 * mad_std_plane
high_snr_mask = cube > 6 * mad_std_plane

from dask_image import ndmeasure

# Find connected structures
structure = np.ones((3, 3, 3), dtype=bool)

low_snr_mask_labels, num_labels = ndmeasure.label(low_snr_mask.include(),
                                                  structure=structure)

print(f"Initial number of regions found: {num_labels.compute()}")

Initial number of regions found: 336

num_pixels_in_high_snr_mask = ndmeasure.sum_labels(high_snr_mask.include(),
                                                   label_image=low_snr_mask_labels,
                                                   index=range(1, num_labels.compute() + 1)).compute()

num_pixels_in_low_snr_mask = ndmeasure.sum_labels(low_snr_mask.include(),
                                                  label_image=low_snr_mask_labels,
                                                  index=range(1, num_labels.compute() + 1)).compute()

signal_mask = low_snr_mask.include().compute()
low_snr_mask_labels = low_snr_mask_labels.compute()

for num, (high_pix_num, low_pix_num) in enumerate(zip(num_pixels_in_high_snr_mask, num_pixels_in_low_snr_mask)):
    if high_pix_num > 5 and low_pix_num > 40:
        continue

    signal_mask[low_snr_mask_labels == num + 1] = False

signal_mask_labels, num_labels = ndmeasure.label(signal_mask,
                                                  structure=structure)

print(f"Final number of regions found: {num_labels.compute()}")

Final number of regions found: 3

# Extend the signal mask by 1 spectral channel in each direction for the remaining structures
# This is to include low level emission below the 3 sigma cutoff, but only for the remaining structures
# that are likely to be real.
# The same operation can be achieved by "rolling"

posns = np.where(signal_mask > 0)

signal_mask[posns[0] + 1, posns[1], posns[2]] = True
signal_mask[posns[0] - 1, posns[1], posns[2]] = True

masked_cube = cube.with_mask(signal_mask)

spatmasked_cube = cube.with_mask(signal_mask.sum(axis=0) > 0)

masked_moment0 = masked_cube.moment0()
plt.imshow(masked_moment0.value, origin='lower', cmap='inferno')
cbar = plt.colorbar()
cbar.set_label('Integrated Intensity (K km/s)')

# 1. peak of each spectra
masked_peaktemp = masked_cube.max(axis=0)
plt.imshow(masked_peaktemp.value, origin='lower', cmap='inferno')
cbar = plt.colorbar()
cbar.set_label('Peak Temperature (K)')

# 2. Moment 1 (centroid velocity)
masked_moment1 = masked_cube.moment1()
plt.imshow(masked_moment1.value, origin='lower', cmap='bwr')
cbar = plt.colorbar()
cbar.set_label('Centroid Velocity (km / s)')

# 3. Second moment line width
masked_lwidth = masked_cube.linewidth_sigma()
plt.imshow(masked_lwidth.value, origin='lower', cmap='inferno')
cbar = plt.colorbar()
cbar.set_label('Line Width (km / s)')

# 3b. "equivalent" line width
masked_equivwidth = masked_moment0 / (np.sqrt(2 * np.pi) * masked_peaktemp)
plt.imshow(masked_equivwidth.value, origin='lower', cmap='inferno')
cbar = plt.colorbar()
cbar.set_label('Equiv. line width (km / s)')

# Import the models and fitting subpackages from astropy
from astropy.modeling import models, fitting

# We will also need utilities from dask.array as we handle "chunks" from the dask cube.
import dask.array as da


def spectrum_fit_gaussian(subcube, spectrum_axis=None, guesses=None, block_info=None, debug=False):
    '''
    Fit a single Gaussian model to all spectra in the given subcube.

    Masked spectra (i.e., all NaNs) are skipped when fitting.

    Parameters
    ----------
    subcube : `~dask.array.Array`
        The dask array chunk to be fit.
    spectrum_axis : numpy.ndarray
        The spectral axis values. This can *NOT* be an `astropy.quantity` with units.
    guesses : list of numpy.ndarray
        3-element list containing the (1) peak amplitude, (2) mean velocity, and
        (3) line width initial guesses.
    block_info : None, dict
        Information about the `subcube` block. Used by `~dask.array.map_blocks`.
    debug : bool, optional
        Enable to print basic information to the terminal.

    Returns
    -------
    results : numpy.ndarray
        The fit results for the Gaussian fits. The array will have the shape:
        (3, spatial dimension 1, spatial dimension 2).

    '''

    # We expect a 3D shape in all cases.
    subcube_3D = da.atleast_3d(subcube)

    # But if the initial number ofdimensions is 1 (so a single spectrum), we
    # want the 0th axis to always be the spectral axis.
    if subcube.ndim == 1:
        subcube_3D = subcube_3D.swapaxes(0, 1)

    # Initialize an all NaN results array
    results = np.zeros((3, ) + subcube_3D.shape[1:], dtype=float) * np.NaN

    # When no block_info is given, or dask uses a string "placeholder",
    # assume there is only one block
    if block_info is None or isinstance(block_info, str):
        loc = [0, 0, 0]        
    else:
        # Otherwise, we need information the lower corner of `subcube` to
        # use the correct guesses.            
        loc = [block_range[0] for block_range in block_info[0]['array-location']]

        if debug:
            print(f"array-location {loc}")

    spatial_shape = subcube_3D.shape[1:]
    for y, x in np.ndindex(spatial_shape):

        # Use the block offset to correctly slice the guess arrays
        y_full, x_full = y + loc[1], x + loc[2]

        spec = subcube_3D[:, y, x].compute()

        # Skip all masked data
        if not np.any(spec):
            continue

        # You may need a second check here to enforce enough finite points
        # to fit to (so >number of parameters in the model for unregularized fits)

        # The 1D Gaussian model with initial guesses for parameters
        g_init = models.Gaussian1D(amplitude=guesses[0][y_full, x_full],
                                   mean=guesses[1][y_full, x_full],
                                   stddev=guesses[2][y_full, x_full])

        # If the initial guesses have any NaNs, also skip the fitting.
        if np.isnan(g_init.parameters).any():
            continue

        # And fit with the Levenberg-Marquardt algorithm and least squares statistic.
        fit_g = fitting.LevMarLSQFitter()

        # The initial model, spectral axis (in km/s) and spectrum are passed for the fit
        g_fit = fit_g(g_init, spectrum_axis, spec)

        results[:, y, x] = g_fit.parameters

    return results

# Pixel location
y, x = 32, 32

spec = spatmasked_cube[:, y, x]

# Slice out the guesses at this location
guesses = [masked_peaktemp[y:y+1, x:x+1].value, masked_moment1[y:y+1, x:x+1].value,
           masked_equivwidth[y:y+1, x:x+1].value]

# Feed in the data. Our "subcube" or chunk is a single spectrum in this case
fit_params = spectrum_fit_gaussian(spatmasked_cube._get_filled_data(fill=np.NaN)[:, y, x], cube.spectral_axis, guesses)

# subcube will be 3D when fitting the full cube
fit_params = fit_params.squeeze()

# Convert the output results to an astropy.model
g_fit_spec = models.Gaussian1D(amplitude=fit_params[0].squeeze(),
                               mean=fit_params[1].squeeze(),
                               stddev=fit_params[2].squeeze())

print(g_fit_spec)

plt.plot(spec.spectral_axis, spec.value, drawstyle='steps-mid')
plt.plot(spec.spectral_axis, g_fit_spec(spec.spectral_axis.value))
plt.ylabel("Brightness Temperature (K)")
plt.xlabel("Radio Velocity (km /s)")

Model: Gaussian1D
Inputs: ('x',)
Outputs: ('y',)
Model set size: 1
Parameters:
        amplitude              mean              stddev     
    ------------------ ------------------- -----------------
    1.0974557824901818 -211.55636300015527 4.187837281376681

Text(0.5, 0, 'Radio Velocity (km /s)')

chunk_size = (3, 16, 16)

# Set the number of workers to run in parellel.
spatmasked_cube.use_dask_scheduler('threads', num_workers=4)

# Call the parallel function wrapper to fit every spaxel.
test = spatmasked_cube.apply_function_parallel_spectral(spectrum_fit_gaussian,
                                                        return_new_cube=False, # Tell spectral-cube the output is not a SpectralCube
                                                        accepts_chunks=True, # `spectrum_fit_gaussian` accepts chunks
                                                        drop_axis=[0], # The output will no longer contain the spectral axis
                                                        chunks=chunk_size, # See above regarding the chunk size
                                                        spectrum_axis=cube.spectral_axis.value, # Pass the spectral axis for fitting
                                                        guesses=[masked_peaktemp.value, # These are the initial guesses to start the fitting with
                                                                 masked_moment1.value,
                                                                 masked_equivwidth.value],)

test.shape

(3, 64, 64)

test

out_test = test.compute()

out_test.shape

(3, 64, 64)

plt.figure(figsize=(18, 6))

plt.subplot(131)
plt.title('Peak Temperature (K)', fontsize=14)
plt.imshow(masked_peaktemp.value, vmin=0, origin='lower')
plt.colorbar()

plt.subplot(132)
plt.title('Fit Amplitude (K)', fontsize=14)
plt.imshow(out_test[0], vmin=0, origin='lower')
plt.colorbar()

plt.subplot(133)
plt.title('Fit Residual (K)', fontsize=14)
plt.imshow(masked_peaktemp.value - out_test[0], cmap='bwr', vmax=1.2, vmin=-1.2, origin='lower')
plt.colorbar()

print(f"Max residual: {np.nanmax(masked_moment0.value - out_test[0])}")
print(f"Min residual: {np.nanmin(masked_moment0.value - out_test[0])}")

Max residual: 11.402426366853376
Min residual: -1.269384834177893

plt.figure(figsize=(18, 6))

plt.subplot(131)
plt.title('Moment 1 (km/s)', fontsize=14)
plt.imshow(masked_moment1.value, cmap='bwr', origin='lower')
plt.colorbar()

plt.subplot(132)
plt.title('Fit Centroid Velocity (km/s)', fontsize=14)
plt.imshow(out_test[1], cmap='bwr', origin='lower')
plt.colorbar()

plt.subplot(133)
plt.title('Fit Residual (km/s)', fontsize=14)
plt.imshow(masked_moment1.value - out_test[1], cmap='bwr', vmax=3, vmin=-3, origin='lower')
plt.colorbar()

print(f"Max residual: {np.nanmax(masked_moment1.value - out_test[1])}")
print(f"Min residual: {np.nanmin(masked_moment1.value - out_test[1])}")

Max residual: 2.620709728272601
Min residual: -2.819628567830705

plt.figure(figsize=(18, 6))

plt.subplot(131)
plt.title('Equivalent Line Width (km/s)', fontsize=14)
plt.imshow(masked_equivwidth.value, vmin=0, origin='lower')
plt.colorbar()

plt.subplot(132)
plt.title('Fit Line Width (km/s)', fontsize=14)
plt.imshow(out_test[2], vmin=0, origin='lower')
plt.colorbar()

plt.subplot(133)
plt.title('Fit Residual (km/s)', fontsize=14)
plt.imshow(masked_equivwidth.value - out_test[2], cmap='bwr', vmax=5, vmin=-5, origin='lower')
plt.colorbar()

print(f"Max residual: {np.nanmax(masked_equivwidth.value - out_test[2])}")
print(f"Min residual: {np.nanmin(masked_equivwidth.value - out_test[2])}")

Max residual: 1.1474030039027872
Min residual: -5.1946133796482785

plt.figure(figsize=(18, 6))

integrated_intensity = np.sqrt(2 * np.pi) * out_test[0] * u.K * out_test[2] * u.km / u.s

plt.subplot(131)
plt.title('Moment 0 (K km/s)', fontsize=14)
plt.imshow(masked_moment0.value, vmin=0, origin='lower')
plt.colorbar()

plt.subplot(132)
plt.title('Fit Moment 0 (K km/s)', fontsize=14)
plt.imshow(integrated_intensity.value, vmin=0, origin='lower')
plt.colorbar()

plt.subplot(133)
plt.title('Fit Residual (K km/s)', fontsize=14)
plt.imshow(masked_moment0.value - integrated_intensity.value, cmap='bwr', vmax=1.5, vmin=-1.5, origin='lower')
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7f181c6432b0>

full_model = models.Gaussian1D(amplitude=out_test[0], mean=out_test[1], stddev=out_test[2])

model_cube = full_model(cube.spectral_axis[:, None, None].value)

plt.imshow(model_cube.max(0), origin='lower')

<matplotlib.image.AxesImage at 0x7f181c483cd0>

Multi-spectrum parallel fitting with spectral-cube and astropy¶

Authors¶

Learning Goals¶

Keywords¶

Summary¶

Requires¶

Identifying spectra to fit¶

Initial guesses¶

Chunk fitting¶

Fitting the whole cube¶

Fit results¶

Performance¶