[1]:
import numpy as np
import numpy.testing as npt
import xarray as xr
import xrft
import dask.array as dsar
from matplotlib import colors
import matplotlib.pyplot as plt
%matplotlib inline

Parallelized Bartlett’s Method

For long data sets that have reached statistical equilibrium, it is useful to chunk the data, calculate the periodogram for each chunk and then take the average to reduce variance.

[2]:
n = int(2**8)
da = xr.DataArray(np.random.rand(n,int(n/2),int(n/2)), dims=['time','y','x'])
da
[2]:
<xarray.DataArray (time: 256, y: 128, x: 128)>
array([[[ 0.493341,  0.28303 , ...,  0.434256,  0.616031],
        [ 0.777314,  0.629644, ...,  0.152931,  0.445424],
        ...,
        [ 0.562456,  0.022227, ...,  0.88538 ,  0.054687],
        [ 0.381456,  0.908454, ...,  0.843443,  0.706326]],

       [[ 0.469143,  0.241104, ...,  0.249369,  0.830898],
        [ 0.283305,  0.438634, ...,  0.893666,  0.242556],
        ...,
        [ 0.897823,  0.187038, ...,  0.977466,  0.270899],
        [ 0.252733,  0.425873, ...,  0.228847,  0.954393]],

       ...,
       [[ 0.936424,  0.793693, ...,  0.406293,  0.272336],
        [ 0.917752,  0.83908 , ...,  0.954489,  0.151129],
        ...,
        [ 0.081756,  0.016332, ...,  0.524886,  0.87095 ],
        [ 0.677224,  0.41488 , ...,  0.12199 ,  0.689685]],

       [[ 0.193302,  0.113419, ...,  0.083486,  0.784332],
        [ 0.695728,  0.376776, ...,  0.278004,  0.026373],
        ...,
        [ 0.677775,  0.255296, ...,  0.112851,  0.46325 ],
        [ 0.598086,  0.529324, ...,  0.267431,  0.65419 ]]])
Dimensions without coordinates: time, y, x

One dimension

Discrete Fourier Transform

[3]:
daft = xrft.dft(da.chunk({'time':int(n/4)}), dim=['time'], shift=False , chunks_to_segments=True).compute()
daft
[3]:
<xarray.DataArray 'fftn-917988d0ce7d7da01b5f7a3cf2bb9a26' (time_segment: 4, freq_time: 64, y: 128, x: 128)>
array([[[[ 30.737014+0.j      , ...,  31.659135+0.j      ],
         ...,
         [ 31.308938+0.j      , ...,  31.768846+0.j      ]],

        ...,
        [[  1.928097-0.118076j, ...,   0.732440+2.07656j ],
         ...,
         [  0.225814+1.256083j, ...,   0.244113-1.276807j]]],


       ...,
       [[[ 37.777908+0.j      , ...,  30.996848+0.j      ],
         ...,
         [ 28.650088+0.j      , ...,  35.362874+0.j      ]],

        ...,
        [[ -1.780642+0.477772j, ...,   2.575858+1.71943j ],
         ...,
         [  3.149759-2.664934j, ...,   1.872009-2.977565j]]]])
Coordinates:
  * time_segment       (time_segment) int64 0 1 2 3
  * freq_time          (freq_time) float64 0.0 0.01562 0.03125 0.04688 ...
  * y                  (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
  * x                  (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
    freq_time_spacing  float64 0.01562
[4]:
data = da.chunk({'time':int(n/4)}).data
data_rs = data.reshape((4,int(n/4),int(n/2),int(n/2)))
da_rs = xr.DataArray(data_rs, dims=['time_segment','time','y','x'])
da1 = xr.DataArray(dsar.fft.fftn(data_rs, axes=[1]).compute(),
                   dims=['time_segment','freq_time','y','x'])
da1
[4]:
<xarray.DataArray (time_segment: 4, freq_time: 64, y: 128, x: 128)>
array([[[[ 30.737014+0.j      , ...,  31.659135+0.j      ],
         ...,
         [ 31.308938+0.j      , ...,  31.768846+0.j      ]],

        ...,
        [[  1.928097-0.118076j, ...,   0.732440+2.07656j ],
         ...,
         [  0.225814+1.256083j, ...,   0.244113-1.276807j]]],


       ...,
       [[[ 37.777908+0.j      , ...,  30.996848+0.j      ],
         ...,
         [ 28.650088+0.j      , ...,  35.362874+0.j      ]],

        ...,
        [[ -1.780642+0.477772j, ...,   2.575858+1.71943j ],
         ...,
         [  3.149759-2.664934j, ...,   1.872009-2.977565j]]]])
Dimensions without coordinates: time_segment, freq_time, y, x

We assert that our calculations give equal results.

[5]:
npt.assert_almost_equal(da1, daft.values)

Power Spectrum

[6]:
ps = xrft.power_spectrum(da.chunk({'time':int(n/4)}), dim=['time'], chunks_to_segments=True)
ps
[6]:
<xarray.DataArray 'concatenate-183433100cd82e429170a4fe2f9c4cbb' (time_segment: 4, freq_time: 64, y: 128, x: 128)>
dask.array<truediv, shape=(4, 64, 128, 128), dtype=float64, chunksize=(1, 32, 128, 128)>
Coordinates:
  * time_segment       (time_segment) int64 0 1 2 3
  * freq_time          (freq_time) float64 -0.5 -0.4844 -0.4688 -0.4531 ...
  * y                  (y) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
  * x                  (x) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
    freq_time_spacing  float64 0.01562

Taking the mean over the segments gives the Barlett’s estimate.

[7]:
ps = ps.mean(['time_segment','y','x'])
ps
[7]:
<xarray.DataArray 'concatenate-183433100cd82e429170a4fe2f9c4cbb' (freq_time: 64)>
dask.array<mean_agg-aggregate, shape=(64,), dtype=float64, chunksize=(32,)>
Coordinates:
  * freq_time          (freq_time) float64 -0.5 -0.4844 -0.4688 -0.4531 ...
    freq_time_spacing  float64 0.01562
[8]:
fig, ax = plt.subplots()
ax.semilogx(ps.freq_time[int(n/8)+1:], ps[int(n/8)+1:])
[8]:
[<matplotlib.lines.Line2D at 0x10ebc9518>]
_images/chunk_example_12_1.png

Two dimension

Discrete Fourier Transform

[9]:
daft = xrft.dft(da.chunk({'y':32,'x':32}), dim=['y','x'], shift=False , chunks_to_segments=True).compute()
daft
[9]:
<xarray.DataArray 'fftn-8077935acd6b48b40d6593c688c326b2' (time: 256, y_segment: 4, freq_y: 32, x_segment: 4, freq_x: 32)>
array([[[[[ 505.090962 +0.j      , ...,    3.673241 +2.033024j],
          ...,
          [ 506.979486 +0.j      , ...,    2.672219 +8.645102j]],

         ...,
         [[  -1.746757 -1.347122j, ...,   -2.183099+17.472835j],
          ...,
          [   3.450049 +3.832201j, ...,   -4.072164 -7.279733j]]],


        ...,
        [[[ 504.971751 +0.j      , ...,   -6.610465-12.385931j],
          ...,
          [ 512.756185 +0.j      , ...,   -4.344255 -8.458134j]],

         ...,
         [[  -7.979198 -7.454325j, ...,   -2.962019 +6.43059j ],
          ...,
          [   4.024805 +3.72519j , ...,   -8.242673 -8.259182j]]]],



       ...,
       [[[[ 518.573138 +0.j      , ...,    0.573928-10.006888j],
          ...,
          [ 520.423164 +0.j      , ...,   -1.110088 +0.141936j]],

         ...,
         [[   2.043005 -3.116515j, ...,    8.697924 -5.116488j],
          ...,
          [   3.702009 -7.202762j, ...,  -12.007770 +3.514272j]]],


        ...,
        [[[ 523.615806 +0.j      , ...,   -9.301065 +4.935474j],
          ...,
          [ 521.535950 +0.j      , ...,    6.826755 +1.688166j]],

         ...,
         [[   2.157400-14.676636j, ...,   -1.865237-11.408717j],
          ...,
          [   0.651302 +0.531716j, ...,    5.861882 +5.968681j]]]]])
Coordinates:
  * time            (time) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 ...
  * y_segment       (y_segment) int64 0 1 2 3
  * freq_y          (freq_y) float64 0.0 0.03125 0.0625 0.09375 0.125 0.1562 ...
  * x_segment       (x_segment) int64 0 1 2 3
  * freq_x          (freq_x) float64 0.0 0.03125 0.0625 0.09375 0.125 0.1562 ...
    freq_y_spacing  float64 0.03125
    freq_x_spacing  float64 0.03125
[10]:
data = da.chunk({'y':32,'x':32}).data
data_rs = data.reshape((256,4,32,4,32))
da_rs = xr.DataArray(data_rs, dims=['time','y_segment','y','x_segment','x'])
da2 = xr.DataArray(dsar.fft.fftn(data_rs, axes=[2,4]).compute(),
                   dims=['time','y_segment','freq_y','x_segment','freq_x'])
da2
[10]:
<xarray.DataArray (time: 256, y_segment: 4, freq_y: 32, x_segment: 4, freq_x: 32)>
array([[[[[ 505.090962 +0.j      , ...,    3.673241 +2.033024j],
          ...,
          [ 506.979486 +0.j      , ...,    2.672219 +8.645102j]],

         ...,
         [[  -1.746757 -1.347122j, ...,   -2.183099+17.472835j],
          ...,
          [   3.450049 +3.832201j, ...,   -4.072164 -7.279733j]]],


        ...,
        [[[ 504.971751 +0.j      , ...,   -6.610465-12.385931j],
          ...,
          [ 512.756185 +0.j      , ...,   -4.344255 -8.458134j]],

         ...,
         [[  -7.979198 -7.454325j, ...,   -2.962019 +6.43059j ],
          ...,
          [   4.024805 +3.72519j , ...,   -8.242673 -8.259182j]]]],



       ...,
       [[[[ 518.573138 +0.j      , ...,    0.573928-10.006888j],
          ...,
          [ 520.423164 +0.j      , ...,   -1.110088 +0.141936j]],

         ...,
         [[   2.043005 -3.116515j, ...,    8.697924 -5.116488j],
          ...,
          [   3.702009 -7.202762j, ...,  -12.007770 +3.514272j]]],


        ...,
        [[[ 523.615806 +0.j      , ...,   -9.301065 +4.935474j],
          ...,
          [ 521.535950 +0.j      , ...,    6.826755 +1.688166j]],

         ...,
         [[   2.157400-14.676636j, ...,   -1.865237-11.408717j],
          ...,
          [   0.651302 +0.531716j, ...,    5.861882 +5.968681j]]]]])
Dimensions without coordinates: time, y_segment, freq_y, x_segment, freq_x

We assert that our calculations give equal results.

[11]:
npt.assert_almost_equal(da2, daft.values)

Power Spectrum

[14]:
ps = xrft.power_spectrum(da.chunk({'time':1,'y':64,'x':64}), dim=['y','x'],
                         chunks_to_segments=True, window='True', detrend='linear')
ps = ps.mean(['time','y_segment','x_segment'])
ps
[14]:
<xarray.DataArray 'concatenate-34ef1d78d80632d6b25c65df82f67753' (freq_y: 64, freq_x: 64)>
dask.array<mean_agg-aggregate, shape=(64, 64), dtype=float64, chunksize=(32, 32)>
Coordinates:
  * freq_y          (freq_y) float64 -0.5 -0.4844 -0.4688 -0.4531 -0.4375 ...
  * freq_x          (freq_x) float64 -0.5 -0.4844 -0.4688 -0.4531 -0.4375 ...
    freq_y_spacing  float64 0.01562
    freq_x_spacing  float64 0.01562
[19]:
fig, ax = plt.subplots()
ps.plot(ax=ax, norm=colors.LogNorm(), vmin=6.5e-4, vmax=7.5e-4)
[19]:
<matplotlib.collections.QuadMesh at 0x1210117b8>
_images/chunk_example_20_1.png
[ ]: