Aligning Field and SHIFT Data

import pandas as pd
 import geopandas as gdp
 import rioxarray as rxr
 import xarray as xr
 import rasterio as rio
 import numpy as np
 import sys
 sys.path.append('/efs/SHIFT-Python-Utilities/')
 from shift_python_utilities.intake_shift import shift_catalog
 from joblib import Parallel, delayed
 import holoviews as hv
 hv.extension('bokeh')
 import hvplot.xarray

 cat = shift_catalog()

Read in our field data csv using Pandas
Convert the dataframe to a Geopandas dataframe using the latitudes and longitudes
Remove sites missing lattitudes and longitudes
Filter sites by target type. For this dataset we are only interested in the soil and canopy targets

df = pd.read_csv('/efs/edlang1/FieldData_yangello_DAAC.csv')

gdf = gdp.GeoDataFrame(
    df, geometry=gdp.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326"
)

# Filter out sites missing lat/lon values
gdf = gdf[~np.isnan(np.asarray(gdf.latitude))].reset_index(drop=True)

# Filter by target type
gdf = gdf[(gdf.target_type == 'Canopy') | (gdf.target_type == "Soil")].reset_index(drop=True)
gdf

Georeference our SHIFT data

ds = cat.aviris_v1_gridded().read_chunked()
ds.rio.write_crs(rio.CRS.from_wkt(",".join(ds.attrs['coordinate system string'])), inplace=True)
ds.rio.crs

Our field sites are 30 by 30 meter grids:

Convert the geodataframe’s CRS to UTM so it is meter based instead of degrees.
Using Shapely, create a 15 meter square (cap_style of 3) buffer around our field sites.

gdf = gdf.to_crs(gdf.estimate_utm_crs())
# create a 30 by 30 square around each point
for i in range(len(gdf)):
    gdf.loc[i, 'geometry'] = gdf.iloc[i].geometry.buffer(15, cap_style=3)

The SHIFT data is sample in 5nm increments while our field data is sampled at 1nm increments. To align the datasets we will use our SHIFT wavelengths to filter our geodataframe.

field_data_df = gdf.filter(ds.wavelength.astype(int).astype(str).values)

Next, we are going to loop through our field sites:

Clip our dataset using our buffer polygon.
Reproject the clipped data area down to a single reflectance value.

Clipping and reprojecting each clipped area for every time is computationally intensive. We can parallelize the the reprojection process to increase the speed.

%%time

field_data = []
aviris_data = []
for ind, row in gdf.iterrows():

    field_area = ds.rio.clip([row.geometry], all_touched=False)

    def reproj(i):
        return field_area.reflectance.isel(time=i).transpose('wavelength', 'y', 'x').rio.reproject(field_area.rio.crs, shape=(1,1))

    temp = Parallel(n_jobs=4, prefer="threads")(delayed(reproj)(i) for i in range(field_area.time.__len__()))

    temp = xr.concat(temp, dim='time').to_dataset().squeeze().reflectance.values.T[None, ...]
    aviris_data += [temp]

    field_data += [np.asarray(field_data_df.iloc[ind])[...,None].T]

    print(f"Site: {row.unique_id} ({ind}) complete")

print()
field_data = np.concatenate(field_data, axis=0)
aviris_data = np.concatenate(aviris_data, axis=0)

Once we have our data we can use the numpy arrays to create an xarray dataset.

coords = {'wavelength':(['wavelength'], ds.wavelength.values.astype(int)), 'site':(['site'], list(gdf.unique_id)), 'time':(ds['time'].values)}
data_vars = {'field_data':(['site', 'wavelength'], field_data), 'aviris_data':(['site', 'wavelength', 'time'], aviris_data)}

out_ds = xr.Dataset(data_vars=data_vars, coords=coords)
out_ds

Using the hvplot xarray extension we can plot the field and AVIRIS refelectance for each site and observe how the AVIRIS data compares to the field data at each recording date.

out_ds.hvplot(x='wavelength').opts(xticks=10)