API Reference

Complete function signatures and parameters for all public functions.

Module: indices

spi()

Calculate Standardized Precipitation Index for monitoring both dry (drought) and wet (flood/excess) conditions.

def spi(
    precip,
    scale,
    periodicity='monthly',
    data_start_year=None,
    calibration_start_year=1991,
    calibration_end_year=2020,
    fitting_params=None,
    return_params=False,
    var_name=None,
    distribution='gamma'
) -> xarray.DataArray

Parameters:

Parameter Type Required Default Description
precip xarray.DataArray Yes - Precipitation data with dimensions (time, lat, lon)
scale int Yes - Time scale in months (1, 3, 6, 12, 24, etc.)
periodicity str No ‘monthly’ Temporal resolution (‘monthly’ or ‘daily’)
data_start_year int No None First year of data (auto-detected if None)
calibration_start_year int No 1991 Calibration period start
calibration_end_year int No 2020 Calibration period end
fitting_params dict No None Pre-fitted parameters for operational use
return_params bool No False If True, return (result, params) tuple
var_name str No None Variable name if precip is a Dataset
distribution str No ‘gamma’ Distribution type: ‘gamma’, ‘pearson3’, ‘log_logistic’, ‘gev’, ‘gen_logistic’

Returns:

  • xarray.DataArray: SPI values with same dimensions as input
    • Variable name: spi_{distribution}_{scale}_month (e.g., spi_gamma_12_month)
    • Range: typically -3 to +3
    • Negative values: dry conditions (drought)
    • Positive values: wet conditions (flooding/excess)
    • Attributes: scale, distribution, calibration_period

Raises:

  • ValueError: Invalid input dimensions or parameters
  • RuntimeError: Distribution fitting failures

Example:

import xarray as xr
from indices import spi

precip = xr.open_dataset('precip.nc')['precip']

# Default (Gamma distribution)
spi_12 = spi(precip, scale=12)

# With Pearson III distribution
spi_12_p3 = spi(precip, scale=12, distribution='pearson3')

# Save and reuse parameters
spi_12, params = spi(precip, scale=12, return_params=True)

spi_multi_scale()

Calculate SPI for multiple time scales simultaneously.

def spi_multi_scale(
    precip,
    scales,
    distribution='gamma',
    **kwargs
) -> xarray.Dataset

Parameters:

Parameter Type Required Default Description
precip xarray.DataArray Yes - Precipitation data
scales list of int Yes - List of time scales [3, 6, 12]
distribution str No ‘gamma’ Distribution type
**kwargs - No - Same parameters as spi()

Returns:

  • xarray.Dataset: Dataset with one variable per scale
    • Variables: spi_{distribution}_{scale}_month (e.g., spi_gamma_3_month, spi_pearson3_12_month)

Example:

from indices import spi_multi_scale

scales = [3, 6, 12]
spi_all = spi_multi_scale(precip, scales=scales)

# With Pearson III
spi_all_p3 = spi_multi_scale(precip, scales=scales, distribution='pearson3')

spei()

Calculate Standardized Precipitation Evapotranspiration Index.

def spei(
    precip,
    pet=None,
    temperature=None,
    latitude=None,
    scale=12,
    periodicity='monthly',
    data_start_year=None,
    calibration_start_year=1991,
    calibration_end_year=2020,
    fitting_params=None,
    return_params=False,
    distribution='gamma',
    pet_method='thornthwaite',
    temp_min=None,
    temp_max=None
) -> xarray.DataArray

Parameters:

Parameter Type Required Default Description
precip xarray.DataArray Yes - Precipitation data (mm/month)
pet xarray.DataArray No None Potential evapotranspiration (mm/month)
temperature xarray.DataArray No None Mean temperature for PET calculation (if pet not provided)
latitude xarray.DataArray No None Latitude for PET calculation
scale int No 12 Time scale in months
distribution str No ‘gamma’ Distribution type: ‘gamma’, ‘pearson3’, ‘log_logistic’, ‘gev’, ‘gen_logistic’. Pearson III recommended for SPEI.
pet_method str No ‘thornthwaite’ PET method: ‘thornthwaite’ or ‘hargreaves’
temp_min xarray.DataArray No None Minimum temperature (required for Hargreaves)
temp_max xarray.DataArray No None Maximum temperature (required for Hargreaves)
All others - - - Same as spi()

Returns:

  • xarray.DataArray: SPEI values
    • Variable name: spei_{distribution}_{scale}_month (e.g., spei_pearson3_12_month)

Example:

from indices import spei

# Default (Gamma with Thornthwaite PET)
spei_12 = spei(precip, pet=pet, scale=12)

# Pearson III (recommended for SPEI)
spei_12 = spei(precip, pet=pet, scale=12, distribution='pearson3')

# Auto-compute PET from temperature (Thornthwaite)
spei_12 = spei(precip, temperature=temp, latitude=lat, scale=12)

# Hargreaves PET method (better for arid regions)
spei_12 = spei(precip, temperature=temp_mean, latitude=lat, scale=12,
               pet_method='hargreaves', temp_min=tmin, temp_max=tmax)

spei_multi_scale()

Calculate SPEI for multiple time scales.

def spei_multi_scale(
    precip,
    pet=None,
    scales=[1, 3, 6, 12],
    distribution='gamma',
    **kwargs
) -> xarray.Dataset

Parameters: Same pattern as spi_multi_scale() but requires pet. Pearson III or Log-Logistic recommended for SPEI.


spi_global()

Calculate SPI for global-scale datasets with automatic memory management.

def spi_global(
    precip_path,
    output_path,
    scale=12,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    chunk_size=500,
    var_name=None,
    save_params=True,
    distribution='gamma'
) -> xarray.Dataset

Parameters:

Parameter Type Required Default Description
precip_path str Yes - Path to precipitation NetCDF file
output_path str Yes - Path for output SPI NetCDF file
scale int No 12 Accumulation scale in months
periodicity str No ‘monthly’ Temporal resolution
calibration_start_year int No 1991 Calibration period start
calibration_end_year int No 2020 Calibration period end
chunk_size int No 500 Spatial chunk size (lat and lon)
var_name str No None Precipitation variable name (auto-detected)
save_params bool No True Save fitting parameters
distribution str No ‘gamma’ Distribution type: ‘gamma’, ‘pearson3’, ‘log_logistic’, ‘gev’, ‘gen_logistic’

Returns:

  • xarray.Dataset: Dataset with computed SPI

Example:

from indices import spi_global

result = spi_global(
    'global_chirps_monthly.nc',
    'spi_12_global.nc',
    scale=12,
    calibration_start_year=1991,
    calibration_end_year=2020,
    chunk_size=500
)

spei_global()

Calculate SPEI for global-scale datasets with automatic memory management.

def spei_global(
    precip_path,
    pet_path,
    output_path,
    scale=12,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    chunk_size=500,
    precip_var_name=None,
    pet_var_name=None,
    save_params=True,
    distribution='gamma'
) -> xarray.Dataset

Parameters:

Parameter Type Required Default Description
precip_path str Yes - Path to precipitation NetCDF file
pet_path str Yes - Path to PET NetCDF file
output_path str Yes - Path for output SPEI NetCDF file
scale int No 12 Accumulation scale
chunk_size int No 500 Spatial chunk size
distribution str No ‘gamma’ Distribution type. Pearson III or Log-Logistic recommended for SPEI.
All others - - - Same as spi_global()

Returns:

  • xarray.Dataset: Dataset with computed SPEI

Example:

from indices import spei_global

result = spei_global(
    'global_precip.nc',
    'global_pet.nc',
    'spei_12_global.nc',
    scale=12,
    chunk_size=500
)

estimate_memory_requirements()

Estimate memory requirements before running computation.

def estimate_memory_requirements(
    precip,
    var_name=None,
    available_memory_gb=None
) -> dict

Parameters:

Parameter Type Required Default Description
precip str or xarray.DataArray Yes - Precipitation data or path
var_name str No None Variable name (for NetCDF path)
available_memory_gb float No None Override system memory detection

Returns:

  • dict with keys:
    • input_size_gb: Input data size
    • peak_memory_gb: Estimated peak memory
    • recommended_chunk_size: Suggested chunk dimensions
    • fits_in_memory: Boolean
    • recommendation: Human-readable advice

Example:

from indices import estimate_memory_requirements

mem = estimate_memory_requirements('global_chirps.nc')
print(f"Peak memory: {mem['peak_memory_gb']:.1f} GB")
print(f"Recommended chunk: {mem['recommended_chunk_size']}")
print(f"Recommendation: {mem['recommendation']}")

Module: chunked

ChunkedProcessor

Main class for memory-efficient chunked processing.

class ChunkedProcessor:
    def __init__(
        self,
        chunk_lat=500,
        chunk_lon=500,
        n_workers=None,
        temp_dir=None,
        verbose=True
    )

Parameters:

Parameter Type Required Default Description
chunk_lat int No 500 Chunk size in latitude dimension
chunk_lon int No 500 Chunk size in longitude dimension
n_workers int No None Number of parallel workers
temp_dir str No None Temporary directory for intermediate files
verbose bool No True Print progress messages

Methods:

compute_spi_chunked()

def compute_spi_chunked(
    self,
    precip,
    output_path,
    scale,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    var_name=None,
    save_params=True,
    params_path=None,
    compress=True,
    complevel=4,
    callback=None,
    distribution='gamma'
) -> xarray.Dataset

compute_spei_chunked()

def compute_spei_chunked(
    self,
    precip,
    pet,
    output_path,
    scale,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    precip_var_name=None,
    pet_var_name=None,
    save_params=True,
    params_path=None,
    compress=True,
    complevel=4,
    callback=None,
    distribution='gamma'
) -> xarray.Dataset

Example:

from chunked import ChunkedProcessor

# Create processor
processor = ChunkedProcessor(chunk_lat=500, chunk_lon=500)

# Define progress callback
def progress(current, total, message):
    print(f"[{current}/{total}] {message}")

# Run computation
result = processor.compute_spi_chunked(
    precip='global_precip.nc',
    output_path='spi_12_global.nc',
    scale=12,
    calibration_start_year=1991,
    calibration_end_year=2020,
    save_params=True,
    callback=progress
)

estimate_memory()

Estimate memory requirements for a dataset.

def estimate_memory(
    n_time,
    n_lat,
    n_lon,
    dtype=np.float64,
    available_memory_gb=None
) -> MemoryEstimate

Parameters:

Parameter Type Required Default Description
n_time int Yes - Number of time steps
n_lat int Yes - Number of latitude points
n_lon int Yes - Number of longitude points
dtype numpy.dtype No float64 Data type
available_memory_gb float No None Override system memory

Returns:

  • MemoryEstimate named tuple with:
    • input_size_bytes: Raw input size
    • peak_memory_bytes: Estimated peak
    • available_bytes: System available memory
    • fits_in_memory: Boolean
    • recommended_chunk_lat: Suggested chunk size
    • recommended_chunk_lon: Suggested chunk size

Example:

from chunked import estimate_memory

mem = estimate_memory(528, 2160, 4320)
print(f"Input: {mem.input_size_bytes / 1e9:.1f} GB")
print(f"Peak: {mem.peak_memory_bytes / 1e9:.1f} GB")
print(f"Fits in memory: {mem.fits_in_memory}")

Module: utils

calculate_pet()

Calculate potential evapotranspiration using Thornthwaite or Hargreaves-Samani method.

def calculate_pet(
    temperature,
    latitude,
    data_start_year,
    method='thornthwaite',
    temp_min=None,
    temp_max=None
) -> xarray.DataArray

Parameters:

Parameter Type Required Default Description
temperature xarray.DataArray or numpy.ndarray Yes - Monthly mean temperature data (C)
latitude float or array Yes - Latitude in degrees
data_start_year int Yes - First year of the temperature data
method str No ‘thornthwaite’ PET method: ‘thornthwaite’ or ‘hargreaves’
temp_min xarray.DataArray or numpy.ndarray No None Monthly minimum temperature (required for Hargreaves)
temp_max xarray.DataArray or numpy.ndarray No None Monthly maximum temperature (required for Hargreaves)

Returns:

  • xarray.DataArray: PET values (mm/month)
    • Attributes include method name and reference

Method Comparison:

Method Inputs Best For Reference
Thornthwaite T_mean, latitude Humid regions, quick estimates Thornthwaite (1948)
Hargreaves T_mean, T_min, T_max, latitude Arid/semi-arid regions Hargreaves & Samani (1985)

Example:

from utils import calculate_pet

# Thornthwaite method (default)
pet = calculate_pet(temp_mean, latitude=lat, data_start_year=1958)

# Hargreaves method (better for arid regions)
pet = calculate_pet(
    temp_mean, latitude=lat, data_start_year=1958,
    method='hargreaves',
    temp_min=tmin,
    temp_max=tmax
)

eto_thornthwaite()

Low-level function to calculate PET using the Thornthwaite equation.

def eto_thornthwaite(
    temperature_celsius,
    latitude_degrees,
    data_start_year
) -> numpy.ndarray

Parameters:

Parameter Type Required Description
temperature_celsius numpy array Yes Monthly mean temperatures (C)
latitude_degrees float Yes Latitude in degrees (-90 to 90)
data_start_year int Yes First year of data

Returns:

  • numpy.ndarray: PET values (mm/month)

eto_hargreaves()

Low-level function to calculate PET using the Hargreaves-Samani equation.

def eto_hargreaves(
    temp_mean_celsius,
    temp_min_celsius,
    temp_max_celsius,
    latitude_degrees,
    data_start_year
) -> numpy.ndarray

Parameters:

Parameter Type Required Description
temp_mean_celsius numpy array Yes Monthly mean temperatures (C)
temp_min_celsius numpy array Yes Monthly minimum temperatures (C)
temp_max_celsius numpy array Yes Monthly maximum temperatures (C)
latitude_degrees float Yes Latitude in degrees (-90 to 90)
data_start_year int Yes First year of data

Returns:

  • numpy.ndarray: PET values (mm/month)

Note: The Hargreaves equation:

\[PET = 0.0023 \times R_a \times (T_{mean} + 17.8) \times \sqrt{T_{max} - T_{min}}\]

where \(R_a\) is extraterrestrial radiation calculated from latitude and day of year.


get_optimal_chunk_size()

Calculate optimal chunk dimensions based on available memory.

def get_optimal_chunk_size(
    n_time,
    n_lat,
    n_lon,
    available_memory_gb=None,
    memory_multiplier=12.0,
    safety_factor=0.7
) -> Tuple[int, int]

Parameters:

Parameter Type Required Default Description
n_time int Yes - Number of time steps
n_lat int Yes - Number of latitude points
n_lon int Yes - Number of longitude points
available_memory_gb float No None Override system memory
memory_multiplier float No 12.0 Peak memory multiplier
safety_factor float No 0.7 Memory safety margin

Returns:

  • Tuple[int, int]: (chunk_lat, chunk_lon)

Example:

from utils import get_optimal_chunk_size

chunk_lat, chunk_lon = get_optimal_chunk_size(528, 2160, 4320)
print(f"Optimal chunk size: {chunk_lat} x {chunk_lon}")

format_bytes()

Format byte count as human-readable string.

def format_bytes(n_bytes: int) -> str

Example:

from utils import format_bytes

print(format_bytes(1073741824))  # "1.00 GB"
print(format_bytes(536870912))   # "512.00 MB"

get_array_memory_size()

Calculate memory footprint of an array.

def get_array_memory_size(
    shape: Tuple[int, ...],
    dtype=np.float64
) -> int

Returns:

  • int: Size in bytes

Example:

from utils import get_array_memory_size

size = get_array_memory_size((528, 2160, 4320), np.float32)
print(f"Array size: {size / 1e9:.2f} GB")

Module: runtheory

identify_events()

Identify complete climate extreme events from time series. Works for both dry (drought) and wet (flood/excess) events based on threshold direction.

def identify_events(
    index_timeseries,
    threshold=-1.0,
    min_duration=1
) -> pandas.DataFrame

Parameters:

Parameter Type Required Default Description
index_timeseries xarray.DataArray or pandas.Series Yes - SPI/SPEI time series (single location)
threshold float No -1.0 Event threshold (negative for drought, positive for wet)
min_duration int No 1 Minimum event duration (months)

Returns:

  • pandas.DataFrame with columns:
    • event_id: Event number (1, 2, 3, …)
    • start_date: Event start
    • end_date: Event end
    • duration: Months
    • magnitude: Cumulative deficit
    • intensity: magnitude / duration
    • peak: Minimum SPI/SPEI value
    • peak_date: When peak occurred
    • interarrival: Months since previous event

Example:

from runtheory import identify_events

spi_loc = spi.isel(lat=50, lon=100)

# Drought events (negative threshold)
dry_events = identify_events(spi_loc, threshold=-1.2, min_duration=3)
print(f"Found {len(dry_events)} drought events")

# Wet events (positive threshold)
wet_events = identify_events(spi_loc, threshold=+1.2, min_duration=3)
print(f"Found {len(wet_events)} wet events")

calculate_timeseries()

Create month-by-month climate extreme event monitoring time series. Works for both drought (negative threshold) and wet (positive threshold) conditions.

def calculate_timeseries(
    index_timeseries,
    threshold=-1.0
) -> pandas.DataFrame

Parameters:

Parameter Type Required Default Description
index_timeseries xarray.DataArray or pandas.Series Yes - SPI/SPEI time series
threshold float No -1.0 Event threshold (negative for drought, positive for wet)

Returns:

  • pandas.DataFrame with columns:
    • time: Date
    • index_value: SPI/SPEI value
    • is_event: Boolean
    • event_id: Current event number (or 0)
    • duration: Current event duration
    • magnitude_cumulative: Accumulated deficit
    • magnitude_instantaneous: Current month’s severity
    • intensity: magnitude_cumulative / duration

Example:

from runtheory import calculate_timeseries

ts = calculate_timeseries(spi_loc, threshold=-1.2)

# Check current status
current = ts.iloc[-1]
if current['is_event']:
    print(f"IN EVENT: {current['duration']} months")
    print(f"Cumulative magnitude: {current['magnitude_cumulative']:.2f}")

calculate_period_statistics()

Calculate gridded climate extreme event statistics for a time period. Works for both drought (negative threshold) and wet (positive threshold) events.

def calculate_period_statistics(
    index_data,
    threshold=-1.0,
    start_year=None,
    end_year=None,
    min_duration=1
) -> xarray.Dataset

Parameters:

Parameter Type Required Default Description
index_data xarray.DataArray Yes - Gridded SPI/SPEI (time, lat, lon)
threshold float No -1.0 Event threshold (negative=dry, positive=wet)
start_year int No None Period start (uses all if None)
end_year int No None Period end (uses all if None)
min_duration int No 1 Minimum event duration

Returns:

  • xarray.Dataset with variables (lat, lon):
    • num_events: Event count
    • total_event_months: Total months in events
    • total_magnitude: Sum of all magnitudes
    • mean_magnitude: Average per event
    • max_magnitude: Largest event
    • worst_peak: Most severe value
    • mean_intensity: Average intensity
    • max_intensity: Maximum intensity
    • pct_time_in_event: Percentage of time in events

Example:

from runtheory import calculate_period_statistics

# Statistics for 2023
stats_2023 = calculate_period_statistics(
    spi,
    threshold=-1.2,
    start_year=2023,
    end_year=2023
)

# Plot
stats_2023.num_events.plot(title='Dry/Wet Events in 2023')
stats_2023.to_netcdf('output/netcdf/stats_2023.nc')

calculate_annual_statistics()

Calculate period statistics for each year.

def calculate_annual_statistics(
    index_data,
    threshold=-1.0,
    min_duration=1
) -> xarray.Dataset

Parameters: Same as calculate_period_statistics() but no start/end year

Returns:

  • xarray.Dataset with dimensions (year, lat, lon)
    • Same variables as period statistics
    • Additional dimension: year

Example:

from runtheory import calculate_annual_statistics

annual = calculate_annual_statistics(spi, threshold=-1.2)

# Access specific year
stats_2020 = annual.sel(year=2020)

# Time series of regional average
regional_avg = annual.num_events.mean(dim=['lat', 'lon'])
regional_avg.plot()

compare_periods()

Compare event statistics across multiple time periods. Works for both dry (drought) and wet events.

def compare_periods(
    index_data,
    periods,
    period_names=None,
    threshold=-1.0,
    min_duration=1
) -> xarray.Dataset

Parameters:

Parameter Type Required Default Description
index_data xarray.DataArray Yes - Gridded SPI/SPEI
periods list of tuples Yes - [(start1, end1), (start2, end2), …]
period_names list of str No None Names for each period
threshold float No -1.0 Event threshold (negative=dry, positive=wet)
min_duration int No 1 Minimum event duration

Returns:

  • xarray.Dataset with dimensions (period, lat, lon)
    • Same variables as period statistics
    • Additional dimension: period

Example:

from runtheory import compare_periods

comparison = compare_periods(
    spi,
    periods=[(1991, 2020), (2021, 2024)],
    period_names=['Historical', 'Recent'],
    threshold=-1.2
)

# Calculate change
diff = comparison.sel(period='Recent') - comparison.sel(period='Historical')
diff.num_events.plot(title='Change in Events', cmap='RdBu_r')

summarize_events()

Calculate summary statistics from events DataFrame.

def summarize_events(
    events_df
) -> dict

Parameters:

  • events_df (pandas.DataFrame): Output from identify_events()

Returns:

  • dict with keys:
    • num_events: Total count
    • mean_duration: Average duration
    • max_duration: Longest event
    • mean_magnitude: Average magnitude
    • max_magnitude: Largest magnitude
    • most_severe_peak: Worst peak value
    • mean_interarrival: Average time between events

Example:

from runtheory import identify_events, summarize_events

events = identify_events(spi_loc, threshold=-1.2)
summary = summarize_events(events)
print(f"Total events: {summary['num_events']}")
print(f"Mean duration: {summary['mean_duration']:.1f} months")

Module: visualization

plot_index()

Plot climate index time series with color-coded severity. Works for both dry (drought) and wet (flood) conditions.

def plot_index(
    index_timeseries,
    threshold=-1.0,
    title=None,
    ax=None,
    figsize=(14, 6)
) -> matplotlib.figure.Figure

Parameters:

Parameter Type Required Default Description
index_timeseries xarray.DataArray or pandas.Series Yes - SPI/SPEI data
threshold float No -1.0 Event threshold (negative=dry, positive=wet) line
title str No None Plot title
ax matplotlib.axes.Axes No None Existing axes
figsize tuple No (14, 6) Figure size

Returns:

  • matplotlib.figure.Figure: Figure object

Example:

from visualization import plot_index

plot_index(spi_loc, threshold=-1.2,
                   title='SPI-12 Time Series')
plt.savefig('output/plots/single/spi_timeseries.png', dpi=300)

plot_events()

Plot time series with individual events highlighted.

def plot_events(
    index_timeseries,
    events_df,
    threshold=-1.0,
    title=None,
    ax=None,
    figsize=(14, 6)
) -> matplotlib.figure.Figure

Parameters:

Parameter Type Required Default Description
index_timeseries xarray.DataArray or pandas.Series Yes - Index data
events_df pandas.DataFrame Yes - From identify_events()
threshold float No -1.0 Threshold line
title str No None Plot title
ax matplotlib.axes.Axes No None Existing axes
figsize tuple No (14, 6) Figure size

Returns:

  • matplotlib.figure.Figure

Example:

from visualization import plot_events

events = identify_events(spi_loc, threshold=-1.2)
plot_events(spi_loc, events, threshold=-1.2)
plt.savefig(f'output/plots/single/events_lat{lat}_lon{lon}.png')

plot_event_timeline()

Create 5-panel plot showing event evolution. Works for both dry (drought) and wet conditions.

def plot_event_timeline(
    timeseries_df,
    title=None,
    figsize=(14, 12)
) -> matplotlib.figure.Figure

Parameters:

Parameter Type Required Default Description
timeseries_df pandas.DataFrame Yes - From calculate_timeseries()
title str No None Main title
figsize tuple No (14, 12) Figure size

Returns:

  • matplotlib.figure.Figure with 5 panels:
    1. Index value
    2. Duration
    3. Magnitude (cumulative) - blue
    4. Magnitude (instantaneous) - red
    5. Intensity

Example:

from visualization import plot_event_timeline

ts = calculate_timeseries(spi_loc, threshold=-1.2)
plot_event_timeline(ts, title='Event Evolution')
plt.savefig('output/plots/single/timeline.png', dpi=300)

plot_spatial_stats()

Plot spatial map of event statistics. Works for both dry (drought) and wet events.

def plot_spatial_stats(
    stats_dataset,
    variable='num_events',
    title=None,
    cmap=None,
    figsize=(12, 8)
) -> matplotlib.figure.Figure

Parameters:

Parameter Type Required Default Description
stats_dataset xarray.Dataset Yes - From calculate_period_statistics()
variable str No ‘num_events’ Variable to plot
title str No None Plot title
cmap str No None Colormap (auto-selected if None)
figsize tuple No (12, 8) Figure size

Returns:

  • matplotlib.figure.Figure

Example:

from visualization import plot_spatial_stats

stats = calculate_period_statistics(spi, start_year=2020, end_year=2024)
plot_spatial_stats(stats, variable='num_events',
                           title='Drought Events 2020-2024')
plt.savefig('output/plots/spatial/stats_2020-2024.png', dpi=300)

generate_location_filename()

Generate consistent filename with location coordinates.

def generate_location_filename(
    base_name,
    lat,
    lon,
    extension='png'
) -> str

Parameters:

Parameter Type Required Default Description
base_name str Yes - File base name
lat float Yes - Latitude
lon float Yes - Longitude
extension str No ‘png’ File extension

Returns:

  • str: Filename like base_name_lat31.82_lon-7.07.png

Example:

from visualization import generate_location_filename

filename = generate_location_filename('dry_events', 31.82, -7.07, 'png')
# Returns: 'dry_events_lat31.82_lon-7.07.png'

full_path = f'output/plots/single/{filename}'
plt.savefig(full_path, dpi=300)

Complete Workflow Example

import sys
sys.path.insert(0, 'src')

import xarray as xr
from indices import spi
from utils import calculate_pet
from runtheory import (identify_events,
                       calculate_period_statistics,
                       compare_periods)
from visualization import (plot_events,
                          plot_spatial_stats,
                          generate_location_filename)

# 1. Load data
precip = xr.open_dataset('input/chirps.nc')['precip']

# 2. Calculate SPI
spi_12 = spi(precip, scale=12, calibration_start_year=1991,
             calibration_end_year=2020)
spi_12.to_netcdf('output/netcdf/spi_12.nc')

# 3. Single location analysis
lat_idx, lon_idx = 50, 100
lat_val = spi_12.lat.values[lat_idx]
lon_val = spi_12.lon.values[lon_idx]
spi_loc = spi_12.isel(lat=lat_idx, lon=lon_idx)

events = identify_events(spi_loc, threshold=-1.2, min_duration=3)
filename = generate_location_filename('dry_events', lat_val, lon_val, 'csv')
events.to_csv(f'output/csv/{filename}')

# 4. Visualize
plot_events(spi_loc, events, threshold=-1.2)
filename = generate_location_filename('plot_events', lat_val, lon_val, 'png')
plt.savefig(f'output/plots/single/{filename}', dpi=300)

# 5. Gridded statistics
stats_2023 = calculate_period_statistics(spi_12, threshold=-1.2,
                                         start_year=2023, end_year=2023)
stats_2023.to_netcdf('output/netcdf/event_stats_2023.nc')

plot_spatial_stats(stats_2023, variable='num_events',
                           title='Dry/Wet Events in 2023')
plt.savefig('output/plots/spatial/stats_2023.png', dpi=300)

# 6. Compare periods
comparison = compare_periods(
    spi_12,
    periods=[(1991, 2020), (2021, 2024)],
    period_names=['Historical', 'Recent']
)

diff = comparison.sel(period='Recent') - comparison.sel(period='Historical')
diff.num_events.plot(title='Change in Dry/Wet Events', cmap='RdBu_r')
plt.savefig('output/plots/spatial/comparison.png', dpi=300)

See Also

Back to top