API Reference

Complete function signatures and parameters for all public functions.

Module: indices

spi()

Calculate Standardized Precipitation Index for monitoring both dry (drought) and wet (flood/excess) conditions.

def spi(
    precip,
    scale,
    periodicity='monthly',
    data_start_year=None,
    calibration_start_year=1991,
    calibration_end_year=2020,
    fitting_params=None,
    return_params=False,
    var_name=None,
    distribution='gamma'
) -> xarray.DataArray

Parameters:

Parameter	Type	Required	Default	Description
`precip`	xarray.DataArray	Yes	-	Precipitation data with dimensions (time, lat, lon)
`scale`	int	Yes	-	Time scale in months (1, 3, 6, 12, 24, etc.)
`periodicity`	str	No	‘monthly’	Temporal resolution (‘monthly’ or ‘daily’)
`data_start_year`	int	No	None	First year of data (auto-detected if None)
`calibration_start_year`	int	No	1991	Calibration period start
`calibration_end_year`	int	No	2020	Calibration period end
`fitting_params`	dict	No	None	Pre-fitted parameters for operational use
`return_params`	bool	No	False	If True, return (result, params) tuple
`var_name`	str	No	None	Variable name if precip is a Dataset
`distribution`	str	No	‘gamma’	Distribution type: ‘gamma’, ‘pearson3’, ‘log_logistic’, ‘gev’, ‘gen_logistic’

Returns:

xarray.DataArray: SPI values with same dimensions as input
- Variable name: spi_{distribution}_{scale}_month (e.g., spi_gamma_12_month)
- Range: typically -3 to +3
- Negative values: dry conditions (drought)
- Positive values: wet conditions (flooding/excess)
- Attributes: scale, distribution, calibration_period

Raises:

ValueError: Invalid input dimensions or parameters
RuntimeError: Distribution fitting failures

Example:

import xarray as xr
from indices import spi

precip = xr.open_dataset('precip.nc')['precip']

# Default (Gamma distribution)
spi_12 = spi(precip, scale=12)

# With Pearson III distribution
spi_12_p3 = spi(precip, scale=12, distribution='pearson3')

# Save and reuse parameters
spi_12, params = spi(precip, scale=12, return_params=True)

spi_multi_scale()

Calculate SPI for multiple time scales simultaneously.

def spi_multi_scale(
    precip,
    scales,
    distribution='gamma',
    **kwargs
) -> xarray.Dataset

Parameters:

Parameter	Type	Required	Default	Description
`precip`	xarray.DataArray	Yes	-	Precipitation data
`scales`	list of int	Yes	-	List of time scales [3, 6, 12]
`distribution`	str	No	‘gamma’	Distribution type
`**kwargs`	-	No	-	Same parameters as `spi()`

Returns:

xarray.Dataset: Dataset with one variable per scale
- Variables: spi_{distribution}_{scale}_month (e.g., spi_gamma_3_month, spi_pearson3_12_month)

Example:

from indices import spi_multi_scale

scales = [3, 6, 12]
spi_all = spi_multi_scale(precip, scales=scales)

# With Pearson III
spi_all_p3 = spi_multi_scale(precip, scales=scales, distribution='pearson3')

spei()

Calculate Standardized Precipitation Evapotranspiration Index.

def spei(
    precip,
    pet=None,
    temperature=None,
    latitude=None,
    scale=12,
    periodicity='monthly',
    data_start_year=None,
    calibration_start_year=1991,
    calibration_end_year=2020,
    fitting_params=None,
    return_params=False,
    distribution='gamma',
    pet_method='thornthwaite',
    temp_min=None,
    temp_max=None
) -> xarray.DataArray

Parameters:

Parameter	Type	Required	Default	Description
`precip`	xarray.DataArray	Yes	-	Precipitation data (mm/month)
`pet`	xarray.DataArray	No	None	Potential evapotranspiration (mm/month)
`temperature`	xarray.DataArray	No	None	Mean temperature for PET calculation (if pet not provided)
`latitude`	xarray.DataArray	No	None	Latitude for PET calculation
`scale`	int	No	12	Time scale in months
`distribution`	str	No	‘gamma’	Distribution type: ‘gamma’, ‘pearson3’, ‘log_logistic’, ‘gev’, ‘gen_logistic’. Pearson III recommended for SPEI.
`pet_method`	str	No	‘thornthwaite’	PET method: ‘thornthwaite’ or ‘hargreaves’
`temp_min`	xarray.DataArray	No	None	Minimum temperature (required for Hargreaves)
`temp_max`	xarray.DataArray	No	None	Maximum temperature (required for Hargreaves)
All others	-	-	-	Same as `spi()`

Returns:

xarray.DataArray: SPEI values
- Variable name: spei_{distribution}_{scale}_month (e.g., spei_pearson3_12_month)

Example:

from indices import spei

# Default (Gamma with Thornthwaite PET)
spei_12 = spei(precip, pet=pet, scale=12)

# Pearson III (recommended for SPEI)
spei_12 = spei(precip, pet=pet, scale=12, distribution='pearson3')

# Auto-compute PET from temperature (Thornthwaite)
spei_12 = spei(precip, temperature=temp, latitude=lat, scale=12)

# Hargreaves PET method (better for arid regions)
spei_12 = spei(precip, temperature=temp_mean, latitude=lat, scale=12,
               pet_method='hargreaves', temp_min=tmin, temp_max=tmax)

spei_multi_scale()

Calculate SPEI for multiple time scales.

def spei_multi_scale(
    precip,
    pet=None,
    scales=[1, 3, 6, 12],
    distribution='gamma',
    **kwargs
) -> xarray.Dataset

Parameters: Same pattern as spi_multi_scale() but requires pet. Pearson III or Log-Logistic recommended for SPEI.

spi_global()

Calculate SPI for global-scale datasets with automatic memory management.

def spi_global(
    precip_path,
    output_path,
    scale=12,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    chunk_size=500,
    var_name=None,
    save_params=True,
    distribution='gamma'
) -> xarray.Dataset

Parameters:

Parameter	Type	Required	Default	Description
`precip_path`	str	Yes	-	Path to precipitation NetCDF file
`output_path`	str	Yes	-	Path for output SPI NetCDF file
`scale`	int	No	12	Accumulation scale in months
`periodicity`	str	No	‘monthly’	Temporal resolution
`calibration_start_year`	int	No	1991	Calibration period start
`calibration_end_year`	int	No	2020	Calibration period end
`chunk_size`	int	No	500	Spatial chunk size (lat and lon)
`var_name`	str	No	None	Precipitation variable name (auto-detected)
`save_params`	bool	No	True	Save fitting parameters
`distribution`	str	No	‘gamma’	Distribution type: ‘gamma’, ‘pearson3’, ‘log_logistic’, ‘gev’, ‘gen_logistic’

Returns:

xarray.Dataset: Dataset with computed SPI

Example:

from indices import spi_global

result = spi_global(
    'global_chirps_monthly.nc',
    'spi_12_global.nc',
    scale=12,
    calibration_start_year=1991,
    calibration_end_year=2020,
    chunk_size=500
)

spei_global()

Calculate SPEI for global-scale datasets with automatic memory management.

def spei_global(
    precip_path,
    pet_path,
    output_path,
    scale=12,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    chunk_size=500,
    precip_var_name=None,
    pet_var_name=None,
    save_params=True,
    distribution='gamma'
) -> xarray.Dataset

Parameters:

Parameter	Type	Required	Default	Description
`precip_path`	str	Yes	-	Path to precipitation NetCDF file
`pet_path`	str	Yes	-	Path to PET NetCDF file
`output_path`	str	Yes	-	Path for output SPEI NetCDF file
`scale`	int	No	12	Accumulation scale
`chunk_size`	int	No	500	Spatial chunk size
`distribution`	str	No	‘gamma’	Distribution type. Pearson III or Log-Logistic recommended for SPEI.
All others	-	-	-	Same as `spi_global()`

Returns:

xarray.Dataset: Dataset with computed SPEI

Example:

from indices import spei_global

result = spei_global(
    'global_precip.nc',
    'global_pet.nc',
    'spei_12_global.nc',
    scale=12,
    chunk_size=500
)

estimate_memory_requirements()

Estimate memory requirements before running computation.

def estimate_memory_requirements(
    precip,
    var_name=None,
    available_memory_gb=None
) -> dict

Parameters:

Parameter	Type	Required	Default	Description
`precip`	str or xarray.DataArray	Yes	-	Precipitation data or path
`var_name`	str	No	None	Variable name (for NetCDF path)
`available_memory_gb`	float	No	None	Override system memory detection

Returns:

dict with keys:
- input_size_gb: Input data size
- peak_memory_gb: Estimated peak memory
- recommended_chunk_size: Suggested chunk dimensions
- fits_in_memory: Boolean
- recommendation: Human-readable advice

Example:

from indices import estimate_memory_requirements

mem = estimate_memory_requirements('global_chirps.nc')
print(f"Peak memory: {mem['peak_memory_gb']:.1f} GB")
print(f"Recommended chunk: {mem['recommended_chunk_size']}")
print(f"Recommendation: {mem['recommendation']}")

Module: chunked

ChunkedProcessor

Main class for memory-efficient chunked processing.

class ChunkedProcessor:
    def __init__(
        self,
        chunk_lat=500,
        chunk_lon=500,
        n_workers=None,
        temp_dir=None,
        verbose=True
    )

Parameters:

Parameter	Type	Required	Default	Description
`chunk_lat`	int	No	500	Chunk size in latitude dimension
`chunk_lon`	int	No	500	Chunk size in longitude dimension
`n_workers`	int	No	None	Number of parallel workers
`temp_dir`	str	No	None	Temporary directory for intermediate files
`verbose`	bool	No	True	Print progress messages

Methods:

compute_spi_chunked()

def compute_spi_chunked(
    self,
    precip,
    output_path,
    scale,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    var_name=None,
    save_params=True,
    params_path=None,
    compress=True,
    complevel=4,
    callback=None,
    distribution='gamma'
) -> xarray.Dataset

compute_spei_chunked()

def compute_spei_chunked(
    self,
    precip,
    pet,
    output_path,
    scale,
    periodicity='monthly',
    calibration_start_year=1991,
    calibration_end_year=2020,
    precip_var_name=None,
    pet_var_name=None,
    save_params=True,
    params_path=None,
    compress=True,
    complevel=4,
    callback=None,
    distribution='gamma'
) -> xarray.Dataset

Example:

from chunked import ChunkedProcessor

# Create processor
processor = ChunkedProcessor(chunk_lat=500, chunk_lon=500)

# Define progress callback
def progress(current, total, message):
    print(f"[{current}/{total}] {message}")

# Run computation
result = processor.compute_spi_chunked(
    precip='global_precip.nc',
    output_path='spi_12_global.nc',
    scale=12,
    calibration_start_year=1991,
    calibration_end_year=2020,
    save_params=True,
    callback=progress
)

estimate_memory()

Estimate memory requirements for a dataset.

def estimate_memory(
    n_time,
    n_lat,
    n_lon,
    dtype=np.float64,
    available_memory_gb=None
) -> MemoryEstimate

Parameters:

Parameter	Type	Required	Default	Description
`n_time`	int	Yes	-	Number of time steps
`n_lat`	int	Yes	-	Number of latitude points
`n_lon`	int	Yes	-	Number of longitude points
`dtype`	numpy.dtype	No	float64	Data type
`available_memory_gb`	float	No	None	Override system memory

Returns:

MemoryEstimate named tuple with:
- input_size_bytes: Raw input size
- peak_memory_bytes: Estimated peak
- available_bytes: System available memory
- fits_in_memory: Boolean
- recommended_chunk_lat: Suggested chunk size
- recommended_chunk_lon: Suggested chunk size

Example:

from chunked import estimate_memory

mem = estimate_memory(528, 2160, 4320)
print(f"Input: {mem.input_size_bytes / 1e9:.1f} GB")
print(f"Peak: {mem.peak_memory_bytes / 1e9:.1f} GB")
print(f"Fits in memory: {mem.fits_in_memory}")

Module: utils

calculate_pet()

Calculate potential evapotranspiration using Thornthwaite or Hargreaves-Samani method.

def calculate_pet(
    temperature,
    latitude,
    data_start_year,
    method='thornthwaite',
    temp_min=None,
    temp_max=None
) -> xarray.DataArray

Parameters:

Parameter	Type	Required	Default	Description
`temperature`	xarray.DataArray or numpy.ndarray	Yes	-	Monthly mean temperature data (C)
`latitude`	float or array	Yes	-	Latitude in degrees
`data_start_year`	int	Yes	-	First year of the temperature data
`method`	str	No	‘thornthwaite’	PET method: ‘thornthwaite’ or ‘hargreaves’
`temp_min`	xarray.DataArray or numpy.ndarray	No	None	Monthly minimum temperature (required for Hargreaves)
`temp_max`	xarray.DataArray or numpy.ndarray	No	None	Monthly maximum temperature (required for Hargreaves)

Returns:

xarray.DataArray: PET values (mm/month)
- Attributes include method name and reference

Method Comparison:

Method	Inputs	Best For	Reference
Thornthwaite	T_mean, latitude	Humid regions, quick estimates	Thornthwaite (1948)
Hargreaves	T_mean, T_min, T_max, latitude	Arid/semi-arid regions	Hargreaves & Samani (1985)

Example:

from utils import calculate_pet

# Thornthwaite method (default)
pet = calculate_pet(temp_mean, latitude=lat, data_start_year=1958)

# Hargreaves method (better for arid regions)
pet = calculate_pet(
    temp_mean, latitude=lat, data_start_year=1958,
    method='hargreaves',
    temp_min=tmin,
    temp_max=tmax
)

eto_thornthwaite()

Low-level function to calculate PET using the Thornthwaite equation.

def eto_thornthwaite(
    temperature_celsius,
    latitude_degrees,
    data_start_year
) -> numpy.ndarray

Parameters:

Parameter	Type	Required	Description
`temperature_celsius`	numpy array	Yes	Monthly mean temperatures (C)
`latitude_degrees`	float	Yes	Latitude in degrees (-90 to 90)
`data_start_year`	int	Yes	First year of data

Returns:

numpy.ndarray: PET values (mm/month)

eto_hargreaves()

Low-level function to calculate PET using the Hargreaves-Samani equation.

def eto_hargreaves(
    temp_mean_celsius,
    temp_min_celsius,
    temp_max_celsius,
    latitude_degrees,
    data_start_year
) -> numpy.ndarray

Parameters:

Parameter	Type	Required	Description
`temp_mean_celsius`	numpy array	Yes	Monthly mean temperatures (C)
`temp_min_celsius`	numpy array	Yes	Monthly minimum temperatures (C)
`temp_max_celsius`	numpy array	Yes	Monthly maximum temperatures (C)
`latitude_degrees`	float	Yes	Latitude in degrees (-90 to 90)
`data_start_year`	int	Yes	First year of data

Returns:

numpy.ndarray: PET values (mm/month)

Note: The Hargreaves equation:

\[PET = 0.0023 \times R_a \times (T_{mean} + 17.8) \times \sqrt{T_{max} - T_{min}}\]

where $R_a$ is extraterrestrial radiation calculated from latitude and day of year.

get_optimal_chunk_size()

Calculate optimal chunk dimensions based on available memory.

def get_optimal_chunk_size(
    n_time,
    n_lat,
    n_lon,
    available_memory_gb=None,
    memory_multiplier=12.0,
    safety_factor=0.7
) -> Tuple[int, int]

Parameters:

Parameter	Type	Required	Default	Description
`n_time`	int	Yes	-	Number of time steps
`n_lat`	int	Yes	-	Number of latitude points
`n_lon`	int	Yes	-	Number of longitude points
`available_memory_gb`	float	No	None	Override system memory
`memory_multiplier`	float	No	12.0	Peak memory multiplier
`safety_factor`	float	No	0.7	Memory safety margin

Returns:

Tuple[int, int]: (chunk_lat, chunk_lon)

Example:

from utils import get_optimal_chunk_size

chunk_lat, chunk_lon = get_optimal_chunk_size(528, 2160, 4320)
print(f"Optimal chunk size: {chunk_lat} x {chunk_lon}")

format_bytes()

Format byte count as human-readable string.

def format_bytes(n_bytes: int) -> str

Example:

from utils import format_bytes

print(format_bytes(1073741824))  # "1.00 GB"
print(format_bytes(536870912))   # "512.00 MB"

get_array_memory_size()

Calculate memory footprint of an array.

def get_array_memory_size(
    shape: Tuple[int, ...],
    dtype=np.float64
) -> int

Returns:

int: Size in bytes

Example:

from utils import get_array_memory_size

size = get_array_memory_size((528, 2160, 4320), np.float32)
print(f"Array size: {size / 1e9:.2f} GB")

print_memory_info()

Print current system memory usage.

def print_memory_info()

Example:

from utils import print_memory_info

print_memory_info()
# Output:
# Memory Info:
#   Total: 64.00 GB
#   Available: 48.32 GB
#   Used: 15.68 GB (24.5%)

Module: runtheory

identify_events()

Identify complete climate extreme events from time series. Works for both dry (drought) and wet (flood/excess) events based on threshold direction.

def identify_events(
    index_timeseries,
    threshold=-1.0,
    min_duration=1
) -> pandas.DataFrame

Parameters:

Parameter	Type	Required	Default	Description
`index_timeseries`	xarray.DataArray or pandas.Series	Yes	-	SPI/SPEI time series (single location)
`threshold`	float	No	-1.0	Event threshold (negative for drought, positive for wet)
`min_duration`	int	No	1	Minimum event duration (months)

Returns:

pandas.DataFrame with columns:
- event_id: Event number (1, 2, 3, …)
- start_date: Event start
- end_date: Event end
- duration: Months
- magnitude: Cumulative deficit
- intensity: magnitude / duration
- peak: Minimum SPI/SPEI value
- peak_date: When peak occurred
- interarrival: Months since previous event

Example:

from runtheory import identify_events

spi_loc = spi.isel(lat=50, lon=100)

# Drought events (negative threshold)
dry_events = identify_events(spi_loc, threshold=-1.2, min_duration=3)
print(f"Found {len(dry_events)} drought events")

# Wet events (positive threshold)
wet_events = identify_events(spi_loc, threshold=+1.2, min_duration=3)
print(f"Found {len(wet_events)} wet events")

calculate_timeseries()

Create month-by-month climate extreme event monitoring time series. Works for both drought (negative threshold) and wet (positive threshold) conditions.

def calculate_timeseries(
    index_timeseries,
    threshold=-1.0
) -> pandas.DataFrame

Parameters:

Parameter	Type	Required	Default	Description
`index_timeseries`	xarray.DataArray or pandas.Series	Yes	-	SPI/SPEI time series
`threshold`	float	No	-1.0	Event threshold (negative for drought, positive for wet)

Returns:

pandas.DataFrame with columns:
- time: Date
- index_value: SPI/SPEI value
- is_event: Boolean
- event_id: Current event number (or 0)
- duration: Current event duration
- magnitude_cumulative: Accumulated deficit
- magnitude_instantaneous: Current month’s severity
- intensity: magnitude_cumulative / duration

Example:

from runtheory import calculate_timeseries

ts = calculate_timeseries(spi_loc, threshold=-1.2)

# Check current status
current = ts.iloc[-1]
if current['is_event']:
    print(f"IN EVENT: {current['duration']} months")
    print(f"Cumulative magnitude: {current['magnitude_cumulative']:.2f}")

calculate_period_statistics()

Calculate gridded climate extreme event statistics for a time period. Works for both drought (negative threshold) and wet (positive threshold) events.

def calculate_period_statistics(
    index_data,
    threshold=-1.0,
    start_year=None,
    end_year=None,
    min_duration=1
) -> xarray.Dataset

Parameters:

Parameter	Type	Required	Default	Description
`index_data`	xarray.DataArray	Yes	-	Gridded SPI/SPEI (time, lat, lon)
`threshold`	float	No	-1.0	Event threshold (negative=dry, positive=wet)
`start_year`	int	No	None	Period start (uses all if None)
`end_year`	int	No	None	Period end (uses all if None)
`min_duration`	int	No	1	Minimum event duration

Returns:

xarray.Dataset with variables (lat, lon):
- num_events: Event count
- total_event_months: Total months in events
- total_magnitude: Sum of all magnitudes
- mean_magnitude: Average per event
- max_magnitude: Largest event
- worst_peak: Most severe value
- mean_intensity: Average intensity
- max_intensity: Maximum intensity
- pct_time_in_event: Percentage of time in events

Example:

from runtheory import calculate_period_statistics

# Statistics for 2023
stats_2023 = calculate_period_statistics(
    spi,
    threshold=-1.2,
    start_year=2023,
    end_year=2023
)

# Plot
stats_2023.num_events.plot(title='Dry/Wet Events in 2023')
stats_2023.to_netcdf('output/netcdf/stats_2023.nc')

calculate_annual_statistics()

Calculate period statistics for each year.

def calculate_annual_statistics(
    index_data,
    threshold=-1.0,
    min_duration=1
) -> xarray.Dataset

Parameters: Same as calculate_period_statistics() but no start/end year

Returns:

xarray.Dataset with dimensions (year, lat, lon)
- Same variables as period statistics
- Additional dimension: year

Example:

from runtheory import calculate_annual_statistics

annual = calculate_annual_statistics(spi, threshold=-1.2)

# Access specific year
stats_2020 = annual.sel(year=2020)

# Time series of regional average
regional_avg = annual.num_events.mean(dim=['lat', 'lon'])
regional_avg.plot()

compare_periods()

Compare event statistics across multiple time periods. Works for both dry (drought) and wet events.

def compare_periods(
    index_data,
    periods,
    period_names=None,
    threshold=-1.0,
    min_duration=1
) -> xarray.Dataset

Parameters:

Parameter	Type	Required	Default	Description
`index_data`	xarray.DataArray	Yes	-	Gridded SPI/SPEI
`periods`	list of tuples	Yes	-	[(start1, end1), (start2, end2), …]
`period_names`	list of str	No	None	Names for each period
`threshold`	float	No	-1.0	Event threshold (negative=dry, positive=wet)
`min_duration`	int	No	1	Minimum event duration

Returns:

xarray.Dataset with dimensions (period, lat, lon)
- Same variables as period statistics
- Additional dimension: period

Example:

from runtheory import compare_periods

comparison = compare_periods(
    spi,
    periods=[(1991, 2020), (2021, 2024)],
    period_names=['Historical', 'Recent'],
    threshold=-1.2
)

# Calculate change
diff = comparison.sel(period='Recent') - comparison.sel(period='Historical')
diff.num_events.plot(title='Change in Events', cmap='RdBu_r')

summarize_events()

Calculate summary statistics from events DataFrame.

def summarize_events(
    events_df
) -> dict

Parameters:

events_df (pandas.DataFrame): Output from identify_events()

Returns:

dict with keys:
- num_events: Total count
- mean_duration: Average duration
- max_duration: Longest event
- mean_magnitude: Average magnitude
- max_magnitude: Largest magnitude
- most_severe_peak: Worst peak value
- mean_interarrival: Average time between events

Example:

from runtheory import identify_events, summarize_events

events = identify_events(spi_loc, threshold=-1.2)
summary = summarize_events(events)
print(f"Total events: {summary['num_events']}")
print(f"Mean duration: {summary['mean_duration']:.1f} months")

Module: visualization

plot_index()

Plot climate index time series with color-coded severity. Works for both dry (drought) and wet (flood) conditions.

def plot_index(
    index_timeseries,
    threshold=-1.0,
    title=None,
    ax=None,
    figsize=(14, 6)
) -> matplotlib.figure.Figure

Parameters:

Parameter	Type	Required	Default	Description
`index_timeseries`	xarray.DataArray or pandas.Series	Yes	-	SPI/SPEI data
`threshold`	float	No	-1.0	Event threshold (negative=dry, positive=wet) line
`title`	str	No	None	Plot title
`ax`	matplotlib.axes.Axes	No	None	Existing axes
`figsize`	tuple	No	(14, 6)	Figure size

Returns:

matplotlib.figure.Figure: Figure object

Example:

from visualization import plot_index

plot_index(spi_loc, threshold=-1.2,
                   title='SPI-12 Time Series')
plt.savefig('output/plots/single/spi_timeseries.png', dpi=300)

plot_events()

Plot time series with individual events highlighted.

def plot_events(
    index_timeseries,
    events_df,
    threshold=-1.0,
    title=None,
    ax=None,
    figsize=(14, 6)
) -> matplotlib.figure.Figure

Parameters:

Parameter	Type	Required	Default	Description
`index_timeseries`	xarray.DataArray or pandas.Series	Yes	-	Index data
`events_df`	pandas.DataFrame	Yes	-	From `identify_events()`
`threshold`	float	No	-1.0	Threshold line
`title`	str	No	None	Plot title
`ax`	matplotlib.axes.Axes	No	None	Existing axes
`figsize`	tuple	No	(14, 6)	Figure size

Returns:

matplotlib.figure.Figure

Example:

from visualization import plot_events

events = identify_events(spi_loc, threshold=-1.2)
plot_events(spi_loc, events, threshold=-1.2)
plt.savefig(f'output/plots/single/events_lat{lat}_lon{lon}.png')

plot_event_timeline()

Create 5-panel plot showing event evolution. Works for both dry (drought) and wet conditions.

def plot_event_timeline(
    timeseries_df,
    title=None,
    figsize=(14, 12)
) -> matplotlib.figure.Figure

Parameters:

Parameter	Type	Required	Default	Description
`timeseries_df`	pandas.DataFrame	Yes	-	From `calculate_timeseries()`
`title`	str	No	None	Main title
`figsize`	tuple	No	(14, 12)	Figure size

Returns:

matplotlib.figure.Figure with 5 panels:
1. Index value
2. Duration
3. Magnitude (cumulative) - blue
4. Magnitude (instantaneous) - red
5. Intensity

Example:

from visualization import plot_event_timeline

ts = calculate_timeseries(spi_loc, threshold=-1.2)
plot_event_timeline(ts, title='Event Evolution')
plt.savefig('output/plots/single/timeline.png', dpi=300)

plot_spatial_stats()

Plot spatial map of event statistics. Works for both dry (drought) and wet events.

def plot_spatial_stats(
    stats_dataset,
    variable='num_events',
    title=None,
    cmap=None,
    figsize=(12, 8)
) -> matplotlib.figure.Figure

Parameters:

Parameter	Type	Required	Default	Description
`stats_dataset`	xarray.Dataset	Yes	-	From `calculate_period_statistics()`
`variable`	str	No	‘num_events’	Variable to plot
`title`	str	No	None	Plot title
`cmap`	str	No	None	Colormap (auto-selected if None)
`figsize`	tuple	No	(12, 8)	Figure size

Returns:

matplotlib.figure.Figure

Example:

from visualization import plot_spatial_stats

stats = calculate_period_statistics(spi, start_year=2020, end_year=2024)
plot_spatial_stats(stats, variable='num_events',
                           title='Drought Events 2020-2024')
plt.savefig('output/plots/spatial/stats_2020-2024.png', dpi=300)

generate_location_filename()

Generate consistent filename with location coordinates.

def generate_location_filename(
    base_name,
    lat,
    lon,
    extension='png'
) -> str

Parameters:

Parameter	Type	Required	Default	Description
`base_name`	str	Yes	-	File base name
`lat`	float	Yes	-	Latitude
`lon`	float	Yes	-	Longitude
`extension`	str	No	‘png’	File extension

Returns:

str: Filename like base_name_lat31.82_lon-7.07.png

Example:

from visualization import generate_location_filename

filename = generate_location_filename('dry_events', 31.82, -7.07, 'png')
# Returns: 'dry_events_lat31.82_lon-7.07.png'

full_path = f'output/plots/single/{filename}'
plt.savefig(full_path, dpi=300)

Complete Workflow Example

import sys
sys.path.insert(0, 'src')

import xarray as xr
from indices import spi
from utils import calculate_pet
from runtheory import (identify_events,
                       calculate_period_statistics,
                       compare_periods)
from visualization import (plot_events,
                          plot_spatial_stats,
                          generate_location_filename)

# 1. Load data
precip = xr.open_dataset('input/chirps.nc')['precip']

# 2. Calculate SPI
spi_12 = spi(precip, scale=12, calibration_start_year=1991,
             calibration_end_year=2020)
spi_12.to_netcdf('output/netcdf/spi_12.nc')

# 3. Single location analysis
lat_idx, lon_idx = 50, 100
lat_val = spi_12.lat.values[lat_idx]
lon_val = spi_12.lon.values[lon_idx]
spi_loc = spi_12.isel(lat=lat_idx, lon=lon_idx)

events = identify_events(spi_loc, threshold=-1.2, min_duration=3)
filename = generate_location_filename('dry_events', lat_val, lon_val, 'csv')
events.to_csv(f'output/csv/{filename}')

# 4. Visualize
plot_events(spi_loc, events, threshold=-1.2)
filename = generate_location_filename('plot_events', lat_val, lon_val, 'png')
plt.savefig(f'output/plots/single/{filename}', dpi=300)

# 5. Gridded statistics
stats_2023 = calculate_period_statistics(spi_12, threshold=-1.2,
                                         start_year=2023, end_year=2023)
stats_2023.to_netcdf('output/netcdf/event_stats_2023.nc')

plot_spatial_stats(stats_2023, variable='num_events',
                           title='Dry/Wet Events in 2023')
plt.savefig('output/plots/spatial/stats_2023.png', dpi=300)

# 6. Compare periods
comparison = compare_periods(
    spi_12,
    periods=[(1991, 2020), (2021, 2024)],
    period_names=['Historical', 'Recent']
)

diff = comparison.sel(period='Recent') - comparison.sel(period='Historical')
diff.num_events.plot(title='Change in Dry/Wet Events', cmap='RdBu_r')
plt.savefig('output/plots/spatial/comparison.png', dpi=300)

--- title: "API Reference" --- Complete function signatures and parameters for all public functions. ## Module: indices ### spi() Calculate Standardized Precipitation Index for monitoring both dry (drought) and wet (flood/excess) conditions. ```python def spi( precip, scale, periodicity='monthly', data_start_year=None, calibration_start_year=1991, calibration_end_year=2020, fitting_params=None, return_params=False, var_name=None, distribution='gamma' ) -> xarray.DataArray ``` **Parameters:** | Parameter | Type | Required | Default | Description | | ----------- | ------ | ---------- | --------- | ------------- | | `precip` | xarray.DataArray | Yes | - | Precipitation data with dimensions (time, lat, lon) | | `scale` | int | Yes | - | Time scale in months (1, 3, 6, 12, 24, etc.) | | `periodicity` | str | No | 'monthly' | Temporal resolution ('monthly' or 'daily') | | `data_start_year` | int | No | None | First year of data (auto-detected if None) | | `calibration_start_year` | int | No | 1991 | Calibration period start | | `calibration_end_year` | int | No | 2020 | Calibration period end | | `fitting_params` | dict | No | None | Pre-fitted parameters for operational use | | `return_params` | bool | No | False | If True, return (result, params) tuple | | `var_name` | str | No | None | Variable name if precip is a Dataset | | `distribution` | str | No | 'gamma' | Distribution type: 'gamma', 'pearson3', 'log_logistic', 'gev', 'gen_logistic' | **Returns:** - `xarray.DataArray`: SPI values with same dimensions as input - Variable name: `spi_{distribution}_{scale}_month` (e.g., `spi_gamma_12_month`) - Range: typically -3 to +3 - Negative values: dry conditions (drought) - Positive values: wet conditions (flooding/excess) - Attributes: scale, distribution, calibration_period **Raises:** - `ValueError`: Invalid input dimensions or parameters - `RuntimeError`: Distribution fitting failures **Example:** ```python import xarray as xr from indices import spi precip = xr.open_dataset('precip.nc')['precip'] # Default (Gamma distribution) spi_12 = spi(precip, scale=12) # With Pearson III distribution spi_12_p3 = spi(precip, scale=12, distribution='pearson3') # Save and reuse parameters spi_12, params = spi(precip, scale=12, return_params=True) ``` --- ### spi_multi_scale() Calculate SPI for multiple time scales simultaneously. ```python def spi_multi_scale( precip, scales, distribution='gamma', **kwargs ) -> xarray.Dataset ``` **Parameters:** | Parameter | Type | Required | Default | Description | | ----------- | ------ | ---------- | --------- | ------------- | | `precip` | xarray.DataArray | Yes | - | Precipitation data | | `scales` | list of int | Yes | - | List of time scales [3, 6, 12] | | `distribution` | str | No | 'gamma' | Distribution type | | `**kwargs` | - | No | - | Same parameters as `spi()` | **Returns:** - `xarray.Dataset`: Dataset with one variable per scale - Variables: `spi_{distribution}_{scale}_month` (e.g., `spi_gamma_3_month`, `spi_pearson3_12_month`) **Example:** ```python from indices import spi_multi_scale scales = [3, 6, 12] spi_all = spi_multi_scale(precip, scales=scales) # With Pearson III spi_all_p3 = spi_multi_scale(precip, scales=scales, distribution='pearson3') ``` --- ### spei() Calculate Standardized Precipitation Evapotranspiration Index. ```python def spei( precip, pet=None, temperature=None, latitude=None, scale=12, periodicity='monthly', data_start_year=None, calibration_start_year=1991, calibration_end_year=2020, fitting_params=None, return_params=False, distribution='gamma', pet_method='thornthwaite', temp_min=None, temp_max=None ) -> xarray.DataArray ``` **Parameters:** | Parameter | Type | Required | Default | Description | | ----------- | ------ | ---------- | --------- | ------------- | | `precip` | xarray.DataArray | Yes | - | Precipitation data (mm/month) | | `pet` | xarray.DataArray | No | None | Potential evapotranspiration (mm/month) | | `temperature` | xarray.DataArray | No | None | Mean temperature for PET calculation (if pet not provided) | | `latitude` | xarray.DataArray | No | None | Latitude for PET calculation | | `scale` | int | No | 12 | Time scale in months | | `distribution` | str | No | 'gamma' | Distribution type: 'gamma', 'pearson3', 'log_logistic', 'gev', 'gen_logistic'. Pearson III recommended for SPEI. | | `pet_method` | str | No | 'thornthwaite' | PET method: 'thornthwaite' or 'hargreaves' | | `temp_min` | xarray.DataArray | No | None | Minimum temperature (required for Hargreaves) | | `temp_max` | xarray.DataArray | No | None | Maximum temperature (required for Hargreaves) | | All others | - | - | - | Same as `spi()` | **Returns:** - `xarray.DataArray`: SPEI values - Variable name: `spei_{distribution}_{scale}_month` (e.g., `spei_pearson3_12_month`) **Example:** ```python from indices import spei # Default (Gamma with Thornthwaite PET) spei_12 = spei(precip, pet=pet, scale=12) # Pearson III (recommended for SPEI) spei_12 = spei(precip, pet=pet, scale=12, distribution='pearson3') # Auto-compute PET from temperature (Thornthwaite) spei_12 = spei(precip, temperature=temp, latitude=lat, scale=12) # Hargreaves PET method (better for arid regions) spei_12 = spei(precip, temperature=temp_mean, latitude=lat, scale=12, pet_method='hargreaves', temp_min=tmin, temp_max=tmax) ``` --- ### spei_multi_scale() Calculate SPEI for multiple time scales. ```python def spei_multi_scale( precip, pet=None, scales=[1, 3, 6, 12], distribution='gamma', **kwargs ) -> xarray.Dataset ``` **Parameters:** Same pattern as `spi_multi_scale()` but requires `pet`. Pearson III or Log-Logistic recommended for SPEI. --- ### spi_global() Calculate SPI for global-scale datasets with automatic memory management. ```python def spi_global( precip_path, output_path, scale=12, periodicity='monthly', calibration_start_year=1991, calibration_end_year=2020, chunk_size=500, var_name=None, save_params=True, distribution='gamma' ) -> xarray.Dataset ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `precip_path` | str | Yes | - | Path to precipitation NetCDF file | | `output_path` | str | Yes | - | Path for output SPI NetCDF file | | `scale` | int | No | 12 | Accumulation scale in months | | `periodicity` | str | No | 'monthly' | Temporal resolution | | `calibration_start_year` | int | No | 1991 | Calibration period start | | `calibration_end_year` | int | No | 2020 | Calibration period end | | `chunk_size` | int | No | 500 | Spatial chunk size (lat and lon) | | `var_name` | str | No | None | Precipitation variable name (auto-detected) | | `save_params` | bool | No | True | Save fitting parameters | | `distribution` | str | No | 'gamma' | Distribution type: 'gamma', 'pearson3', 'log_logistic', 'gev', 'gen_logistic' | **Returns:** - `xarray.Dataset`: Dataset with computed SPI **Example:** ```python from indices import spi_global result = spi_global( 'global_chirps_monthly.nc', 'spi_12_global.nc', scale=12, calibration_start_year=1991, calibration_end_year=2020, chunk_size=500 ) ``` --- ### spei_global() Calculate SPEI for global-scale datasets with automatic memory management. ```python def spei_global( precip_path, pet_path, output_path, scale=12, periodicity='monthly', calibration_start_year=1991, calibration_end_year=2020, chunk_size=500, precip_var_name=None, pet_var_name=None, save_params=True, distribution='gamma' ) -> xarray.Dataset ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `precip_path` | str | Yes | - | Path to precipitation NetCDF file | | `pet_path` | str | Yes | - | Path to PET NetCDF file | | `output_path` | str | Yes | - | Path for output SPEI NetCDF file | | `scale` | int | No | 12 | Accumulation scale | | `chunk_size` | int | No | 500 | Spatial chunk size | | `distribution` | str | No | 'gamma' | Distribution type. Pearson III or Log-Logistic recommended for SPEI. | | All others | - | - | - | Same as `spi_global()` | **Returns:** - `xarray.Dataset`: Dataset with computed SPEI **Example:** ```python from indices import spei_global result = spei_global( 'global_precip.nc', 'global_pet.nc', 'spei_12_global.nc', scale=12, chunk_size=500 ) ``` --- ### estimate_memory_requirements() Estimate memory requirements before running computation. ```python def estimate_memory_requirements( precip, var_name=None, available_memory_gb=None ) -> dict ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `precip` | str or xarray.DataArray | Yes | - | Precipitation data or path | | `var_name` | str | No | None | Variable name (for NetCDF path) | | `available_memory_gb` | float | No | None | Override system memory detection | **Returns:** - `dict` with keys: - `input_size_gb`: Input data size - `peak_memory_gb`: Estimated peak memory - `recommended_chunk_size`: Suggested chunk dimensions - `fits_in_memory`: Boolean - `recommendation`: Human-readable advice **Example:** ```python from indices import estimate_memory_requirements mem = estimate_memory_requirements('global_chirps.nc') print(f"Peak memory: {mem['peak_memory_gb']:.1f} GB") print(f"Recommended chunk: {mem['recommended_chunk_size']}") print(f"Recommendation: {mem['recommendation']}") ``` --- ## Module: chunked ### ChunkedProcessor Main class for memory-efficient chunked processing. ```python class ChunkedProcessor: def __init__( self, chunk_lat=500, chunk_lon=500, n_workers=None, temp_dir=None, verbose=True ) ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `chunk_lat` | int | No | 500 | Chunk size in latitude dimension | | `chunk_lon` | int | No | 500 | Chunk size in longitude dimension | | `n_workers` | int | No | None | Number of parallel workers | | `temp_dir` | str | No | None | Temporary directory for intermediate files | | `verbose` | bool | No | True | Print progress messages | **Methods:** #### compute_spi_chunked() ```python def compute_spi_chunked( self, precip, output_path, scale, periodicity='monthly', calibration_start_year=1991, calibration_end_year=2020, var_name=None, save_params=True, params_path=None, compress=True, complevel=4, callback=None, distribution='gamma' ) -> xarray.Dataset ``` #### compute_spei_chunked() ```python def compute_spei_chunked( self, precip, pet, output_path, scale, periodicity='monthly', calibration_start_year=1991, calibration_end_year=2020, precip_var_name=None, pet_var_name=None, save_params=True, params_path=None, compress=True, complevel=4, callback=None, distribution='gamma' ) -> xarray.Dataset ``` **Example:** ```python from chunked import ChunkedProcessor # Create processor processor = ChunkedProcessor(chunk_lat=500, chunk_lon=500) # Define progress callback def progress(current, total, message): print(f"[{current}/{total}] {message}") # Run computation result = processor.compute_spi_chunked( precip='global_precip.nc', output_path='spi_12_global.nc', scale=12, calibration_start_year=1991, calibration_end_year=2020, save_params=True, callback=progress ) ``` --- ### estimate_memory() Estimate memory requirements for a dataset. ```python def estimate_memory( n_time, n_lat, n_lon, dtype=np.float64, available_memory_gb=None ) -> MemoryEstimate ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `n_time` | int | Yes | - | Number of time steps | | `n_lat` | int | Yes | - | Number of latitude points | | `n_lon` | int | Yes | - | Number of longitude points | | `dtype` | numpy.dtype | No | float64 | Data type | | `available_memory_gb` | float | No | None | Override system memory | **Returns:** - `MemoryEstimate` named tuple with: - `input_size_bytes`: Raw input size - `peak_memory_bytes`: Estimated peak - `available_bytes`: System available memory - `fits_in_memory`: Boolean - `recommended_chunk_lat`: Suggested chunk size - `recommended_chunk_lon`: Suggested chunk size **Example:** ```python from chunked import estimate_memory mem = estimate_memory(528, 2160, 4320) print(f"Input: {mem.input_size_bytes / 1e9:.1f} GB") print(f"Peak: {mem.peak_memory_bytes / 1e9:.1f} GB") print(f"Fits in memory: {mem.fits_in_memory}") ``` --- ## Module: utils ### calculate_pet() Calculate potential evapotranspiration using Thornthwaite or Hargreaves-Samani method. ```python def calculate_pet( temperature, latitude, data_start_year, method='thornthwaite', temp_min=None, temp_max=None ) -> xarray.DataArray ``` **Parameters:** | Parameter | Type | Required | Default | Description | | ----------- | ------ | ---------- | --------- | ------------- | | `temperature` | xarray.DataArray or numpy.ndarray | Yes | - | Monthly mean temperature data (C) | | `latitude` | float or array | Yes | - | Latitude in degrees | | `data_start_year` | int | Yes | - | First year of the temperature data | | `method` | str | No | 'thornthwaite' | PET method: 'thornthwaite' or 'hargreaves' | | `temp_min` | xarray.DataArray or numpy.ndarray | No | None | Monthly minimum temperature (required for Hargreaves) | | `temp_max` | xarray.DataArray or numpy.ndarray | No | None | Monthly maximum temperature (required for Hargreaves) | **Returns:** - `xarray.DataArray`: PET values (mm/month) - Attributes include method name and reference **Method Comparison:** | Method | Inputs | Best For | Reference | |--------|--------|----------|-----------| | Thornthwaite | T_mean, latitude | Humid regions, quick estimates | Thornthwaite (1948) | | Hargreaves | T_mean, T_min, T_max, latitude | Arid/semi-arid regions | Hargreaves & Samani (1985) | **Example:** ```python from utils import calculate_pet # Thornthwaite method (default) pet = calculate_pet(temp_mean, latitude=lat, data_start_year=1958) # Hargreaves method (better for arid regions) pet = calculate_pet( temp_mean, latitude=lat, data_start_year=1958, method='hargreaves', temp_min=tmin, temp_max=tmax ) ``` --- ### eto_thornthwaite() Low-level function to calculate PET using the Thornthwaite equation. ```python def eto_thornthwaite( temperature_celsius, latitude_degrees, data_start_year ) -> numpy.ndarray ``` **Parameters:** | Parameter | Type | Required | Description | | ----------- | ------ | ---------- | ------------- | | `temperature_celsius` | numpy array | Yes | Monthly mean temperatures (C) | | `latitude_degrees` | float | Yes | Latitude in degrees (-90 to 90) | | `data_start_year` | int | Yes | First year of data | **Returns:** - `numpy.ndarray`: PET values (mm/month) --- ### eto_hargreaves() Low-level function to calculate PET using the Hargreaves-Samani equation. ```python def eto_hargreaves( temp_mean_celsius, temp_min_celsius, temp_max_celsius, latitude_degrees, data_start_year ) -> numpy.ndarray ``` **Parameters:** | Parameter | Type | Required | Description | | ----------- | ------ | ---------- | ------------- | | `temp_mean_celsius` | numpy array | Yes | Monthly mean temperatures (C) | | `temp_min_celsius` | numpy array | Yes | Monthly minimum temperatures (C) | | `temp_max_celsius` | numpy array | Yes | Monthly maximum temperatures (C) | | `latitude_degrees` | float | Yes | Latitude in degrees (-90 to 90) | | `data_start_year` | int | Yes | First year of data | **Returns:** - `numpy.ndarray`: PET values (mm/month) **Note:** The Hargreaves equation: $$PET = 0.0023 \times R_a \times (T_{mean} + 17.8) \times \sqrt{T_{max} - T_{min}}$$ where $R_a$ is extraterrestrial radiation calculated from latitude and day of year. --- ### get_optimal_chunk_size() Calculate optimal chunk dimensions based on available memory. ```python def get_optimal_chunk_size( n_time, n_lat, n_lon, available_memory_gb=None, memory_multiplier=12.0, safety_factor=0.7 ) -> Tuple[int, int] ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `n_time` | int | Yes | - | Number of time steps | | `n_lat` | int | Yes | - | Number of latitude points | | `n_lon` | int | Yes | - | Number of longitude points | | `available_memory_gb` | float | No | None | Override system memory | | `memory_multiplier` | float | No | 12.0 | Peak memory multiplier | | `safety_factor` | float | No | 0.7 | Memory safety margin | **Returns:** - `Tuple[int, int]`: (chunk_lat, chunk_lon) **Example:** ```python from utils import get_optimal_chunk_size chunk_lat, chunk_lon = get_optimal_chunk_size(528, 2160, 4320) print(f"Optimal chunk size: {chunk_lat} x {chunk_lon}") ``` --- ### format_bytes() Format byte count as human-readable string. ```python def format_bytes(n_bytes: int) -> str ``` **Example:** ```python from utils import format_bytes print(format_bytes(1073741824)) # "1.00 GB" print(format_bytes(536870912)) # "512.00 MB" ``` --- ### get_array_memory_size() Calculate memory footprint of an array. ```python def get_array_memory_size( shape: Tuple[int, ...], dtype=np.float64 ) -> int ``` **Returns:** - `int`: Size in bytes **Example:** ```python from utils import get_array_memory_size size = get_array_memory_size((528, 2160, 4320), np.float32) print(f"Array size: {size / 1e9:.2f} GB") ``` --- ### print_memory_info() Print current system memory usage. ```python def print_memory_info() ``` **Example:** ```python from utils import print_memory_info print_memory_info() # Output: # Memory Info: # Total: 64.00 GB # Available: 48.32 GB # Used: 15.68 GB (24.5%) ``` --- ## Module: runtheory ### identify_events() Identify complete climate extreme events from time series. Works for both dry (drought) and wet (flood/excess) events based on threshold direction. ```python def identify_events( index_timeseries, threshold=-1.0, min_duration=1 ) -> pandas.DataFrame ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `index_timeseries` | xarray.DataArray or pandas.Series | Yes | - | SPI/SPEI time series (single location) | | `threshold` | float | No | -1.0 | Event threshold (negative for drought, positive for wet) | | `min_duration` | int | No | 1 | Minimum event duration (months) | **Returns:** - `pandas.DataFrame` with columns: - `event_id`: Event number (1, 2, 3, ...) - `start_date`: Event start - `end_date`: Event end - `duration`: Months - `magnitude`: Cumulative deficit - `intensity`: magnitude / duration - `peak`: Minimum SPI/SPEI value - `peak_date`: When peak occurred - `interarrival`: Months since previous event **Example:** ```python from runtheory import identify_events spi_loc = spi.isel(lat=50, lon=100) # Drought events (negative threshold) dry_events = identify_events(spi_loc, threshold=-1.2, min_duration=3) print(f"Found {len(dry_events)} drought events") # Wet events (positive threshold) wet_events = identify_events(spi_loc, threshold=+1.2, min_duration=3) print(f"Found {len(wet_events)} wet events") ``` --- ### calculate_timeseries() Create month-by-month climate extreme event monitoring time series. Works for both drought (negative threshold) and wet (positive threshold) conditions. ```python def calculate_timeseries( index_timeseries, threshold=-1.0 ) -> pandas.DataFrame ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `index_timeseries` | xarray.DataArray or pandas.Series | Yes | - | SPI/SPEI time series | | `threshold` | float | No | -1.0 | Event threshold (negative for drought, positive for wet) | **Returns:** - `pandas.DataFrame` with columns: - `time`: Date - `index_value`: SPI/SPEI value - `is_event`: Boolean - `event_id`: Current event number (or 0) - `duration`: Current event duration - `magnitude_cumulative`: Accumulated deficit - `magnitude_instantaneous`: Current month's severity - `intensity`: magnitude_cumulative / duration **Example:** ```python from runtheory import calculate_timeseries ts = calculate_timeseries(spi_loc, threshold=-1.2) # Check current status current = ts.iloc[-1] if current['is_event']: print(f"IN EVENT: {current['duration']} months") print(f"Cumulative magnitude: {current['magnitude_cumulative']:.2f}") ``` --- ### calculate_period_statistics() Calculate gridded climate extreme event statistics for a time period. Works for both drought (negative threshold) and wet (positive threshold) events. ```python def calculate_period_statistics( index_data, threshold=-1.0, start_year=None, end_year=None, min_duration=1 ) -> xarray.Dataset ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `index_data` | xarray.DataArray | Yes | - | Gridded SPI/SPEI (time, lat, lon) | | `threshold` | float | No | -1.0 | Event threshold (negative=dry, positive=wet) | | `start_year` | int | No | None | Period start (uses all if None) | | `end_year` | int | No | None | Period end (uses all if None) | | `min_duration` | int | No | 1 | Minimum event duration | **Returns:** - `xarray.Dataset` with variables (lat, lon): - `num_events`: Event count - `total_event_months`: Total months in events - `total_magnitude`: Sum of all magnitudes - `mean_magnitude`: Average per event - `max_magnitude`: Largest event - `worst_peak`: Most severe value - `mean_intensity`: Average intensity - `max_intensity`: Maximum intensity - `pct_time_in_event`: Percentage of time in events **Example:** ```python from runtheory import calculate_period_statistics # Statistics for 2023 stats_2023 = calculate_period_statistics( spi, threshold=-1.2, start_year=2023, end_year=2023 ) # Plot stats_2023.num_events.plot(title='Dry/Wet Events in 2023') stats_2023.to_netcdf('output/netcdf/stats_2023.nc') ``` --- ### calculate_annual_statistics() Calculate period statistics for each year. ```python def calculate_annual_statistics( index_data, threshold=-1.0, min_duration=1 ) -> xarray.Dataset ``` **Parameters:** Same as `calculate_period_statistics()` but no start/end year **Returns:** - `xarray.Dataset` with dimensions (year, lat, lon) - Same variables as period statistics - Additional dimension: `year` **Example:** ```python from runtheory import calculate_annual_statistics annual = calculate_annual_statistics(spi, threshold=-1.2) # Access specific year stats_2020 = annual.sel(year=2020) # Time series of regional average regional_avg = annual.num_events.mean(dim=['lat', 'lon']) regional_avg.plot() ``` --- ### compare_periods() Compare event statistics across multiple time periods. Works for both dry (drought) and wet events. ```python def compare_periods( index_data, periods, period_names=None, threshold=-1.0, min_duration=1 ) -> xarray.Dataset ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `index_data` | xarray.DataArray | Yes | - | Gridded SPI/SPEI | | `periods` | list of tuples | Yes | - | [(start1, end1), (start2, end2), ...] | | `period_names` | list of str | No | None | Names for each period | | `threshold` | float | No | -1.0 | Event threshold (negative=dry, positive=wet) | | `min_duration` | int | No | 1 | Minimum event duration | **Returns:** - `xarray.Dataset` with dimensions (period, lat, lon) - Same variables as period statistics - Additional dimension: `period` **Example:** ```python from runtheory import compare_periods comparison = compare_periods( spi, periods=[(1991, 2020), (2021, 2024)], period_names=['Historical', 'Recent'], threshold=-1.2 ) # Calculate change diff = comparison.sel(period='Recent') - comparison.sel(period='Historical') diff.num_events.plot(title='Change in Events', cmap='RdBu_r') ``` --- ### summarize_events() Calculate summary statistics from events DataFrame. ```python def summarize_events( events_df ) -> dict ``` **Parameters:** - `events_df` (pandas.DataFrame): Output from `identify_events()` **Returns:** - `dict` with keys: - `num_events`: Total count - `mean_duration`: Average duration - `max_duration`: Longest event - `mean_magnitude`: Average magnitude - `max_magnitude`: Largest magnitude - `most_severe_peak`: Worst peak value - `mean_interarrival`: Average time between events **Example:** ```python from runtheory import identify_events, summarize_events events = identify_events(spi_loc, threshold=-1.2) summary = summarize_events(events) print(f"Total events: {summary['num_events']}") print(f"Mean duration: {summary['mean_duration']:.1f} months") ``` --- ## Module: visualization ### plot_index() Plot climate index time series with color-coded severity. Works for both dry (drought) and wet (flood) conditions. ```python def plot_index( index_timeseries, threshold=-1.0, title=None, ax=None, figsize=(14, 6) ) -> matplotlib.figure.Figure ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `index_timeseries` | xarray.DataArray or pandas.Series | Yes | - | SPI/SPEI data | | `threshold` | float | No | -1.0 | Event threshold (negative=dry, positive=wet) line | | `title` | str | No | None | Plot title | | `ax` | matplotlib.axes.Axes | No | None | Existing axes | | `figsize` | tuple | No | (14, 6) | Figure size | **Returns:** - `matplotlib.figure.Figure`: Figure object **Example:** ```python from visualization import plot_index plot_index(spi_loc, threshold=-1.2, title='SPI-12 Time Series') plt.savefig('output/plots/single/spi_timeseries.png', dpi=300) ``` --- ### plot_events() Plot time series with individual events highlighted. ```python def plot_events( index_timeseries, events_df, threshold=-1.0, title=None, ax=None, figsize=(14, 6) ) -> matplotlib.figure.Figure ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `index_timeseries` | xarray.DataArray or pandas.Series | Yes | - | Index data | | `events_df` | pandas.DataFrame | Yes | - | From `identify_events()` | | `threshold` | float | No | -1.0 | Threshold line | | `title` | str | No | None | Plot title | | `ax` | matplotlib.axes.Axes | No | None | Existing axes | | `figsize` | tuple | No | (14, 6) | Figure size | **Returns:** - `matplotlib.figure.Figure` **Example:** ```python from visualization import plot_events events = identify_events(spi_loc, threshold=-1.2) plot_events(spi_loc, events, threshold=-1.2) plt.savefig(f'output/plots/single/events_lat{lat}_lon{lon}.png') ``` --- ### plot_event_timeline() Create 5-panel plot showing event evolution. Works for both dry (drought) and wet conditions. ```python def plot_event_timeline( timeseries_df, title=None, figsize=(14, 12) ) -> matplotlib.figure.Figure ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `timeseries_df` | pandas.DataFrame | Yes | - | From `calculate_timeseries()` | | `title` | str | No | None | Main title | | `figsize` | tuple | No | (14, 12) | Figure size | **Returns:** - `matplotlib.figure.Figure` with 5 panels: 1. Index value 2. Duration 3. Magnitude (cumulative) - blue 4. Magnitude (instantaneous) - red 5. Intensity **Example:** ```python from visualization import plot_event_timeline ts = calculate_timeseries(spi_loc, threshold=-1.2) plot_event_timeline(ts, title='Event Evolution') plt.savefig('output/plots/single/timeline.png', dpi=300) ``` --- ### plot_spatial_stats() Plot spatial map of event statistics. Works for both dry (drought) and wet events. ```python def plot_spatial_stats( stats_dataset, variable='num_events', title=None, cmap=None, figsize=(12, 8) ) -> matplotlib.figure.Figure ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `stats_dataset` | xarray.Dataset | Yes | - | From `calculate_period_statistics()` | | `variable` | str | No | 'num_events' | Variable to plot | | `title` | str | No | None | Plot title | | `cmap` | str | No | None | Colormap (auto-selected if None) | | `figsize` | tuple | No | (12, 8) | Figure size | **Returns:** - `matplotlib.figure.Figure` **Example:** ```python from visualization import plot_spatial_stats stats = calculate_period_statistics(spi, start_year=2020, end_year=2024) plot_spatial_stats(stats, variable='num_events', title='Drought Events 2020-2024') plt.savefig('output/plots/spatial/stats_2020-2024.png', dpi=300) ``` --- ### generate_location_filename() Generate consistent filename with location coordinates. ```python def generate_location_filename( base_name, lat, lon, extension='png' ) -> str ``` **Parameters:** | Parameter | Type | Required | Default | Description | | --------- | ---- | -------- | ------- | ----------- | | `base_name` | str | Yes | - | File base name | | `lat` | float | Yes | - | Latitude | | `lon` | float | Yes | - | Longitude | | `extension` | str | No | 'png' | File extension | **Returns:** - `str`: Filename like `base_name_lat31.82_lon-7.07.png` **Example:** ```python from visualization import generate_location_filename filename = generate_location_filename('dry_events', 31.82, -7.07, 'png') # Returns: 'dry_events_lat31.82_lon-7.07.png' full_path = f'output/plots/single/{filename}' plt.savefig(full_path, dpi=300) ``` --- ## Complete Workflow Example ```python import sys sys.path.insert(0, 'src') import xarray as xr from indices import spi from utils import calculate_pet from runtheory import (identify_events, calculate_period_statistics, compare_periods) from visualization import (plot_events, plot_spatial_stats, generate_location_filename) # 1. Load data precip = xr.open_dataset('input/chirps.nc')['precip'] # 2. Calculate SPI spi_12 = spi(precip, scale=12, calibration_start_year=1991, calibration_end_year=2020) spi_12.to_netcdf('output/netcdf/spi_12.nc') # 3. Single location analysis lat_idx, lon_idx = 50, 100 lat_val = spi_12.lat.values[lat_idx] lon_val = spi_12.lon.values[lon_idx] spi_loc = spi_12.isel(lat=lat_idx, lon=lon_idx) events = identify_events(spi_loc, threshold=-1.2, min_duration=3) filename = generate_location_filename('dry_events', lat_val, lon_val, 'csv') events.to_csv(f'output/csv/{filename}') # 4. Visualize plot_events(spi_loc, events, threshold=-1.2) filename = generate_location_filename('plot_events', lat_val, lon_val, 'png') plt.savefig(f'output/plots/single/{filename}', dpi=300) # 5. Gridded statistics stats_2023 = calculate_period_statistics(spi_12, threshold=-1.2, start_year=2023, end_year=2023) stats_2023.to_netcdf('output/netcdf/event_stats_2023.nc') plot_spatial_stats(stats_2023, variable='num_events', title='Dry/Wet Events in 2023') plt.savefig('output/plots/spatial/stats_2023.png', dpi=300) # 6. Compare periods comparison = compare_periods( spi_12, periods=[(1991, 2020), (2021, 2024)], period_names=['Historical', 'Recent'] ) diff = comparison.sel(period='Recent') - comparison.sel(period='Historical') diff.num_events.plot(title='Change in Dry/Wet Events', cmap='RdBu_r') plt.savefig('output/plots/spatial/comparison.png', dpi=300) ``` --- ## See Also - [Probability Distributions](distributions.qmd) - Distribution selection guide and mathematical details - [Validation & Test Results](validation.qmd) - Quality verification and test coverage - [Implementation Details](implementation.qmd) - Architecture and optimization - [Methodology](methodology.qmd) - Scientific background - [User Guides](../user-guide/) - Detailed usage instructions