Source code for gastropy.signal.preprocessing

"""Time-domain EGG signal preprocessing functions.

Provides composable artifact removal functions that operate on raw EGG
signals before bandpass filtering. All functions accept 1D (single
channel) or 2D ``(n_channels, n_samples)`` arrays and return the same
shape.

References
----------
Dalmaijer, E. S. (2025). electrography v1.1.1.
https://github.com/esdalmaijer/electrography

Gharibans, A. A., Smarr, B., Kunkel, D. C., Kriegsfeld, L. J.,
Mousa, H., & Coleman, T. P. (2018). Artifact rejection methodology
enables continuous, noninvasive measurement of gastric myoelectric
activity in ambulatory subjects. *Scientific Reports*, 8, 5019.
https://doi.org/10.1038/s41598-018-23302-9
"""

import numpy as np


[docs] def hampel_filter(data, k=3, n_sigma=3.0): """Remove spike artifacts using the Hampel median identifier. Slides a window of length ``2k+1`` across the signal. Any sample that deviates from the local median by more than ``n_sigma * 1.4826 * MAD`` is replaced by the local median. The factor 1.4826 makes the scaled MAD a consistent estimator of the standard deviation under a Gaussian assumption (Davies & Gather, 1993). Parameters ---------- data : array_like EGG signal(s). Accepts shape ``(n_samples,)`` or ``(n_channels, n_samples)``. k : int, optional Half-window size: each sample is compared to its ``k`` neighbours on either side (window length = ``2k+1``). Default is 3. n_sigma : float, optional Outlier threshold in scaled MAD units. Samples further than ``n_sigma`` from the local median are replaced. Default is 3.0. Returns ------- cleaned : np.ndarray Signal with spike outliers replaced by local medians. Same shape as input. References ---------- Davies, P. L., & Gather, U. (1993). The identification of multiple outliers. *Journal of the American Statistical Association*, 88, 782–792. Dalmaijer, E. S. (2025). electrography v1.1.1. https://github.com/esdalmaijer/electrography Examples -------- >>> import numpy as np >>> from gastropy.signal import hampel_filter >>> sig = np.sin(2 * np.pi * 0.05 * np.arange(0, 300, 0.1)) >>> sig[50] = 100.0 # inject a spike >>> cleaned = hampel_filter(sig) >>> np.abs(cleaned[50]) < 5.0 # spike removed True """ data = np.asarray(data, dtype=float) squeeze = data.ndim == 1 if squeeze: data = data[np.newaxis, :] # (1, n_samples) n_channels, n_samples = data.shape signal = data.copy() for i in range(n_samples): si = max(0, i - k) ei = min(n_samples, i + k + 1) window = data[:, si:ei] med = np.nanmedian(window, axis=1) # (n_channels,) d = np.abs(window - med[:, np.newaxis]) sd = 1.4826 * np.nanmedian(d, axis=1) # (n_channels,) replace = np.abs(data[:, i] - med) > n_sigma * sd signal[replace, i] = med[replace] return signal[0] if squeeze else signal
[docs] def mad_filter(data, n_sigma=3.0): """Remove outliers using a global median absolute deviation filter. Computes a single global median and MAD across the entire signal. Samples deviating by more than ``n_sigma * 1.4826 * MAD`` are replaced by the global median. Faster than ``hampel_filter`` but less adaptive to signal drift. Parameters ---------- data : array_like EGG signal(s). Accepts shape ``(n_samples,)`` or ``(n_channels, n_samples)``. n_sigma : float, optional Outlier threshold in scaled MAD units. Default is 3.0. Returns ------- cleaned : np.ndarray Signal with global outliers replaced by the median. Same shape as input. References ---------- Dalmaijer, E. S. (2025). electrography v1.1.1. https://github.com/esdalmaijer/electrography Examples -------- >>> import numpy as np >>> from gastropy.signal import mad_filter >>> rng = np.random.default_rng(0) >>> sig = rng.standard_normal(1000) >>> sig[200] = 50.0 # inject a large global outlier >>> cleaned = mad_filter(sig) >>> np.abs(cleaned[200]) < 5.0 True """ data = np.asarray(data, dtype=float) squeeze = data.ndim == 1 if squeeze: data = data[np.newaxis, :] signal = data.copy() med = np.nanmedian(signal, axis=1, keepdims=True) # (n_channels, 1) d = np.abs(signal - med) sd = 1.4826 * np.nanmedian(d, axis=1) # (n_channels,) threshold = n_sigma * sd for ch in range(signal.shape[0]): replace = d[ch] > threshold[ch] signal[ch, replace] = med[ch, 0] return signal[0] if squeeze else signal
[docs] def remove_movement_artifacts(data, sfreq, freq=0.05, window=1.0): """Attenuate movement artifacts using an LMMSE Wiener filter. Estimates local signal variance within sliding windows and computes a Wiener-like (LMMSE) correction that suppresses time segments dominated by movement noise while preserving the gastric rhythm. The algorithm: 1. For each sample, compute the local mean ``E[y]`` and variance ``var_y`` in a window of length ``window / freq`` seconds. 2. Estimate the signal-of-interest variance ``var_e`` as the mean of all local variances. 3. Compute the predicted noise contribution:: x_hat = E[y] + clip(var_y - var_e, 0) / max(var_y, var_e) * (y - E[y]) 4. Return the residual ``y - x_hat``. Parameters ---------- data : array_like EGG signal(s). Accepts shape ``(n_samples,)`` or ``(n_channels, n_samples)``. sfreq : float Sampling frequency in Hz. freq : float, optional Centre frequency of interest in Hz. Used to set the window length (``window / freq`` seconds). Default is 0.05 Hz (normogastric centre, 3 cpm). window : float, optional Window length in cycles of ``freq``. Default is 1.0. Returns ------- cleaned : np.ndarray Movement-corrected signal. Same shape as input. Notes ----- The filter always subtracts the estimated noise component ``x_hat``, which contains the local mean ``E[y]``. As a result the output is effectively mean-centered within each sliding window, and any DC offset in the original signal will be removed. A constant (zero-variance) signal maps to all zeros. This is the expected behaviour for EGG preprocessing, where a zero-mean signal is assumed, but should be noted when applying to signals with a meaningful baseline. References ---------- Gharibans, A. A., Smarr, B., Kunkel, D. C., Kriegsfeld, L. J., Mousa, H., & Coleman, T. P. (2018). Artifact rejection methodology enables continuous, noninvasive measurement of gastric myoelectric activity in ambulatory subjects. *Scientific Reports*, 8, 5019. https://doi.org/10.1038/s41598-018-23302-9 Dalmaijer, E. S. (2025). electrography v1.1.1. https://github.com/esdalmaijer/electrography Examples -------- >>> import numpy as np >>> from gastropy.signal import remove_movement_artifacts >>> t = np.arange(0, 300, 0.1) >>> sig = np.sin(2 * np.pi * 0.05 * t) >>> cleaned = remove_movement_artifacts(sig, sfreq=10.0) >>> cleaned.shape == sig.shape True """ data = np.asarray(data, dtype=float) squeeze = data.ndim == 1 if squeeze: data = data[np.newaxis, :] n_channels, n_samples = data.shape # Window length in samples (half-window for centred sliding window) win_sec = window / freq win_half = int((win_sec * sfreq) // 2) si_boundary = win_half ei_boundary = n_samples - win_half # Pass 1: compute local mean and variance at each sample e_y = np.zeros_like(data) var_y = np.zeros_like(data) for i in range(n_samples): si_ = 0 if i < si_boundary else i - win_half ei_ = n_samples if i > ei_boundary else i + win_half segment = data[:, si_:ei_] e_y[:, i] = np.mean(segment, axis=1) var_y[:, i] = np.var(segment, axis=1) # Estimate signal-of-interest variance as mean of local variances var_e = np.mean(var_y, axis=1) # (n_channels,) # Pass 2: compute predicted noise x_hat and subtract x_hat = np.zeros_like(data) for i in range(n_samples): a = var_y[:, i] - var_e a = np.maximum(a, 0.0) b = np.maximum(var_y[:, i], var_e) # When b=0 the signal has zero variance everywhere (e.g. constant # signal): no correction needed, so the weight is 0. with np.errstate(invalid="ignore", divide="ignore"): weight = np.where(b == 0.0, 0.0, a / b) x_hat[:, i] = e_y[:, i] + weight * (data[:, i] - e_y[:, i]) cleaned = data - x_hat return cleaned[0] if squeeze else cleaned
__all__ = ["hampel_filter", "mad_filter", "remove_movement_artifacts"]