Electrocardiogram signals¶
In this notebook we do some signal processing with ECG traces (recordings of electrical activity in the heart.) The data comes from https://easy.dans.knaw.nl/ui/datasets/id/easy-dataset:75600/tab/2, and there is a related paper (Novel genetic ensembles of classifiers applied to myocardium dysfunction recognition based on ECG signals, by Paweł Pławiak) at https://doi.org/10.1016/j.swevo.2017.10.002
import numpy as np
import scipy
import scipy.fft
import matplotlib.pyplot as plt
import pandas as pd
We will consider 17 different files. The first one is an ECG trace of a healthy heart; the others each exhibit one of 16 different medical problems. In the original dataset there are many different examples of each type, and the paper discusses various machine learning models that can be used to distinguish between the types. Here we will just do some basic processing on one example of each type.
data_dir = '../data/ecg/'
def make_ecg_dict():
with open(data_dir + 'types.txt') as f:
lines = f.readlines()
ecg_dict = {}
for line in lines:
parts = line.split(',')
i = int(parts[0])
c = parts[1]
n = parts[2].strip()
ecg_dict[i] = (i,c,n)
ecg_dict[c] = (i,c,n)
return ecg_dict
ecg_dict = make_ecg_dict()
def get_ecg_data(n):
index, code, full_name = ecg_dict[n]
data = scipy.io.loadmat(data_dir + code + '.mat')['val']
return (index, code, full_name, data)
get_ecg_data(1)[3].shape
(1, 3600)
def show_ecg(n):
index, code, full_name, data = get_ecg_data(n)
data = (data - np.mean(data))/np.std(data)
indices = np.arange(data.shape[0])
times = indices / 360
peak_indices = scipy.signal.find_peaks(data, height=1)[0]
peak_times = peak_indices / 360
peak_values = data[peak_indices]
trough_indices = scipy.signal.find_peaks(-data, height=1)[0]
trough_times = trough_indices / 360
trough_values = data[trough_indices]
value_range = np.max(np.abs(data))
filter = scipy.signal.butter(4, 0.2, 'high', output='sos')
filtered_data = scipy.signal.sosfilt(filter, data)[500:]
filtered_times = np.arange(len(filtered_data)) / 360
filtered_value_range = np.max(np.abs(filtered_data))
fig, ax = plt.subplots(1,3,figsize=(12,4))
ax[0].set_title(full_name)
ax[0].plot(times, data)
ax[0].scatter(peak_times, peak_values, color='red',s=10)
ax[0].scatter(trough_times, trough_values, color='green',s=10)
ax[0].set_ylim(-1.1*value_range, 1.1*value_range)
ax[1].set_title('High-pass filtered')
ax[1].plot(filtered_times, filtered_data)
ax[1].set_ylim(-1.1*filtered_value_range,1.1*filtered_value_range)
ax[2].set_title('Power spectrum')
frequencies, powers = scipy.signal.periodogram(data, fs=360)
ax[2].plot(frequencies[frequencies < 50],powers[frequencies < 50])
mean_peak_gap = np.mean(np.diff(peak_times))
mean_prominence = scipy.signal.peak_prominences(data, peak_indices)[0].mean()
print(f"Mean peak gap : {mean_peak_gap:.3f}s")
print(f"Mean prominence: {mean_prominence:.3f}")
show_ecg('Bigeminy')
Mean peak gap : 0.394s Mean prominence: 2.929