import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler
from scipy.io.wavfile import write
from scipy.interpolate import interp1d
from scipy import interpolate

img = cv2.imread('./img/hb_2.jpg')
img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)

plt.title("Original Image")
plt.imshow(img)
plt.show()

def rotate_image(image, angle):

    image_center = tuple(np.array(image.shape[1::-1]) / 2)
    rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
    result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)

    return result


def crop_image(image):
    
    # return image[1500:2600, 420:2400]
    return image[1900:2700, 200:2000]

img = cv2.imread('./img/hb_2.jpg', cv2.IMREAD_GRAYSCALE)

ret, img = cv2.threshold(img, 75, 255, cv2.THRESH_BINARY)

img = rotate_image(img, 92)
img = crop_image(img)

plt.title("Grayscaled, cropped, binary thresholding")
plt.imshow(img)
plt.show()

avg_vals = []
for row in img:
    avg_vals.append(row.mean())

plt.plot(avg_vals)
plt.show()

filtered_img = np.empty((0, img.shape[1]))
z = np.zeros((1, img.shape[1]))

for i in range(img.shape[0]):
    row = img[i]
    if row.mean() < 200:
        filtered_img = np.append(filtered_img, row.reshape(1,-1), axis=0)
    else:
        filtered_img = np.append(filtered_img, filtered_img[i-1].reshape(1,-1), axis=0)

filtered_img.shape

(800, 1800)

plt.imshow(filtered_img)

<matplotlib.image.AxesImage at 0x7fd8837d9270>

def extract_signal(img):
    """Estimate signal amplitude from image of sound wave.
    For each column of binarized image array, scan pixels
    and append indices to signal if current pixel
    and previous pixel have different values.

    Args:
        img (np.ndarray): numpy representation of image pixels

    Returns:
        (np.array): Indices of pixel value changes, estimating
            sound signal amplitude.
    """

    # init empty signal
    signal = np.empty((0, 2))

    for j in range(img.shape[1]): # cols
        for i in range(img.shape[0]): # rows
            prev = img[i-1, j]
            curr = img[i, j]

            if prev != curr: # if pixel value changes, append to output signal
                signal = np.append(signal, np.array([[j, img.shape[0] - i]]), axis=0)

    return signal


def interpolate_nan(signal):
    """Linearly interpolate nan and inf values in
    extracted signal.
    """

    inds = np.arange(signal.shape[0])
    good = np.where(np.isfinite(signal))
    f = interpolate.interp1d(inds[good], signal[good], bounds_error=False)
    
    out = np.where(np.isfinite(signal), signal, f(inds))

    return out


def extract_signal_alternate(img):
    """An alternate extraction method that ensures that
    sampled points alternate between crest and trough.

    Note:
        Not used. Signal is "too" sinusoidal, so it
        just produces beeps rather than the more staticky
        beats as they actually sounded.
    """

    def scan(col, direction):
        col_len = col.shape[0]
        if direction == 'down':
            i = 0
            while i < col_len:
                if col[i] != 0:
                    break
                i += 1

        else:
            i = col_len - 1
            while i >= 0:
                if col[i] != 0:
                    break
                i -= 1

        return i if i not in [-1, col_len] else np.nan


    # init empty signal
    signal = np.empty((0, 2))

    for j in range(img.shape[1]): # cols
        col = img[:, j]
        top = scan(col, 'down')
        bottom = scan(col, 'up')

        signal = np.append(signal, np.array([[j, img.shape[0] - top]]), axis=0)
        signal = np.append(signal, np.array([[j, img.shape[0] - bottom]]), axis=0)

    signal[:, 1] = interpolate_nan(signal[:, 1])

    return signal

signal = extract_signal(filtered_img)

plt.title("Extracted signal")
plt.scatter(x=signal[:, 0], y=signal[:, 1])
plt.show()

# outlier removal with DBSCAN
dbs = DBSCAN(min_samples=10, eps=30)
dbs.fit(signal)

DBSCAN(eps=30, min_samples=10)

DBSCAN(eps=30, min_samples=10)

df = pd.DataFrame(signal, columns=['x', 'y'])
df['cluster'] = dbs.labels_

plt.title("Original Signal")
for c in df.cluster.unique():
    dummy = df.query(f"cluster == {c}")
    plt.scatter(x=dummy.x, y=dummy.y, label=c)
plt.legend()
plt.show()

filtered_df = df[df['cluster'].isin([0])].reset_index(drop=True)

plt.title('Outliers Removed')
plt.scatter(x=filtered_df.x, y=filtered_df.y, c=filtered_df.cluster)
plt.show()

filtered_df['amplitude'] = MinMaxScaler().fit_transform(filtered_df['y'].to_numpy().reshape(-1,1))
processed_signal = filtered_df[['amplitude']].to_numpy()
processed_signal.shape

(16624, 1)

# downsample
downsampled_signal = processed_signal[::3, :]
downsampled_signal.shape

(5542, 1)

plt.plot(downsampled_signal)

[<matplotlib.lines.Line2D at 0x7fd87c60ac50>]

baby_bpm = 133
playback_rate = 48000
n_samples = downsampled_signal.shape[0]
beats_in_photo = 2

curr_elapsed_time_minutes = (n_samples / playback_rate) / 60
curr_bpm = 2 / curr_elapsed_time_minutes

slowdown_factor = curr_bpm / baby_bpm

print(f"Current BPM: {curr_bpm:0.2f}. Desired BPM: {baby_bpm}. Slowdown factor: {slowdown_factor:0.2f}.")

Current BPM: 1039.34. Desired BPM: 133. Slowdown factor: 7.81.

interp = interp1d(x=range(downsampled_signal.shape[0]), y=downsampled_signal, kind='cubic', axis=0)

adj_signal = []

for i in np.arange(0, downsampled_signal.shape[0] - 1, 1/slowdown_factor):
    adj_signal.append(interp(i))

adj_signal = np.array(adj_signal)
adj_signal.shape

(43301, 1)

center = adj_signal[:30].mean() # beginning of the signal should pretty much be silent, or 0
centered_signal = adj_signal - center

plt.title("Smoothed (cubic spline interpolation) and time-adjusted sound wave")
plt.plot(centered_signal)

[<matplotlib.lines.Line2D at 0x7fd87c70fdc0>]

upsample = centered_signal
for i in range(4):
    upsample = np.append(upsample, upsample, axis=0)

upsample.shape

(692816, 1)

volume_reduction_factor = 0.1
bit_adj = 32767
scaled = np.int16(upsample*volume_reduction_factor/ np.max(np.abs(upsample)) * bit_adj)

ts = dt.datetime.now()
ts_str = ts.strftime("%Y%m%d%H%M%S")
file_name = f'./audio/heartbeat_{ts_str}.wav'
write(file_name, playback_rate, scaled)

Nigel Story | Full-Stack Data Science

Capturing a Heartbeat from an Image¶

Contents¶

Introduction ¶

About the Data ¶

Methods ¶

Packages and Setup ¶

Image Processing ¶

Signal Extraction ¶

DBSCAN Noise Removal ¶

Signal Scaling and Interpolation ¶

Audio Output ¶

Conclusion ¶

Thanks for Reading!¶