Compare two different Audio in Python
Do not miss this exclusive book on Binary Tree Problems. Get it now for free.
In this article, we have explored how to compare two different audio in Python using librosa library.
Table of contents:
- Waveforms and domains
- Oboe
- Clarinet
- Time Stretch
- Log Power Spectrogram
- MFCC
Waveforms and domains
Waveform wrt sound represents movement of particles in a gaseous, liquid, or solid medium.
Time domain when refering wrt sound is depection of particles movement with the help of analysis of time.
Frequency domain when refering wrt sound is depection of particles movement with the help of analysis of frequency (explained later).
CODE
#Time Domain
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import librosa as lr
import librosa.display
from IPython.display import Audio
import IPython.display as ipd
audio1='First'
y, sr = lr.load('./{}.wav'.format(audio1))
ipd.Audio(y, rate=sr)
plt.figure(figsize=(15, 5))
lr.display.waveplot(y, sr, alpha=0.8)
plt.show()
audio2='Second'
y, sr = lr.load('./{}.wav'.format(audio2))
ipd.Audio(y, rate=sr)
plt.figure(figsize=(15, 5))
lr.display.waveplot(y, sr, alpha=0.8)
plt.show()
Output :
In the output of first audio we can predict that the movement of particles wrt time is gradually decreasing. Hence formation of a triangle. While for second audio the movement of particle first increases and then decreases.
Frequency Domain
import numpy as np
import matplotlib.pyplot as plot
from scipy import pi
from scipy.fftpack import fft
import librosa as lr
import librosa.display
audio1='First'
y, sr = lr.load('./{}.wav'.format(audio1))
signalAmplitude = np.sin(y)
plot.subplot(211)
plot.plot(y, signalAmplitude,'bs')
plot.xlabel('time')
plot.ylabel('amplitude')
plot.subplot(212)
plot.magnitude_spectrum(signalAmplitude,Fs=4)
plot.show()
audio2='Second'
y, sr = lr.load('./{}.wav'.format(audio2))
signalAmplitude = np.sin(y)
plot.subplot(211)
plot.plot(y, signalAmplitude,'bs')
plot.xlabel('time')
plot.ylabel('amplitude')
plot.subplot(212)
plot.magnitude_spectrum(signalAmplitude,Fs=4)
plot.show()
Output :
From the output we can say that First is having a constant movement of partiles except at one significant point while for Second we are having many back and forth movement of particles.
Oboe
Oboe is an instrument which is placed between the lips and blown which causes both reeds to vibrate against each other. They open and close very rapidly, sending bursts of energy into the air column inside the instrument and causing it to vibrate in sympathy.
We now potray the movement of particles of our audio when passed through oboe.
Code
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import librosa as lr
import librosa.display
from IPython.display import Audio
import IPython.display as ipd
import scipy
audio1='First'
y, sr = lr.load('./{}.wav'.format(audio1))
f = np.linspace(0, sr, 4096)
print(y.shape)
X = scipy.fft(y[10000:14096])
X_mag = np.absolute(X)
plt.figure(figsize=(14, 5))
plt.plot(f[:2000], X_mag[:2000]) # magnitude spectrum
plt.xlabel('Frequency (Hz)')
plt.show()
audio2='Second'
y, sr = lr.load('./{}.wav'.format(audio2))
f = np.linspace(0, sr, 4096)
print(y.shape)
X = scipy.fft(y[10000:14096])
X_mag = np.absolute(X)
plt.figure(figsize=(14, 5))
plt.plot(f[:2000], X_mag[:2000]) # magnitude spectrum
plt.xlabel('Frequency (Hz)')
plt.show()
Output :
Clarinet
It is a single-reed woodwind instrument used orchestrally and in military and brass bands and possessing a distinguished solo repertory.
We now potray the movement of particles of our audio when passed through clarinet.
CODE
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import librosa as lr
import librosa.display
from IPython.display import Audio
import IPython.display as ipd
import scipy
audio1='First'
y, sr = lr.load('./{}.wav'.format(audio1))
f = np.linspace(0, sr, 4096)
ipd.Audio(y, rate=sr)
print(y.shape)
X = scipy.fft(y[10000:14096])
X_mag = np.absolute(X)
plt.figure(figsize=(14, 5))
plt.plot(f[:500], X_mag[:500]) # magnitude spectrum
plt.xlabel('Frequency (Hz)')
plt.show()
audio2='Second'
y, sr = lr.load('./{}.wav'.format(audio2))
f = np.linspace(0, sr, 4096)
ipd.Audio(y, rate=sr)
print(y.shape)
X = scipy.fft(y[10000:14096])
X_mag = np.absolute(X)
plt.figure(figsize=(14, 5))
plt.plot(f[:500], X_mag[:500]) # magnitude spectrum
plt.xlabel('Frequency (Hz)')
plt.show()
Output :
Time Stretch
In this code we have increased the time that is twice the speed of original audio and decreses the time as well by half.
CODE
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import librosa as lr
audio1='First'
y, sr = lr.load('./{}.wav'.format(audio1))
y_fast = lr.effects.time_stretch(y, 2.0)
time = np.arange(0,len(y_fast))/sr
fig, ax = plt.subplots()
ax.plot(time,y_fast)
ax.set(xlabel='Time(s)',ylabel='sound amplitude')
plt.show()
y_slow = lr.effects.time_stretch(y, 0.5)
time = np.arange(0,len(y_slow))/sr
fig, ax = plt.subplots()
ax.plot(time,y_slow)
ax.set(xlabel='Time(s)',ylabel='sound amplitude')
plt.show()
audio2='Second'
y, sr = lr.load('./{}.wav'.format(audio2))
y_fast = lr.effects.time_stretch(y, 2.0)
time = np.arange(0,len(y_fast))/sr
fig, ax = plt.subplots()
ax.plot(time,y_fast)
ax.set(xlabel='Time(s)',ylabel='sound amplitude')
plt.show()
y_slow = lr.effects.time_stretch(y, 0.5)
time = np.arange(0,len(y_slow))/sr
fig, ax = plt.subplots()
ax.plot(time,y_slow)
ax.set(xlabel='Time(s)',ylabel='sound amplitude')
plt.show()
Output :
Log Power Spectrogram
The Log Power Spectrogram is used to demonstrate the distribution of power across frequency components of a given signal.
As we know, any audio signal can be split into multiple signals having different frequencies or a range of frequencies. This is proved using Fourier Analysis.
CODE
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import librosa as lr
import librosa.display
audio1='First'
y, sr = lr.load('./{}.wav'.format(audio1))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
plt.subplot(4, 2, 2)
lr.display.specshow(D, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Log-frequency power spectrogram')
plt.show()
audio2='Second'
y, sr = lr.load('./{}.wav'.format(audio))
D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
plt.subplot(4, 2, 2)
lr.display.specshow(D, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Log-frequency power spectrogram')
plt.show()
Output :
MFCC
The output of the below code is .txt file which is no included here but uploaded in our GitHub account do give it a look.
Code
import librosa
import numpy as np
RATE = 24000
N_MFCC = 13
def get_wav(language_num):
'''
Load wav file from disk and down-samples to RATE
:param language_num (list): list of file names
:return (numpy array): Down-sampled wav file
'''
y, sr = librosa.load('./{}.wav'.format(language_num))
return(librosa.core.resample(y=y,orig_sr=sr,target_sr=RATE, scale=True))
def to_mfcc(wav):
'''
Converts wav file to Mel Frequency Ceptral Coefficients
:param wav (numpy array): Wav form
:return (2d numpy array: MFCC
'''
return(librosa.feature.mfcc(y=wav, sr=RATE, n_mfcc=N_MFCC))
if __name__ == '__main__':
audio1 = 'First'
X= get_wav(audio1)
X=to_mfcc(X)
c = np.savetxt('file1.txt', X, delimiter =', ')
a = open("file1.txt", 'r')# open file in read mode
audio2 = 'Second'
X= get_wav(audio2)
X=to_mfcc(X)
c = np.savetxt('file2.txt', X, delimiter =', ')
a = open("file2.txt", 'r')# open file in read mode
print("the file contains:")
print(a.read())
With this article at OpenGenus, you must have the complete idea of how to compare to audio in Python using librosa library.
Sign up for FREE 3 months of Amazon Music. YOU MUST NOT MISS.