Working with Audio Files in Python#
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['axes.grid'] = True
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 1
----> 1 import numpy as np
2 import matplotlib.pyplot as plt
4 plt.style.use('seaborn-v0_8-whitegrid')
ModuleNotFoundError: No module named 'numpy'
def find_files(start_path: str, extensions: List[str] = AUDIO_EXTENSIONS) -> List[str]:
"""Find all audio files in a directory recursively."""
extensions = [ext.lower() if ext.startswith(".") else f".{ext.lower()}" for ext in extensions]
matched_files = []
for root, _, files in os.walk(start_path):
for file in files:
if any(file.lower().endswith(ext) for ext in extensions):
matched_files.append(os.path.join(root, file))
return sorted(matched_files)
def get_media_length_in_dir(file_path):
"""
Function to get the length of audio files in a directory.
Supported file types: .wav, .wave
Parameters
----------
file_path : str
Path to the directory containing the audio files.
"""
file_list = os.listdir(file_path)
print(f"Files in directory: {file_path}")
print("")
for file_name in file_list:
single_file_path = os.path.join(file_path, file_name)
print(f"Processing file: {single_file_path}")
if single_file_path.endswith((".wav", ".wave")):
waveform, sample_rate = torchaudio.load(single_file_path)
total_length_samples = waveform.shape[1]
total_length_seconds = total_length_samples / sample_rate
total_number_of_frames = total_length_samples
minutes, seconds = divmod(total_length_seconds, 60)
print(
"Length in {}: {} minutes, {:.2f} seconds"
.format(single_file_path, int(minutes), seconds)
)
print(
"Number of elements in {}: {}"
.format(single_file_path, total_number_of_frames)
)
else:
print("Skipping {} (unsupported file type)"
.format(single_file_path))
print("")
def load_audio(file_path: str):
"""Load an audio file using scipy."""
sample_rate, waveform = wav.read(file_path)
if len(waveform.shape) > 1:
waveform = waveform.mean(axis=1) # Convert to mono if stereo
return waveform, sample_rate
def truncate_to_fixed_length(signal: np.ndarray, length: int) -> np.ndarray:
"""Pad or truncate an audio signal to a specified length."""
if len(signal) < length:
return np.pad(signal, (0, length - len(signal)), mode='constant')
return signal[:length]