Working with Audio Files in Python

Working with Audio Files in Python#

import numpy as np
import matplotlib.pyplot as plt

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['axes.grid'] = True

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Cell In[1], line 1
----> 1 import numpy as np
      2 import matplotlib.pyplot as plt
      4 plt.style.use('seaborn-v0_8-whitegrid')

ModuleNotFoundError: No module named 'numpy'

def find_files(start_path: str, extensions: List[str] = AUDIO_EXTENSIONS) -> List[str]:
    """Find all audio files in a directory recursively."""
    extensions = [ext.lower() if ext.startswith(".") else f".{ext.lower()}" for ext in extensions]
    matched_files = []
    for root, _, files in os.walk(start_path):
        for file in files:
            if any(file.lower().endswith(ext) for ext in extensions):
                matched_files.append(os.path.join(root, file))
    return sorted(matched_files)

def get_media_length_in_dir(file_path):
    """
    Function to get the length of audio files in a directory.
    Supported file types: .wav, .wave

    Parameters
    ----------
    file_path : str
        Path to the directory containing the audio files.
    """
    file_list = os.listdir(file_path)
    print(f"Files in directory: {file_path}")
    print("")

    for file_name in file_list:
        single_file_path = os.path.join(file_path, file_name)
        print(f"Processing file: {single_file_path}")

        if single_file_path.endswith((".wav", ".wave")):
            waveform, sample_rate = torchaudio.load(single_file_path)
            total_length_samples = waveform.shape[1]
            total_length_seconds = total_length_samples / sample_rate
            total_number_of_frames = total_length_samples
            minutes, seconds = divmod(total_length_seconds, 60)
            print(
                "Length in {}: {} minutes, {:.2f} seconds"
                .format(single_file_path, int(minutes), seconds)
            )
            print(
                "Number of elements in {}: {}"
                .format(single_file_path, total_number_of_frames)
            )

        else:
            print("Skipping {} (unsupported file type)"
                  .format(single_file_path))

        print("")

def load_audio(file_path: str):
    """Load an audio file using scipy."""
    sample_rate, waveform = wav.read(file_path)
    if len(waveform.shape) > 1:
        waveform = waveform.mean(axis=1)  # Convert to mono if stereo
    return waveform, sample_rate

def truncate_to_fixed_length(signal: np.ndarray, length: int) -> np.ndarray:
    """Pad or truncate an audio signal to a specified length."""
    if len(signal) < length:
        return np.pad(signal, (0, length - len(signal)), mode='constant')
    return signal[:length]