Python Forum
Computing correlation in audio files
Thread Rating:
  • 0 Vote(s) - 0 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Computing correlation in audio files
#1
Hi all. I am trying to compute a correlation between audio files in terms of waveform. The code breaks in a few different places and I am not sure how to debug it. I was hoping someone would be able to help me out.

# compare.py 
import argparse
from numpy import correlate

def initialize():
    parser = argparse.ArgumentParser()
    parser.add_argument("-i ", "--source-file", help="source file")
    parser.add_argument("-o ", "--target-file", help="target file")
    args = parser.parse_args()

    SOURCE_FILE = args.source_file if args.source_file else None
    TARGET_FILE = args.target_file if args.target_file else None

    SOURCE_FILE = "Comparison1.wav"
    TARGET_FILE = "Comparison2.wav"
  
    if not SOURCE_FILE or not TARGET_FILE:
      raise Exception("Source or Target files not specified.")
    return SOURCE_FILE, TARGET_FILE
  
if __name__ == "__main__":
    SOURCE_FILE, TARGET_FILE = initialize()
    correlate(SOURCE_FILE, TARGET_FILE)

# correlation.py
import commands 
import numpy 

# seconds to sample audio file for
sample_time = 500
# number of points to scan cross correlation over
span = 150
# step size (in points) of cross correlation
step = 1
# minimum number of points that must overlap in cross correlation
# exception is raised if this cannot be met
min_overlap = 20
# report match when cross correlation has a peak exceeding threshold
threshold = 0.5

# calculate fingerprint
def calculate_fingerprints(filename):
    fpcalc_out = commands.getoutput('fpcalc -raw -length %i %s'
                                    % (sample_time, filename))
    fingerprint_index = fpcalc_out.find('FINGERPRINT=') + 12
    # convert fingerprint to list of integers
    fingerprints = map(int, fpcalc_out[fingerprint_index:].split(','))
    
    return fingerprints
  
# returns correlation between lists
def correlation(listx, listy):
    if len(listx) == 0 or len(listy) == 0:
        # Error checking in main program should prevent us from ever being
        # able to get here.
        raise Exception('Empty lists cannot be correlated.')
    if len(listx) > len(listy):
        listx = listx[:len(listy)]
    elif len(listx) < len(listy):
        listy = listy[:len(listx)]
    
    covariance = 0
    for i in range(len(listx)):
        covariance += 32 - bin(listx[i] ^ listy[i]).count("1")
    covariance = covariance / float(len(listx))
    
    return covariance/32
  
# return cross correlation, with listy offset from listx
def cross_correlation(listx, listy, offset):
    if offset > 0:
        listx = listx[offset:]
        listy = listy[:len(listx)]
    elif offset < 0:
        offset = -offset
        listy = listy[offset:]
        listx = listx[:len(listy)]
    if min(len(listx), len(listy)) < min_overlap:
        # Error checking in main program should prevent us from ever being
        # able to get here.
        return 
    #raise Exception('Overlap too small: %i' % min(len(listx), len(listy)))
    return correlation(listx, listy)
  
# cross correlate listx and listy with offsets from -span to span
def compare(listx, listy, span, step):
    if span > min(len(listx), len(listy)):
        # Error checking in main program should prevent us from ever being
        # able to get here.
        raise Exception('span >= sample size: %i >= %i\n'
                        % (span, min(len(listx), len(listy)))
                        + 'Reduce span, reduce crop or increase sample_time.')
    corr_xy = []
    for offset in numpy.arange(-span, span + 1, step):
        corr_xy.append(cross_correlation(listx, listy, offset))
    return corr_xy
  
# return index of maximum value in list
def max_index(listx):
    max_index = 0
    max_value = listx[0]
    for i, value in enumerate(listx):
        if value > max_value:
            max_value = value
            max_index = i
    return max_index
  
def get_max_corr(corr, source, target):
    max_corr_index = max_index(corr)
    max_corr_offset = -span + max_corr_index * step
    print("max_corr_index = ", max_corr_index, "max_corr_offset = ", max_corr_offset)
# report matches
    if corr[max_corr_index] > threshold:
        print('%s and %s match with correlation of %.4f at offset %i'
             % (source, target, corr[max_corr_index], max_corr_offset)) 

def correlate(source, target):
    fingerprint_source = calculate_fingerprints(source)
    fingerprint_target = calculate_fingerprints(target)
    
    corr = compare(fingerprint_source, fingerprint_target, span, step)
    max_corr_offset = get_max_corr(corr, source, target)
Reply


Possibly Related Threads…
Thread Author Replies Views Last Post
  I am getting a valueError. And not sure why? My goal is to visualize the correlation ReadytoCode 0 416 Dec-11-2023, 05:33 AM
Last Post: ReadytoCode
  Computing GC Content uwl 3 883 Jun-26-2023, 09:37 PM
Last Post: Pedroski55
  Error in find pearson correlation function erneelgupta 1 1,810 Mar-01-2022, 03:41 PM
Last Post: stevendaprano
  How to increase the size of a png picture for the heatmap of the correlation? lulu43366 9 3,383 Oct-06-2021, 04:15 PM
Last Post: deanhystad
  How to remove a column or two columns in a correlation heatmap? lulu43366 3 5,077 Sep-30-2021, 03:47 PM
Last Post: lulu43366
  Matlab to Python -- Parallel Computing zistambo 1 1,934 Jun-10-2020, 04:59 PM
Last Post: pyzyx3qwerty
  Other modules for reading audio files? jedzz 0 1,569 Mar-25-2020, 11:07 PM
Last Post: jedzz
  Correlation thomaschu 0 1,521 Jan-29-2020, 05:45 PM
Last Post: thomaschu
  computing entropy using pickle files baran01 2 2,374 Dec-30-2019, 09:45 PM
Last Post: micseydel
  Correlation of Incidents using time difference Rajhesh 1 1,776 Jun-27-2019, 03:44 PM
Last Post: Larz60+

Forum Jump:

User Panel Messages

Announcements
Announcement #1 8/1/2020
Announcement #2 8/2/2020
Announcement #3 8/6/2020