diff --git a/CHANGES.rst b/CHANGES.rst index c90e607bc..7bd59e48c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,7 @@ Version 0.16.dev0 New features: +* `TempoDetector` can operate on live audio signals (#292) * Added chord evaluation (#309) Bug fixes: @@ -18,6 +19,7 @@ Bug fixes: API relevant changes: +* `BufferProcessor` uses `data` instead of `buffer` for data storage (#292) * `DBNBeatTrackingProcessor` expects 1D inputs (#299) Other changes: diff --git a/bin/TempoDetector b/bin/TempoDetector index 3ae90dbc5..3c67fa2e2 100755 --- a/bin/TempoDetector +++ b/bin/TempoDetector @@ -50,12 +50,15 @@ def main(): # version p.add_argument('--version', action='version', version='TempoDetector.2016') # input/output options - io_arguments(p, output_suffix='.bpm.txt') + io_arguments(p, output_suffix='.bpm.txt', online=True) ActivationsProcessor.add_arguments(p) # signal processing arguments SignalProcessor.add_arguments(p, norm=False, gain=0) # tempo arguments - TempoEstimationProcessor.add_arguments(p) + TempoEstimationProcessor.add_arguments(p, method='comb', min_bpm=40., + max_bpm=250., act_smooth=0.14, + hist_smooth=9, hist_buffer=10., + alpha=0.79) # mirex stuff g = p.add_mutually_exclusive_group() g.add_argument('--mirex', dest='tempo_format', diff --git a/madmom/features/beats.py b/madmom/features/beats.py index e7b7dcf38..ee2fcaff6 100755 --- a/madmom/features/beats.py +++ b/madmom/features/beats.py @@ -10,11 +10,13 @@ from __future__ import absolute_import, division, print_function import sys + import numpy as np -from madmom.processors import Processor, SequentialProcessor, ParallelProcessor -from madmom.audio.signal import smooth as smooth_signal -from madmom.ml.nn import average_predictions +from ..audio.signal import smooth as smooth_signal +from ..ml.nn import average_predictions +from ..processors import (OnlineProcessor, ParallelProcessor, Processor, + SequentialProcessor, BufferProcessor) # classes for tracking (down-)beats with RNNs @@ -404,8 +406,15 @@ class BeatTrackingProcessor(Processor): look_ahead : float, optional Look `look_ahead` seconds in both directions to determine the local tempo and align the beats accordingly. + tempo_estimator : :class:`TempoEstimationProcessor`, optional + Use this processor to estimate the (local) tempo. If 'None' a default + tempo estimator will be created and used. fps : float, optional Frames per second. + kwargs : dict, optional + Keyword arguments passed to + :class:`madmom.features.tempo.TempoEstimationProcessor` if no + `tempo_estimator` was given. Notes ----- @@ -449,25 +458,21 @@ class BeatTrackingProcessor(Processor): """ LOOK_ASIDE = 0.2 - LOOK_AHEAD = 10 - # tempo defaults - TEMPO_METHOD = 'comb' - MIN_BPM = 40 - MAX_BPM = 240 - ACT_SMOOTH = 0.09 - HIST_SMOOTH = 7 - ALPHA = 0.79 + LOOK_AHEAD = 10. def __init__(self, look_aside=LOOK_ASIDE, look_ahead=LOOK_AHEAD, fps=None, - **kwargs): - # import the TempoEstimation here otherwise we have a loop - from .tempo import TempoEstimationProcessor + tempo_estimator=None, **kwargs): # save variables self.look_aside = look_aside self.look_ahead = look_ahead self.fps = fps # tempo estimator - self.tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs) + if tempo_estimator is None: + # import the TempoEstimation here otherwise we have a loop + from .tempo import TempoEstimationProcessor + # create default tempo estimator + tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs) + self.tempo_estimator = tempo_estimator def process(self, activations, **kwargs): """ @@ -880,7 +885,7 @@ def _process_dbn(process_tuple): return process_tuple[0].viterbi(process_tuple[1]) -class DBNBeatTrackingProcessor(Processor): +class DBNBeatTrackingProcessor(OnlineProcessor): """ Beat tracking with RNNs and a dynamic Bayesian network (DBN) approximated by a Hidden Markov Model (HMM). @@ -1003,27 +1008,7 @@ def reset(self): self.last_beat = 0 self.tempo = 0 - def process(self, activations, **kwargs): - """ - Detect the beats in the given activation function. - - Parameters - ---------- - activations : numpy array - Beat activation function. - - Returns - ------- - beats : numpy array - Detected beat positions [seconds]. - - """ - if self.online: - return self.process_forward(activations, **kwargs) - else: - return self.process_viterbi(activations, **kwargs) - - def process_viterbi(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Detect the beats in the given activation function with Viterbi decoding. @@ -1088,7 +1073,7 @@ def process_viterbi(self, activations, **kwargs): # convert the detected beats to seconds and return them return (beats + first) / float(self.fps) - def process_forward(self, activations, reset=True, **kwargs): + def process_online(self, activations, reset=True, **kwargs): """ Detect the beats in the given activation function with the forward algorithm. @@ -1145,7 +1130,7 @@ def process_forward(self, activations, reset=True, **kwargs): sys.stderr.write('\r%s' % ''.join(display)) sys.stderr.flush() # forward path often reports multiple beats close together, thus report - # only beats more than the minumum interval apart + # only beats more than the minimum interval apart beats_ = [] for frame in np.nonzero(beats)[0]: cur_beat = (frame + self.counter) / float(self.fps) @@ -1164,6 +1149,10 @@ def process_forward(self, activations, reset=True, **kwargs): # return beat(s) return np.array(beats_) + process_forward = process_online + + process_viterbi = process_offline + @staticmethod def add_arguments(parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, num_tempi=NUM_TEMPI, transition_lambda=TRANSITION_LAMBDA, @@ -1249,6 +1238,378 @@ def add_arguments(parser, min_bpm=MIN_BPM, max_bpm=MAX_BPM, return g +class MultiAgentBeatTrackingProcessor(OnlineProcessor): + """ + Beat Tracking via Multiple Agents. Oriented towards the paper "Beat + Tracking for multiple applications: A multi-agent system architecture with + state recovery" by Lobate Oliveira et al., 2012 + + Parameters + ---------- + max_agents : int + Max number of agents. + num_tempi : int + Number of tempi to be considered. + induction_time : float + Window length in seconds used for inducting the agents. + + """ + MAX_AGENTS = 30 + NUM_TEMPI = 3 + INDUCTION_TIME = 2. + + class Agent(object): + """ + Agent class for tracking the beats. Each agent has a tempo, a score, + a current prediction where the next beat is and a complete history + of all previously detected beats. + + Parameters + ---------- + inner_window : int + Inner window in frames to each side where a beat is accepted. + outer_window : float + outer window factor to each side depending on the interval. + threshold : float + Threshold value for accepting a beat. + correction_factor : float + Allow agent to adapt to errors. + inherit_score_factor : float + Child agents will inherit a percentage of their parents. + + """ + # TODO: Should we use @classmethod to set all values from outside? + INNER_WINDOW = 5 + OUTER_WINDOW = 0.4 + THRESHOLD = 0.05 + CORRECTION_FACTOR = 0.25 + INHERIT_SCORE_FACTOR = 0.9 + + # used for normalizing the score, set from outside + _MAX_INTERVAL = None + _MIN_INTERVAL = None + + def __init__(self, score=0, interval=0, prediction=0, beats=[]): + self.score = score + self.interval = interval + self.prediction = prediction + self.beats = beats + + def __hash__(self): + return hash(self.prediction + self.interval) + + def __eq__(self, other): + # TODO: Maybe allow for slight variations here + eq_prediction = self.prediction == other.prediction + eq_interval = self.interval == other.interval + return eq_prediction and eq_interval + + def fork(self, error): + """ + Return child agents based on the given error. The childs + inherit a part of the parents score and all of the parents + detections. The interval and the prediction for the next beat + are adjusted to the error of the last prediction. + + """ + # TODO: Check whether all childs are really needed + agents = [ + # create agent with same tempo but adjusted prediction + self.__class__(score=self.score * self.INHERIT_SCORE_FACTOR, + interval=self.interval, + prediction=self.prediction + error, + beats=self.beats), + # create agent with adjusted tempo and prediction + self.__class__(score=self.score * self.INHERIT_SCORE_FACTOR, + interval=self.interval + error, + prediction=self.prediction + error, + beats=self.beats), + # create agent with adjusted tempo and prediction by half + self.__class__(score=self.score * self.INHERIT_SCORE_FACTOR, + interval=self.interval + int(error * 0.5), + prediction=self.prediction + int(error * 0.5), + beats=self.beats) + ] + # only return agents which are within tempo range + return [agent for agent in agents if + self._MIN_INTERVAL <= agent.interval <= self._MAX_INTERVAL] + + def accept(self, activation, idx): + """ + Accept beat at global frame position idx if the + activation exceeds the threshold. + + """ + if activation > self.THRESHOLD: + self.beats = self.beats + [idx] + + def process(self, activations, idx): + """ + Set the next prediction, score the agent and create + new child agents if necessary. This method is called after + the outer window has passed for offline and online. + + Parameters + ---------- + activations : list + Activation window which surrounds the prediction by + outer window length on both sides. For online mode we wait + until all of that information is available before calling + this method. + idx : int + Global frame counter index of the last activation. Since we + calculate everything in absolute frame times. + + Returns + ------- + agents : list + New child agent objects + + """ + # calculate frames to look around + frames = int(self.interval * self.OUTER_WINDOW) + # get predicted activation + act = activations[int(idx - self.prediction)] + # get max index within outer window + max_idx = idx - len(activations) + np.argmax(activations) + # distance between max activation and predicted position + error = max_idx - self.prediction + # faster agents should not get a better score + normalization = self.interval / self._MIN_INTERVAL + # if max activation was in inner window + if abs(error) <= self.INNER_WINDOW: + # if no beat has been accepted yet, accept max (for offline) + if self.beats[-1] < idx - len(activations): + self.accept(max(activations), max_idx) + # update prediction + self.prediction = max_idx + self.interval + # reward agent for detecting the beat + self.score += (1 - abs(error) / frames) * normalization * act + # adapt agent to error + self.interval += int(error * self.CORRECTION_FACTOR) + self.prediction += int(error * self.CORRECTION_FACTOR) + # return no new child agents + return [] + # if max activation was in outer window + else: + # if no beat has been accepted yet, accept act (for offline) + if self.beats[-1] < idx - len(activations): + self.accept(act, self.prediction) + # update prediction + self.prediction += self.interval + # create child agents + new_agents = self.fork(error) + # penalize agent for not detecting the beat + self.score -= (abs(error) / frames) * normalization * act + # return new child agents + return new_agents + + def __init__(self, fps=None, tempo_estimator=None, online=False, **kwargs): + # pylint: disable=unused-argument + super(MultiAgentBeatTrackingProcessor, self).__init__(online=online) + # save variables + self.fps = fps + # tempo estimator + if tempo_estimator is None: + # import the TempoEstimation here otherwise we have a loop + from .tempo import TempoEstimationProcessor + # create default tempo estimator + tempo_estimator = TempoEstimationProcessor(fps=fps, **kwargs) + self.tempo_estimator = tempo_estimator + self.agents = [] + # TODO: Not sure if thats nice? + self.Agent._MIN_INTERVAL = tempo_estimator.min_interval + self.Agent._MAX_INTERVAL = tempo_estimator.max_interval + if self.online: + self.visualize = kwargs.get('verbose', False) + self.buffer = BufferProcessor(int(self.INDUCTION_TIME * self.fps)) + self.last_beat = 0 + self.counter = 0 + + def reset(self): + """Reset the MultiAgentBeatTrackingProcessor.""" + self.buffer.reset() + self.agents = [] + self.last_beat = 0 + self.counter = 0 + + def process_offline(self, activations, **kwargs): + """ + Detect the beats in the given activation function. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + beats : numpy array + Detected beat positions [seconds]. + + """ + # smooth activations + act_smooth = int(self.fps * self.tempo_estimator.act_smooth) + activations = smooth_signal(activations, act_smooth) + # create an interval histogram over the induction time window + induction_window = activations[:int(self.INDUCTION_TIME * self.fps)] + histogram = self.tempo_estimator.interval_histogram(induction_window) + # get N most likely tempi + from .tempo import detect_tempo + tempi = detect_tempo(histogram, self.fps)[:self.NUM_TEMPI, 0] + # convert tempi to intervals + intervals = 60.0 * self.fps / tempi + # induct agents for each interval + for interval in intervals.astype(int): + self.induct_agents(activations[:interval], interval) + # iterate through all activations + for idx, activation in enumerate(activations): + # process activation for each agent + new_agents = [] + for agent in self.agents: + # calculate number of frames to look around the prediction + # TODO: Should this be calculated as a @property in agent? + outer_frames = int(agent.interval * agent.OUTER_WINDOW) + # skip if not the time yet for this agent + if agent.prediction + outer_frames != idx: + continue + # get activations of outer windows surrounding the prediction + context = activations[max(0, idx - outer_frames * 2):idx] + # process agent and extend new agents + new_agents.extend(agent.process(context, idx)) + # TODO: Those lines are the same for offline/online: refactor? + # append new agents to agents list + self.agents.extend(new_agents) + # sort all agents by score + self.agents.sort(key=lambda a: a.score, reverse=True) + # remove duplicates by using the agents __eq__ method + self.agents = list(dict.fromkeys(self.agents)) + # kill worst agents if too many + self.agents = self.agents[:self.MAX_AGENTS] + # return beats of best agent + return np.array(self.agents[0].beats) / self.fps + + def process_online(self, activations, reset=True, **kwargs): + """ + Detect the beats in the given activation function for online mode. + + Parameters + ---------- + activations : numpy array + Beat activation function. + reset : bool, optional + Reset the BeatTrackingProcessor to its initial state before + processing. + + Returns + ------- + beats : numpy array + Detected beat positions [seconds]. + + """ + # reset to initial state + if reset: + self.reset() + beats_ = [] + for activation in activations: + # shift buffer and put new activation at end of buffer + buffer = self.buffer(activation) + # induct agents after induction time has passed + if self.counter == self.INDUCTION_TIME * self.fps: + # create histogram of induction window + histogram = self.tempo_estimator.interval_histogram(buffer) + # get N most likely tempi + from .tempo import detect_tempo + tempi = detect_tempo(histogram, self.fps)[:self.NUM_TEMPI, 0] + # convert tempi to intervals + intervals = 60.0 * self.fps / tempi + # induct agents on past interval frames + for interval in intervals.astype(int): + act = buffer[-interval:] + self.induct_agents(act, interval, self.counter - interval) + # guess beat if possible for each agent + for agent in self.agents: + # skip if beat was already detected inside this inner window + if agent.beats[-1] > self.counter - agent.INNER_WINDOW * 2: + continue + # skip if not the time yet for this agent to guess + if self.counter < agent.prediction or \ + self.counter > agent.prediction + agent.INNER_WINDOW: + continue + # get max activation of the past inner window + max_act = max(buffer[-agent.INNER_WINDOW:]) + # accept the current frame as a beat + agent.accept(max_act, self.counter) + # set score and predictions deferred after outer window has passed + # this way we get a little peek into the future + new_agents = [] + for agent in self.agents: + # calculate number of frames to look around the prediction + outer_frames = int(agent.interval * agent.OUTER_WINDOW) + # skip if not the time yet for this agent + if agent.prediction + outer_frames != self.counter: + continue + # get activations of outer windows surrounding the prediction + context = buffer[-outer_frames * 2:] + # process agent and extend new agents + new_agents.extend(agent.process(context, self.counter)) + # append new agents to agents list + self.agents.extend(new_agents) + # sort all agents by score + self.agents.sort(key=lambda a: a.score, reverse=True) + # remove duplicates by using the agents __eq__ method + self.agents = list(dict.fromkeys(self.agents)) + # kill worst agents if too many + self.agents = self.agents[:self.MAX_AGENTS] + # if best agent found a beat this frame + is_beat = self.agents and self.agents[0].beats[-1] == self.counter + # beats have to lie apart at least min_interval + beat_distance = self.counter - self.tempo_estimator.min_interval + # if current frame is considered a beat return it as result + if is_beat and self.last_beat < beat_distance: + beats_.append(self.counter) + self.last_beat = self.counter + # increase frame counter + self.counter += 1 + # return beat(s) + return np.array(beats_) / self.fps + + def induct_agents(self, activations, interval, start=0): + """ + Introduce agents with a given interval by letting them start at + the biggest N maxima inside the given activations. + + Parameters + ---------- + activations : list + Activation function window where agents should be introduced. + interval : int + Time interval which the introduced agents should have. + start : int, optional + Global frame number where agents start. + + """ + from scipy.signal import argrelextrema + # get all maxima within activations window + maxima = argrelextrema(activations, np.greater)[0] + # if no maxima could be found just use max value + if len(maxima) == 0: + maxima = np.array([activations.argmax()]) + # pick N maxima indices where activation is highest + best_idx = activations[maxima].argsort(axis=0)[::-1][:self.MAX_AGENTS] + # pick best maxima + best_maxima = maxima[best_idx] + # for each best maxima init an agent + for max_idx in best_maxima: + new_agent = self.Agent(score=activations[max_idx], + interval=interval, + prediction=start + max_idx + interval, + beats=[start + max_idx]) + # append new agent to agents list + # TODO: Should this method return the agents instead of appending? + self.agents.append(new_agent) + + class DBNDownBeatTrackingProcessor(Processor): """ Downbeat tracking with RNNs and a dynamic Bayesian network (DBN) diff --git a/madmom/features/onsets.py b/madmom/features/onsets.py index 970fe8997..00f258d13 100755 --- a/madmom/features/onsets.py +++ b/madmom/features/onsets.py @@ -13,9 +13,9 @@ from scipy.ndimage import uniform_filter from scipy.ndimage.filters import maximum_filter -from ..processors import (Processor, SequentialProcessor, ParallelProcessor, - BufferProcessor) from ..audio.signal import smooth as smooth_signal +from ..processors import (BufferProcessor, OnlineProcessor, ParallelProcessor, + Processor, SequentialProcessor, ) from ..utils import combine_events EPSILON = np.spacing(1) @@ -1018,7 +1018,7 @@ def add_arguments(parser, **kwargs): return OnsetPeakPickingProcessor.add_arguments(parser, **kwargs) -class OnsetPeakPickingProcessor(Processor): +class OnsetPeakPickingProcessor(OnlineProcessor): """ This class implements the onset peak-picking functionality. It transparently converts the chosen values from seconds to frames. @@ -1100,10 +1100,9 @@ def __init__(self, threshold=THRESHOLD, smooth=SMOOTH, pre_avg=PRE_AVG, combine=COMBINE, delay=DELAY, online=ONLINE, fps=FPS, **kwargs): # pylint: disable=unused-argument - # TODO: make this an IOProcessor by defining input/output processings - # super(PeakPicking, self).__init__(peak_picking, write_events) - # adjust some params for online mode? - if online: + # instantiate OnlineProcessor + super(OnsetPeakPickingProcessor, self).__init__(online=online) + if self.online: # set some parameters to 0 (i.e. no future information available) smooth = 0 post_avg = 0 @@ -1121,7 +1120,6 @@ def __init__(self, threshold=THRESHOLD, smooth=SMOOTH, pre_avg=PRE_AVG, self.post_max = post_max self.combine = combine self.delay = delay - self.online = online self.fps = fps def reset(self): @@ -1130,27 +1128,7 @@ def reset(self): self.counter = 0 self.last_onset = None - def process(self, activations, **kwargs): - """ - Detect the onsets in the given activation function. - - Parameters - ---------- - activations : numpy array - Onset activation function. - - Returns - ------- - onsets : numpy array - Detected onsets [seconds]. - - """ - if self.online: - return self.process_online(activations, **kwargs) - else: - return self.process_sequence(activations, **kwargs) - - def process_sequence(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Detect the onsets in the given activation function. @@ -1245,6 +1223,8 @@ def process_online(self, activations, reset=True, **kwargs): # return the onsets return onsets + process_sequence = process_offline + @staticmethod def add_arguments(parser, threshold=THRESHOLD, smooth=None, pre_avg=None, post_avg=None, pre_max=None, post_max=None, @@ -1300,8 +1280,9 @@ def add_arguments(parser, threshold=THRESHOLD, smooth=None, pre_avg=None, '[default=%(default).2f]') if post_avg is not None: g.add_argument('--post_avg', action='store', type=float, - default=post_avg, help='build average over N ' - 'following seconds [default=%(default).2f]') + default=post_avg, + help='build average over N following seconds ' + '[default=%(default).2f]') if pre_max is not None: g.add_argument('--pre_max', action='store', type=float, default=pre_max, diff --git a/madmom/features/tempo.py b/madmom/features/tempo.py index 1e60ca9a1..da88433d5 100755 --- a/madmom/features/tempo.py +++ b/madmom/features/tempo.py @@ -9,11 +9,20 @@ from __future__ import absolute_import, division, print_function +import sys + import numpy as np -from madmom.processors import Processor -from madmom.audio.signal import smooth as smooth_signal +from ..audio.signal import smooth as smooth_signal +from ..processors import BufferProcessor, OnlineProcessor +METHOD = 'comb' +ALPHA = 0.79 +MIN_BPM = 40. +MAX_BPM = 250. +ACT_SMOOTH = 0.14 +HIST_SMOOTH = 9 +HIST_BUFFER = 10. NO_TEMPO = np.nan @@ -215,7 +224,7 @@ def detect_tempo(histogram, fps): if len(peaks) == 0: # a flat histogram has no peaks, use the center bin if len(bins): - ret = np.asarray([tempi[len(bins) / 2], 1.]) + ret = np.asarray([tempi[len(bins) // 2], 1.]) else: # otherwise: no peaks, no tempo ret = np.asarray([NO_TEMPO, 0.]) @@ -234,8 +243,358 @@ def detect_tempo(histogram, fps): return np.atleast_2d(ret) -# tempo estimation processor class -class TempoEstimationProcessor(Processor): +# tempo histogram processor classes +class TempoHistogramProcessor(OnlineProcessor): + """ + Tempo Histogram Processor class. + + Parameters + ---------- + min_bpm : float + Minimum tempo to detect [bpm]. + max_bpm : float + Maximum tempo to detect [bpm]. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. + fps : float, optional + Frames per second. + + Notes + ----- + This abstract class provides the basic tempo histogram functionality. + Please use one of the following implementations: + + - :class:`CombFilterTempoHistogramProcessor`, + - :class:`ACFTempoHistogramProcessor` or + - :class:`DBNTempoHistogramProcessor`. + + """ + + def __init__(self, min_bpm, max_bpm, hist_buffer=HIST_BUFFER, fps=None, + online=False, **kwargs): + # pylint: disable=unused-argument + super(TempoHistogramProcessor, self).__init__(online=online) + self.min_bpm = min_bpm + self.max_bpm = max_bpm + self.hist_buffer = hist_buffer + self.fps = fps + if self.online: + self._hist_buffer = BufferProcessor((int(hist_buffer * self.fps), + len(self.intervals))) + + @property + def min_interval(self): + """Minimum beat interval [frames].""" + return int(np.floor(60. * self.fps / self.max_bpm)) + + @property + def max_interval(self): + """Maximum beat interval [frames].""" + return int(np.ceil(60. * self.fps / self.min_bpm)) + + @property + def intervals(self): + """Beat intervals [frames].""" + return np.arange(self.min_interval, self.max_interval + 1) + + def reset(self): + """Reset the tempo histogram aggregation buffer.""" + self._hist_buffer.reset() + + +class CombFilterTempoHistogramProcessor(TempoHistogramProcessor): + """ + Create a tempo histogram with a bank of resonating comb filters. + + Parameters + ---------- + min_bpm : float, optional + Minimum tempo to detect [bpm]. + max_bpm : float, optional + Maximum tempo to detect [bpm]. + alpha : float, optional + Scaling factor for the comb filter. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. + fps : float, optional + Frames per second. + online : bool, optional + Operate in online (i.e. causal) mode. + + """ + + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, alpha=ALPHA, + hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): + # pylint: disable=unused-argument + super(CombFilterTempoHistogramProcessor, self).__init__( + min_bpm=min_bpm, max_bpm=max_bpm, hist_buffer=hist_buffer, fps=fps, + online=online, **kwargs) + self.alpha = alpha + if self.online: + self._comb_buffer = BufferProcessor((self.max_interval + 1, + len(self.intervals))) + + def reset(self): + """Reset to initial state.""" + super(CombFilterTempoHistogramProcessor, self).reset() + self._comb_buffer.reset() + + def process_offline(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals with a bank of resonating + comb filters. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + histogram_bins : numpy array + Bins of the beat interval histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + return interval_histogram_comb(activations, self.alpha, + self.min_interval, self.max_interval) + + def process_online(self, activations, reset=True, **kwargs): + """ + Compute the histogram of the beat intervals with a bank of resonating + comb filters in online mode. + + Parameters + ---------- + activations : numpy float + Beat activation function. + reset : bool, optional + Reset to initial state before processing. + + Returns + ------- + histogram_bins : numpy array + Bins of the tempo histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # reset to initial state + if reset: + self.reset() + # indices at which to retrieve y[n - τ] + idx = [-self.intervals, np.arange(len(self.intervals))] + # iterate over all activations + for act in activations: + # online feed backward comb filter (y[n] = x[n] + α * y[n - τ]) + y_n = act + self.alpha * self._comb_buffer[idx] + # shift output buffer with new value + self._comb_buffer(y_n) + # determine the tau with the highest value + act_max = y_n == np.max(y_n, axis=-1)[..., np.newaxis] + # compute the max bins + bins = y_n * act_max + # use a buffer to only keep a certain number of bins + # shift buffer and put new bins at end of buffer + bins = self._hist_buffer(bins) + # build a histogram together with the intervals and return it + return np.sum(bins, axis=0), self.intervals + + +class ACFTempoHistogramProcessor(TempoHistogramProcessor): + """ + Create a tempo histogram with autocorrelation. + + Parameters + ---------- + min_bpm : float, optional + Minimum tempo to detect [bpm]. + max_bpm : float, optional + Maximum tempo to detect [bpm]. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. + fps : float, optional + Frames per second. + online : bool, optional + Operate in online (i.e. causal) mode. + + """ + + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, + hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): + # pylint: disable=unused-argument + super(ACFTempoHistogramProcessor, self).__init__( + min_bpm=min_bpm, max_bpm=max_bpm, hist_buffer=hist_buffer, fps=fps, + online=online, **kwargs) + if self.online: + self._act_buffer = BufferProcessor((self.max_interval + 1, 1)) + + def reset(self): + """Reset to initial state.""" + super(ACFTempoHistogramProcessor, self).reset() + self._act_buffer.reset() + + def process_offline(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals with the autocorrelation + function. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + histogram_bins : numpy array + Bins of the beat interval histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # build the tempo (i.e. inter beat interval) histogram and return it + return interval_histogram_acf(activations, self.min_interval, + self.max_interval) + + def process_online(self, activations, reset=True, **kwargs): + """ + Compute the histogram of the beat intervals with the autocorrelation + function in online mode. + + Parameters + ---------- + activations : numpy float + Beat activation function. + reset : bool, optional + Reset to initial state before processing. + + Returns + ------- + histogram_bins : numpy array + Bins of the tempo histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # reset to initial state + if reset: + self.reset() + # iterate over all activations + # TODO: speed this up! + for act in activations: + # online ACF (y[n] = x[n] * x[n - τ]) + bins = act * self._act_buffer[-self.intervals].T + # shift activation buffer with new value + self._act_buffer(act) + # use a buffer to only keep a certain number of bins + # shift buffer and put new bins at end of buffer + bins = self._hist_buffer(bins) + # build a histogram together with the intervals and return it + return np.sum(bins, axis=0), self.intervals + + +class DBNTempoHistogramProcessor(TempoHistogramProcessor): + """ + Create a tempo histogram with a dynamic Bayesian network (DBN). + + Parameters + ---------- + min_bpm : float, optional + Minimum tempo to detect [bpm]. + max_bpm : float, optional + Maximum tempo to detect [bpm]. + hist_buffer : float + Aggregate the tempo histogram over `hist_buffer` seconds. + fps : float, optional + Frames per second. + online : bool, optional + Operate in online (i.e. causal) mode. + + """ + + def __init__(self, min_bpm=MIN_BPM, max_bpm=MAX_BPM, + hist_buffer=HIST_BUFFER, fps=None, online=False, **kwargs): + # pylint: disable=unused-argument + super(DBNTempoHistogramProcessor, self).__init__( + min_bpm=min_bpm, max_bpm=max_bpm, hist_buffer=hist_buffer, fps=fps, + online=online, **kwargs) + from .beats import DBNBeatTrackingProcessor + self.dbn = DBNBeatTrackingProcessor( + min_bpm=self.min_bpm, max_bpm=self.max_bpm, fps=self.fps, + online=online, **kwargs) + + def reset(self): + """Reset DBN to initial state.""" + super(DBNTempoHistogramProcessor, self).reset() + self.dbn.hmm.reset() + + def process_offline(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals with a DBN. + + Parameters + ---------- + activations : numpy array + Beat activation function. + + Returns + ------- + histogram_bins : numpy array + Bins of the beat interval histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # get the best state path by calling the viterbi algorithm + path, _ = self.dbn.hmm.viterbi(activations.astype(np.float32)) + intervals = self.dbn.st.state_intervals[path] + # get the counts of the bins + bins = np.bincount(intervals, + minlength=self.dbn.st.intervals.max() + 1) + # truncate everything below the minimum interval of the state space + bins = bins[self.dbn.st.intervals.min():] + # build a histogram together with the intervals and return it + return bins, self.dbn.st.intervals + + def process_online(self, activations, reset=True, **kwargs): + """ + Compute the histogram of the beat intervals with a DBN using the + forward algorithm. + + Parameters + ---------- + activations : numpy float + Beat activation function. + reset : bool, optional + Reset DBN to initial state before processing. + + Returns + ------- + histogram_bins : numpy array + Bins of the tempo histogram. + histogram_delays : numpy array + Corresponding delays [frames]. + + """ + # reset to initial state + if reset: + self.reset() + # use forward path to get best state + fwd = self.dbn.hmm.forward(activations, reset=reset) + # choose the best state for each step + states = np.argmax(fwd, axis=1) + intervals = self.dbn.st.state_intervals[states] + # convert intervals to bins + bins = np.zeros((len(activations), len(self.intervals))) + bins[np.arange(len(activations)), intervals - self.min_interval] = 1 + # shift buffer and put new bins at end of buffer + bins = self._hist_buffer(bins) + # build a histogram together with the intervals and return it + return np.sum(bins, axis=0), self.intervals + + +class TempoEstimationProcessor(OnlineProcessor): """ Tempo Estimation Processor class. @@ -255,6 +614,12 @@ class TempoEstimationProcessor(Processor): Scaling factor for the comb filter. fps : float, optional Frames per second. + histogram_processor : :class:`TempoHistogramProcessor`, optional + Processor used to create a tempo histogram. If 'None', a default + combfilter histogram processor will be created and used. + kwargs : dict, optional + Keyword arguments passed to :class:`CombFilterTempoHistogramProcessor` + if no `histogram_processor` was given. Examples -------- @@ -278,38 +643,63 @@ class TempoEstimationProcessor(Processor): [ 82.19178, 0.09629]]) """ - # default values for tempo estimation - METHOD = 'comb' - MIN_BPM = 40. - MAX_BPM = 250. - HIST_SMOOTH = 9 - ACT_SMOOTH = 0.14 - ALPHA = 0.79 def __init__(self, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, alpha=ALPHA, - fps=None, **kwargs): + act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, fps=None, + online=False, histogram_processor=None, **kwargs): # pylint: disable=unused-argument - # save variables + super(TempoEstimationProcessor, self).__init__(online=online) self.method = method - self.min_bpm = min_bpm - self.max_bpm = max_bpm self.act_smooth = act_smooth self.hist_smooth = hist_smooth - self.alpha = alpha self.fps = fps + if self.online: + self.visualize = kwargs.get('verbose', False) + if histogram_processor is None: + if method == 'acf': + histogram_processor = ACFTempoHistogramProcessor + elif method == 'comb': + histogram_processor = CombFilterTempoHistogramProcessor + elif method == 'dbn': + histogram_processor = DBNTempoHistogramProcessor + else: + raise ValueError('tempo histogram method unknown.') + # instantiate histogram processor + histogram_processor = histogram_processor( + min_bpm=min_bpm, max_bpm=max_bpm, fps=fps, online=online, + **kwargs) + self.histogram_processor = histogram_processor + + @property + def min_bpm(self): + """Minimum tempo [bpm].""" + return self.histogram_processor.min_bpm + + @property + def max_bpm(self): + """Maximum tempo [bpm].""" + return self.histogram_processor.max_bpm + + @property + def intervals(self): + """Beat intervals [frames].""" + return self.histogram_processor.intervals @property def min_interval(self): """Minimum beat interval [frames].""" - return int(np.floor(60. * self.fps / self.max_bpm)) + return self.histogram_processor.min_interval @property def max_interval(self): """Maximum beat interval [frames].""" - return int(np.ceil(60. * self.fps / self.min_bpm)) + return self.histogram_processor.max_interval + + def reset(self): + """Reset to initial state.""" + self.histogram_processor.reset() - def process(self, activations, **kwargs): + def process_offline(self, activations, **kwargs): """ Detect the tempi from the (beat) activations. @@ -335,9 +725,51 @@ def process(self, activations, **kwargs): # detect the tempi and return them return detect_tempo(histogram, self.fps) - def interval_histogram(self, activations): + def process_online(self, activations, reset=True, **kwargs): + """ + Detect the tempi from the (beat) activations in online mode. + + Parameters + ---------- + activations : numpy array + Beat activation function processed frame by frame. + reset : bool, optional + Reset the TempoEstimationProcessor to its initial state before + processing. + + Returns + ------- + tempi : numpy array + Array with the dominant tempi [bpm] (first column) and their + relative strengths (second column). + """ - Compute the histogram of the beat intervals with the selected method. + # build the tempo histogram depending on the chosen method + histogram = self.interval_histogram(activations, reset=reset) + # smooth the histogram + histogram = smooth_histogram(histogram, self.hist_smooth) + # detect the tempo and append it to the found tempi + tempo = detect_tempo(histogram, self.fps) + # visualize tempo + if self.visualize: + display = '' + # display the 3 most likely tempi and their strengths + for i, display_tempo in enumerate(tempo[:3], start=1): + # display tempo + display += '| ' + str(round(display_tempo[0], 1)) + ' ' + # display strength + display += min(int(display_tempo[1] * 50), 18) * '*' + # fill up the rest with spaces + display = display.ljust(i * 26) + # print the tempi + sys.stderr.write('\r%s' % ''.join(display) + '|') + sys.stderr.flush() + # return tempo + return tempo + + def interval_histogram(self, activations, **kwargs): + """ + Compute the histogram of the beat intervals. Parameters ---------- @@ -352,31 +784,7 @@ def interval_histogram(self, activations): Corresponding delays [frames]. """ - # build the tempo (i.e. inter beat interval) histogram and return it - if self.method == 'acf': - return interval_histogram_acf(activations, self.min_interval, - self.max_interval) - elif self.method == 'comb': - return interval_histogram_comb(activations, self.alpha, - self.min_interval, - self.max_interval) - elif self.method == 'dbn': - from .beats import DBNBeatTrackingProcessor - # instantiate a DBN for beat tracking - dbn = DBNBeatTrackingProcessor(min_bpm=self.min_bpm, - max_bpm=self.max_bpm, - num_tempi=None, fps=self.fps) - # get the best state path by calling the viterbi algorithm - path, _ = dbn.hmm.viterbi(activations.astype(np.float32)) - intervals = dbn.st.state_intervals[path] - # get the counts of the bins - bins = np.bincount(intervals, minlength=dbn.st.intervals.max() + 1) - # truncate everything below the minimum interval of the state space - bins = bins[dbn.st.intervals.min():] - # build a histogram together with the intervals and return it - return bins, dbn.st.intervals - else: - raise ValueError('tempo estimation method unknown') + return self.histogram_processor(activations, **kwargs) def dominant_interval(self, histogram): """ @@ -398,9 +806,9 @@ def dominant_interval(self, histogram): return dominant_interval(histogram, self.hist_smooth) @staticmethod - def add_arguments(parser, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, - act_smooth=ACT_SMOOTH, hist_smooth=HIST_SMOOTH, - alpha=ALPHA): + def add_arguments(parser, method=None, min_bpm=None, max_bpm=None, + act_smooth=None, hist_smooth=None, hist_buffer=None, + alpha=None): """ Add tempo estimation related arguments to an existing parser. @@ -418,6 +826,8 @@ def add_arguments(parser, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, Smooth the activation function over `act_smooth` seconds. hist_smooth : int, optional Smooth the tempo histogram over `hist_smooth` bins. + hist_buffer : float, optional + Aggregate the tempo histogram over `hist_buffer` seconds. alpha : float, optional Scaling factor for the comb filter. @@ -455,6 +865,11 @@ def add_arguments(parser, method=METHOD, min_bpm=MIN_BPM, max_bpm=MAX_BPM, default=hist_smooth, help='smooth the tempo histogram over N bins ' '[default=%(default)d]') + if hist_buffer is not None: + g.add_argument('--hist_buffer', action='store', type=float, + default=hist_buffer, + help='aggregate the tempo histogram over N seconds ' + '[default=%(default).2f]') if alpha is not None: g.add_argument('--alpha', action='store', type=float, default=alpha, diff --git a/madmom/processors.py b/madmom/processors.py index b387e5466..c50693378 100644 --- a/madmom/processors.py +++ b/madmom/processors.py @@ -15,16 +15,15 @@ from __future__ import absolute_import, division, print_function -import os -import sys import argparse import itertools as it import multiprocessing as mp +import os +import sys +from collections import MutableSequence import numpy as np -from collections import MutableSequence - class Processor(object): """ @@ -121,13 +120,110 @@ def process(self, data, **kwargs): Processed data. """ - raise NotImplementedError('must be implemented by subclass.') + raise NotImplementedError('Must be implemented by subclass.') def __call__(self, *args, **kwargs): # this magic method makes a Processor callable return self.process(*args, **kwargs) +class OnlineProcessor(Processor): + """ + Abstract base class for processing data in online mode. + + Derived classes must implement the following methods: + + - process_online(): process the data in online mode, + - process_offline(): process the data in offline mode. + + """ + + def __init__(self, online=False): + self.online = online + + def process(self, data, **kwargs): + """ + Process the data either in online or offline mode. + + Parameters + ---------- + data : depends on the implementation of subclass + Data to be processed. + kwargs : dict, optional + Keyword arguments for processing. + + Returns + ------- + depends on the implementation of subclass + Processed data. + + Notes + ----- + This method is used to pass the data to either `process_online` or + `process_offline`, depending on the `online` setting of the processor. + + """ + if self.online: + return self.process_online(data, **kwargs) + return self.process_offline(data, **kwargs) + + def process_online(self, data, reset=True, **kwargs): + """ + Process the data in online mode. + + This method must be implemented by the derived class and should process + the given data frame by frame and return the processed output. + + Parameters + ---------- + data : depends on the implementation of subclass + Data to be processed. + reset : bool, optional + Reset the processor to its initial state before processing. + kwargs : dict, optional + Keyword arguments for processing. + + Returns + ------- + depends on the implementation of subclass + Processed data. + + """ + raise NotImplementedError('Must be implemented by subclass.') + + def process_offline(self, data, **kwargs): + """ + Process the data in offline mode. + + This method must be implemented by the derived class and should process + the given data and return the processed output. + + Parameters + ---------- + data : depends on the implementation of subclass + Data to be processed. + kwargs : dict, optional + Keyword arguments for processing. + + Returns + ------- + depends on the implementation of subclass + Processed data. + + """ + raise NotImplementedError('Must be implemented by subclass.') + + def reset(self): + """ + Reset the OnlineProcessor. + + This method must be implemented by the derived class and should reset + the processor to its initial state. + + """ + raise NotImplementedError('Must be implemented by subclass.') + + class OutputProcessor(Processor): """ Class for processing data and/or feeding it into some sort of output. @@ -157,7 +253,7 @@ def process(self, data, output, **kwargs): """ # pylint: disable=arguments-differ - raise NotImplementedError('must be implemented by subclass.') + raise NotImplementedError('Must be implemented by subclass.') # functions for processing file(s) with a Processor @@ -198,9 +294,8 @@ def _process(process_tuple): elif isinstance(process_tuple[0], Processor): # call the Processor with data and kwargs return process_tuple[0](*process_tuple[1:-1], **process_tuple[-1]) - else: - # just call whatever we got here (e.g. a function) without kwargs - return process_tuple[0](*process_tuple[1:-1]) + # just call whatever we got here (e.g. a function) without kwargs + return process_tuple[0](*process_tuple[1:-1]) class SequentialProcessor(MutableSequence, Processor): @@ -639,11 +734,16 @@ class BufferProcessor(Processor): ---------- buffer_size : int or tuple Size of the buffer (time steps, [additional dimensions]). + init : numpy array, optional + Init the buffer with this array. + init_value : float, optional + If only `buffer_size` is given but no `init`, use this value to + initialise the buffer. Notes ----- - If `buffer_size` (or the first value thereof) is 1, only the un-buffered - current value is returned. + If `buffer_size` (or the first item thereof in case of tuple) is 1, + only the un-buffered current value is returned. If context is needed, `buffer_size` must be set to >1. E.g. SpectrogramDifference needs a context of two frames to be able to @@ -664,7 +764,20 @@ def __init__(self, buffer_size=None, init=None, init_value=0): init = np.ones(buffer_size) * init_value # save variables self.buffer_size = buffer_size - self.buffer = init + self.init = init + self.data = init + + def reset(self, init=None): + """ + Reset BufferProcessor to its initial state. + + Parameters + ---------- + init : numpy array, shape (num_hiddens,), optional + Reset BufferProcessor to this initial state. + + """ + self.data = init if init is not None else self.init def process(self, data, **kwargs): """ @@ -689,14 +802,31 @@ def process(self, data, **kwargs): # length of the data data_length = len(data) # remove `data_length` from buffer at the beginning and append new data - self.buffer = np.roll(self.buffer, -data_length, axis=0) - self.buffer[-data_length:] = data + self.data = np.roll(self.data, -data_length, axis=0) + self.data[-data_length:] = data # return the complete buffer - return self.buffer + return self.data # alias for easier / more intuitive calling buffer = process + def __getitem__(self, index): + """ + Direct access to the buffer data. + + Parameters + ---------- + index : int, slice, ndarray, + Any NumPy indexing method to access the buffer data directly. + + Returns + ------- + numpy array or subclass thereof + Requested view of the buffered data. + + """ + return self.data[index] + # function to process live input def process_online(processor, infile, outfile, **kwargs): diff --git a/tests/test_bin.py b/tests/test_bin.py index f0d01f4ae..3766ab2f4 100644 --- a/tests/test_bin.py +++ b/tests/test_bin.py @@ -868,6 +868,7 @@ def setUp(self): pj(ACTIVATIONS_PATH, "sample.beats_blstm.npz")) self.result = np.loadtxt( pj(DETECTIONS_PATH, "sample.tempo_detector.txt")) + self.online_results = np.array([176.47, 88.24, 0.58]) def test_help(self): self.assertTrue(run_help(self.bin)) @@ -901,6 +902,15 @@ def test_run(self): result = np.loadtxt(tmp_result) self.assertTrue(np.allclose(result, self.result, atol=1e-5)) + def test_online(self): + run_program([self.bin, 'online', sample_file, '-o', tmp_result]) + result = np.loadtxt(tmp_result) + self.assertTrue(np.allclose(result[-1], self.online_results)) + run_program([self.bin, 'single', '--online', sample_file, '-o', + tmp_result]) + result = np.loadtxt(tmp_result) + self.assertTrue(np.allclose(result, self.online_results)) + # clean up def teardown(): diff --git a/tests/test_features_tempo.py b/tests/test_features_tempo.py index 5b8565a46..a5e145512 100644 --- a/tests/test_features_tempo.py +++ b/tests/test_features_tempo.py @@ -19,7 +19,22 @@ COMB_TEMPI = np.array([[176.470, 0.475], [117.647, 0.177], [240.0, 0.154], [68.966, 0.099], [82.192, 0.096]]) - +COMB_TEMPI_ONLINE = [[176.470588, 0.289414003], [115.384615, 0.124638601], + [230.769231, 0.0918372569], [84.5070423, 0.0903815502], + [75.0000000, 0.0713704506], [53.5714286, 0.0701783497], + [65.9340659, 0.0696296514], [49.1803279, 0.0676349815], + [61.2244898, 0.0646209647], [40.8163265, 0.0602941909]] +ACF_TEMPI = np.array([[176.470, 0.246], [86.956, 0.226], [58.823, 0.181], + [43.795, 0.137], [115.384, 0.081], [70.588, 0.067], + [50.847, 0.058]]) +ACF_TEMPI_ONLINE = [[176.470588, 0.253116038], [88.2352941, 0.231203195], + [58.8235294, 0.187827698], [43.7956204, 0.139373027], + [115.384615, 0.0749783568], [69.7674419, 0.0599632291], + [50.4201681, 0.0535384559]] +DBN_TEMPI = np.array([[176.470, 1]]) +DBN_TEMPI_ONLINE = [[176.470588, 0.580877380], [86.9565217, 0.244729904], + [74.0740741, 0.127887992], [40.8163265, 0.0232523621], + [250.000000, 0.0232523621]] HIST = interval_histogram_comb(act, 0.79, min_tau=24, max_tau=150) @@ -86,6 +101,7 @@ class TestTempoEstimationProcessorClass(unittest.TestCase): def setUp(self): self.processor = TempoEstimationProcessor(fps=fps) + self.online_processor = TempoEstimationProcessor(fps=fps, online=True) def test_types(self): self.assertIsInstance(self.processor.method, str) @@ -93,6 +109,53 @@ def test_types(self): self.assertIsInstance(self.processor.max_bpm, float) self.assertIsInstance(self.processor.act_smooth, float) self.assertIsInstance(self.processor.hist_smooth, int) + self.assertIsInstance(self.processor.fps, float) + self.assertIsInstance(self.processor.histogram_processor, + TempoHistogramProcessor) + + def test_values(self): + self.assertTrue(self.processor.method == 'comb') + self.assertTrue(self.processor.min_bpm == 40) + self.assertTrue(self.processor.max_bpm == 250) + self.assertTrue(self.processor.act_smooth == 0.14) + self.assertTrue(self.processor.hist_smooth == 9) + self.assertTrue(self.processor.fps == 100) + # test default values of the histogram processor + self.assertTrue(self.processor.histogram_processor.alpha == 0.79) + self.assertTrue(self.processor.histogram_processor.min_interval == 24) + self.assertTrue(self.processor.histogram_processor.max_interval == 150) + + def test_process(self): + tempi = self.processor(act) + self.assertTrue(np.allclose(tempi, COMB_TEMPI, atol=0.01)) + + def test_process_online(self): + # process all activations at once + tempi = self.online_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, COMB_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same + self.online_processor.reset() + tempi = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) + # without resetting results are different + tempi = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], [[176.470588, 0.31322337], + [85.7142857, 0.11437361], + [115.384615, 0.10919612]])) + + +class TestCombFilterTempoHistogramProcessorClass(unittest.TestCase): + + def setUp(self): + self.processor = CombFilterTempoHistogramProcessor(fps=fps) + self.online_processor = CombFilterTempoHistogramProcessor(fps=fps, + online=True) + + def test_types(self): + self.assertIsInstance(self.processor.min_bpm, float) + self.assertIsInstance(self.processor.max_bpm, float) self.assertIsInstance(self.processor.alpha, float) self.assertIsInstance(self.processor.fps, float) # properties @@ -100,20 +163,244 @@ def test_types(self): self.assertIsInstance(self.processor.max_interval, int) def test_values(self): - self.assertTrue(self.processor.method == 'comb') self.assertTrue(self.processor.min_bpm == 40) self.assertTrue(self.processor.max_bpm == 250) - self.assertTrue(self.processor.act_smooth == 0.14) - self.assertTrue(self.processor.hist_smooth == 9) self.assertTrue(self.processor.alpha == 0.79) self.assertTrue(self.processor.fps == 100) self.assertTrue(self.processor.min_interval == 24) self.assertTrue(self.processor.max_interval == 150) - def test_process(self): - tempi = self.processor(act) + def test_tempo(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.processor, fps=fps) + tempi = tempo_processor(act) self.assertTrue(np.allclose(tempi, COMB_TEMPI, atol=0.01)) + def test_tempo_online(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.online_processor, fps=fps, online=True) + # process all activations at once + tempi = tempo_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, COMB_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same + tempo_processor.reset() + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1], COMB_TEMPI_ONLINE)) + # without resetting results are different + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], [[176.470588, 0.31322337], + [85.7142857, 0.11437361], + [115.384615, 0.10919612]])) + + def test_process(self): + hist, delays = self.processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 10.5064280455)) + self.assertTrue(np.allclose(hist.min(), 1.23250838113)) + self.assertTrue(np.allclose(hist.argmax(), 10)) + self.assertTrue(np.allclose(hist.argmin(), 44)) + self.assertTrue(np.allclose(np.sum(hist), 231.568316445)) + self.assertTrue(np.allclose(np.mean(hist), 1.82337257043)) + self.assertTrue(np.allclose(np.median(hist), 1.48112542203)) + + def test_process_online(self): + # offline results + hist_offline, delays_offline = self.processor(act) + # calling with all activations at once + hist, delays = self.online_processor(act) + # result must be the same as for offline processing + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) + # calling frame by frame after resetting + self.online_processor.reset() + result = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + # the final result must be the same as for offline processing + hist, delays = result[-1] + hist_, delays_ = self.processor(act) + self.assertTrue(np.allclose(hist, hist_)) + self.assertTrue(np.allclose(delays, delays_)) + # result after 100 frames + hist, delays = result[99] + self.assertTrue(np.allclose(hist.max(), 2.03108930086)) + self.assertTrue(np.allclose(hist.min(), 1.23250838113)) + self.assertTrue(np.allclose(hist.argmax(), 12)) + self.assertTrue(np.allclose(hist.argmin(), 44)) + self.assertTrue(np.allclose(np.sum(hist), 175.034206851)) + self.assertTrue(np.allclose(np.mean(hist), 1.37822210119)) + self.assertTrue(np.allclose(np.median(hist), 1.23250838113)) + # the final result must be the same as for offline processing + hist, delays = result[-1] + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) + # results must be different without resetting + result = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + hist, delays = result[-1] + self.assertTrue(np.allclose(hist.max(), 18.1385269354)) + self.assertTrue(np.allclose(hist.min(), 1.23250838113)) + self.assertTrue(np.allclose(hist.argmax(), 11)) + self.assertTrue(np.allclose(hist.argmin(), 72)) + self.assertTrue(np.allclose(np.sum(hist), 332.668525522)) + self.assertTrue(np.allclose(np.mean(hist), 2.61943720884)) + self.assertTrue(np.allclose(np.median(hist), 1.96220625848)) + + +class TestACFTempoHistogramProcessorClass(unittest.TestCase): + + def setUp(self): + self.processor = ACFTempoHistogramProcessor(fps=fps) + self.online_processor = ACFTempoHistogramProcessor(fps=fps, + online=True) + + def test_types(self): + self.assertIsInstance(self.processor.min_bpm, float) + self.assertIsInstance(self.processor.max_bpm, float) + self.assertIsInstance(self.processor.fps, float) + # properties + self.assertIsInstance(self.processor.min_interval, int) + self.assertIsInstance(self.processor.max_interval, int) + + def test_values(self): + self.assertTrue(self.processor.min_bpm == 40) + self.assertTrue(self.processor.max_bpm == 250) + self.assertTrue(self.processor.fps == 100) + self.assertTrue(self.processor.min_interval == 24) + self.assertTrue(self.processor.max_interval == 150) + + def test_tempo(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.processor, fps=fps) + tempi = tempo_processor(act) + self.assertTrue(np.allclose(tempi, ACF_TEMPI, atol=0.01)) + + def test_tempo_online(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.online_processor, fps=fps, online=True) + # process all activations at once + tempi = tempo_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, ACF_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same + tempo_processor.reset() + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1], ACF_TEMPI_ONLINE)) + # without resetting results are different + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], [[176.4705882, 0.2414368], + [86.95652174, 0.2248635], + [58.25242718, 0.1878183]])) + + def test_process(self): + hist, delays = self.processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 0.772242703961)) + self.assertTrue(np.allclose(hist.min(), 0.0550745515184)) + self.assertTrue(np.allclose(hist.argmax(), 11)) + self.assertTrue(np.allclose(hist.argmin(), 103)) + self.assertTrue(np.allclose(np.sum(hist), 28.4273056042)) + self.assertTrue(np.allclose(np.mean(hist), 0.223837052001)) + self.assertTrue(np.allclose(np.median(hist), 0.147368463433)) + + def test_process_online(self): + # offline results + hist_offline, delays_offline = self.processor(act) + # calling with all activations at once + hist, delays = self.online_processor(act) + # result must be the same as for offline processing + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) + # calling frame by frame after resetting + self.online_processor.reset() + result = [self.online_processor(np.atleast_1d(a), reset=False) + for a in act] + # the final result must be the same as for offline processing + hist, delays = result[-1] + hist_, delays_ = self.processor(act) + self.assertTrue(np.allclose(hist, hist_)) + self.assertTrue(np.allclose(delays, delays_)) + # result after 100 frames + hist, delays = result[99] + self.assertTrue(np.allclose(hist.max(), 0.19544739526)) + self.assertTrue(np.allclose(hist.min(), 0)) + self.assertTrue(np.allclose(hist.argmax(), 46)) + self.assertTrue(np.allclose(hist.argmin(), 76)) + self.assertTrue(np.allclose(np.sum(hist), 3.58546628975)) + self.assertTrue(np.allclose(np.mean(hist), 0.0282320180295)) + self.assertTrue(np.allclose(np.median(hist), 0.00471735456373)) + # the final result must be the same as for offline processing + hist, delays = result[-1] + self.assertTrue(np.allclose(hist, hist_offline)) + self.assertTrue(np.allclose(delays, delays_offline)) + + +class TestDBNTempoHistogramProcessorClass(unittest.TestCase): + + def setUp(self): + self.processor = DBNTempoHistogramProcessor(fps=fps) + self.online_processor = DBNTempoHistogramProcessor(fps=fps, + online=True) + + def test_types(self): + self.assertIsInstance(self.processor.min_bpm, float) + self.assertIsInstance(self.processor.max_bpm, float) + self.assertIsInstance(self.processor.fps, float) + # properties + self.assertIsInstance(self.processor.min_interval, int) + self.assertIsInstance(self.processor.max_interval, int) + + def test_values(self): + self.assertTrue(self.processor.min_bpm == 40) + self.assertTrue(self.processor.max_bpm == 250) + self.assertTrue(self.processor.fps == 100) + self.assertTrue(self.processor.min_interval == 24) + self.assertTrue(self.processor.max_interval == 150) + + def test_tempo(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.processor, fps=fps) + tempi = tempo_processor(act) + self.assertTrue(np.allclose(tempi, DBN_TEMPI, atol=0.01)) + + def test_tempo_online(self): + tempo_processor = TempoEstimationProcessor( + histogram_processor=self.online_processor, fps=fps, online=True) + # process all activations at once + tempi = tempo_processor(act, reset=False) + self.assertTrue(np.allclose(tempi, DBN_TEMPI_ONLINE)) + # process frame by frame; with resetting results are the same + tempo_processor.reset() + tempo_processor.reset() + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1], DBN_TEMPI_ONLINE)) + # without resetting results are different + tempi = [tempo_processor(np.atleast_1d(a), reset=False) for a in act] + self.assertTrue(np.allclose(tempi[-1][:3], + [[176.4705882, 0.472499032], + [84.5070423, 0.432130320], + [74.0740741, 0.0699384753]])) + + def test_process(self): + hist, delays = self.processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 281)) + self.assertTrue(np.allclose(hist.min(), 0)) + self.assertTrue(np.allclose(hist.argmax(), 10)) + self.assertTrue(np.allclose(hist.argmin(), 0)) + self.assertTrue(np.allclose(np.sum(hist), 281)) + self.assertTrue(np.allclose(np.mean(hist), 2.2125984252)) + self.assertTrue(np.allclose(np.median(hist), 0)) + + def test_process_online(self): + hist, delays = self.online_processor(act) + self.assertTrue(np.allclose(delays, np.arange(24, 151))) + self.assertTrue(np.allclose(hist.max(), 106)) + self.assertTrue(np.allclose(hist.min(), 0)) + self.assertTrue(np.allclose(hist.argmax(), 10)) + self.assertTrue(np.allclose(hist.argmin(), 1)) + self.assertTrue(np.allclose(np.sum(hist), 281)) + self.assertTrue(np.allclose(np.mean(hist), 2.2125984252)) + self.assertTrue(np.allclose(np.median(hist), 0)) + class TestWriteTempoFunction(unittest.TestCase): diff --git a/tests/test_processors.py b/tests/test_processors.py index 9964249d3..2cd9236bb 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -8,7 +8,6 @@ from __future__ import absolute_import, division, print_function import tempfile import unittest -import sys from madmom.processors import * from madmom.models import * @@ -31,7 +30,7 @@ class TestBufferProcessor(unittest.TestCase): def test_1d(self): buffer = BufferProcessor(5, init=np.zeros(5)) - self.assertTrue(np.allclose(buffer.buffer, 0)) + self.assertTrue(np.allclose(buffer.data, 0)) # shift in two new values result = buffer(np.arange(2)) self.assertTrue(np.allclose(result, [0, 0, 0, 0, 1])) @@ -45,9 +44,9 @@ def test_1d(self): def test_2d(self): buffer = BufferProcessor((5, 2), init=np.zeros((5, 2))) - print(buffer.buffer) - self.assertTrue(buffer.buffer.shape == (5, 2)) - self.assertTrue(np.allclose(buffer.buffer, 0)) + print(buffer.data) + self.assertTrue(buffer.data.shape == (5, 2)) + self.assertTrue(np.allclose(buffer.data, 0)) # shift in new values result = buffer(np.arange(2).reshape((1, -1))) self.assertTrue(result.shape == (5, 2)) @@ -71,6 +70,14 @@ def test_2d(self): self.assertTrue(result.shape == (5, 2)) self.assertTrue(np.allclose(result.ravel(), np.arange(4, 14))) + def test_reset(self): + buffer = BufferProcessor(5, init=np.ones(5)) + self.assertTrue(np.allclose(buffer.data, 1)) + result = buffer(np.arange(2)) + self.assertTrue(np.allclose(result, [1, 1, 1, 0, 1])) + buffer.reset() + self.assertTrue(np.allclose(buffer.data, 1)) + # clean up def teardown():