Module note_seq.performance_encoder_decoder
Classes for converting between performance input and model input/output.
Expand source code
# Copyright 2021 The Magenta Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes for converting between performance input and model input/output."""
import math
from note_seq import encoder_decoder
from note_seq import performance_lib
from note_seq.encoder_decoder import EventSequenceEncoderDecoder
from note_seq.performance_lib import PerformanceEvent
import numpy as np
# Number of floats used to encode NOTE_ON and NOTE_OFF events, using modulo-12
# encoding. 5 floats for: valid, octave_cos, octave_sin, note_cos, note_sin.
MODULO_PITCH_ENCODER_WIDTH = 5
# Number of floats used to encode TIME_SHIFT and VELOCITY events using
# module-bins encoding. 3 floats for: valid, event_cos, event_sin.
MODULO_VELOCITY_ENCODER_WIDTH = 3
MODULO_TIME_SHIFT_ENCODER_WIDTH = 3
MODULO_EVENT_RANGES = [
(PerformanceEvent.NOTE_ON, performance_lib.MIN_MIDI_PITCH,
performance_lib.MAX_MIDI_PITCH, MODULO_PITCH_ENCODER_WIDTH),
(PerformanceEvent.NOTE_OFF, performance_lib.MIN_MIDI_PITCH,
performance_lib.MAX_MIDI_PITCH, MODULO_PITCH_ENCODER_WIDTH),
]
class PerformanceModuloEncoding(object):
"""Modulo encoding for performance events."""
def __init__(self, num_velocity_bins=0,
max_shift_steps=performance_lib.DEFAULT_MAX_SHIFT_STEPS):
"""Initiaizer for PerformanceModuloEncoding.
Args:
num_velocity_bins: Number of velocity bins.
max_shift_steps: Maximum number of shift steps supported.
"""
self._event_ranges = MODULO_EVENT_RANGES + [
(PerformanceEvent.TIME_SHIFT, 1, max_shift_steps,
MODULO_TIME_SHIFT_ENCODER_WIDTH)
]
if num_velocity_bins > 0:
self._event_ranges.append(
(PerformanceEvent.VELOCITY, 1, num_velocity_bins,
MODULO_VELOCITY_ENCODER_WIDTH))
self._max_shift_steps = max_shift_steps
self._num_velocity_bins = num_velocity_bins
# Create a lookup table for modulo-12 encoding of pitch classes.
# Possible values for semitone_steps are 1 and 7. A value of 1 corresponds
# to placing notes consecutively on the unit circle. A value of 7
# corresponds to following each note with one that is 7 semitones above it.
# semitone_steps = 1 seems to produce better results, and is the recommended
# value. Moreover, unit tests are provided only for semitone_steps = 1. If
# in the future you plan to enable support for semitone_steps = 7, then
# please make semitone_steps a parameter of this method, and add unit tests
# for it.
semitone_steps = 1
self._pitch_class_table = np.zeros((12, 2))
for i in range(12):
row = (i * semitone_steps) % 12
angle = (float(row) * math.pi) / 6.0
self._pitch_class_table[row] = [math.cos(angle), math.sin(angle)]
# Create a lookup table for modulo-144 encoding of notes. Encode each note
# on a unit circle of 144 notes, spanning 12 octaves. Since there are only
# 128 midi notes, the last 16 positions on the unit circle will not be used.
self._note_table = np.zeros((144, 2))
for i in range(144):
angle = (float(i) * math.pi) / 72.0
self._note_table[i] = [math.cos(angle), math.sin(angle)]
# Create a lookup table for modulo-bins encoding of time_shifts.
self._time_shift_table = np.zeros((max_shift_steps, 2))
for i in range(max_shift_steps):
angle = (float(i) * 2.0 * math.pi) / float(max_shift_steps)
self._time_shift_table[i] = [math.cos(angle), math.sin(angle)]
# Create a lookup table for modulo-bins encoding of velocities.
if num_velocity_bins > 0:
self._velocity_table = np.zeros((num_velocity_bins, 2))
for i in range(num_velocity_bins):
angle = (float(i) * 2.0 * math.pi) / float(num_velocity_bins)
self._velocity_table[i] = [math.cos(angle), math.sin(angle)]
@property
def input_size(self):
total = 0
for _, _, _, encoder_width in self._event_ranges:
total += encoder_width
return total
def encode_modulo_event(self, event):
offset = 0
for event_type, min_value, _, encoder_width in self._event_ranges:
if event.event_type == event_type:
value = event.event_value - min_value
return offset, event_type, value
offset += encoder_width
raise ValueError('Unknown event type: %s' % event.event_type)
def embed_pitch_class(self, value):
if value < 0 or value >= 12:
raise ValueError('Unexpected pitch class value: %s' % value)
return self._pitch_class_table[value]
def embed_note(self, value):
if value < 0 or value >= 144:
raise ValueError('Unexpected note value: %s' % value)
return self._note_table[value]
def embed_time_shift(self, value):
if value < 0 or value >= self._max_shift_steps:
raise ValueError('Unexpected time shift value: %s' % value)
return self._time_shift_table[value]
def embed_velocity(self, value):
if value < 0 or value >= self._num_velocity_bins:
raise ValueError('Unexpected velocity value: %s' % value)
return self._velocity_table[value]
class ModuloPerformanceEventSequenceEncoderDecoder(EventSequenceEncoderDecoder):
"""An EventSequenceEncoderDecoder for modulo encoding performance events.
ModuloPerformanceEventSequenceEncoderDecoder is an EventSequenceEncoderDecoder
that uses modulo/circular encoding for encoding performance input events, and
otherwise uses one hot encoding for encoding and decoding of labels.
"""
def __init__(self, num_velocity_bins=0,
max_shift_steps=performance_lib.DEFAULT_MAX_SHIFT_STEPS):
"""Initialize a ModuloPerformanceEventSequenceEncoderDecoder object.
Args:
num_velocity_bins: Number of velocity bins.
max_shift_steps: Maximum number of shift steps supported.
"""
self._modulo_encoding = PerformanceModuloEncoding(
num_velocity_bins=num_velocity_bins, max_shift_steps=max_shift_steps)
self._one_hot_encoding = PerformanceOneHotEncoding(
num_velocity_bins=num_velocity_bins, max_shift_steps=max_shift_steps)
@property
def input_size(self):
return self._modulo_encoding.input_size
@property
def num_classes(self):
return self._one_hot_encoding.num_classes
@property
def default_event_label(self):
return self._one_hot_encoding.encode_event(
self._one_hot_encoding.default_event)
def events_to_input(self, events, position):
"""Returns the input vector for the given position in the event sequence.
Returns a modulo/circular encoding for the given position in the performance
event sequence.
Args:
events: A list-like sequence of events.
position: An integer event position in the event sequence.
Returns:
An input vector, a list of floats.
"""
input_ = [0.0] * self.input_size
offset, event_type, value = (self._modulo_encoding
.encode_modulo_event(events[position]))
input_[offset] = 1.0 # valid bit for the event
offset += 1
if event_type in (performance_lib.PerformanceEvent.NOTE_ON,
performance_lib.PerformanceEvent.NOTE_OFF):
# Encode the note on a circle of 144 notes, covering 12 octaves.
cosine_sine_pair = self._modulo_encoding.embed_note(value)
input_[offset] = cosine_sine_pair[0]
input_[offset + 1] = cosine_sine_pair[1]
offset += 2
# Encode the note's pitch class, using the encoder's lookup table.
value %= 12
cosine_sine_pair = self._modulo_encoding.embed_pitch_class(value)
input_[offset] = cosine_sine_pair[0]
input_[offset + 1] = cosine_sine_pair[1]
else:
# This must be a velocity, or a time-shift event. Encode it using
# modulo-bins embedding.
if event_type == performance_lib.PerformanceEvent.TIME_SHIFT:
cosine_sine_pair = self._modulo_encoding.embed_time_shift(value)
else:
cosine_sine_pair = self._modulo_encoding.embed_velocity(value)
input_[offset] = cosine_sine_pair[0]
input_[offset + 1] = cosine_sine_pair[1]
return input_
def events_to_label(self, events, position):
"""Returns the label for the given position in the event sequence.
Returns the zero-based index value for the given position in the event
sequence, as determined by the one hot encoding.
Args:
events: A list-like sequence of events.
position: An integer event position in the event sequence.
Returns:
A label, an integer.
"""
return self._one_hot_encoding.encode_event(events[position])
def class_index_to_event(self, class_index, events):
"""Returns the event for the given class index.
This is the reverse process of the self.events_to_label method.
Args:
class_index: An integer in the range [0, self.num_classes).
events: A list-like sequence of events. This object is not used in this
implementation.
Returns:
An event value.
"""
return self._one_hot_encoding.decode_event(class_index)
def labels_to_num_steps(self, labels):
"""Returns the total number of time steps for a sequence of class labels.
Args:
labels: A list-like sequence of integers in the range
[0, self.num_classes).
Returns:
The total number of time steps for the label sequence, as determined by
the one-hot encoding.
"""
events = []
for label in labels:
events.append(self.class_index_to_event(label, events))
return sum(self._one_hot_encoding.event_to_num_steps(event)
for event in events)
class PerformanceOneHotEncoding(encoder_decoder.OneHotEncoding):
"""One-hot encoding for performance events."""
def __init__(self, num_velocity_bins=0,
max_shift_steps=performance_lib.DEFAULT_MAX_SHIFT_STEPS,
min_pitch=performance_lib.MIN_MIDI_PITCH,
max_pitch=performance_lib.MAX_MIDI_PITCH):
self._event_ranges = [
(PerformanceEvent.NOTE_ON, min_pitch, max_pitch),
(PerformanceEvent.NOTE_OFF, min_pitch, max_pitch),
(PerformanceEvent.TIME_SHIFT, 1, max_shift_steps)
]
if num_velocity_bins > 0:
self._event_ranges.append(
(PerformanceEvent.VELOCITY, 1, num_velocity_bins))
self._max_shift_steps = max_shift_steps
@property
def num_classes(self):
return sum(max_value - min_value + 1
for event_type, min_value, max_value in self._event_ranges)
@property
def default_event(self):
return PerformanceEvent(
event_type=PerformanceEvent.TIME_SHIFT,
event_value=self._max_shift_steps)
def encode_event(self, event):
offset = 0
for event_type, min_value, max_value in self._event_ranges:
if event.event_type == event_type:
return offset + event.event_value - min_value
offset += max_value - min_value + 1
raise ValueError('Unknown event type: %s' % event.event_type)
def decode_event(self, index):
offset = 0
for event_type, min_value, max_value in self._event_ranges:
if offset <= index <= offset + max_value - min_value:
return PerformanceEvent(
event_type=event_type, event_value=min_value + index - offset)
offset += max_value - min_value + 1
raise ValueError('Unknown event index: %s' % index)
def event_to_num_steps(self, event):
if event.event_type == PerformanceEvent.TIME_SHIFT:
return event.event_value
else:
return 0
class NotePerformanceEventSequenceEncoderDecoder(
EventSequenceEncoderDecoder):
"""Multiple one-hot encoding for event tuples."""
def __init__(self, num_velocity_bins, max_shift_steps=1000,
max_duration_steps=1000,
min_pitch=performance_lib.MIN_MIDI_PITCH,
max_pitch=performance_lib.MAX_MIDI_PITCH):
self._min_pitch = min_pitch
def optimal_num_segments(steps):
segments_indices = [(i, i + steps / i) for i in range(1, steps)
if steps % i == 0]
return min(segments_indices, key=lambda v: v[1])[0]
# Add 1 because we need to represent 0 time shifts.
self._shift_steps_segments = optimal_num_segments(max_shift_steps + 1)
assert self._shift_steps_segments > 1
self._shift_steps_per_segment = (
(max_shift_steps + 1) // self._shift_steps_segments)
self._max_duration_steps = max_duration_steps
self._duration_steps_segments = optimal_num_segments(max_duration_steps)
assert self._duration_steps_segments > 1
self._duration_steps_per_segment = (
max_duration_steps // self._duration_steps_segments)
self._num_classes = [
# TIME_SHIFT major
self._shift_steps_segments,
# TIME_SHIFT minor
self._shift_steps_per_segment,
# NOTE_ON
max_pitch - min_pitch + 1,
# VELOCITY
num_velocity_bins,
# DURATION major
self._duration_steps_segments,
# DURATION minor
self._duration_steps_per_segment,
]
@property
def input_size(self):
return sum(self._num_classes)
@property
def num_classes(self):
return self._num_classes
@property
def shift_steps_segments(self):
return self._shift_steps_segments
@property
def duration_steps_segments(self):
return self._duration_steps_segments
@property
def shift_steps_per_segment(self):
return self._shift_steps_per_segment
@property
def duration_steps_per_segment(self):
return self._duration_steps_per_segment
@property
def default_event_label(self):
return self._encode_event(
(PerformanceEvent(PerformanceEvent.TIME_SHIFT, 0),
PerformanceEvent(PerformanceEvent.NOTE_ON, 60),
PerformanceEvent(PerformanceEvent.VELOCITY, 1),
PerformanceEvent(PerformanceEvent.DURATION, 1)))
def _encode_event(self, event):
time_shift_major = event[0].event_value // self._shift_steps_per_segment
time_shift_minor = event[0].event_value % self._shift_steps_per_segment
note_on = event[1].event_value - self._min_pitch
velocity = event[2].event_value - 1
# Don't need to represent 0 duration, so subtract 1.
duration_value = event[3].event_value - 1
duration_major = duration_value // self._duration_steps_per_segment
duration_minor = duration_value % self._duration_steps_per_segment
return (time_shift_major, time_shift_minor, note_on, velocity,
duration_major, duration_minor)
def events_to_input(self, events, position):
event = events[position]
encoded = self._encode_event(event)
one_hots = []
for i, encoded_sub_event in enumerate(encoded):
one_hot = [0.0] * self._num_classes[i]
one_hot[encoded_sub_event] = 1.0
one_hots.append(one_hot)
return np.hstack(one_hots)
def events_to_label(self, events, position):
event = events[position]
return self._encode_event(event)
def class_index_to_event(self, class_index, events):
class_indices = class_index
time_shift = (class_indices[0] * self._shift_steps_per_segment +
class_indices[1])
pitch = class_indices[2] + self._min_pitch
velocity = class_indices[3] + 1
duration = (class_indices[4] * self._duration_steps_per_segment +
class_indices[5]) + 1
return (PerformanceEvent(PerformanceEvent.TIME_SHIFT, time_shift),
PerformanceEvent(PerformanceEvent.NOTE_ON, pitch),
PerformanceEvent(PerformanceEvent.VELOCITY, velocity),
PerformanceEvent(PerformanceEvent.DURATION, duration))
def labels_to_num_steps(self, labels):
steps = 0
for label in labels:
event = self.class_index_to_event(label, None)
steps += event[0].event_value
if event:
steps += event[3].event_value
return steps
Classes
class ModuloPerformanceEventSequenceEncoderDecoder (num_velocity_bins=0, max_shift_steps=100)
-
An EventSequenceEncoderDecoder for modulo encoding performance events.
ModuloPerformanceEventSequenceEncoderDecoder is an EventSequenceEncoderDecoder that uses modulo/circular encoding for encoding performance input events, and otherwise uses one hot encoding for encoding and decoding of labels.
Initialize a ModuloPerformanceEventSequenceEncoderDecoder object.
Args
num_velocity_bins
- Number of velocity bins.
max_shift_steps
- Maximum number of shift steps supported.
Expand source code
class ModuloPerformanceEventSequenceEncoderDecoder(EventSequenceEncoderDecoder): """An EventSequenceEncoderDecoder for modulo encoding performance events. ModuloPerformanceEventSequenceEncoderDecoder is an EventSequenceEncoderDecoder that uses modulo/circular encoding for encoding performance input events, and otherwise uses one hot encoding for encoding and decoding of labels. """ def __init__(self, num_velocity_bins=0, max_shift_steps=performance_lib.DEFAULT_MAX_SHIFT_STEPS): """Initialize a ModuloPerformanceEventSequenceEncoderDecoder object. Args: num_velocity_bins: Number of velocity bins. max_shift_steps: Maximum number of shift steps supported. """ self._modulo_encoding = PerformanceModuloEncoding( num_velocity_bins=num_velocity_bins, max_shift_steps=max_shift_steps) self._one_hot_encoding = PerformanceOneHotEncoding( num_velocity_bins=num_velocity_bins, max_shift_steps=max_shift_steps) @property def input_size(self): return self._modulo_encoding.input_size @property def num_classes(self): return self._one_hot_encoding.num_classes @property def default_event_label(self): return self._one_hot_encoding.encode_event( self._one_hot_encoding.default_event) def events_to_input(self, events, position): """Returns the input vector for the given position in the event sequence. Returns a modulo/circular encoding for the given position in the performance event sequence. Args: events: A list-like sequence of events. position: An integer event position in the event sequence. Returns: An input vector, a list of floats. """ input_ = [0.0] * self.input_size offset, event_type, value = (self._modulo_encoding .encode_modulo_event(events[position])) input_[offset] = 1.0 # valid bit for the event offset += 1 if event_type in (performance_lib.PerformanceEvent.NOTE_ON, performance_lib.PerformanceEvent.NOTE_OFF): # Encode the note on a circle of 144 notes, covering 12 octaves. cosine_sine_pair = self._modulo_encoding.embed_note(value) input_[offset] = cosine_sine_pair[0] input_[offset + 1] = cosine_sine_pair[1] offset += 2 # Encode the note's pitch class, using the encoder's lookup table. value %= 12 cosine_sine_pair = self._modulo_encoding.embed_pitch_class(value) input_[offset] = cosine_sine_pair[0] input_[offset + 1] = cosine_sine_pair[1] else: # This must be a velocity, or a time-shift event. Encode it using # modulo-bins embedding. if event_type == performance_lib.PerformanceEvent.TIME_SHIFT: cosine_sine_pair = self._modulo_encoding.embed_time_shift(value) else: cosine_sine_pair = self._modulo_encoding.embed_velocity(value) input_[offset] = cosine_sine_pair[0] input_[offset + 1] = cosine_sine_pair[1] return input_ def events_to_label(self, events, position): """Returns the label for the given position in the event sequence. Returns the zero-based index value for the given position in the event sequence, as determined by the one hot encoding. Args: events: A list-like sequence of events. position: An integer event position in the event sequence. Returns: A label, an integer. """ return self._one_hot_encoding.encode_event(events[position]) def class_index_to_event(self, class_index, events): """Returns the event for the given class index. This is the reverse process of the self.events_to_label method. Args: class_index: An integer in the range [0, self.num_classes). events: A list-like sequence of events. This object is not used in this implementation. Returns: An event value. """ return self._one_hot_encoding.decode_event(class_index) def labels_to_num_steps(self, labels): """Returns the total number of time steps for a sequence of class labels. Args: labels: A list-like sequence of integers in the range [0, self.num_classes). Returns: The total number of time steps for the label sequence, as determined by the one-hot encoding. """ events = [] for label in labels: events.append(self.class_index_to_event(label, events)) return sum(self._one_hot_encoding.event_to_num_steps(event) for event in events)
Ancestors
Methods
def class_index_to_event(self, class_index, events)
-
Returns the event for the given class index.
This is the reverse process of the self.events_to_label method.
Args
class_index
- An integer in the range [0, self.num_classes).
events
- A list-like sequence of events. This object is not used in this implementation.
Returns
An event value.
Expand source code
def class_index_to_event(self, class_index, events): """Returns the event for the given class index. This is the reverse process of the self.events_to_label method. Args: class_index: An integer in the range [0, self.num_classes). events: A list-like sequence of events. This object is not used in this implementation. Returns: An event value. """ return self._one_hot_encoding.decode_event(class_index)
def events_to_input(self, events, position)
-
Returns the input vector for the given position in the event sequence.
Returns a modulo/circular encoding for the given position in the performance event sequence.
Args
events
- A list-like sequence of events.
position
- An integer event position in the event sequence.
Returns
An input vector, a list of floats.
Expand source code
def events_to_input(self, events, position): """Returns the input vector for the given position in the event sequence. Returns a modulo/circular encoding for the given position in the performance event sequence. Args: events: A list-like sequence of events. position: An integer event position in the event sequence. Returns: An input vector, a list of floats. """ input_ = [0.0] * self.input_size offset, event_type, value = (self._modulo_encoding .encode_modulo_event(events[position])) input_[offset] = 1.0 # valid bit for the event offset += 1 if event_type in (performance_lib.PerformanceEvent.NOTE_ON, performance_lib.PerformanceEvent.NOTE_OFF): # Encode the note on a circle of 144 notes, covering 12 octaves. cosine_sine_pair = self._modulo_encoding.embed_note(value) input_[offset] = cosine_sine_pair[0] input_[offset + 1] = cosine_sine_pair[1] offset += 2 # Encode the note's pitch class, using the encoder's lookup table. value %= 12 cosine_sine_pair = self._modulo_encoding.embed_pitch_class(value) input_[offset] = cosine_sine_pair[0] input_[offset + 1] = cosine_sine_pair[1] else: # This must be a velocity, or a time-shift event. Encode it using # modulo-bins embedding. if event_type == performance_lib.PerformanceEvent.TIME_SHIFT: cosine_sine_pair = self._modulo_encoding.embed_time_shift(value) else: cosine_sine_pair = self._modulo_encoding.embed_velocity(value) input_[offset] = cosine_sine_pair[0] input_[offset + 1] = cosine_sine_pair[1] return input_
def events_to_label(self, events, position)
-
Returns the label for the given position in the event sequence.
Returns the zero-based index value for the given position in the event sequence, as determined by the one hot encoding.
Args
events
- A list-like sequence of events.
position
- An integer event position in the event sequence.
Returns
A label, an integer.
Expand source code
def events_to_label(self, events, position): """Returns the label for the given position in the event sequence. Returns the zero-based index value for the given position in the event sequence, as determined by the one hot encoding. Args: events: A list-like sequence of events. position: An integer event position in the event sequence. Returns: A label, an integer. """ return self._one_hot_encoding.encode_event(events[position])
def labels_to_num_steps(self, labels)
-
Returns the total number of time steps for a sequence of class labels.
Args
labels
- A list-like sequence of integers in the range [0, self.num_classes).
Returns
The total number of time steps for the label sequence, as determined by the one-hot encoding.
Expand source code
def labels_to_num_steps(self, labels): """Returns the total number of time steps for a sequence of class labels. Args: labels: A list-like sequence of integers in the range [0, self.num_classes). Returns: The total number of time steps for the label sequence, as determined by the one-hot encoding. """ events = [] for label in labels: events.append(self.class_index_to_event(label, events)) return sum(self._one_hot_encoding.event_to_num_steps(event) for event in events)
Inherited members
class NotePerformanceEventSequenceEncoderDecoder (num_velocity_bins, max_shift_steps=1000, max_duration_steps=1000, min_pitch=0, max_pitch=127)
-
Multiple one-hot encoding for event tuples.
Expand source code
class NotePerformanceEventSequenceEncoderDecoder( EventSequenceEncoderDecoder): """Multiple one-hot encoding for event tuples.""" def __init__(self, num_velocity_bins, max_shift_steps=1000, max_duration_steps=1000, min_pitch=performance_lib.MIN_MIDI_PITCH, max_pitch=performance_lib.MAX_MIDI_PITCH): self._min_pitch = min_pitch def optimal_num_segments(steps): segments_indices = [(i, i + steps / i) for i in range(1, steps) if steps % i == 0] return min(segments_indices, key=lambda v: v[1])[0] # Add 1 because we need to represent 0 time shifts. self._shift_steps_segments = optimal_num_segments(max_shift_steps + 1) assert self._shift_steps_segments > 1 self._shift_steps_per_segment = ( (max_shift_steps + 1) // self._shift_steps_segments) self._max_duration_steps = max_duration_steps self._duration_steps_segments = optimal_num_segments(max_duration_steps) assert self._duration_steps_segments > 1 self._duration_steps_per_segment = ( max_duration_steps // self._duration_steps_segments) self._num_classes = [ # TIME_SHIFT major self._shift_steps_segments, # TIME_SHIFT minor self._shift_steps_per_segment, # NOTE_ON max_pitch - min_pitch + 1, # VELOCITY num_velocity_bins, # DURATION major self._duration_steps_segments, # DURATION minor self._duration_steps_per_segment, ] @property def input_size(self): return sum(self._num_classes) @property def num_classes(self): return self._num_classes @property def shift_steps_segments(self): return self._shift_steps_segments @property def duration_steps_segments(self): return self._duration_steps_segments @property def shift_steps_per_segment(self): return self._shift_steps_per_segment @property def duration_steps_per_segment(self): return self._duration_steps_per_segment @property def default_event_label(self): return self._encode_event( (PerformanceEvent(PerformanceEvent.TIME_SHIFT, 0), PerformanceEvent(PerformanceEvent.NOTE_ON, 60), PerformanceEvent(PerformanceEvent.VELOCITY, 1), PerformanceEvent(PerformanceEvent.DURATION, 1))) def _encode_event(self, event): time_shift_major = event[0].event_value // self._shift_steps_per_segment time_shift_minor = event[0].event_value % self._shift_steps_per_segment note_on = event[1].event_value - self._min_pitch velocity = event[2].event_value - 1 # Don't need to represent 0 duration, so subtract 1. duration_value = event[3].event_value - 1 duration_major = duration_value // self._duration_steps_per_segment duration_minor = duration_value % self._duration_steps_per_segment return (time_shift_major, time_shift_minor, note_on, velocity, duration_major, duration_minor) def events_to_input(self, events, position): event = events[position] encoded = self._encode_event(event) one_hots = [] for i, encoded_sub_event in enumerate(encoded): one_hot = [0.0] * self._num_classes[i] one_hot[encoded_sub_event] = 1.0 one_hots.append(one_hot) return np.hstack(one_hots) def events_to_label(self, events, position): event = events[position] return self._encode_event(event) def class_index_to_event(self, class_index, events): class_indices = class_index time_shift = (class_indices[0] * self._shift_steps_per_segment + class_indices[1]) pitch = class_indices[2] + self._min_pitch velocity = class_indices[3] + 1 duration = (class_indices[4] * self._duration_steps_per_segment + class_indices[5]) + 1 return (PerformanceEvent(PerformanceEvent.TIME_SHIFT, time_shift), PerformanceEvent(PerformanceEvent.NOTE_ON, pitch), PerformanceEvent(PerformanceEvent.VELOCITY, velocity), PerformanceEvent(PerformanceEvent.DURATION, duration)) def labels_to_num_steps(self, labels): steps = 0 for label in labels: event = self.class_index_to_event(label, None) steps += event[0].event_value if event: steps += event[3].event_value return steps
Ancestors
Instance variables
var duration_steps_per_segment
-
Expand source code
@property def duration_steps_per_segment(self): return self._duration_steps_per_segment
var duration_steps_segments
-
Expand source code
@property def duration_steps_segments(self): return self._duration_steps_segments
var shift_steps_per_segment
-
Expand source code
@property def shift_steps_per_segment(self): return self._shift_steps_per_segment
var shift_steps_segments
-
Expand source code
@property def shift_steps_segments(self): return self._shift_steps_segments
Inherited members
class PerformanceModuloEncoding (num_velocity_bins=0, max_shift_steps=100)
-
Modulo encoding for performance events.
Initiaizer for PerformanceModuloEncoding.
Args
num_velocity_bins
- Number of velocity bins.
max_shift_steps
- Maximum number of shift steps supported.
Expand source code
class PerformanceModuloEncoding(object): """Modulo encoding for performance events.""" def __init__(self, num_velocity_bins=0, max_shift_steps=performance_lib.DEFAULT_MAX_SHIFT_STEPS): """Initiaizer for PerformanceModuloEncoding. Args: num_velocity_bins: Number of velocity bins. max_shift_steps: Maximum number of shift steps supported. """ self._event_ranges = MODULO_EVENT_RANGES + [ (PerformanceEvent.TIME_SHIFT, 1, max_shift_steps, MODULO_TIME_SHIFT_ENCODER_WIDTH) ] if num_velocity_bins > 0: self._event_ranges.append( (PerformanceEvent.VELOCITY, 1, num_velocity_bins, MODULO_VELOCITY_ENCODER_WIDTH)) self._max_shift_steps = max_shift_steps self._num_velocity_bins = num_velocity_bins # Create a lookup table for modulo-12 encoding of pitch classes. # Possible values for semitone_steps are 1 and 7. A value of 1 corresponds # to placing notes consecutively on the unit circle. A value of 7 # corresponds to following each note with one that is 7 semitones above it. # semitone_steps = 1 seems to produce better results, and is the recommended # value. Moreover, unit tests are provided only for semitone_steps = 1. If # in the future you plan to enable support for semitone_steps = 7, then # please make semitone_steps a parameter of this method, and add unit tests # for it. semitone_steps = 1 self._pitch_class_table = np.zeros((12, 2)) for i in range(12): row = (i * semitone_steps) % 12 angle = (float(row) * math.pi) / 6.0 self._pitch_class_table[row] = [math.cos(angle), math.sin(angle)] # Create a lookup table for modulo-144 encoding of notes. Encode each note # on a unit circle of 144 notes, spanning 12 octaves. Since there are only # 128 midi notes, the last 16 positions on the unit circle will not be used. self._note_table = np.zeros((144, 2)) for i in range(144): angle = (float(i) * math.pi) / 72.0 self._note_table[i] = [math.cos(angle), math.sin(angle)] # Create a lookup table for modulo-bins encoding of time_shifts. self._time_shift_table = np.zeros((max_shift_steps, 2)) for i in range(max_shift_steps): angle = (float(i) * 2.0 * math.pi) / float(max_shift_steps) self._time_shift_table[i] = [math.cos(angle), math.sin(angle)] # Create a lookup table for modulo-bins encoding of velocities. if num_velocity_bins > 0: self._velocity_table = np.zeros((num_velocity_bins, 2)) for i in range(num_velocity_bins): angle = (float(i) * 2.0 * math.pi) / float(num_velocity_bins) self._velocity_table[i] = [math.cos(angle), math.sin(angle)] @property def input_size(self): total = 0 for _, _, _, encoder_width in self._event_ranges: total += encoder_width return total def encode_modulo_event(self, event): offset = 0 for event_type, min_value, _, encoder_width in self._event_ranges: if event.event_type == event_type: value = event.event_value - min_value return offset, event_type, value offset += encoder_width raise ValueError('Unknown event type: %s' % event.event_type) def embed_pitch_class(self, value): if value < 0 or value >= 12: raise ValueError('Unexpected pitch class value: %s' % value) return self._pitch_class_table[value] def embed_note(self, value): if value < 0 or value >= 144: raise ValueError('Unexpected note value: %s' % value) return self._note_table[value] def embed_time_shift(self, value): if value < 0 or value >= self._max_shift_steps: raise ValueError('Unexpected time shift value: %s' % value) return self._time_shift_table[value] def embed_velocity(self, value): if value < 0 or value >= self._num_velocity_bins: raise ValueError('Unexpected velocity value: %s' % value) return self._velocity_table[value]
Instance variables
var input_size
-
Expand source code
@property def input_size(self): total = 0 for _, _, _, encoder_width in self._event_ranges: total += encoder_width return total
Methods
def embed_note(self, value)
-
Expand source code
def embed_note(self, value): if value < 0 or value >= 144: raise ValueError('Unexpected note value: %s' % value) return self._note_table[value]
def embed_pitch_class(self, value)
-
Expand source code
def embed_pitch_class(self, value): if value < 0 or value >= 12: raise ValueError('Unexpected pitch class value: %s' % value) return self._pitch_class_table[value]
def embed_time_shift(self, value)
-
Expand source code
def embed_time_shift(self, value): if value < 0 or value >= self._max_shift_steps: raise ValueError('Unexpected time shift value: %s' % value) return self._time_shift_table[value]
def embed_velocity(self, value)
-
Expand source code
def embed_velocity(self, value): if value < 0 or value >= self._num_velocity_bins: raise ValueError('Unexpected velocity value: %s' % value) return self._velocity_table[value]
def encode_modulo_event(self, event)
-
Expand source code
def encode_modulo_event(self, event): offset = 0 for event_type, min_value, _, encoder_width in self._event_ranges: if event.event_type == event_type: value = event.event_value - min_value return offset, event_type, value offset += encoder_width raise ValueError('Unknown event type: %s' % event.event_type)
class PerformanceOneHotEncoding (num_velocity_bins=0, max_shift_steps=100, min_pitch=0, max_pitch=127)
-
One-hot encoding for performance events.
Expand source code
class PerformanceOneHotEncoding(encoder_decoder.OneHotEncoding): """One-hot encoding for performance events.""" def __init__(self, num_velocity_bins=0, max_shift_steps=performance_lib.DEFAULT_MAX_SHIFT_STEPS, min_pitch=performance_lib.MIN_MIDI_PITCH, max_pitch=performance_lib.MAX_MIDI_PITCH): self._event_ranges = [ (PerformanceEvent.NOTE_ON, min_pitch, max_pitch), (PerformanceEvent.NOTE_OFF, min_pitch, max_pitch), (PerformanceEvent.TIME_SHIFT, 1, max_shift_steps) ] if num_velocity_bins > 0: self._event_ranges.append( (PerformanceEvent.VELOCITY, 1, num_velocity_bins)) self._max_shift_steps = max_shift_steps @property def num_classes(self): return sum(max_value - min_value + 1 for event_type, min_value, max_value in self._event_ranges) @property def default_event(self): return PerformanceEvent( event_type=PerformanceEvent.TIME_SHIFT, event_value=self._max_shift_steps) def encode_event(self, event): offset = 0 for event_type, min_value, max_value in self._event_ranges: if event.event_type == event_type: return offset + event.event_value - min_value offset += max_value - min_value + 1 raise ValueError('Unknown event type: %s' % event.event_type) def decode_event(self, index): offset = 0 for event_type, min_value, max_value in self._event_ranges: if offset <= index <= offset + max_value - min_value: return PerformanceEvent( event_type=event_type, event_value=min_value + index - offset) offset += max_value - min_value + 1 raise ValueError('Unknown event index: %s' % index) def event_to_num_steps(self, event): if event.event_type == PerformanceEvent.TIME_SHIFT: return event.event_value else: return 0
Ancestors
Inherited members