#!/usr/pkg/bin/python """determine primary frequency of wave file When reading the source, note that we use 'sample' to indicate what the actual audio hardware calls a sample, and 'chunk' to indicate the "sample" of the audio file we are converting. It can be confusing even still.""" import sys, os, wave, pwd sys.path.append(os.sep.join([pwd.getpwuid(os.geteuid())[5], 'lib', 'python'])) from com.jcomeau.midi import midi from power import * from struct import * self = sys.argv.pop(0).split(os.sep)[-1].split('.')[0] verbose, debugging = True, True ticklength = .5 / 96 # default MIDI tick, 96 per quarter note = 500 msec # you need to make the chunklength big enough to see at least half of one # cycle, and preferably a full cycle, of the longest wavelength you want # to detect. if that's 30 Hz, one cycle is 33.3 milliseconds. since one # tick is 1/192 seconds, or 5.2 ms, a chunk has to be about 6 ticks long. # make sure, for easier programming, to make chunklength a multiple # of ticklength chunklength = ticklength * 6 notes = midi.NoteMapping('all', 'frequency') if len(sys.argv) < 1: sys.stderr.write("Usage: %s INFILE[...]\n" % self) sys.exit(0) def verboseprint(*args): "helpful printout" global verbose if verbose: sys.stderr.write("%s\n" % repr(args)) def dbgprint(*args): "debugging printout" global debugging if debugging: sys.stderr.write("%s\n" % repr(args)) def soundchunk(wavefile, startposition = None, fraction = 1.0): "get a fractional-second chunk from sum of channels and return as list" channels = wavefile.getnchannels() samplewidth = wavefile.getsampwidth() framerate = wavefile.getframerate() #dbgprint("getting sound chunk") format = {1: 'b', 2: 'h', 4: 'l'} if fraction > 1: fraction = 1 chunksize = int(fraction * framerate) & ~1 # make it an even number if startposition is not None: skip = int(startposition * framerate) wavefile.setpos(skip) chunk = wavefile.readframes(chunksize) chunk = unpack(format[samplewidth] * (len(chunk) / samplewidth), chunk) #dbgprint('chunk', chunk[0:10]) total = array([0] * (len(chunk) / channels)) for channel in range(0, channels): total = list(total + array(list(chunk)[channel::channels])) #dbgprint('total', total[0:10]) return total def dcfilter(array): "eliminate DC component from signal" if len(array) == 0: return array dc_component = average(array) for index in range(0, len(array)): array[index] = array[index] - dc_component return array def average(array): "compute average of samples in array" return float(sum(array)) / len(array) def averagepower(wavefile): length, total = 0, 0 framerate = wavefile.getframerate() while True: try: chunk = soundchunk(wavefile) if len(chunk) == 0: raise Exception except: break chunkfft = powerspectrum(dcfilter(chunk), framerate).tolist() total = sum(chunkfft) length = length + len(chunk) wavefile.setpos(0) if length == 0: sys.stderr.write("no wave data found") sys.exit(1) averagepower = float(total) / length verboseprint('average power: %.2f' % averagepower) return averagepower def nearestnotes(): nearest = dict() global notes twelfth = 1.0 / 12 for note in notes.keys(): # calculate half a semitone above and below minimum = notes[note] * pow(2, -twelfth / 2) maximum = notes[note] * pow(2, twelfth / 2) for frequency in range(int(minimum), int(maximum)): nearest[frequency] = note return nearest def reducepower(song, peak): "set the power level of each note to a fraction of peak, and peak to 127" verboseprint('peak power', peak) dbgprint('reducepower', song) power = 1 # offset of power level in each note list for index in range(0, len(song)): timeslot = song[index] for index in range(0, len(timeslot)): timeslot[index][power] = \ int((timeslot[index][power] / float(peak)) * 126) + 1 # drop any notes less than 1/10th the strength of the strongest if index > 0 and timeslot[index][power] < timeslot[index - 1][power] / 10: while len(timeslot) > index: timeslot.pop() break def combinetimes(song, delta): "combine the timeslots into actual track of on-off events" dbgprint('combinetimes', song) global ticklength, chunklength ticksperchunk = int(chunklength / ticklength) current = [] time = 0 lastevent = 0 # last 'time' an event was appended to track track = [midi.MTrk, 0] for index in range(0, len(song)): timeslot = song[index] verboseprint('timeslot', timeslot) notes = map(lambda list: list[0], timeslot) currentnotes = map(lambda list: list[0], current) for note in notes: try: # if the note is already in currentnotes, do whatever...? index = currentnotes.index(note) # do anything you want to it here... can't think of what at the moment except: # note was not in currentnotes, so output Note On event # and add to current, note value and velocity track.append([time - lastevent, 0x90, note, timeslot[notes.index(note)][1]]) lastevent = time current.append([note, timeslot[notes.index(note)][1]]) # now go through currentnotes and see if all are really current # if not, output Note Off events and remove them from the list for note in currentnotes: try: index = notes.index(note) except: track.append([time - lastevent, 0x90, note, 0]) lastevent = time try: current.pop(currentnotes.index(note)) except: # probably no notes to pop. duh. pass time += ticksperchunk return track def wav2mid(): global chunklength, notes midinotes = nearestnotes() while len(sys.argv) > 0: filename = sys.argv.pop(0) verboseprint(filename) wavefile = wave.open(filename, "rb") outfilename = '%s.mid' % filename outfile = open(outfilename, "wb") poweraverage = averagepower(wavefile) channels = wavefile.getnchannels() samplewidth = wavefile.getsampwidth() framerate = wavefile.getframerate() nyquist = framerate / 2 comptype = wavefile.getcomptype() songlist = [] peakpower = 0 # strength of strongest signal in song dbgprint("stats for %s (%s): channels=%d, samplewidth=%d, framerate=%d" % \ (filename, comptype, channels, samplewidth, framerate)) while True: try: chunk = soundchunk(wavefile, None, chunklength) if len(chunk) == 0: raise Exception except: break # weed DC out of signal chunkfft = powerspectrum(dcfilter(chunk), framerate).tolist() notelist = [] # make a list of strongest frequencies in each chunk while True: # make sure to include nyquist frequency in check for max # ignore anything less than 30 Hz maximum = max(chunkfft[30:nyquist + 1]) if maximum < 10 * poweraverage: dbgprint('current maximum signal %d < 10 * poweraverage (%d)' % \ (maximum, poweraverage)) break if maximum > peakpower: peakpower = maximum verboseprint('peakpower now', peakpower) frequency = chunkfft.index(maximum) try: note = midinotes[frequency] except: break power = maximum / poweraverage chunkfft[frequency] = 0 notesonly = map(lambda note: note[0], notelist) if note in notesonly: # add this frequency's strength to the primary frequency for that note notelist[notesonly.index(note)][1] += maximum if notelist[notesonly.index(note)][1] > peakpower: peakpower = notelist[notesonly.index(note)][1] verboseprint('peakpower now', peakpower) else: try: notelist.append([note, power]) except: raise songlist.append(notelist) wavefile.close() reducepower(songlist, peakpower) track = combinetimes(songlist, chunklength) track.append([0, 0xff, 0x2f, 0]) # end of track marker midi.UndumpMidiHeader(outfile, [midi.MThd, 6, 0, 1, 96]) midi.UndumpMidiTrack(outfile, track) if __name__ == "__main__": wav2mid()