#!/usr/pkg/bin/python Copyright = """ fa2mid.py - convert from fastA format to standard MIDI Copyright (C) 2003 John Comeau This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. """ try: import sys, os, re except: sys.stderr.write("Needs os and re; upgrade preferably to version 2.3\n") sys.exit(1) # Global data # define boolean True on older Python interpreters (before 2.3) try: True except: True = 1 False = 0 LongLength = 4 ShortLength = 2 Octave = 12 # 12 half-notes make an octave NormalVelocity = 64 # use for note off events, base all notes around this ByteMask = 0xffL # must be longword to avoid warnings about left shifts DeltaZero = chr(0) # delta-time of zero prefixes many MIDI events EndOfTrack = DeltaZero + "\xff\x2f\x00" LyricEvent = DeltaZero + "\xff\x05" TextEvent = DeltaZero + "\xff\x01" CopyrightNotice = DeltaZero + "\xff\x02" RCSid = "$Id: fa2mid.py,v 1.75 2003/10/10 19:55:08 jcomeau Exp $" MThd = "MThd" # MIDI header ID string MTrk = "MTrk" # MIDI track ID string # map bases to their inverse BasePairs = {'T': 'A', 'C': 'G', 'A': 'T', 'G': 'C', 't': 'a', 'c': 'g', 'a': 't', 'g': 'c', 'N': 'N'} CurrentCodon = "NNN" # 'N' signifies unknown base Frame = 0 # 0-based representation of frames 1, 2, or 3 Coding = {0: 'X', 1: 'X', 2: 'X'} # 'X' indicates genetic code terminator EOL = "" # end-of-line characters found in input stream # Subroutines def VarLen(LongValue): Buffer, ReturnString = 0, "" while (LongValue > 0): Buffer = Buffer + (LongValue & 0x7f) LongValue = LongValue >> 7 if (LongValue > 0): Buffer = Buffer << 8 Buffer = Buffer | 0x80 while True: ReturnString = ReturnString + chr(Buffer & ByteMask) if (Buffer & 0x80): Buffer = Buffer >> 8 else: break return (ReturnString) def ReadVarLen(Stream): Value, Byte, Signal = 0L, 0, 0x80 while Signal > 0: Byte = ord(NextBytes(Stream, 1)) # NextByte() will croak on EOF Signal = Signal & Byte Byte = Byte & ~Signal Value = (Value << 7) + Byte return Value def FixedLong(LongValue): Bytes, ReturnString = LongLength, "" for Index in range(Bytes, 0, -1): Mask = ByteMask << ((Index - 1) * 8) Byte = (LongValue & Mask) >> ((Index - 1) * 8) ReturnString = ReturnString + chr(Byte) return (ReturnString) def ReadFixedLong(Stream): Value, Long = 0, '' Long = NextBytes(Stream, LongLength) for Index in range(0, LongLength): Value = (Value << 8) + ord(Long[Index:Index + 1]) return Value def FixedShort(ShortValue): Bytes, ReturnString = ShortLength, "" for Index in range(Bytes, 0, -1): Mask = ByteMask << ((Index - 1) * 8) Byte = (ShortValue & Mask) >> ((Index - 1) * 8) ReturnString = ReturnString + chr(Byte) return (ReturnString) def ReadFixedShort(Stream): Value, Short = 0, '' Short = NextBytes(Stream, ShortLength) for Index in range(0, ShortLength): Value = (Value << 8) + ord(Short[Index:Index + 1]) return Value def NoteOn(Delay, Channel, Note, Velocity): return VarLen(Delay) + chr(0x90 + Channel) + chr(Note) + chr(Velocity) def NoteOff(Delay, Channel, Note, Velocity): return VarLen(Delay) + chr(0x80 + Channel) + chr(Note) + chr(Velocity) def NoteDef(Value, Velocity, Duration): # for bases Channel = 0 return NoteOn(0, Channel, Value, Velocity) + \ NoteOff(Duration, Channel, Value, NormalVelocity) def NoteMapping(): "Map letters for base-pairs and amino acids to Standard MIDI notes" notes = dict() nextnote = 57 # standard MIDI A2 for letter in range(ord('A'), ord('Z') + 1): notes[chr(letter)] = nextnote if nextnote % 12 == 11 or nextnote % 12 == 4: nextnote = nextnote + 1 # 'mi' to 'fa', or 'ti' to 'do' else: nextnote = nextnote + 2 return notes def RelativeVelocity(BaseVelocity, NoteMidpoint, Note, Power, Options): "Adjust velocity so that high-pitched notes don't overpower lower notes" adjustment = float(NoteMidpoint) / float(Note) modifiedAdjustment = pow(adjustment, Power) # modify the curve velocity = float(BaseVelocity) * modifiedAdjustment velocity = min(velocity, 127.0) # cannot exceed velocity value of 127 debugprint(Options, "adjusting velocity from " + str(BaseVelocity) + \ " to " + str(int(velocity))) return int(velocity) def Lyric(text): if len(text): return LyricEvent + VarLen(len(text)) + text else: return '' def EmbedText(text): if len(text): return TextEvent + VarLen(len(text)) + text else: return '' def EmbedCopyright(text): if len(text): return CopyrightNotice + VarLen(len(text)) + text else: return '' def BaseNoteDef(Duration, Options): NoteMap = NoteMapping() MezzoPiano, MezzoForte = 64, 80 # really 48, 64 but this works better # drop everything down so the amino acids sound higher T = NoteMap['T'] - (2 * Octave) T = T - (Options.lower_t * Octave) # drop to level of F by default C = NoteMap['C'] - (2 * Octave) A = NoteMap['A'] - (2 * Octave) G = NoteMap['G'] - (2 * Octave) power = 0.8 # used to adjust relative velocity, obtained by trial+error notemap = { 'T': NoteDef(T, RelativeVelocity( MezzoForte, C, T, power, Options), Duration), 'C': NoteDef(C, RelativeVelocity( MezzoForte, C, C, power, Options), Duration), 'A': NoteDef(A, RelativeVelocity( MezzoForte, C, A, power, Options), Duration), 'G': NoteDef(G, RelativeVelocity( MezzoForte, C, G, power, Options), Duration), } if not Options.exons_only: notemap['t'] = NoteDef(T, RelativeVelocity( MezzoPiano, C, T, power, Options), Duration) notemap['c'] = NoteDef(C, RelativeVelocity( MezzoPiano, C, C, power, Options), Duration) notemap['a'] = NoteDef(A, RelativeVelocity( MezzoPiano, C, A, power, Options), Duration) notemap['g'] = NoteDef(G, RelativeVelocity( MezzoPiano, C, G, power, Options), Duration) if Options.lyriclevel > 0: for key, value in notemap.items(): notemap[key] = Lyric(key) + value return notemap def mRNA_mapping(Options): # map DNA to mRNA, uppercase letters if coding introns try: code_introns = Options.exons_only except: code_introns = False mRNA = {'T': 'U', 'C': 'C', 'A': 'A', 'G': 'G', 'N': 'N'} for base in ('t', 'c', 'a', 'g'): if code_introns: mRNA[base] = base else: mRNA[base] = mRNA[base.upper()] return mRNA def AminoAcids(): """Entire genetic code with all amino acid representations. Lists all amino acids normally coded by mRNA by their 1-letter and 3-letter abbreviations, and all the codons which code for them. Note the 'U' (Uracil) instead of 'T' (Thymine) since amino acids are produced using mRNA not DNA. """ return { 'G': ('Glycine', 'Gly', ('GGU', 'GGC', 'GGA', 'GGG',)), 'A': ('Alanine', 'Ala', ('GCU', 'GCC', 'GCA', 'GCG',)), 'V': ('Valine', 'Val', ('GTT', 'GTC', 'GTA', 'GTG',)), 'L': ('Leucine', 'Leu', ('UUA', 'UUG', 'CUU', 'CUC', 'CUA', 'CUG',)), 'I': ('Isoleucine', 'Ile', ('AUU', 'AUC', 'AUA',)), 'P': ('Proline', 'Pro', ('CCU', 'CCC', 'CCA', 'CCG',)), 'F': ('Phenylalanine', 'Phe', ('UUU', 'UUC',)), 'Y': ('Tyrosine', 'Tyr', ('UAU', 'UAC',)), 'W': ('Tryptophan', 'Trp', ('UGG',)), 'S': ('Serine', 'Ser', ('UCU', 'UCC', 'UCA', 'UCG', 'AGU', 'AGC',)), 'T': ('Threonine', 'Thr', ('ACU', 'ACC', 'ACA', 'ACG',)), 'C': ('Cysteine', 'Cys', ('UGU', 'UGC',)), 'M': ('Methionine', 'Met', ('AUG',)), 'N': ('Asparagine', 'Asn', ('AAU', 'AAC',)), 'Q': ('Glutamine', 'Gln', ('CAA', 'CAG',)), 'D': ('Aspartate', 'Asp', ('GAU', 'GAC',)), 'E': ('Glutamate', 'Glu', ('GAA', 'GAG',)), 'K': ('Lysine', 'Lys', ('AAA', 'AAG',)), 'R': ('Arginine', 'Arg', ('CGU', 'CGC', 'CGA', 'CGG', 'AGA', 'AGG',)), 'H': ('Histidine', 'His', ('CAU', 'CAC',)), 'X': ('terminator', 'ter', ('UAA', 'UAG', 'UGA',)), } def GeneticCode(): "Returns a dictionary of codon keys with amino acid values" code = dict() # initialize dictionary for key, value in AminoAcids().items(): for codon in value[2]: code[codon] = key return code def Abbreviations(): "Returns mapping of one-letter amino acid abbreviations for lyrics" abbr = dict() for key, value in AminoAcids().items(): abbr[key] = ("", "", "(" + key + ")", "(" + value[1] + ")", "(" + value[0] + ")") # e. g. K: "", "", "(K)", "(Lys)", "(Lysine)" return abbr def MidiHeader(): ChunkLength = FixedLong(6) # 6 bytes MidiFileFormat = FixedShort(0) # 0, simplest format NumberOfTracks = FixedShort(1) # 1 track TimeDivision = FixedShort(96) # ticks per quarter note return MThd + ChunkLength + MidiFileFormat + NumberOfTracks + TimeDivision def MidiTrackCommon(Options): TimeSignaturePrefix = DeltaZero + "\xff\x58" # meta-event for time signature TimeSignatureLength = chr(4) # length of data TimeSignatureNumerator = chr(4) # for 4/4 time TimeSignatureDenominator = chr(2) # 2 ** -dd, where dd = 2, gives 1/4 TimeSignatureClocks = chr(24) # number of MIDI clocks in metronome tick TimeSignatureQuarterNote = chr(8) # 8 notated 32nd notes per 24 MIDI clocks TimeSignature = TimeSignaturePrefix + TimeSignatureLength + \ TimeSignatureNumerator + TimeSignatureDenominator + \ TimeSignatureClocks + TimeSignatureQuarterNote TempoPrefix = DeltaZero + "\xff\x51" # meta-event for tempo specification TempoLength = chr(3) # bytes for tempo data (24-bit time specification) TempoSpecification = FixedLong(500000)[1:4] # microseconds per MIDI 1/4 note Tempo = TempoPrefix + TempoLength + TempoSpecification CopyrightEvent = EmbedCopyright(Options.copyright) ProgramChange = DeltaZero + "\xc0" + chr(Options.instrument - 1) + \ DeltaZero + "\xc1" + chr(Options.instrument1 - 1) + \ DeltaZero + "\xc2" + chr(Options.instrument2 - 1) + \ DeltaZero + "\xc3" + chr(Options.instrument3 - 1) # examples: 1 = Grand Piano, 13 = Marimba return (TimeSignature + Tempo + CopyrightEvent + ProgramChange) def debugprint(options, text): if (options.verbose) and len(text) > 0: sys.stderr.write(text + "\n") def GenomeNoteData(DataString, NoteMap, BaseNoteMap, Options): """Change a single line of base-pair data into notes. This is the most complex routine because of the different options it has to handle. If there are any serious bugs in the program they are likely to be found here""" Notes = "" Code = GeneticCode() global CurrentCodon, Frame, Coding, EOL abbreviations = Abbreviations() mRNA = mRNA_mapping(Options) if DataString[0:1] == '>': # comment line, so skip it return "" for Index in range(len(DataString)): Base = DataString[Index] # should be in GATCgatc but could be N or \n if BaseNoteMap.has_key(Base): debugprint(Options, "playing note for " + Base) if Options.lyriclevel > 0 and len(EOL) > 0: Notes = Notes + Lyric("\r\n") # for WinAmp EOL = '' Notes = Notes + BaseNoteMap[Base] elif Base < ' ': # any control character should be end-of-line EOL = EOL + Base else: debugprint(Options, "skipping datum " + Base) # after the note for that base is played, ... # ... start the note for the amino acid created by that codon if mRNA.has_key(Base): CurrentCodon = CurrentCodon[1:3] + mRNA[Base] delay = 0 if Base == 'N': # there was no delay above delay = Options.duration if Options.lyriclevel > 0 and False: # can't do, we add 'NNN' Notes = Notes + Lyric('N') if Coding[Frame] != 'X': debugprint(Options, "ending note " + Coding[Frame] + \ " after " + str(delay) + " MIDI clocks") Notes = Notes + NoteOff(delay, # end current note Frame + 1, # first frame is 2nd channel NoteMap[Coding[Frame]], NormalVelocity) if Code.has_key(CurrentCodon) and \ Code[CurrentCodon] != 'X' and \ (Options.code_all or \ Coding[Frame] != 'X' or \ Code[CurrentCodon] == 'M'): Coding[Frame] = Code[CurrentCodon] debugprint(Options, "starting note for amino acid " + Coding[Frame] + \ " " + abbreviations[Coding[Frame]][4]) velocity = RelativeVelocity(32, NoteMap['F'], NoteMap[Coding[Frame]], 2.0, Options) Notes = Notes + \ Lyric(abbreviations[Coding[Frame]][Options.lyriclevel]) Notes = Notes + NoteOn(0, Frame + 1, NoteMap[Coding[Frame]], velocity) elif (not Code.has_key(CurrentCodon) or \ Code[CurrentCodon] == 'X') and Coding[Frame] != 'X': debugprint(Options, "note was already ended above") Coding[Frame] = 'X' else: debugprint(Options, "setting Coding[Frame] to 'X'") Coding[Frame] = 'X' Frame = (Frame + 1) % 3 debugprint(Options, "Frame is now " + str(Frame + 1)) return Notes def EndDataTrack(Options): "Clean up any loose ends then send end-of-track meta-event" Data, NoteMap = '', NoteMapping() for key, value in Coding.items(): if value != 'X': debugprint(Options, "ending note for " + \ value + " which should have ended before") Data = Data + NoteOff(0, key + 1, NoteMap[value], NormalVelocity) return Data + EndOfTrack def CheckMinMax(optionInstance, option, value, parser, *arguments): (min, max) = arguments errorMessage = "value of " + option + " (" + str(value) +\ ") must be between " + str(min) + " and " + str(max) if value < min or value > max: raise OptionValueError(errorMessage) else: setattr(parser.values, optionInstance.dest, value) def initialize_fa2mid(): try: from optparse import OptionParser, OptionValueError except: sys.stderr.write("Needs optparse, upgrade preferably to 2.3\n"); sys.exit(1) EighthNotes, AcousticBass, ShowIntrons = 144, 33, False # defaults ChoirAahs, VoiceOohs, SynthVoice = 53, 54, 55 parser = OptionParser(usage = \ "%prog [options] INFILE|- [OUTFILE|-] (note: '-' means stdio)") parser.add_option("-d", "--duration", type = "int", action = "callback", callback = CheckMinMax, callback_args = (1, 2048), dest = "duration", default = EighthNotes, help = "MIDI clocks per eighth note") parser.add_option("-i", "--instrument", type = "int", action = "callback", callback = CheckMinMax, callback_args = (1, 128), dest = "instrument", default = AcousticBass, help = "1-based Standard MIDI instrument number for bases") parser.add_option("-1", "--frame1_instrument", type = "int", action = "callback", callback = CheckMinMax, callback_args = (1, 128), dest = "instrument1", default = ChoirAahs, help = "MIDI instrument number for frame 1 amino acids") parser.add_option("-2", "--frame2_instrument", type = "int", action = "callback", callback = CheckMinMax, callback_args = (1, 128), dest = "instrument2", default = ChoirAahs, help = "MIDI instrument number for frame 2 amino acids") parser.add_option("-3", "--frame3_instrument", type = "int", action = "callback", callback = CheckMinMax, callback_args = (1, 128), dest = "instrument3", default = ChoirAahs, help = "MIDI instrument number for frame 3 amino acids") parser.add_option("-l", "--with_lyrics", type = "int", action = "callback", callback = CheckMinMax, callback_args = (0, 4), dest = "lyriclevel", default = 0, help = "output genome data as MIDI lyrics (various verbosity levels)") parser.add_option("-t", "--lower_T", type = "int", action = "callback", callback = CheckMinMax, callback_args = (0, 2), dest = "lower_t", default = 2, help = "lower note for Thymine by this many octaves relative to C") parser.add_option("-c", "--copyright", default = "Created by: " + \ program_name(sys.argv[0]).group(0) + \ ' ' + ' '.join(sys.argv[1:]) + Copyright, help = "string (enclosed in quotes) to use as copyright notice") parser.add_option("-e", "--exons_only", action="store_true", default = ShowIntrons, help = "ignore lowercased (intron) data") parser.add_option("-a", "--code_all", action = "store_true", default = False, help = "don't wait for AUG to start coding") parser.add_option("-v", "--verbose", action = "store_true", default = False, help = "output debugging information while processing") Input, Output = "-", "-" options, arguments = parser.parse_args() if len(arguments) < 1: parser.print_help() sys.exit(0) elif arguments[0] != "-": Input, Output = arguments[0], arguments[0] + ".mid" if len(arguments) > 1: if arguments[1] != "-": Output = arguments[1] return (options, Input, Output) def fa2mid(): (Options, Input, Output) = initialize_fa2mid() debugprint(Options, "processing: " + ' '.join(sys.argv)) if Input == "-": InputFile = sys.stdin else: InputFile = open(Input, "r") if Output == "-": OutputFile = sys.stdout else: OutputFile = open(Output, "wb") NoteMap = NoteMapping() BaseNoteMap = BaseNoteDef(Options.duration, Options) Adjustment = len(MidiHeader()) + len(MTrk) # to correct length of track data if Output == "-": # we'll have to buffer entire output; better have lots of swapspace! OutputData = MidiHeader() + MTrk + FixedLong(0) + \ MidiTrackCommon(Options) while True: Line = InputFile.readline() if len(Line) == 0: break OutputData = OutputData + GenomeNoteData(Line, NoteMap, BaseNoteMap, Options) debugprint(Options, "gradually cutting off amino acid tones") OutputData = OutputData + GenomeNoteData("NNN", NoteMap, BaseNoteMap, Options) OutputData = OutputData + EndDataTrack(Options) OutputFile.write(OutputData[0:Adjustment] + \ FixedLong(len(OutputData) - Adjustment - LongLength) + \ OutputData[Adjustment + LongLength:]) InputFile.close() OutputFile.close() else: OutputFile.write(MidiHeader() + \ MTrk + FixedLong(0) + MidiTrackCommon(Options)) while True: Line = InputFile.readline() if len(Line) == 0: break OutputFile.write(GenomeNoteData(Line, NoteMap, BaseNoteMap, Options)) debugprint(Options, "gradually cutting off amino acid tones") OutputFile.write(GenomeNoteData("NNN", NoteMap, BaseNoteMap, Options)) OutputFile.write(EndDataTrack(Options)) InputFile.close() OutputFile.close() OutputFile = open(Output, "rb+") OutputFile.seek(0, 2); OutputFileLength = OutputFile.tell() OutputFile.seek(Adjustment) OutputFile.write(FixedLong(OutputFileLength - Adjustment - LongLength)) OutputFile.close() def packgenome(): verbose = False packstring = 'GATCgatcNnMR' for file in sys.argv[1:]: try: input = open(file, "r") except: sys.stderr.write("cannot open " + file + "\n") continue header = input.readline() match = re.match('(>[^>:\s]+)\s*$', header) if match == None: sys.stderr.write("invalid header: " + header + "\n") continue header = match.group(1) datastart = input.tell() datalength = 0 while True: line = input.readline() if len(line) == 0: break match = re.match('([' + packstring + ']*)\s*$', line) if match == None: sys.stderr.write("invalid data: " + line + "\n") sys.stderr.write("output file will be incomplete\n") datalength = datalength + len(match.group(1)) if verbose: sys.stderr.write(str(datalength) + " bytes: " + \ match.group(1) + "\n") # start compressing the data outfile = file + ".2bit" try: output = open(outfile, "wb") except: sys.stderr.write("cannot create " + output + "\n") continue output.write(header + ":1-" + str(datalength) + "\nP") # that last 'P' after the newline is to ease coding the unpacking # also so that `head -n 1 file.fa.2bit` can be used input.seek(datastart) count = 0 chunk = 0 while count < datalength: base = input.read(1) match = packstring.find(base) if match < 0: continue chunk = (chunk << 2) | (match % 4) count = count + 1 if (count % 4) == 0: output.write(chr(chunk)) chunk = 0 if (count % 4) != 0: if verbose: sys.stderr.write("remaining " + str(count % 4) + \ " bytes will be packed as one byte now\n") while (count % 4) != 0: chunk = chunk << 2 count = count + 1 output.write(chr(chunk)) # write final data # now write mask data to output # [GATC] has mask of 0; [gatc] 1; and [NnMR] 2 # mask of 3 can be used for something else later(?) input.seek(datastart) count = 0 chunk = 0 while count < datalength: base = input.read(1) if len(base) == 0: sys.stderr.write("... unexpected end of input\n") break match = packstring.find(base) if match < 0: if verbose: sys.stderr.write("... skipping: " + str(ord(base)) + ";") continue chunk = (chunk << 2) | int(match / 4) count = count + 1 if (count % 4) == 0: output.write(chr(chunk)) chunk = 0 if (count % 4) != 0: if verbose: sys.stderr.write("remaining " + str(count % 4) + \ " bytes will be packed as one byte now\n") while (count % 4) != 0: chunk = chunk << 2 count = count + 1 output.write(chr(chunk)) # write final mask output.close def unpackgenome(): packstring = 'GATCgatcNnMR' verbose = False for file in sys.argv[1:]: try: input = open(file, "r") except: sys.stderr.write("cannot open " + file + "\n") continue header = input.readline() match = re.match('(>[^>:\s]+):\d-(\d+)\s*$', header) if match == None: sys.stderr.write("invalid header: " + header + "\n") continue header = match.group(1) datalength = int(match.group(2)) skip = input.read(1) # this is the 'P' we put while compressing if skip != 'P': # note that if it _is_ P it doesn't mean it's good sys.stderr.write("invalid data byte past header\n") continue # start uncompressing the data outfile = file + ".fa" # makes it "file.fa.2bit.fa" try: output = open(outfile, "w") except: sys.stderr.write("cannot create " + output + "\n") continue output.write(header + "\n") datastart = output.tell() count = 0 line = '' if verbose: sys.stderr.write("datalength = " + str(datalength) + "\n") while count < datalength: bases = input.read(1) if len(bases) == 0: sys.stderr.write("...unexpected end of file\n") output.close() break chunk = ord(bases) mask = 0xc0 # binary 11000000 if verbose: sys.stderr.write(".") for index in range(3, -1, -1): base = packstring[(chunk & mask) >> (index * 2)] if verbose: sys.stderr.write(base) line = line + base mask = mask >> 2 count = count + 1 if count >= datalength: if verbose: sys.stderr.write("... count >= datalength\n") break if len(line) == 50: output.write(line + "\n") line = '' if len(line) > 0: try: output.write(line + "\n") # write final data except: pass # fuggeddaboudit output.close() if verbose and False: sys.exit(0) # now correct output with mask # [GATC] has mask of 0; [gatc] 1; and [NnMR] 2 try: output = open(outfile, "r+") except: sys.stderr.write("cannot correct " + output + "\n") continue output.seek(datastart) count = 0 chunk = 0 line = '' corrected = '' if verbose: sys.stderr.write("beginning correction at offset " + \ str(output.tell()) + "\n") sys.stderr.write("datalength = " + str(datalength) + "\n") while count < datalength: bases = input.read(1) if len(bases) == 0: sys.stderr.write("... failed correcting output file at " + \ str(count) + " out of " + str(datalength) + " bytes.\n") try: output.close() except: pass break # just give up and move on to the next file chunk = ord(bases) mask = 0xc0 # binary 11000000 for index in range(3, -1, -1): offset = count % 50 if line == '': linestart = output.tell() line = output.readline() if verbose: sys.stderr.write("line = " + line + "\n") output.seek(linestart) base = line[offset:offset + 1] # uncorrected base offset = packstring.find(base) # uncorrected offset offset = offset + (4 * ((chunk & mask) >> (2 * index))) base = packstring[offset:offset + 1] # corrected base corrected = corrected + base mask = mask >> 2 count = count + 1 if count >= datalength: if verbose: sys.stderr.write("... count >= datalength\n") break if len(corrected) == 50: try: if verbose: sys.stderr.write("writing " + corrected + \ " at offset " + str(linestart) + "\n") output.write(corrected + "\n") output.flush() line = '' corrected = '' except: sys.stderr.write("... incomplete output\n") break if len(corrected) > 0: if verbose: sys.stderr.write("writing final corrected line" + \ corrected + "\n") try: output.write(corrected + "\n") # write final data except: pass # don't bother even making a fuss output.close() def program_name(program_path): return re.search('([a-z0-9]+)(.py)?$', program_path) def main(): match = program_name(sys.argv[0]) try: program = match.group(1) except: sys.stderr.write(sys.argv[0] + " is unrecognizable\n") sys.exit(1) eval(program + "()") # The following is standard; it allows the script to be used as a library # with 'import', but runs only when invoked directly if __name__ == "__main__": main()