#!/usr/bin/env python # Converts USF Scripts into CMML # More information at http://trac.annodex.net/wiki/CmmlSubtitles ## Copyright (C) 2004- Commonwealth Scientific and Industrial Research ## Organisation (CSIRO) Australia ## ## Redistribution and use in source and binary forms, with or without ## modification, are permitted provided that the following conditions ## are met: ## ## - Redistributions of source code must retain the above copyright ## notice, this list of conditions and the following disclaimer. ## ## - Redistributions in binary form must reproduce the above copyright ## notice, this list of conditions and the following disclaimer in the ## documentation and/or other materials provided with the distribution. ## ## - Neither the name of CSIRO Australia nor the names of its ## contributors may be used to endorse or promote products derived from ## this software without specific prior written permission. ## ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ## ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ## PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ## PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ## LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ## NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import re import sys import cgi from time import strptime, strftime from xml.sax import saxutils from xml.sax import make_parser from xml.sax.handler import feature_namespaces from xml.sax.handler import ContentHandler global savecmml global message global id whitespace = re.compile('(^\\n *|^ *| *\\n$)') def clock2timestamp(str): data = str.split('.') t = strptime(data[0], '%H:%M:%S') return strftime ("%H:%M:%S" + '.' + data[1], t) class parseHandler(ContentHandler): def __init__(self): self.inDescTitle = False self.inDescName = False self.inDescText = False self.title = '' self.name = '' self.text = '' def startElement(self, name, attrs): if name == 'title': self.inDescTitle = True if name == 'name': self.inDescName = True if name == 'text': self.inDescText = True if name == 'subtitle': self.inDescTitle = False self.inDescName = False self.inDescText = False self.title = '' self.name = '' self.text = '' self.start_time = clock2timestamp(attrs.get('start', None)) self.stop_time = clock2timestamp(attrs.get('stop', None)) if name == 'style': self.style = attrs.get('name') if name == 'font': # For future CSS implementation if (attrs.get('style') == self.style): self.inDescStyle = True def characters(self, ch): if self.inDescTitle: self.title = self.title + ch if self.inDescName: self.name = self.name + ch if self.inDescText: self.text = self.text + ch self.text = whitespace.sub('', self.text) def endElement(self, name): global savecmml global id if name == 'title': self.inDescTitle = False if name == 'name': self.inDescName = False if name == 'text': self.inDescText = False if name == 'metadata': savecmml += ' %s\n' % (self.title) savecmml += ' \n' % (self.name) savecmml += '' if name == 'text': savecmml += '\n' % (id, self.start_time, self.stop_time) savecmml += ' \n' savecmml += ' %s\n' % (self.text) savecmml += ' \n' savecmml += '\n' id += 1 if name == 'br': savecmml += '\n' def convert_usf(file): global savecmml global id id = 1 parser = make_parser() # Create a parser parser.setFeature(feature_namespaces, 0) # Tell parser not interested in XML namespaces dh = parseHandler() # Create the handler parser.setContentHandler(dh) # Tell parser to use the handler # Parse the input savecmml = '\n' savecmml += '\n' parser.parse(file) savecmml += '\n' if __name__=='__main__': try: try: filesave = open(sys.argv[2], 'w') convert_usf(file(sys.argv[1])) filesave.write(savecmml) filesave.close() except IndexError: convert_usf(file(sys.argv[1])) print savecmml except IndexError: print "Usage: %s [Input USF File] [Output CMML File]" % sys.argv[0]