#!/usr/bin/env python # Converts USF Scripts into CMML # More information at http://trac.annodex.net/wiki/CmmlSubtitles ## Copyright (C) 2004- Commonwealth Scientific and Industrial Research ## Organisation (CSIRO) Australia ## ## Redistribution and use in source and binary forms, with or without ## modification, are permitted provided that the following conditions ## are met: ## ## - Redistributions of source code must retain the above copyright ## notice, this list of conditions and the following disclaimer. ## ## - Redistributions in binary form must reproduce the above copyright ## notice, this list of conditions and the following disclaimer in the ## documentation and/or other materials provided with the distribution. ## ## - Neither the name of CSIRO Australia nor the names of its ## contributors may be used to endorse or promote products derived from ## this software without specific prior written permission. ## ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ## ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ## PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ## PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ## LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ## NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import re import sys import cgi from time import strptime, strftime from xml.sax import saxutils from xml.sax import make_parser from xml.sax.handler import feature_namespaces from xml.sax.handler import ContentHandler global savecmml global message global id whitespace = re.compile('(^\\n *|^ *| *\\n$)') def clock2timestamp(str): data = str.split('.') t = strptime(data[0], '%H:%M:%S') return strftime ("%H:%M:%S" + '.' + data[1], t) class parseHandler(ContentHandler): def __init__(self): self.inDescTitle = False self.inDescName = False self.inDescText = False self.title = '' self.name = '' self.text = '' def startElement(self, name, attrs): if name == 'title': self.inDescTitle = True if name == 'name': self.inDescName = True if name == 'text': self.inDescText = True if name == 'subtitle': self.inDescTitle = False self.inDescName = False self.inDescText = False self.title = '' self.name = '' self.text = '' self.start_time = clock2timestamp(attrs.get('start', None)) self.stop_time = clock2timestamp(attrs.get('stop', None)) if name == 'style': self.style = attrs.get('name') if name == 'font': # For future CSS implementation if (attrs.get('style') == self.style): self.inDescStyle = True def characters(self, ch): if self.inDescTitle: self.title = self.title + ch if self.inDescName: self.name = self.name + ch if self.inDescText: self.text = self.text + ch self.text = whitespace.sub('', self.text) def endElement(self, name): global savecmml global id if name == 'title': self.inDescTitle = False if name == 'name': self.inDescName = False if name == 'text': self.inDescText = False if name == 'metadata': savecmml += '