#!/usr/bin/env python # Converts SAMI Scripts into CMML # xml_correct is a stub method that corrects XML parsing errors of SAMI. # If there is a non-strict XML parser that handles syntactical errors this function can be deprecated. # More information at http://trac.annodex.net/wiki/CmmlSubtitles ## Copyright (C) 2004- Commonwealth Scientific and Industrial Research ## Organisation (CSIRO) Australia ## ## Redistribution and use in source and binary forms, with or without ## modification, are permitted provided that the following conditions ## are met: ## ## - Redistributions of source code must retain the above copyright ## notice, this list of conditions and the following disclaimer. ## ## - Redistributions in binary form must reproduce the above copyright ## notice, this list of conditions and the following disclaimer in the ## documentation and/or other materials provided with the distribution. ## ## - Neither the name of CSIRO Australia nor the names of its ## contributors may be used to endorse or promote products derived from ## this software without specific prior written permission. ## ## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ## ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ## PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ORGANISATION OR ## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ## EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ## PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ## LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ## NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ## SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import re import sys import cgi import math import xml.sax from time import strptime, strftime from xml.sax import saxutils from xml.sax import make_parser from xml.sax.handler import feature_namespaces from xml.sax.handler import ContentHandler global savecmml global message global id middlespace = re.compile('( * )') whitespace = re.compile('(^\\n *|^ *| *\\n$)') def time_calculation(seconds, amt_in_sec): val = seconds / amt_in_sec val_rounded = math.floor(val) secs_remainder = (val * amt_in_sec) - (val_rounded * amt_in_sec) return (val_rounded, secs_remainder) def convert_timeperiod(frame): global fps try: totalsec = frame / fps (hours, secs_remainder) = time_calculation(totalsec, 3600) (minutes, secs_remainder) = time_calculation(secs_remainder, 60) (secs, millisecs) = time_calculation(secs_remainder, 1) millisecs = round(millisecs*1000) timeperiod = '%(val1)02d:%(val2)02d:%(val3)02d.%(val4)03d' % {'val1':hours, 'val2':minutes, 'val3':secs, 'val4':millisecs} return timeperiod except IndexError: raise CorruptAqtError("Corrupt Frame Format") class parseHandler(ContentHandler): def __init__(self): self.inDescTitle = False self.inDescText = False self.title = '' self.text = '' def startElement(self, name, attrs): global timing if name == 'title': self.inDescTitle = True if name == 'sync': self.title = '' startframe = float(attrs.get('Start', None)) self.start_time = convert_timeperiod(startframe) timing = True if name == 'style': # For future implementation of CSS Stylesheets self.type = attrs.get('type') if name == 'p': self.inDescText = True def characters(self, ch): global savecmml global timing global id if self.inDescTitle: self.title = self.title + ch if self.inDescText: self.text = self.text + ch self.text = whitespace.sub('', self.text) self.text = middlespace.sub(' ', self.text) if self.text != '' and timing: timing = False savecmml += '\n' % (id, self.start_time) savecmml += ' \n' savecmml += ' %s\n' % (self.text) self.text = '' id += 1 elif self.text != '': savecmml += ' %s\n' % (self.text) self.text = '' def endElement(self, name): global savecmml global id if name == 'title': self.inDescTitle = False savecmml += '\n' savecmml += '' savecmml += '%s' % (self.title) savecmml += '\n' savecmml += '\n' if name == 'p': self.inDescText = True if name == 'sync' and id != 1: savecmml += ' \n' savecmml += '\n' if name == 'br': savecmml += '\n' def error(self, ex): sys.stderr.write('Non-Fatal Error Code: %s\n' % str(ex)) def fatalError(self, ex): global errorFlag errorFlag = True sys.stderr.write('Fatal Error Code: %s\n' % str(ex)) sys.stderr.write('The parsing error \'%s\' occurred at:\n' % (ex.getMessage())) sys.stderr.write('Line: %s, Column: %s in File: %s\n\n' % (str(ex.getLineNumber()), str(ex.getColumnNumber()), sys.argv[1])) def convert_sami(file): global errorFlag global savecmml global id id = 1 errorFlag = False savecmml = '\n' parser = make_parser() # Create a parser parser.setFeature(feature_namespaces, 0) # Tell parser not interested in XML namespaces dh = parseHandler() # Create the handler parser.setContentHandler(dh) # Tell parser to use the handler parser.setErrorHandler(dh) # Tell the parser to use internal error mechanism (-OR-) try: # Try/Except block to capture default error exceptions parser.parse(file) # Parse the input except xml.sax._exceptions.SAXParseException, ex: errorFlag = True print 'Error Code: %s' % (str(ex)) if not errorFlag: savecmml += ' \n' savecmml += '\n' savecmml += '\n' else: savecmml = '' def xml_correct(file, function): import os datastream = '' if function == 'create': sync_tag = re.compile('(^$)') para_tag = re.compile('(^

$)') brln_tag = re.compile('(^
$)') line_end = re.compile('(\A<.*\Z| *<.*\Z)') line_css = re.compile('( *--$|\A--$)') textline = re.compile('(.*<.*\Z)') strtline = re.compile('(\A)') end_line = re.compile('(\Z)') whitespc = re.compile('\A *') code_css = re.compile('\A .*') mainline = re.compile('(\A\n)', re.MULTILINE) # To split all tags into a separate line with the '>' character for line in file: parts = line.split('>') for part in parts: # When the line is split, we replace the ending '>' if line_end.match(part): try: part = whitespc.sub('', part) tempsplit = part.split(' ', 2) tempsplit[0] = str.lower(tempsplit[0]) part = str(tempsplit[0]) + ' ' + str(tempsplit[1]) except Exception: part = str.lower(part) part = end_line.sub('>' , part.strip()) # To ensure no empty new lines are used for parsing if not mainline.match(part): # To close 'sync' or 'p' tags if sync_tag.match(part) or para_tag.match(part): part = part.replace('>', '/>') part = part.replace('=', '="') datastream += part.replace('/>', '"/>') + '\n' # To close 'br' tags elif brln_tag.match(part): datastream += part.replace('>', '/>') + '\n' else: part = whitespc.sub('', part) datastream += part.replace('\n', '') + '\n' # To close css marker tags elif line_css.match(part): part = end_line.sub('>' , part.strip()) part = whitespc.sub('', part) datastream += part.replace('\n', '') + '\n' # To correct tags at the end of a piece of text (ie. text
) elif textline.match(part): tempsplit = part.split('<', 1) tempsplit[0] = whitespc.sub('', tempsplit[0]) datastream += tempsplit[0] + '\n' tempsplit[1] = str.lower(tempsplit[1]) # As the tag will be missing '<' and '>' we need to replace both tempsplit[1] = strtline.sub('<' , tempsplit[1]) tempsplit[1] = end_line.sub('>' , tempsplit[1]) # To ensure no empty new lines are saved if not mainline.match(tempsplit[1]): if sync_tag.match(tempsplit[1]) or para_tag.match(tempsplit[1]): # To close 'sync' or 'p' tags part = part.replace('>', '/>') part = part.replace('=', '="') datastream += tempsplit[1].replace('>', '/>') + '\n' # To close 'br' tags elif brln_tag.match(tempsplit[1]): datastream += tempsplit[1].replace('>', '/>') + '\n' else: datastream += tempsplit[1] + '\n' # Print remaining CSS data in between elif code_css.match(part): part = whitespc.sub('', part) datastream += part.replace('\n', '') + '\n' filetemp = open('tempfile.tmp', 'w') filetemp.write(datastream) filetemp.close() elif function == 'cleanup': os.remove('tempfile.tmp') if __name__=='__main__': global savecmml global fps fps = None print 'Frames-per-Second of Video Required:\nEnter Frames-per-Second:' while not fps: try: fps = float(raw_input()) except ValueError: print 'Invalid FPS Entered. Try Again.' try: xml_correct(file(sys.argv[1]), 'create') try: filesave = open(sys.argv[2], 'w') # using temp file from xml_correct convert_sami(file('tempfile.tmp')) filesave.write(savecmml) filesave.close() except IndexError: # using temp file from xml_correct convert_sami(file('tempfile.tmp')) print savecmml xml_correct('', 'cleanup') except IndexError: print "Usage: %s [Input SAMI File] [Output CMML File]" % sys.argv[0]