# -*- coding: iso8859-1 -*- # # Copyright (C) 2006 CSIRO Australia # All rights reserved. # # This software is licensed as described in the file COPYING, which # you should have received as part of this distribution. # # This software consists of voluntary contributions made by many # individuals. For exact contribution history, see the revision # history and logs. # # Author: Conrad Parker import string import sys from urlparse import urlparse from anxvalidator.anxurlopener import anxURLopener from anxvalidator.cmmlvalidate import CMMLValidate, cmml_types from anxvalidator.oggzvalidate import OggzValidate, ogg_types from anxvalidator.w3c import W3CRedirect from anxvalidator.error import ErrorResponse max_redirects = 10 localhosts = ['localhost', 'localhost.localdomain', '127.0.0.1'] class UrlValidate (object): def __init__(self, url, request_cmml=False, force_media=False): self.url = url self.http_info = [] self.is_annodex = False self.nr_redirects = 0 (scheme, netloc, _, _, _, _) = urlparse (url) if (scheme == ''): url = 'http://' + url elif (scheme == 'file' or (len(scheme) == 1 and scheme.isalpha())): self.http_info += ['Error: I cannot see your local files (URL scheme %s)' % scheme] self.r = "The given URL is not accessible via HTTP" return elif (scheme != 'http'): self.http_info += ['Error: Unknown URL scheme %s' % scheme] self.r = "The given URL is not accessible via HTTP" return server = string.split (netloc, ':')[0] if (server == ''): self.http_info += ['Error: No server specified'] self.r = "The given URL is not accessible via HTTP" return elif (server in localhosts): self.http_info += ['Error: I cannot see your localhost'] self.r = "The given URL is not accessible via HTTP" return self.uo = anxURLopener () if (request_cmml): self.uo.addheader ('Accept', 'text/x-cmml') try: u = self.open (url) except IOError: self.info = None self.r = ErrorResponse (self.uo.errcode, self.uo.errmsg) return self.info = u.info() if (self.info): self.http_info += ['Content-Type: %s' % self.info.get("Content-Type")] c = self.content_type() if (force_media or (c in ogg_types)): self.r = OggzValidate (u) self.http_info += [self.time_uri()] elif (c in cmml_types): self.r = CMMLValidate (u) self.http_info += [self.time_uri()] else: self.r = W3CRedirect (url) def open (self, url): try: u = self.uo.open (url) return u except IOError: errcode = self.uo.errcode errmsg = self.uo.errmsg headers = self.uo.headers if (errcode in [301, 302, 303, 307]): self.nr_redirects += 1 if (self.nr_redirects > max_redirects): raise IOError else: redirect_url = headers.get("Location") self.http_info += ['%d %s: Redirected to %s' % (errcode, errmsg, redirect_url, redirect_url)] return self.open (redirect_url) else: self.info = None self.r = ErrorResponse (errcode, errmsg) raise IOError def content_type (self): c = self.info.get("Content-Type") # Remove encoding etc. parameters t = c.split(';', 1)[0] (major,minor) = t.split ('/', 1) if (minor[:2] == 'x-'): minor = minor[2:] if (minor == 'annodex'): self.is_annodex = True return '%s/%s' % (major, minor) def time_uri (self): link = 'time URIs' t = self.info.get("X-Accept-TimeURI") if (t): s = "Accepts " + link + ' for: %s' % (t) else: s = "Does not specify acceptance of " + link return s def __str__(self): def li(item): return '
  • ' + item + '
  • ' def make_list (list): return '
    ' s = '

    HTTP Results:

    ' s += make_list (self.http_info) s += self.r.__str__() + '
    ' return s