Commit 1298e2d5 authored by Jannis Leidel's avatar Jannis Leidel

Refactored the compressor code a little (split in separate modules). Also...

Refactored the compressor code a little (split in separate modules). Also abstracted the file parsing and added a BeautifulSoupParser and a LxmlParser, while the former is the default.
parent a54a07da
......@@ -148,6 +148,19 @@ A list of filters that will be applied to javascript.
The dotted path to a Django Storage backend to be used to save the
compressed files.
``COMPRESS_PARSER``
--------------------
:Default: ``'compressor.parser.BeautifulSoupParser'``
The backend to use when parsing the JavaScript or Stylesheet files.
The backends included in ``compressor``:
- ``compressor.parser.BeautifulSoupParser``
- ``compressor.parser.LxmlParser``
See `Dependencies`_ for more info about the packages you need for each parser.
``COMPRESS_REBUILD_TIMEOUT``
----------------------------
......@@ -175,8 +188,21 @@ modification timestamp of a file. Disabled by default. Should be smaller
than ``COMPRESS_REBUILD_TIMEOUT`` and ``COMPRESS_MINT_DELAY``.
Dependecies
***********
Dependencies
************
* BeautifulSoup_ (for the default ``compressor.parser.BeautifulSoupParser``)
::
pip install BeautifulSoup
* lxml_ (for the optional ``compressor.parser.LxmlParser``, requires libxml2_)
::
* BeautifulSoup
STATIC_DEPS=true pip install lxml
.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
.. _lxml: http://codespeak.net/lxml/
.. _libxml2: http://xmlsoft.org/
import os
from BeautifulSoup import BeautifulSoup
from django import template
from django.conf import settings as django_settings
from django.template.loader import render_to_string
from django.core.cache import cache
from django.core.files.base import ContentFile
from django.core.files.storage import get_storage_class
from compressor.conf import settings
from compressor import filters
register = template.Library()
class UncompressableFileError(Exception):
pass
def get_hexdigest(plaintext):
try:
import hashlib
return hashlib.sha1(plaintext).hexdigest()
except ImportError:
import sha
return sha.new(plaintext).hexdigest()
def get_mtime(filename):
if settings.MTIME_DELAY:
key = "django_compressor.mtime.%s" % filename
mtime = cache.get(key)
if mtime is None:
mtime = os.path.getmtime(filename)
cache.set(key, mtime, settings.MTIME_DELAY)
return mtime
return os.path.getmtime(filename)
class Compressor(object):
def __init__(self, content, output_prefix="compressed"):
self.content = content
self.type = None
self.output_prefix = output_prefix
self.split_content = []
def split_contents(self):
raise NotImplementedError('split_contents must be defined in a subclass')
def get_filename(self, url):
if not url.startswith(self.storage.base_url):
raise UncompressableFileError('"%s" is not in COMPRESS_URL ("%s") and can not be compressed' % (url, self.storage.base_url))
basename = url.replace(self.storage.base_url, "", 1)
if not self.storage.exists(basename):
raise UncompressableFileError('"%s" does not exist' % self.storage.path(basename))
return self.storage.path(basename)
@property
def soup(self):
return BeautifulSoup(self.content)
@property
def mtimes(self):
return [get_mtime(h[1]) for h in self.split_contents() if h[0] == 'file']
@property
def cachekey(self):
cachebits = [self.content]
cachebits.extend([str(m) for m in self.mtimes])
cachestr = "".join(cachebits).encode(django_settings.DEFAULT_CHARSET)
return "django_compressor.%s" % get_hexdigest(cachestr)[:12]
@property
def storage(self):
return get_storage_class(settings.STORAGE)()
@property
def hunks(self):
if getattr(self, '_hunks', ''):
return self._hunks
self._hunks = []
for kind, v, elem in self.split_contents():
if kind == 'hunk':
input = v
if self.filters:
input = self.filter(input, 'input', elem=elem)
# Let's cast BeautifulSoup element to unicode here since
# it will try to encode using ascii internally later
self._hunks.append(unicode(input))
if kind == 'file':
# TODO: wrap this in a try/except for IoErrors(?)
fd = open(v, 'rb')
input = fd.read()
if self.filters:
input = self.filter(input, 'input', filename=v, elem=elem)
charset = elem.get('charset', django_settings.DEFAULT_CHARSET)
self._hunks.append(unicode(input, charset))
fd.close()
return self._hunks
def concat(self):
# Design decision needed: either everything should be unicode up to
# here or we encode strings as soon as we acquire them. Currently
# concat() expects all hunks to be unicode and does the encoding
return "\n".join([hunk.encode(django_settings.DEFAULT_CHARSET) for hunk in self.hunks])
def filter(self, content, method, **kwargs):
for f in self.filters:
filter = getattr(filters.get_class(f)(content, filter_type=self.type), method)
try:
if callable(filter):
content = filter(**kwargs)
except NotImplementedError:
pass
return content
@property
def combined(self):
if getattr(self, '_output', ''):
return self._output
output = self.concat()
if self.filters:
output = self.filter(output, 'output')
self._output = output
return self._output
@property
def hash(self):
return get_hexdigest(self.combined)[:12]
@property
def new_filepath(self):
filename = "".join([self.hash, self.extension])
return os.path.join(
settings.OUTPUT_DIR.strip(os.sep), self.output_prefix, filename)
def save_file(self):
if self.storage.exists(self.new_filepath):
return False
self.storage.save(self.new_filepath, ContentFile(self.combined))
return True
def output(self):
if not settings.COMPRESS:
return self.content
self.save_file()
context = getattr(self, 'extra_context', {})
context['url'] = self.storage.url(self.new_filepath)
return render_to_string(self.template_name, context)
def output_inline(self):
context = {'content': settings.COMPRESS and self.combined or self.concat()}
if hasattr(self, 'extra_context'):
context.update(self.extra_context)
return render_to_string(self.template_name_inline, context)
class CssCompressor(Compressor):
def __init__(self, content, output_prefix="css"):
self.extension = ".css"
self.template_name = "compressor/css.html"
self.template_name_inline = "compressor/css_inline.html"
self.filters = ['compressor.filters.css_default.CssAbsoluteFilter']
self.filters.extend(settings.COMPRESS_CSS_FILTERS)
self.type = 'css'
super(CssCompressor, self).__init__(content, output_prefix)
def split_contents(self):
if self.split_content:
return self.split_content
split = self.soup.findAll({'link' : True, 'style' : True})
self.media_nodes = []
for elem in split:
data = None
if elem.name == 'link' and elem['rel'] == 'stylesheet':
try:
data = ('file', self.get_filename(elem['href']), elem)
except UncompressableFileError:
if django_settings.DEBUG:
raise
elif elem.name == 'style':
data = ('hunk', elem.string, elem)
if data:
self.split_content.append(data)
media = elem.get('media', None)
# Append to the previous node if it had the same media type,
# otherwise create a new node.
if self.media_nodes and self.media_nodes[-1][0] == media:
self.media_nodes[-1][1].split_content.append(data)
else:
node = CssCompressor(content='')
node.split_content.append(data)
self.media_nodes.append((media, node))
return self.split_content
def output(self):
self.split_contents()
if not hasattr(self, 'media_nodes'):
return super(CssCompressor, self).output()
if not settings.COMPRESS:
return self.content
ret = []
for media, subnode in self.media_nodes:
subnode.extra_context = {'media': media}
ret.append(subnode.output())
return ''.join(ret)
class JsCompressor(Compressor):
def __init__(self, content, output_prefix="js"):
self.extension = ".js"
self.template_name = "compressor/js.html"
self.template_name_inline = "compressor/js_inline.html"
self.filters = settings.COMPRESS_JS_FILTERS
self.type = 'js'
super(JsCompressor, self).__init__(content, output_prefix)
def split_contents(self):
if self.split_content:
return self.split_content
split = self.soup.findAll('script')
for elem in split:
if elem.has_key('src'):
try:
self.split_content.append(('file', self.get_filename(elem['src']), elem))
except UncompressableFileError:
if django_settings.DEBUG:
raise
else:
self.split_content.append(('hunk', elem.string, elem))
return self.split_content
from compressor.base import Compressor
from compressor.js import JsCompressor
from compressor.css import CssCompressor
from compressor.utils import get_hexdigest, get_mtime
from compressor.exceptions import UncompressableFileError
import os
from django.conf import settings as django_settings
from django.template.loader import render_to_string
from django.core.files.base import ContentFile
from django.core.files.storage import get_storage_class
from compressor.conf import settings
from compressor import filters
from compressor.exceptions import UncompressableFileError
from compressor.utils import get_hexdigest, get_mtime, get_class
class Compressor(object):
def __init__(self, content, output_prefix="compressed"):
self.content = content
self.type = None
self.output_prefix = output_prefix
self.split_content = []
self._parser = None
def split_contents(self):
raise NotImplementedError('split_contents must be defined in a subclass')
def get_filename(self, url):
if not url.startswith(self.storage.base_url):
raise UncompressableFileError('"%s" is not in COMPRESS_URL ("%s") and can not be compressed' % (url, self.storage.base_url))
basename = url.replace(self.storage.base_url, "", 1)
if not self.storage.exists(basename):
raise UncompressableFileError('"%s" does not exist' % self.storage.path(basename))
return self.storage.path(basename)
def _get_parser(self):
if self._parser:
return self._parser
parser_cls = get_class(settings.PARSER)
self._parser = parser_cls(self.content)
return self._parser
def _set_parser(self, parser):
self._parser = parser
parser = property(_get_parser, _set_parser)
@property
def mtimes(self):
return [get_mtime(h[1]) for h in self.split_contents() if h[0] == 'file']
@property
def cachekey(self):
cachebits = [self.content]
cachebits.extend([str(m) for m in self.mtimes])
cachestr = "".join(cachebits).encode(django_settings.DEFAULT_CHARSET)
return "django_compressor.%s" % get_hexdigest(cachestr)[:12]
@property
def storage(self):
return get_storage_class(settings.STORAGE)()
@property
def hunks(self):
if getattr(self, '_hunks', ''):
return self._hunks
self._hunks = []
for kind, v, elem in self.split_contents():
attribs = self.parser.elem_attribs(elem)
if kind == 'hunk':
input = v
if self.filters:
input = self.filter(input, 'input', elem=elem)
# Let's cast BeautifulSoup element to unicode here since
# it will try to encode using ascii internally later
self._hunks.append(unicode(input))
if kind == 'file':
# TODO: wrap this in a try/except for IoErrors(?)
fd = open(v, 'rb')
input = fd.read()
if self.filters:
input = self.filter(input, 'input', filename=v, elem=elem)
charset = attribs.get('charset', django_settings.DEFAULT_CHARSET)
self._hunks.append(unicode(input, charset))
fd.close()
return self._hunks
def concat(self):
# Design decision needed: either everything should be unicode up to
# here or we encode strings as soon as we acquire them. Currently
# concat() expects all hunks to be unicode and does the encoding
return "\n".join([hunk.encode(django_settings.DEFAULT_CHARSET) for hunk in self.hunks])
def filter(self, content, method, **kwargs):
for f in self.filters:
filter = getattr(filters.get_class(f)(content, filter_type=self.type), method)
try:
if callable(filter):
content = filter(**kwargs)
except NotImplementedError:
pass
return content
@property
def combined(self):
if getattr(self, '_output', ''):
return self._output
output = self.concat()
if self.filters:
output = self.filter(output, 'output')
self._output = output
return self._output
@property
def hash(self):
return get_hexdigest(self.combined)[:12]
@property
def new_filepath(self):
filename = "".join([self.hash, self.extension])
return os.path.join(
settings.OUTPUT_DIR.strip(os.sep), self.output_prefix, filename)
def save_file(self):
if self.storage.exists(self.new_filepath):
return False
self.storage.save(self.new_filepath, ContentFile(self.combined))
return True
def output(self):
if not settings.COMPRESS:
return self.content
self.save_file()
context = getattr(self, 'extra_context', {})
context['url'] = self.storage.url(self.new_filepath)
return render_to_string(self.template_name, context)
def output_inline(self):
context = {'content': settings.COMPRESS and self.combined or self.concat()}
if hasattr(self, 'extra_context'):
context.update(self.extra_context)
return render_to_string(self.template_name_inline, context)
......@@ -30,3 +30,6 @@ MINT_DELAY = getattr(settings, 'COMPRESS_MINT_DELAY', 30) # 30 seconds
# check for file changes only after a delay (in seconds, disabled by default)
MTIME_DELAY = getattr(settings, 'COMPRESS_MTIME_DELAY', None)
# the backend to use when parsing the JavaScript or Stylesheet files
PARSER = getattr(settings, 'COMPRESS_PARSER', 'compressor.parser.BeautifulSoupParser')
from django.conf import settings as django_settings
from compressor.conf import settings
from compressor.base import Compressor, UncompressableFileError
class CssCompressor(Compressor):
def __init__(self, content, output_prefix="css"):
self.extension = ".css"
self.template_name = "compressor/css.html"
self.template_name_inline = "compressor/css_inline.html"
self.filters = ['compressor.filters.css_default.CssAbsoluteFilter']
self.filters.extend(settings.COMPRESS_CSS_FILTERS)
self.type = 'css'
super(CssCompressor, self).__init__(content, output_prefix)
def split_contents(self):
if self.split_content:
return self.split_content
self.media_nodes = []
for elem in self.parser.css_elems():
data = None
elem_name = self.parser.elem_name(elem)
elem_attribs = self.parser.elem_attribs(elem)
if elem_name == 'link' and elem_attribs['rel'] == 'stylesheet':
try:
content = self.parser.elem_content(elem)
data = ('file', self.get_filename(elem_attribs['href']), elem)
except UncompressableFileError:
if django_settings.DEBUG:
raise
elif elem_name == 'style':
data = ('hunk', self.parser.elem_content(elem), elem)
if data:
self.split_content.append(data)
media = elem_attribs.get('media', None)
# Append to the previous node if it had the same media type,
# otherwise create a new node.
if self.media_nodes and self.media_nodes[-1][0] == media:
self.media_nodes[-1][1].split_content.append(data)
else:
node = CssCompressor(content='')
node.split_content.append(data)
self.media_nodes.append((media, node))
return self.split_content
def output(self):
self.split_contents()
if not hasattr(self, 'media_nodes'):
return super(CssCompressor, self).output()
if not settings.COMPRESS:
return self.content
ret = []
for media, subnode in self.media_nodes:
subnode.extra_context = {'media': media}
ret.append(subnode.output())
return ''.join(ret)
class UncompressableFileError(Exception):
"""
This exception is raised when a file cannot be compressed
"""
pass
class FilterError(Exception):
"""
This exception is raised when a filter fails
"""
pass
class ParserError(Exception):
"""
This exception is raised when the parser fails
"""
pass
from compressor.exceptions import FilterError
from compressor.utils import get_class, get_mod_func
class FilterBase(object):
def __init__(self, content, filter_type=None, verbose=0):
self.type = filter_type
......@@ -8,38 +11,3 @@ class FilterBase(object):
raise NotImplementedError
def output(self, **kwargs):
raise NotImplementedError
class FilterError(Exception):
"""
This exception is raised when a filter fails
"""
pass
def get_class(class_string):
"""
Convert a string version of a function name to the callable object.
"""
if not hasattr(class_string, '__bases__'):
try:
class_string = class_string.encode('ascii')
mod_name, class_name = get_mod_func(class_string)
if class_name != '':
cls = getattr(__import__(mod_name, {}, {}, ['']), class_name)
except (ImportError, AttributeError):
raise FilterError('Failed to import filter %s' % class_string)
return cls
def get_mod_func(callback):
"""
Converts 'django.views.news.stories.story_detail' to
('django.views.news.stories', 'story_detail')
"""
try:
dot = callback.rindex('.')
except ValueError:
return callback, ''
return callback[:dot], callback[dot+1:]
......@@ -4,7 +4,7 @@ import posixpath
from compressor.filters import FilterBase, FilterError
from compressor.conf import settings
from compressor import get_hexdigest, get_mtime
from compressor.utils import get_hexdigest, get_mtime
class CssAbsoluteFilter(FilterBase):
def input(self, filename=None, **kwargs):
......
from django.conf import settings as django_settings
from compressor.conf import settings
from compressor.base import Compressor, UncompressableFileError
class JsCompressor(Compressor):
def __init__(self, content, output_prefix="js"):
self.extension = ".js"
self.template_name = "compressor/js.html"
self.template_name_inline = "compressor/js_inline.html"
self.filters = settings.COMPRESS_JS_FILTERS
self.type = 'js'
super(JsCompressor, self).__init__(content, output_prefix)
def split_contents(self):
if self.split_content:
return self.split_content
for elem in self.parser.js_elems():
attribs = self.parser.elem_attribs(elem)
if 'src' in attribs:
try:
self.split_content.append(('file', self.get_filename(attribs['src']), elem))
except UncompressableFileError:
if django_settings.DEBUG:
raise
else:
content = self.parser.elem_content(elem)
self.split_content.append(('hunk', content, elem))
return self.split_content
from django.conf import settings as django_settings
from django.utils.encoding import smart_unicode
from compressor.conf import settings
from compressor.exceptions import ParserError
from compressor.utils import get_class
class ParserBase(object):
def __init__(self, content):
self.content = content
def css_elems(self):
"""
Return an iterable containing the css elements to handle
"""
raise NotImplementedError
def js_elems(self):
"""
Return an iterable containing the js elements to handle
"""
raise NotImplementedError
def elem_attribs(self, elem):
"""
Return the dictionary like attribute store of the given element
"""
raise NotImplementedError
def elem_content(self, elem):
"""
Return the content of the given element
"""
raise NotImplementedError
def elem_name(self, elem):
"""
Return the name of the given element
"""
raise NotImplementedError
def elem_str(self, elem):
"""
Return the string representation of the given elem
"""
raise NotImplementedError
class BeautifulSoupParser(ParserBase):
_soup = None
@property
def soup(self):
try:
from BeautifulSoup import BeautifulSoup
except ImportError, e:
raise ParserError("Error while initializing Parser: %s" % e)
if self._soup is None:
self._soup = BeautifulSoup(self.content)
return self._soup
def css_elems(self):
return self.soup.findAll({'link' : True, 'style' : True})
def js_elems(self):
return self.soup.findAll('script')
def elem_attribs(self, elem):
return dict(elem.attrs)
def elem_content(self, elem):
return elem.string
def elem_name(self, elem):
return elem.name
def elem_str(self, elem):
return smart_unicode(elem)
class LxmlParser(ParserBase):
_tree = None
@property
def tree(self):
try:
from lxml import html
from lxml.etree import tostring
except ImportError, e:
raise ParserError("Error while initializing Parser: %s" % e)
if self._tree is None:
content = '<root>%s</root>' % self.content
self._tree = html.fromstring(content)
try:
ignore = tostring(self._tree, encoding=unicode)
except UnicodeDecodeError:
self._tree = html.soupparser.fromstring(content)
return self._tree
def css_elems(self):
return self.tree.xpath('link[@rel="stylesheet"]|style')
def js_elems(self):
return self.tree.findall('script')
def elem_attribs(self, elem):
return elem.attrib
def elem_content(self, elem):
return smart_unicode(elem.text)
def elem_name(self, elem):
return elem.tag
def elem_str(self, elem):
from lxml import etree
return smart_unicode(etree.tostring(elem, method='html', encoding=unicode))
import os
from django.core.cache import cache
from compressor.conf import settings
from compressor.exceptions import FilterError
def get_hexdigest(plaintext):
try: