Commit aef93caa authored by Andrew Roetker's avatar Andrew Roetker

(PDB-1956) Remove support for ruby18 in our terminus

This commit retires Ruby 1.8 support in our terminus.
parent f2ae20c8
source ENV['GEM_SOURCE'] || "https://rubygems.org"
puppet_branch = ENV['PUPPET_VERSION'] || "latest"
oldest_supported_puppet = "3.8.1"
oldest_supported_puppet = "4.0.0"
beaker_version = ENV['BEAKER_VERSION']
def location_for(place, fake_version = nil)
......@@ -14,16 +14,7 @@ def location_for(place, fake_version = nil)
end
gem 'facter'
case RUBY_VERSION
when '1.8.7'
gem 'rake', '<= 10.1.1'
# activesupport calls in the latest i18n, which drops 1.8.7. This pins to
# a lower version
gem 'i18n', '~> 0.6.11'
else
gem 'rake'
end
gem 'rake'
group :test do
# Pinning to work-around an incompatiblity with 2.14 in puppetlabs_spec_helper
......@@ -41,15 +32,8 @@ group :test do
end
gem 'mocha', '~> 1.0'
# Since newer versions of rake are not supported, we pin
case RUBY_VERSION
when '1.8.7'
# No activerecord or sqlite for you
else
gem 'activerecord', '~> 3.2'
gem 'sqlite3'
end
gem 'activerecord', '~> 3.2'
gem 'sqlite3'
end
group :acceptance do
......
......@@ -153,21 +153,13 @@ module CharEncoding
end
def self.utf8_string(str, error_context_str)
if RUBY_VERSION =~ /^1.8/
# Ruby 1.8 doesn't have String#encode and related methods, and there
# appears to be a bug in iconv that will interpret some byte sequences
# as 6-byte characters. Thus, we are forced to resort to some unfortunate
# manual chicanery.
warn_if_changed(str, ruby18_clean_utf8(str))
else
begin
coerce_to_utf8(str, error_context_str)
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
# If we got an exception, the string is either invalid or not
# convertible to UTF-8, so drop those bytes.
begin
coerce_to_utf8(str, error_context_str)
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
# If we got an exception, the string is either invalid or not
# convertible to UTF-8, so drop those bytes.
warn_if_changed(str, str.encode('UTF-8', :invalid => :replace, :undef => :replace))
end
warn_if_changed(str, str.encode('UTF-8', :invalid => :replace, :undef => :replace))
end
end
......@@ -179,137 +171,6 @@ module CharEncoding
converted_str
end
# @api private
def self.ruby18_clean_utf8(str)
#iconv_to_utf8(str)
#ruby18_manually_clean_utf8(str)
# So, we've tried doing this UTF8 cleaning for ruby 1.8 a few different
# ways. Doing it via IConv, we don't do a good job of handling characters
# whose codepoints would exceed the legal maximum for UTF-8. Doing it via
# our manual scrubbing process is slower and doesn't catch overlong
# encodings. Since this code really shouldn't even exist in the first place
# we've decided to simply compose the two scrubbing methods for now, rather
# than trying to add detection of overlong encodings. It'd be a non-trivial
# chunk of code, and it'd have to do a lot of bitwise arithmetic (which Ruby
# is not blazingly fast at).
ruby18_manually_clean_utf8(iconv_to_utf8(str))
end
# @todo we're not using this anymore, but I wanted to leave it around
# for a little while just to make sure that the new code pans out.
# @api private
def self.iconv_to_utf8(str)
iconv = Iconv.new('UTF-8//IGNORE', 'UTF-8')
# http://po-ru.com/diary/fixing-invalid-utf-8-in-ruby-revisited/
iconv.iconv(str + " ")[0..-2]
end
# @api private
def self.get_char_len(byte)
Utf8CharLens[byte]
end
# Manually cleans a string by stripping any byte sequences that are
# not valid UTF-8 characters. If you'd prefer for the invalid bytes to be
# replaced with the unicode replacement character rather than being stripped,
# you may pass `false` for the optional second parameter (`strip`, which
# defaults to `true`).
#
# @api private
def self.ruby18_manually_clean_utf8(str, strip = true)
# This is a hack to allow this code to work with either ruby 1.8 or 1.9,
# which is useful for debugging and benchmarking. For more info see the
# comments in the #get_byte method below.
@has_get_byte = str.respond_to?(:getbyte)
i = 0
len = str.length
result = ""
while i < len
byte = get_byte(str, i)
i += 1
char_len = get_char_len(byte)
case char_len
when 0
result.concat(Utf8ReplacementChar) unless strip
when 1
result << byte
when 2..4
ruby18_handle_multibyte_char(result, byte, str, i, char_len, strip)
i += char_len - 1
else
raise Puppet::DevError, "Unhandled UTF8 char length: '#{char_len}'"
end
end
result
end
# @api private
def self.ruby18_handle_multibyte_char(result_str, byte, str, i, char_len, strip = true)
# keeping an array of bytes for now because we need to do some
# bitwise math on them.
char_additional_bytes = []
# If we don't have enough bytes left to read the full character, we
# put on a replacement character and bail.
if i + (char_len - 1) > str.length
result_str.concat(Utf8ReplacementChar) unless strip
return
end
# we've already read the first byte, so we need to set up a range
# from 0 to (n-2); e.g. if it's a 2-byte char, we will have a range
# from 0 to 0 which will result in reading 1 more byte
(0..char_len - 2).each do |x|
char_additional_bytes << get_byte(str, i + x)
end
if (is_valid_multibyte_suffix(byte, char_additional_bytes))
result_str << byte
result_str.concat(char_additional_bytes.pack("c*"))
else
result_str.concat(Utf8ReplacementChar) unless strip
end
end
# @api private
def self.is_valid_multibyte_suffix(byte, additional_bytes)
# This is heinous, but the UTF-8 spec says that codepoints greater than
# 0x10FFFF are illegal. The first character that is over that limit is
# 0xF490bfbf, so if the first byte is F4 then we have to check for
# that condition.
if byte == 0xF4
val = additional_bytes.inject(0) { |result, b | (result << 8) + b}
if val >= 0x90bfbf
return false
end
end
additional_bytes.all? { |b| ((b & 0xC0) == 0x80) }
end
# @api private
def self.get_byte(str, index)
# This method is a hack to allow this code to work with either ruby 1.8
# or 1.9. In production this code path should never be exercised by
# 1.9 because it has a much more sane way to accomplish our goal, but
# for testing, it is useful to be able to run the 1.8 codepath in 1.9.
if @has_get_byte
str.getbyte(index)
else
str[index]
end
end
end
end
end
......
......@@ -6,105 +6,8 @@ require 'spec_helper'
require 'puppet/util/puppetdb/char_encoding'
describe Puppet::Util::Puppetdb::CharEncoding do
describe "#ruby_18_clean_utf8", :if => RUBY_VERSION =~ /^1.8/ do
def test_utf8_clean(in_bytes, expected_bytes)
instr = in_bytes.pack('c*')
out = described_class.ruby18_clean_utf8(instr)
out.should == expected_bytes.pack('c*')
end
it "should recognize (and not modify) valid multi-byte characters" do
in_bytes = [0xE2, 0x9B, 0x87]
expected_bytes = [0xE2, 0x9B, 0x87]
test_utf8_clean(in_bytes, expected_bytes)
end
Utf8ReplacementChar = [0xEF, 0xBF, 0xBD]
it "should strip invalid UTF-8 characters from an invalid multi-byte sequence" do
in_bytes = [0xE2, 0xCB, 0x87]
test_utf8_clean(in_bytes, [0xCB, 0x87])
end
it "should strip incomplete multi-byte characters" do
in_bytes = [0xE2, 0x9B]
test_utf8_clean(in_bytes, [])
end
it "should replace invalid characters with the unicode replacement character" do
# This is related to ticket #14873; our utf8_string code for 1.9 is being
# much more aggressive about replacing bytes with the unicode replacement char;
# it appears to be more correct, as the way that the 1.8/IConv approach
# was handling it was causing certain strings to decode differently in
# clojure, thus causing checksum errors.
in_bytes = [0x21, 0x7F, 0xFD, 0x80, 0xBD, 0xBB, 0xB6, 0xA1]
expected_bytes = [0x21, 0x7F]
test_utf8_clean(in_bytes, expected_bytes)
end
# A multi-byte sequence beginning with any of the following bytes is
# illegal. For more info, see http://en.wikipedia.org/wiki/UTF-8
[[[0xC0, 0xC1], 2],
[[0xF5, 0xF6, 0xF7], 4],
[[0xF8, 0xF9, 0xFA, 0xFB], 5],
[[0xFC, 0xFD, 0xFE, 0xFF], 6]].each do |bytes, num_bytes|
bytes.each do |first_byte|
it "should strip the invalid bytes from a #{num_bytes}-byte character starting with 0x#{first_byte.to_s(16)}" do
in_bytes = [first_byte]
(num_bytes - 1).times { in_bytes << 0x80 }
test_utf8_clean(in_bytes, [])
end
end
end
context "when dealing with multi-byte sequences beginning with 0xF4" do
it "should accept characters that are below the 0x10FFFF limit of Unicode" do
in_bytes = [0xF4, 0x8f, 0xbf, 0xbf]
expected_bytes = [0xF4, 0x8f, 0xbf, 0xbf]
test_utf8_clean(in_bytes, expected_bytes)
end
it "should reject characters that are above the 0x10FFFF limit of Unicode" do
in_bytes = [0xF4, 0x90, 0xbf, 0xbf]
test_utf8_clean(in_bytes, [])
end
end
end
describe "#utf8_string" do
describe "on ruby 1.8", :if => RUBY_VERSION =~ /^1.8/ do
it "should convert from ascii without a warning" do
Puppet.expects(:warning).never
str = "any ascii string"
subject.utf8_string(str, nil).should == str
end
it "should strip invalid chars from non-overlapping latin-1 with a warning" do
Puppet.expects(:warning).with {|msg| msg =~ /Ignoring invalid UTF-8 byte sequences/}
str = "a latin-1 string \xd6"
subject.utf8_string(str, nil).should == "a latin-1 string "
end
it "should strip invalid chars and warn if the string is invalid UTF-8" do
Puppet.expects(:warning).with {|msg| msg =~ /Ignoring invalid UTF-8 byte sequences/}
str = "an invalid utf-8 string \xff"
subject.utf8_string(str, nil).should == "an invalid utf-8 string "
end
it "should return a valid utf-8 string without warning" do
Puppet.expects(:warning).never
str = "a valid utf-8 string \xc3\x96"
subject.utf8_string(str, nil).should == str
end
end
describe "on ruby > 1.8", :if => RUBY_VERSION !~ /^1.8/ do
describe "on ruby >= 1.9" do
it "should convert from ascii without a warning" do
Puppet.expects(:warning).never
......@@ -185,7 +88,7 @@ describe Puppet::Util::Puppetdb::CharEncoding do
end
end
describe "on ruby > 1.8", :if => RUBY_VERSION !~ /^1.8/ do
describe "on ruby >= 1.9" do
it "finds all index of a given character" do
described_class.all_indexes_of_char("a\u2192b\u2192c\u2192d\u2192", "\u2192").should == [1, 3, 5, 7]
described_class.all_indexes_of_char("abcd", "\u2192").should == []
......
......@@ -62,7 +62,7 @@ describe Puppet::Util::Puppetdb::Command do
cmd.payload.include?("\u2192").should be_true
end
describe "on ruby > 1.8", :if => RUBY_VERSION !~ /^1.8/ do
describe "on ruby >= 1.9" do
it "should warn when a command payload includes non-ascii UTF-8 characters" do
Puppet.expects(:warning).with {|msg| msg =~ /Error encoding a 'command-1' command for host 'foo.localdomain' ignoring invalid UTF-8 byte sequences/}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment