Skip to content
Commits on Source (57)
target
jcodings.iml
.idea/
language: java
jdk:
- openjdk8
- openjdk9
script: mvn test
<?xml version="1.0" ?>
<project name="JCodings" default="build">
<property name="src.dir" value="src" />
<property name="bin.dir" value="target/classes" />
<property name="dist.dir" value="target" />
<property name="jar.name" value="jcodings.jar" />
<target name="clean">
<delete dir="${bin.dir}" />
<delete dir="${dist.dir}" />
</target>
<target name="compile">
<mkdir dir="${bin.dir}" />
<javac srcdir="${src.dir}" destdir="${bin.dir}"/>
</target>
<target name="build" depends="compile">
<mkdir dir="${dist.dir}" />
<tstamp>
<format property="buildDate" pattern="yyyy-MM-dd" />
<format property="buildTime" pattern="HH:mm:ss" />
</tstamp>
<jar destfile="${dist.dir}/${jar.name}" manifest="MANIFEST.MF">
<fileset dir="${bin.dir}" />
<manifest>
<attribute name="Built-By" value="${user.name}" />
<attribute name="Built-Date" value="${buildDate}" />
<attribute name="Built-Time" value="${buildTime}" />
</manifest>
</jar>
</target>
</project>
<?xml version="1.0" ?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.jruby.jcodings</groupId>
<artifactId>jcodings</artifactId>
<version>1.0.31-SNAPSHOT</version>
<version>1.0.42-SNAPSHOT</version>
<name>JCodings</name>
<description>
Byte based encoding support library for java
</description>
<description>Byte based encoding support library for java</description>
<parent>
<groupId>org.sonatype.oss</groupId>
<artifactId>oss-parent</artifactId>
<version>7</version>
</parent>
<issueManagement>
<system>Github</system>
<url>https://github.com/jruby/jcodings/issues</url>
</issueManagement>
<scm>
<connection>scm:git:git://github.com/jruby/jcodings.git</connection>
<developerConnection>scm:git:git@github.com:jruby/jcodings.git</developerConnection>
<url>https://github.com/jruby/jcodings</url>
</scm>
<licenses>
<license>
<name>MIT License</name>
......@@ -33,15 +18,28 @@
<distribution>repo</distribution>
</license>
</licenses>
<developers>
<developer>
<id>lopex</id>
<name>Marcin Mielzynski</name>
<email>lopx@gazeta.pl</email>
</developer>
<developer>
<id>headius</id>
<name>Charles Oliver Nutter</name>
<email>headius@headius.com</email>
</developer>
</developers>
<scm>
<connection>scm:git:git://github.com/jruby/jcodings.git</connection>
<developerConnection>scm:git:git@github.com:jruby/jcodings.git</developerConnection>
<url>https://github.com/jruby/jcodings</url>
<tag>HEAD</tag>
</scm>
<issueManagement>
<system>Github</system>
<url>https://github.com/jruby/jcodings/issues</url>
</issueManagement>
<dependencies>
<dependency>
<groupId>junit</groupId>
......@@ -50,12 +48,10 @@
<scope>test</scope>
</dependency>
</dependencies>
<build>
<sourceDirectory>src</sourceDirectory>
<scriptSourceDirectory>scripts</scriptSourceDirectory>
<testSourceDirectory>test</testSourceDirectory>
<finalName>jcodings</finalName>
<extensions>
<extension>
<groupId>org.apache.maven.wagon</groupId>
......@@ -63,41 +59,30 @@
<version>2.1</version>
</extension>
</extensions>
<resources>
<resource>
<filtering>false</filtering>
<directory>resources</directory>
</resource>
</resources>
<finalName>jcodings</finalName>
<plugins>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9</version>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
<configuration>
<additionalparam>-Xdoclint:none</additionalparam>
<quiet>true</quiet>
</configuration>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<id>default-compile</id>
<configuration>
<source>1.7</source>
<target>1.7</target>
<excludes>
<exclude>module-info.java</exclude>
</excludes>
</configuration>
</execution>
</executions>
<configuration>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
......@@ -115,18 +100,70 @@
<version>2.6</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
<version>2.22.0</version>
<configuration>
<argLine>-Dfile.encoding=UTF-8</argLine>
</configuration>
</plugin>
</plugins>
<resources>
<resource>
<directory>resources</directory>
</resource>
</resources>
</build>
<profiles>
<profile>
<id>release-on-9</id>
<activation>
<jdk>[9,)</jdk>
</activation>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<executions>
<execution>
<id>compile9</id>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<release>9</release>
<includes>
<include>module-info.java</include>
</includes>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.0.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
<configuration>
<additionalJOption>-Xdoclint:none</additionalJOption>
<additionalOptions>-html5</additionalOptions>
<quiet>true</quiet>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
#!/usr/bin/env ruby
# coding: utf-8
REPO_PATH = ARGV.first || '/usr/src/ruby-2.5.0' # path to ruby repo
SECTION_NAME = "rdata"
REPO_PATH = ARGV.first || '/usr/src/ruby-2.5.1' # path to ruby repo
SECTION_NAME, G_PREFIX = case RUBY_PLATFORM
when /linux/i; ["rodata", ""]
when /darwin/i; ["const_data", "g"]
else ;["rdata", ""]
end
UNICODE_VERSION = "10.0.0"
SRC_DIR = "../src/org/jcodings"
DST_BIN_DIR = "../resources/tables"
......@@ -18,8 +24,8 @@ end
def process_binary obj_name
binary = open(obj_name, "rb"){|f|f.read}
offset = `objdump -h -j .#{SECTION_NAME} #{obj_name}`[/\.#{SECTION_NAME}.*?(\w+)\s+\S+$/, 1].to_i(16)
`nm --no-sort --defined-only #{obj_name}`.split("\n").map{|s|s.split(/\s+/)}.each do |address, _, name|
offset = `#{G_PREFIX}objdump -h -j .#{SECTION_NAME} #{obj_name}`[/\.#{SECTION_NAME}.*?(\w+)\s+\S+$/, 1].to_i(16)
`#{G_PREFIX}nm --no-sort --defined-only #{obj_name}`.split("\n").map{|s|s.split(/\s+/)}.each do |address, _, name|
yield name, binary, address.to_i(16) + offset
end
end
......@@ -86,7 +92,7 @@ def generate_transcoder_list
generic_list = []
transcoder_list = []
Dir["#{REPO_PATH}/enc/trans/*.c"].reject{|f| f =~ /transdb/}.each do |trans_file|
Dir["#{REPO_PATH}/enc/trans/*.c"].reject{|f| f =~ /transdb/}.sort.each do |trans_file|
name = trans_file[/(\w+)\.c/, 1].split('_').map{|e| e.capitalize}.join("")
trans_src = open(trans_file){|f|f.read}
......@@ -113,23 +119,24 @@ def generate_transcoder_list
end
def generate_transoder_data
Dir["#{REPO_PATH}/enc/trans/*.c"].reject{|f| f =~ /transdb/}.each do |trans_file|
Dir["#{REPO_PATH}/enc/trans/*.c"].reject{|f| f =~ /transdb/}.sort.each do |trans_file|
# next unless trans_file =~ /utf8/
trans_file = trans_file[/(.*)\./, 1]
src = open("#{trans_file}.c", "rb").read
make_name = -> (name) {name.split('_').map{|e|e.capitalize}.join('')}
process_binary "#{trans_file}.o" do |name, binary, address|
case name
when /(.*)_byte_array/
name = $1
size = src[/(\w+?_byte_array)\[(\d+?)\]/m, 2].to_i
open("#{DST_BIN_DIR}/" + "Transcoder_#{name.capitalize.tr('_', '')}_ByteArray.bin", "wb") do |f|
open("#{DST_BIN_DIR}/" + "Transcoder_#{make_name.(name)}_ByteArray.bin", "wb") do |f|
f << [size].pack("N")
f << binary[address, size]
end
when /(.*)_word_array/
name = $1
size = src[/(\w+?_word_array)\[(\d+?)\]/m, 2].to_i
open("#{DST_BIN_DIR}/" + "Transcoder_#{name.capitalize.tr('_', '')}_WordArray.bin", "wb") do |f|
open("#{DST_BIN_DIR}/" + "Transcoder_#{make_name.(name)}_WordArray.bin", "wb") do |f|
f << [size].pack("N")
address.step(address + (size * 4 - 1), 4).each do |adr|
f << binary[adr, 4].unpack("l").pack("N")
......
open module org.jruby.jcodings {
exports org.jcodings;
exports org.jcodings.ascii;
exports org.jcodings.constants;
exports org.jcodings.exception;
exports org.jcodings.specific;
exports org.jcodings.spi;
exports org.jcodings.transcode;
exports org.jcodings.transcode.specific;
exports org.jcodings.unicode;
exports org.jcodings.util;
}
\ No newline at end of file
......@@ -50,7 +50,7 @@ abstract class AbstractEncoding extends Encoding {
*/
@Override
public boolean isNewLine(byte[]bytes, int p, int end) {
return p < end ? bytes[p] == (byte)0x0a : false;
return p < end ? bytes[p] == Encoding.NEW_LINE : false;
}
protected final int asciiMbcCaseFold(int flag, byte[]bytes, IntHolder pp, int end, byte[]lower) {
......@@ -85,7 +85,6 @@ abstract class AbstractEncoding extends Encoding {
asciiApplyAllCaseFold(flag, fun, arg);
}
protected static final CaseFoldCodeItem[] EMPTY_FOLD_CODES = new CaseFoldCodeItem[]{};
protected final CaseFoldCodeItem[]asciiCaseFoldCodesByString(int flag, byte[]bytes, int p, int end) {
int b = bytes[p] & 0xff;
......@@ -94,7 +93,7 @@ abstract class AbstractEncoding extends Encoding {
} else if (0x61 <= b && b <= 0x7a) {
return new CaseFoldCodeItem[]{CaseFoldCodeItem.create(1, b - 0x20)};
} else {
return EMPTY_FOLD_CODES;
return CaseFoldCodeItem.EMPTY_FOLD_CODES;
}
}
......
......@@ -20,6 +20,8 @@
package org.jcodings;
public final class CaseFoldCodeItem {
public static final CaseFoldCodeItem[] EMPTY_FOLD_CODES = new CaseFoldCodeItem[]{};
public final int byteLen;
public final int code[];
......
......@@ -106,7 +106,7 @@ public abstract class CaseFoldMapEncoding extends SingleByteEncoding {
}
}
}
return EMPTY_FOLD_CODES;
return CaseFoldCodeItem.EMPTY_FOLD_CODES;
}
@Override
......
......@@ -19,7 +19,7 @@
*/
package org.jcodings;
public class CodeRange {
public final class CodeRange {
public static boolean isInCodeRange(int[]p, int code) {
return isInCodeRange(p, 0, code);
}
......
......@@ -23,7 +23,7 @@ import org.jcodings.exception.ErrorMessages;
import org.jcodings.exception.InternalException;
import org.jcodings.util.CaseInsensitiveBytesHash;
public class EncodingDB {
public final class EncodingDB {
public static final class Entry {
private static int count;
......
......@@ -85,7 +85,7 @@ public final class ISO8859_1Encoding extends ISOEncoding {
return new CaseFoldCodeItem[]{CaseFoldCodeItem.create(1, b - 0x20)};
}
}
return EMPTY_FOLD_CODES;
return CaseFoldCodeItem.EMPTY_FOLD_CODES;
}
@Override
......
......@@ -420,11 +420,11 @@ public class TranscodeFunctions {
{
long s0 = s[sStart] & 0xFF;
long s1 = s[sStart+1] & 0xFF;
long s2 = s[sStart+2] & 0xFF;
long s3 = s[sStart+3] & 0xFF;
long diff = info >> 8;
long u; /* Unicode Scalar Value */
if ((diff & 0x20000) != 0) { /* GB18030 4 bytes */
long s2 = s[sStart+2] & 0xFF;
long s3 = s[sStart+3] & 0xFF;
u = (((s0 * 10 + s1) * 126 + s2) * 10 + s3 - diff - 0x170000) & 0xFFFFFFFFL;
}
else { /* GB18030 2 bytes */
......
......@@ -407,7 +407,7 @@ public abstract class UnicodeEncoding extends MultiByteEncoding {
}
}
if (items == null || n == 0) return EMPTY_FOLD_CODES;
if (items == null || n == 0) return CaseFoldCodeItem.EMPTY_FOLD_CODES;
if (n < items.length) {
CaseFoldCodeItem [] tmp = new CaseFoldCodeItem[n];
System.arraycopy(items, 0, tmp, 0, n);
......