Skip to content
Commits on Source (65)
language: java
jdk:
- openjdk9
<?xml version="1.0" ?>
<project name="Joni" default="build">
<property name="src.dir" value="src" />
<property name="bin.dir" value="target/classes" />
<property name="dist.dir" value="target" />
<property name="jar.name" value="joni.jar" />
<target name="clean">
<delete dir="${bin.dir}" />
<delete dir="${dist.dir}" />
</target>
<target name="compile">
<mkdir dir="${bin.dir}" />
<javac srcdir="${src.dir}" destdir="${bin.dir}"/>
</target>
<target name="build" depends="compile">
<mkdir dir="${dist.dir}" />
<jar destfile="${dist.dir}/${jar.name}" manifest="MANIFEST.MF">
<fileset dir="${bin.dir}" />
<manifest>
<attribute name="Built-By" value="${user.name}" />
<attribute name="Built-Date" value="${buildDate}" />
<attribute name="Built-Time" value="${buildTime}" />
</manifest>
</jar>
</target>
</project>
......@@ -4,7 +4,7 @@
<groupId>org.jruby.joni</groupId>
<artifactId>joni</artifactId>
<packaging>jar</packaging>
<version>2.1.17-SNAPSHOT</version>
<version>2.1.24-SNAPSHOT</version>
<name>Joni</name>
<description>
Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma
......@@ -66,7 +66,7 @@
<dependency>
<groupId>org.jruby.jcodings</groupId>
<artifactId>jcodings</artifactId>
<version>1.0.30</version>
<version>1.0.40</version>
</dependency>
<dependency>
<groupId>junit</groupId>
......@@ -77,7 +77,7 @@
<dependency>
<groupId>org.ow2.asm</groupId>
<artifactId>asm</artifactId>
<version>5.0.3</version>
<version>6.2.1</version>
<scope>provided</scope>
</dependency>
</dependencies>
......@@ -95,16 +95,24 @@
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<version>3.8.0</version>
<executions>
<execution>
<id>default-compile</id>
<configuration>
<source>1.7</source>
<target>1.7</target>
<excludes>
<exclude>module-info.java</exclude>
</excludes>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
<version>2.22.0</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
......@@ -115,40 +123,6 @@
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.1.2</version>
<executions>
<execution>
<id>attach-sources</id>
<phase>verify</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>2.9</version>
<configuration>
<show>private</show>
<nohelp>true</nohelp>
<additionalparam>-Xdoclint:none</additionalparam>
<quiet>true</quiet>
</configuration>
<executions>
<execution>
<id>attach-sources</id>
<phase>verify</phase>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<profiles>
......@@ -204,6 +178,62 @@
</plugins>
</build>
</profile>
</profiles>
<profile>
<id>release-on-9</id>
<activation>
<jdk>[9,)</jdk>
</activation>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<executions>
<execution>
<id>compile9</id>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<release>9</release>
<includes>
<include>module-info.java</include>
</includes>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-source-plugin</artifactId>
<version>2.2.1</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.0.1</version>
<executions>
<execution>
<id>attach-javadocs</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
<configuration>
<additionalJOption>-Xdoclint:none</additionalJOption>
<additionalOptions>-html5</additionalOptions>
<quiet>true</quiet>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
open module org.jruby.joni {
exports org.joni;
exports org.joni.constants;
exports org.joni.exception;
requires org.jruby.jcodings;
requires org.objectweb.asm;
}
\ No newline at end of file
......@@ -48,11 +48,11 @@ import org.joni.ast.ListNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
import org.joni.constants.AnchorType;
import org.joni.constants.EncloseType;
import org.joni.constants.NodeType;
import org.joni.constants.StackPopLevel;
import org.joni.constants.TargetInfo;
import org.joni.constants.internal.AnchorType;
import org.joni.constants.internal.EncloseType;
import org.joni.constants.internal.NodeType;
import org.joni.constants.internal.StackPopLevel;
import org.joni.constants.internal.TargetInfo;
final class Analyser extends Parser {
......@@ -87,7 +87,6 @@ final class Analyser extends Parser {
numberedRefCheck(root);
}
}
regex.nameTable = env.nameTable;
} // USE_NAMED_GROUP
if (Config.USE_NAMED_GROUP) {
......@@ -350,7 +349,7 @@ final class Analyser extends Parser {
env.numMem = env.numNamed;
regex.numMem = env.numNamed;
env.renumberNameTable(map);
regex.renumberNameTable(map);
return root;
}
......@@ -1344,7 +1343,7 @@ final class Analyser extends Parser {
if (Config.USE_PERL_SUBEXP_CALL && cn.nameP == cn.nameEnd) {
setCallAttr(cn);
} else {
NameEntry ne = env.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd);
NameEntry ne = regex.nameToGroupNumbers(cn.name, cn.nameP, cn.nameEnd);
if (ne == null) {
newValueException(UNDEFINED_NAME_REFERENCE, cn.nameP, cn.nameEnd);
......@@ -2370,7 +2369,7 @@ final class Analyser extends Parser {
regex.setOptimizeMapInfo(opt.map);
regex.setSubAnchor(opt.map.anchor);
} else {
regex.setExactInfo(opt.exb);
regex.setOptimizeExactInfo(opt.exb);
regex.setSubAnchor(opt.exb.anchor);
}
} else if (opt.map.value > 0) {
......
......@@ -36,12 +36,12 @@ import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
import org.joni.constants.AnchorType;
import org.joni.constants.EncloseType;
import org.joni.constants.NodeType;
import org.joni.constants.OPCode;
import org.joni.constants.OPSize;
import org.joni.constants.TargetInfo;
import org.joni.constants.internal.AnchorType;
import org.joni.constants.internal.EncloseType;
import org.joni.constants.internal.NodeType;
import org.joni.constants.internal.OPCode;
import org.joni.constants.internal.OPSize;
import org.joni.constants.internal.TargetInfo;
final class ArrayCompiler extends Compiler {
private int[]code;
......
......@@ -22,7 +22,7 @@ package org.joni;
import java.io.FileOutputStream;
import java.io.IOException;
import org.joni.constants.AsmConstants;
import org.joni.constants.internal.AsmConstants;
import org.objectweb.asm.ClassWriter;
import org.objectweb.asm.MethodVisitor;
import org.objectweb.asm.Opcodes;
......
......@@ -31,14 +31,15 @@ import static org.joni.Option.isPosixRegion;
import org.jcodings.CodeRange;
import org.jcodings.Encoding;
import org.jcodings.IntHolder;
import org.joni.constants.OPCode;
import org.joni.constants.OPSize;
import org.joni.constants.internal.OPCode;
import org.joni.constants.internal.OPSize;
import org.joni.exception.ErrorMessages;
import org.joni.exception.InternalException;
class ByteCodeMachine extends StackMachine {
private static final int INTERRUPT_CHECK_EVERY = 30000;
int interruptCheckCounter = 0; // we modulos this to occasionally check for interrupts
volatile boolean interrupted = false;
private int bestLen; // return value
private int s = 0; // current char
......@@ -52,11 +53,15 @@ class ByteCodeMachine extends StackMachine {
private final int[]code; // byte code
private int ip; // instruction pointer
ByteCodeMachine(Regex regex, byte[]bytes, int p, int end) {
super(regex, bytes, p, end);
ByteCodeMachine(Regex regex, Region region, byte[]bytes, int p, int end) {
super(regex, region, bytes, p, end);
this.code = regex.code;
}
public void interrupt() {
interrupted = true;
}
protected int stkp; // a temporary
private boolean makeCaptureHistoryTree(CaptureTreeNode node) {
//CaptureTreeNode child;
......@@ -146,33 +151,7 @@ class ByteCodeMachine extends StackMachine {
return true;
}
private void debugMatchBegin() {
Config.log.println("match_at: " + "str: " + str + ", end: " + end + ", start: " + sstart + ", sprev: " + sprev);
Config.log.println("size: " + (end - str) + ", start offset: " + (sstart - str));
}
private void debugMatchLoop() {
Config.log.printf("%4d", (s - str)).print("> \"");
int q, i;
for (i = 0, q = s; i < 7 && q < end && s >= 0; i++) {
int len = enc.length(bytes, q, end);
while (len-- > 0) {
if (q < end) {
Config.log.print(new String(bytes, q++, 1));
}
}
}
String str = q < end ? "...\"" : "\"";
q += str.length();
Config.log.print(str);
for (i = 0; i < 20 - (q - s); i++)
Config.log.print(" ");
StringBuilder sb = new StringBuilder();
new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
Config.log.println(sb.toString());
}
protected final int matchAt(int _range, int _sstart, int _sprev) throws InterruptedException {
protected final int matchAt(int _range, int _sstart, int _sprev, boolean interrupt) throws InterruptedException {
range = _range;
sstart = _sstart;
sprev = _sprev;
......@@ -185,14 +164,15 @@ class ByteCodeMachine extends StackMachine {
bestLen = -1;
s = _sstart;
pkeep = _sstart;
return enc.isSingleByte() || (msaOptions & Option.CR_7_BIT) != 0 ? executeSb() : execute();
return enc.isSingleByte() || (msaOptions & Option.CR_7_BIT) != 0 ? executeSb(interrupt) : execute(interrupt);
}
private final int execute() throws InterruptedException {
private final int execute(boolean interrupt) throws InterruptedException {
Thread currentThread = Thread.currentThread();
final int[]code = this.code;
while (true) {
if (interruptCheckCounter++ % INTERRUPT_CHECK_EVERY == 0 && currentThread.isInterrupted()) {
if (interrupted ||
(interrupt && interruptCheckCounter++ % INTERRUPT_CHECK_EVERY == 0 && currentThread.isInterrupted())) {
currentThread.interrupted();
throw new InterruptedException();
}
......@@ -323,11 +303,12 @@ class ByteCodeMachine extends StackMachine {
} // main while
}
private final int executeSb() throws InterruptedException {
private final int executeSb(boolean interrupt) throws InterruptedException {
Thread currentThread = Thread.currentThread();
final int[]code = this.code;
while (true) {
if (interruptCheckCounter++ % INTERRUPT_CHECK_EVERY == 0 && currentThread.isInterrupted()) {
if (interrupted ||
(interrupt && interruptCheckCounter++ % INTERRUPT_CHECK_EVERY == 0 && currentThread.isInterrupted())) {
currentThread.interrupted();
throw new InterruptedException();
}
......@@ -384,8 +365,8 @@ class ByteCodeMachine extends StackMachine {
case OPCode.BEGIN_BUF: opBeginBuf(); continue;
case OPCode.END_BUF: opEndBuf(); continue;
case OPCode.BEGIN_LINE: opBeginLine(); continue;
case OPCode.END_LINE: opEndLine(); continue;
case OPCode.BEGIN_LINE: opBeginLineSb(); continue;
case OPCode.END_LINE: opEndLineSb(); continue;
case OPCode.SEMI_END_BUF: opSemiEndBuf(); continue;
case OPCode.BEGIN_POSITION: opBeginPosition(); continue;
......@@ -501,22 +482,12 @@ class ByteCodeMachine extends StackMachine {
}
} else {
Region region = msaRegion;
if (Config.USE_POSIX_API_REGION_OPTION) {
if (!isPosixRegion(regex.options)) {
if (region != null) {
region.clear();
} else {
msaBegin = msaEnd = 0;
}
}
} else {
if (region != null) {
region.clear();
} else {
msaBegin = msaEnd = 0;
}
} // USE_POSIX_REGION_OPTION
}
// end_best_len:
/* default behavior: return first-matching result. */
return endBestLength();
......@@ -1272,6 +1243,16 @@ class ByteCodeMachine extends StackMachine {
opFail();
}
private void opBeginLineSb() {
if (s == str) {
if (isNotBol(msaOptions)) opFail();
return;
} else if (bytes[sprev] == Encoding.NEW_LINE && s != end) {
return;
}
opFail();
}
private void opEndLine() {
if (s == end) {
if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
......@@ -1289,6 +1270,23 @@ class ByteCodeMachine extends StackMachine {
opFail();
}
private void opEndLineSb() {
if (s == end) {
if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
if (str == end || bytes[sprev] != Encoding.NEW_LINE) {
if (isNotEol(msaOptions)) opFail();
}
return;
} else {
if (isNotEol(msaOptions)) opFail();
return;
}
} else if (bytes[s] == Encoding.NEW_LINE || (Config.USE_CRNL_AS_LINE_TERMINATOR && enc.isMbcCrnl(bytes, s, end))) {
return;
}
opFail();
}
private void opSemiEndBuf() {
if (s == end) {
if (Config.USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE) {
......@@ -1932,4 +1930,30 @@ class ByteCodeMachine extends StackMachine {
private int finish() {
return bestLen;
}
private void debugMatchBegin() {
Config.log.println("match_at: " + "str: " + str + ", end: " + end + ", start: " + sstart + ", sprev: " + sprev);
Config.log.println("size: " + (end - str) + ", start offset: " + (sstart - str));
}
private void debugMatchLoop() {
Config.log.printf("%4d", (s - str)).print("> \"");
int q, i;
for (i = 0, q = s; i < 7 && q < end && s >= 0; i++) {
int len = enc.length(bytes, q, end);
while (len-- > 0) {
if (q < end) {
Config.log.print(new String(bytes, q++, 1));
}
}
}
String str = q < end ? "...\"" : "\"";
q += str.length();
Config.log.print(str);
for (i = 0; i < 20 - (q - s); i++)
Config.log.print(" ");
StringBuilder sb = new StringBuilder();
new ByteCodePrinter(regex).compiledByteCodeToString(sb, ip);
Config.log.println(sb.toString());
}
}
......@@ -20,9 +20,9 @@
package org.joni;
import org.jcodings.Encoding;
import org.joni.constants.Arguments;
import org.joni.constants.OPCode;
import org.joni.constants.OPSize;
import org.joni.constants.internal.Arguments;
import org.joni.constants.internal.OPCode;
import org.joni.constants.internal.OPSize;
import org.joni.exception.InternalException;
class ByteCodePrinter {
......
......@@ -30,7 +30,7 @@ import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
import org.joni.constants.NodeType;
import org.joni.constants.internal.NodeType;
import org.joni.exception.ErrorMessages;
import org.joni.exception.InternalException;
import org.joni.exception.SyntaxException;
......
......@@ -41,10 +41,12 @@ public interface Config extends org.jcodings.Config {
final boolean USE_CAPTURE_HISTORY = false;
final boolean USE_VARIABLE_META_CHARS = true;
final boolean USE_WORD_BEGIN_END = true; /* "\<": word-begin, "\>": word-end */
final boolean USE_POSIX_API_REGION_OPTION = true; /* needed for POSIX API support */
final boolean USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE = true;
final boolean USE_SUNDAY_QUICK_SEARCH = true;
final boolean USE_CEC = false;
final boolean USE_DYNAMIC_OPTION = false;
final boolean USE_BYTE_MAP = OptExactInfo.OPT_EXACT_MAXLEN <= CHAR_TABLE_SIZE;
final boolean USE_INT_MAP_BACKWARD = false;
final int NREGION = 10;
final int MAX_BACKREF_NUM = 1000;
......
......@@ -29,18 +29,20 @@ import org.jcodings.constants.CharacterType;
import org.jcodings.exception.CharacterPropertyException;
import org.jcodings.exception.EncodingError;
import org.joni.ast.QuantifierNode;
import org.joni.constants.AnchorType;
import org.joni.constants.MetaChar;
import org.joni.constants.TokenType;
import org.joni.constants.internal.AnchorType;
import org.joni.constants.internal.TokenType;
import org.joni.exception.ErrorMessages;
class Lexer extends ScannerSupport {
protected final Regex regex;
protected final ScanEnvironment env;
protected final Syntax syntax; // fast access to syntax
protected final Token token = new Token(); // current token
protected Lexer(Regex regex, Syntax syntax, byte[]bytes, int p, int end, WarnCallback warnings) {
super(regex.enc, bytes, p, end);
this.regex = regex;
this.env = new ScanEnvironment(regex, syntax, warnings);
this.syntax = env.syntax;
}
......@@ -945,7 +947,7 @@ class Lexer extends ScannerSupport {
token.setBackrefNum(1);
token.setBackrefRef1(backNum);
} else {
NameEntry e = env.nameToGroupNumbers(bytes, last, nameEnd);
NameEntry e = regex.nameToGroupNumbers(bytes, last, nameEnd);
if (e == null) newValueException(UNDEFINED_NAME_REFERENCE, last, nameEnd);
if (syntax.strictCheckBackref()) {
......
......@@ -26,7 +26,7 @@ import org.jcodings.Encoding;
import org.jcodings.IntHolder;
import org.jcodings.constants.CharacterType;
import org.jcodings.specific.ASCIIEncoding;
import org.joni.constants.AnchorType;
import org.joni.constants.internal.AnchorType;
public abstract class Matcher extends IntHolder {
public static final int FAILED = -1;
......@@ -48,22 +48,23 @@ public abstract class Matcher extends IntHolder {
protected int msaBegin;
protected int msaEnd;
Matcher(Regex regex, byte[]bytes, int p, int end) {
Matcher(Regex regex, Region region, byte[]bytes, int p, int end) {
this.regex = regex;
this.enc = regex.enc;
this.bytes = bytes;
this.str = p;
this.end = end;
this.msaRegion = regex.numMem == 0 ? null : new Region(regex.numMem + 1);
this.msaRegion = region;
}
// main matching method
protected abstract int matchAt(int range, int sstart, int sprev) throws InterruptedException;
protected abstract int matchAt(int range, int sstart, int sprev, boolean interrupt) throws InterruptedException;
protected abstract void stateCheckBuffInit(int strLength, int offset, int stateNum);
protected abstract void stateCheckBuffClear();
public abstract void interrupt();
public final Region getRegion() {
return msaRegion;
}
......@@ -88,13 +89,17 @@ public abstract class Matcher extends IntHolder {
public final int match(int at, int range, int option) {
try {
return matchInterruptible(at, range, option);
return matchCommon(at, range, option, false);
} catch (InterruptedException ex) {
return INTERRUPTED;
}
}
public final int matchInterruptible(int at, int range, int option) throws InterruptedException {
return matchCommon(at, range, option, true);
}
private final int matchCommon(int at, int range, int option, boolean interrupt) throws InterruptedException {
msaInit(option, at);
if (Config.USE_CEC) {
......@@ -105,9 +110,9 @@ public abstract class Matcher extends IntHolder {
int prev = enc.prevCharHead(bytes, str, at, end);
if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
return matchAt(end /*range*/, at, prev);
return matchAt(end /*range*/, at, prev, interrupt);
} else {
return matchAt(range /*range*/, at, prev);
return matchAt(range /*range*/, at, prev, interrupt);
}
}
......@@ -128,7 +133,8 @@ public abstract class Matcher extends IntHolder {
}
retry:while (true) {
p = regex.searchAlgorithm.search(this, bytes, p, end, range);
if (Config.DEBUG_SEARCH) debugSearch(regex.forward.getName(), p, end, range);
p = regex.forward.search(this, bytes, p, end, range);
if (p != -1 && p < range) {
if (p - regex.dMin < s) {
......@@ -214,7 +220,7 @@ public abstract class Matcher extends IntHolder {
int p = s;
retry:while (true) {
p = regex.searchAlgorithm.searchBackward(this, bytes, range, adjrange, end, p, s, range);
p = regex.backward.search(this, bytes, range, adjrange, end, p, s, range);
if (p != -1) {
if (regex.subAnchor != 0) {
......@@ -265,26 +271,26 @@ public abstract class Matcher extends IntHolder {
}
// MATCH_AND_RETURN_CHECK
private boolean matchCheck(int upperRange, int s, int prev) throws InterruptedException {
private boolean matchCheck(int upperRange, int s, int prev, boolean interrupt) throws InterruptedException {
if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
//range = upperRange;
if (matchAt(upperRange, s, prev) != -1) {
if (matchAt(upperRange, s, prev, interrupt) != -1) {
if (!isFindLongest(regex.options)) return true;
}
} else {
//range = upperRange;
if (matchAt(upperRange, s, prev) != -1) return true;
if (matchAt(upperRange, s, prev, interrupt) != -1) return true;
}
} else {
if (Config.USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE) {
if (matchAt(end, s, prev) != -1) {
if (matchAt(end, s, prev, interrupt) != -1) {
//range = upperRange;
if (!isFindLongest(regex.options)) return true;
}
} else {
//range = upperRange;
if (matchAt(end, s, prev) != -1) return true;
if (matchAt(end, s, prev, interrupt) != -1) return true;
}
}
return false;
......@@ -292,13 +298,17 @@ public abstract class Matcher extends IntHolder {
public final int search(int start, int range, int option) {
try {
return searchInterruptible(start, range, option);
return searchCommon(start, range, option, false);
} catch (InterruptedException ex) {
return INTERRUPTED;
}
}
public final int searchInterruptible(int start, int range, int option) throws InterruptedException {
return searchCommon(start, range, option, true);
}
private final int searchCommon(int start, int range, int option, boolean interrupt) throws InterruptedException {
int s, prev;
int origStart = start;
int origRange = range;
......@@ -376,7 +386,7 @@ public abstract class Matcher extends IntHolder {
if (Config.USE_CEC) stateCheckBuffClear();
if (matchCheck(end, s, prev)) return match(s);
if (matchCheck(end, s, prev, interrupt)) return match(s);
return mismatch();
}
return FAILED; // goto mismatch_no_msa;
......@@ -398,7 +408,7 @@ public abstract class Matcher extends IntHolder {
prev = 0; // -1
}
if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
if (regex.forward != null) {
int schRange = range;
if (regex.dMax != 0) {
if (regex.dMax == MinMaxLen.INFINITE_DISTANCE) {
......@@ -418,7 +428,7 @@ public abstract class Matcher extends IntHolder {
prev = value;
}
while (s <= high) {
if (matchCheck(origRange, s, prev)) return match(s); // ???
if (matchCheck(origRange, s, prev, interrupt)) return match(s); // ???
prev = s;
s += enc.length(bytes, s, end);
}
......@@ -430,7 +440,7 @@ public abstract class Matcher extends IntHolder {
if ((regex.anchor & AnchorType.ANYCHAR_STAR) != 0) {
do {
if (matchCheck(origRange, s, prev)) return match(s);
if (matchCheck(origRange, s, prev, interrupt)) return match(s);
prev = s;
s += enc.length(bytes, s, end);
......@@ -447,13 +457,13 @@ public abstract class Matcher extends IntHolder {
}
do {
if (matchCheck(origRange, s, prev)) return match(s);
if (matchCheck(origRange, s, prev, interrupt)) return match(s);
prev = s;
s += enc.length(bytes, s, end);
} while (s < range);
if (s == range) { /* because empty match with /$/. */
if (matchCheck(origRange, s, prev)) return match(s);
if (matchCheck(origRange, s, prev, interrupt)) return match(s);
}
} else { /* backward search */
if (Config.USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE) {
......@@ -462,7 +472,7 @@ public abstract class Matcher extends IntHolder {
}
}
if (regex.searchAlgorithm != SearchAlgorithm.NONE) {
if (regex.backward != null) {
int adjrange;
if (range < end) {
adjrange = enc.leftAdjustCharHead(bytes, str, range, end);
......@@ -477,7 +487,7 @@ public abstract class Matcher extends IntHolder {
if (s > high) s = high;
while (s != -1 && s >= low) {
prev = enc.prevCharHead(bytes, str, s, end);
if (matchCheck(origStart, s, prev)) return match(s);
if (matchCheck(origStart, s, prev, interrupt)) return match(s);
s = prev;
}
} while (s >= range);
......@@ -504,7 +514,7 @@ public abstract class Matcher extends IntHolder {
do {
prev = enc.prevCharHead(bytes, str, s, end);
if (matchCheck(origStart, s, prev)) return match(s);
if (matchCheck(origStart, s, prev, interrupt)) return match(s);
s = prev;
} while (s >= range);
......@@ -603,4 +613,9 @@ public abstract class Matcher extends IntHolder {
", high: " + (high - str));
}
}
static void debugSearch(String name, int textP, int textEnd, int textRange) {
Config.log.println(name + ": text: " + textP + ", text_end: " + textEnd + ", text_range: " + textRange);
}
}
......@@ -20,12 +20,12 @@
package org.joni;
abstract class MatcherFactory {
abstract Matcher create(Regex regex, byte[]bytes, int p, int end);
abstract Matcher create(Regex regex, Region region, byte[]bytes, int p, int end);
static final MatcherFactory DEFAULT = new MatcherFactory() {
@Override
Matcher create(Regex regex, byte[] bytes, int p, int end) {
return new ByteCodeMachine(regex, bytes, p, end);
Matcher create(Regex regex, Region region, byte[]bytes, int p, int end) {
return new ByteCodeMachine(regex, region, bytes, p, end);
}
};
}
......@@ -21,7 +21,7 @@ package org.joni;
abstract class NativeMachine extends Matcher {
protected NativeMachine(Regex regex, byte[]bytes, int p, int end) {
super(regex, bytes, p, end);
protected NativeMachine(Regex regex, Region region, byte[]bytes, int p, int end) {
super(regex, region, bytes, p, end);
}
}
......@@ -19,7 +19,7 @@
*/
package org.joni;
import org.joni.constants.AnchorType;
import org.joni.constants.internal.AnchorType;
final class OptAnchorInfo implements AnchorType {
int leftAnchor;
......
......@@ -45,19 +45,17 @@ import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
import org.joni.constants.AnchorType;
import org.joni.constants.EncloseType;
import org.joni.constants.NodeType;
import org.joni.constants.TokenType;
import org.joni.constants.internal.AnchorType;
import org.joni.constants.internal.EncloseType;
import org.joni.constants.internal.NodeType;
import org.joni.constants.internal.TokenType;
class Parser extends Lexer {
protected final Regex regex;
protected int returnCode; // return code used by parser methods (they itself return parsed nodes)
// this approach will not affect recursive calls
protected Parser(Regex regex, Syntax syntax, byte[]bytes, int p, int end, WarnCallback warnings) {
super(regex, syntax, bytes, p, end, warnings);
this.regex = regex;
}
private static final int POSIX_BRACKET_NAME_MIN_LEN = 4;
......@@ -719,7 +717,7 @@ class Parser extends Lexer {
num = env.addMemEntry();
if (listCapture && num >= BitStatus.BIT_STATUS_BITS_NUM) newValueException(GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY);
env.nameAdd(bytes, nm, nameEnd, num, syntax);
regex.nameAdd(bytes, nm, nameEnd, num, syntax);
EncloseNode en = EncloseNode.newMemory(env.option, true);
en.regNum = num;
......
......@@ -20,18 +20,21 @@
package org.joni;
import static org.joni.BitStatus.bsAt;
import static org.joni.Config.USE_SUNDAY_QUICK_SEARCH;
import static org.joni.Option.isCaptureGroup;
import static org.joni.Option.isDontCaptureGroup;
import java.util.Collections;
import java.util.Iterator;
import org.jcodings.CaseFoldCodeItem;
import org.jcodings.Encoding;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.UTF8Encoding;
import org.jcodings.util.BytesHash;
import org.joni.constants.AnchorType;
import org.joni.constants.internal.AnchorType;
import org.joni.exception.ErrorMessages;
import org.joni.exception.InternalException;
import org.joni.exception.ValueException;
public final class Regex {
......@@ -61,10 +64,11 @@ public final class Regex {
Object userObject;
final int caseFoldFlag;
BytesHash<NameEntry> nameTable; // named entries
private BytesHash<NameEntry> nameTable; // named entries
/* optimization info (string search, char-map and anchors) */
SearchAlgorithm searchAlgorithm; /* optimize flag */
Search.Forward forward; /* optimize flag */
Search.Backward backward;
int thresholdLength; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
int anchorDmin; /* (SEMI_)END_BUF anchor distance */
......@@ -155,8 +159,16 @@ public final class Regex {
return matcher(bytes, 0, bytes.length);
}
public Matcher matcherNoRegion(byte[]bytes) {
return matcherNoRegion(bytes, 0, bytes.length);
}
public Matcher matcher(byte[]bytes, int p, int end) {
return factory.create(this, bytes, p, end);
return factory.create(this, numMem == 0 ? null : new Region(numMem + 1), bytes, p, end);
}
public Matcher matcherNoRegion(byte[]bytes, int p, int end) {
return factory.create(this, null, bytes, p, end);
}
public int numberOfCaptures() {
......@@ -175,6 +187,70 @@ public final class Regex {
}
}
private NameEntry nameFind(byte[]name, int nameP, int nameEnd) {
if (nameTable != null) return nameTable.get(name, nameP, nameEnd);
return null;
}
void renumberNameTable(int[]map) {
if (nameTable != null) {
for (NameEntry e : nameTable) {
if (e.backNum > 1) {
for (int i=0; i<e.backNum; i++) {
e.backRefs[i] = map[e.backRefs[i]];
}
} else if (e.backNum == 1) {
e.backRef1 = map[e.backRef1];
}
}
}
}
void nameAdd(byte[]name, int nameP, int nameEnd, int backRef, Syntax syntax) {
if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.EMPTY_GROUP_NAME);
NameEntry e = null;
if (nameTable == null) {
nameTable = new BytesHash<NameEntry>(); // 13, oni defaults to 5
} else {
e = nameFind(name, nameP, nameEnd);
}
if (e == null) {
// dup the name here as oni does ?, what for ? (it has to manage it, we don't)
e = new NameEntry(name, nameP, nameEnd);
nameTable.putDirect(name, nameP, nameEnd, e);
} else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) {
throw new ValueException(ErrorMessages.MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP));
}
e.addBackref(backRef);
}
NameEntry nameToGroupNumbers(byte[]name, int nameP, int nameEnd) {
return nameFind(name, nameP, nameEnd);
}
public int nameToBackrefNumber(byte[]name, int nameP, int nameEnd, Region region) {
NameEntry e = nameToGroupNumbers(name, nameP, nameEnd);
if (e == null) throw new ValueException(ErrorMessages.UNDEFINED_NAME_REFERENCE,
new String(name, nameP, nameEnd - nameP));
switch(e.backNum) {
case 0:
throw new InternalException(ErrorMessages.PARSER_BUG);
case 1:
return e.backRef1;
default:
if (region != null) {
for (int i = e.backNum - 1; i >= 0; i--) {
if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i];
}
}
return e.backRefs[e.backNum - 1];
}
}
String nameTableToString() {
StringBuilder sb = new StringBuilder();
......@@ -206,49 +282,94 @@ public final class Regex {
}
/* set skip map for Boyer-Moor search */
void setupBMSkipMap() {
boolean setupBMSkipMap(boolean ignoreCase) {
byte[]bytes = exact;
int p = exactP;
int s = exactP;
int end = exactEnd;
int len = end - p;
int len = end - s;
int clen;
CaseFoldCodeItem[]items = CaseFoldCodeItem.EMPTY_FOLD_CODES;
byte[]buf = new byte[Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM * Config.ENC_MBC_CASE_FOLD_MAXLEN];
final int ilen = USE_SUNDAY_QUICK_SEARCH ? len : len - 1;
if (Config.USE_BYTE_MAP || len < Config.CHAR_TABLE_SIZE) {
if (map == null) map = new byte[Config.CHAR_TABLE_SIZE]; // map/skip
for (int i = 0; i < Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)(USE_SUNDAY_QUICK_SEARCH ? len + 1 : len);
if (len < Config.CHAR_TABLE_SIZE) {
// map/skip
if (map == null) map = new byte[Config.CHAR_TABLE_SIZE];
for (int i = 0; i < ilen; i += clen) {
if (ignoreCase) items = enc.caseFoldCodesByString(caseFoldFlag, bytes, s + i, end);
clen = setupBMSkipMapCheck(bytes, s + i, end, items, buf);
if (clen == 0) return true;
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) map[i] = (byte)len;
for (int i=0; i<len-1; i++) map[bytes[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ??
for (int j = 0; j < clen; j++) {
map[bytes[s + i + j] & 0xff] = (byte)(ilen - i - j);
for (int k = 0; k < items.length; k++) {
map[buf[k * Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM + j] & 0xff] = (byte)(ilen - i - j);
}
}
}
} else {
if (intMap == null) intMap = new int[Config.CHAR_TABLE_SIZE];
for (int i = 0; i < Config.CHAR_TABLE_SIZE; i++) intMap[i] = (USE_SUNDAY_QUICK_SEARCH ? len + 1 : len);
for (int i=0; i<len-1; i++) intMap[bytes[p + i] & 0xff] = len - 1 - i; // oxff ??
for (int i = 0; i < ilen; i += clen) {
if (ignoreCase) items = enc.caseFoldCodesByString(caseFoldFlag, bytes, s + i, end);
clen = setupBMSkipMapCheck(bytes, s + i, end, items, buf);
if (clen == 0) return true;
for (int j = 0; j < clen; j++) {
intMap[bytes[s + i + j] & 0xff] = ilen - i - j;
for (int k = 0; k < items.length; k++) {
intMap[buf[k * Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM + j] & 0xff] = ilen - i - j;
}
}
}
}
return false;
}
void setExactInfo(OptExactInfo e) {
private int setupBMSkipMapCheck(byte[]bytes, int p, int end, CaseFoldCodeItem[]items, byte[]buf) {
int clen = enc.length(bytes, p, end);
if (p + clen > end) clen = end - p;
for (int j = 0; j < items.length; j++) {
if (items[j].code.length != 1 || items[j].byteLen != clen) return 0;
int flen = enc.codeToMbc(items[j].code[0], buf, j * Config.ENC_GET_CASE_FOLD_CODES_MAX_NUM);
if (flen != clen) return 0;
}
return clen;
}
void setOptimizeExactInfo(OptExactInfo e) {
if (e.length == 0) return;
// shall we copy that ?
exact = e.bytes;
exactP = 0;
exactEnd = e.length;
boolean allowReverse = enc.isReverseMatchAllowed(exact, exactP, exactEnd);
if (e.ignoreCase > 0) {
// encodings won't return toLowerTable for case insensitive search if it's not safe to use it directly
searchAlgorithm = enc.toLowerCaseTable() != null ? SearchAlgorithm.SLOW_IC_SB : SearchAlgorithm.SLOW_IC;
if (e.length >= 3 || (e.length >= 2 && allowReverse)) {
if (!setupBMSkipMap(true)) {
forward = allowReverse ? Search.BM_IC_FORWARD : Search.BM_NOT_REV_IC_FORWARD;
} else {
forward = enc.toLowerCaseTable() != null ? Search.SLOW_IC_SB_FORWARD : Search.SLOW_IC_FORWARD;
}
} else {
forward = enc.toLowerCaseTable() != null ? Search.SLOW_IC_SB_FORWARD : Search.SLOW_IC_FORWARD;
}
backward = enc.toLowerCaseTable() != null ? Search.SLOW_IC_SB_BACKWARD : Search.SLOW_IC_BACKWARD;
} else {
boolean allowReverse = enc.isReverseMatchAllowed(exact, exactP, exactEnd);
if (e.length >= 3 || (e.length >= 2 && allowReverse)) {
setupBMSkipMap();
if (allowReverse) {
searchAlgorithm = SearchAlgorithm.BM;
if (!setupBMSkipMap(false)) {
forward = allowReverse ? Search.BM_FORWARD : Search.BM_NOT_REV_FORWARD;
} else {
searchAlgorithm = SearchAlgorithm.BM_NOT_REV;
forward = enc.isSingleByte() ? Search.SLOW_SB_FORWARD : Search.SLOW_FORWARD;
}
} else {
searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.SLOW_SB : SearchAlgorithm.SLOW;
forward = enc.isSingleByte() ? Search.SLOW_SB_FORWARD : Search.SLOW_FORWARD;
}
backward = enc.isSingleByte() ? Search.SLOW_SB_BACKWARD : Search.SLOW_BACKWARD;
}
dMin = e.mmd.min;
......@@ -262,7 +383,14 @@ public final class Regex {
void setOptimizeMapInfo(OptMapInfo m) {
map = m.map;
searchAlgorithm = enc.isSingleByte() ? SearchAlgorithm.MAP_SB : SearchAlgorithm.MAP;
if (enc.isSingleByte()) {
forward = Search.MAP_SB_FORWARD;
backward = Search.MAP_SB_BACKWARD;
} else {
forward = Search.MAP_FORWARD;
backward = Search.MAP_BACKWARD;
}
dMin = m.mmd.min;
dMax = m.mmd.max;
......@@ -277,7 +405,8 @@ public final class Regex {
}
void clearOptimizeInfo() {
searchAlgorithm = SearchAlgorithm.NONE;
forward = null;
backward = null;
anchor = 0;
anchorDmax = 0;
anchorDmin = 0;
......@@ -289,7 +418,7 @@ public final class Regex {
public String optimizeInfoToString() {
String s = "";
s += "optimize: " + searchAlgorithm.getName() + "\n";
s += "optimize: " + (forward != null ? forward.getName() : "NONE") + "\n";
s += " anchor: " + OptAnchorInfo.anchorToString(anchor);
if ((anchor & AnchorType.END_BUF_MASK) != 0) {
......@@ -298,7 +427,7 @@ public final class Regex {
s += "\n";
if (searchAlgorithm != SearchAlgorithm.NONE) {
if (forward != null) {
s += " sub anchor: " + OptAnchorInfo.anchorToString(subAnchor) + "\n";
}
......@@ -307,7 +436,7 @@ public final class Regex {
if (exact != null) {
s += "exact: [" + new String(exact, exactP, exactEnd - exactP) + "]: length: " + (exactEnd - exactP) + "\n";
} else if (searchAlgorithm == SearchAlgorithm.MAP || searchAlgorithm == SearchAlgorithm.MAP_SB) {
} else if (forward == Search.MAP_FORWARD || forward == Search.MAP_SB_FORWARD) {
int n=0;
for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) if (map[i] != 0) n++;
......
......@@ -20,13 +20,11 @@
package org.joni;
import org.jcodings.Encoding;
import org.jcodings.util.BytesHash;
import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.constants.SyntaxProperties;
import org.joni.exception.ErrorMessages;
import org.joni.exception.InternalException;
import org.joni.exception.ValueException;
public final class ScanEnvironment {
public int option;
......@@ -45,7 +43,6 @@ public final class ScanEnvironment {
public int numMem;
int numNamed; // USE_NAMED_GROUP
BytesHash<NameEntry> nameTable;
public EncloseNode memNodes[];
......@@ -88,69 +85,6 @@ public final class ScanEnvironment {
}
}
NameEntry nameFind(byte[]name, int nameP, int nameEnd) {
if (nameTable != null) return nameTable.get(name, nameP, nameEnd);
return null;
}
void renumberNameTable(int[]map) {
if (nameTable != null) {
for (NameEntry e : nameTable) {
if (e.backNum > 1) {
for (int i=0; i<e.backNum; i++) {
e.backRefs[i] = map[e.backRefs[i]];
}
} else if (e.backNum == 1) {
e.backRef1 = map[e.backRef1];
}
}
}
}
void nameAdd(byte[]name, int nameP, int nameEnd, int backRef, Syntax syntax) {
if (nameEnd - nameP <= 0) throw new ValueException(ErrorMessages.EMPTY_GROUP_NAME);
NameEntry e = null;
if (nameTable == null) {
nameTable = new BytesHash<NameEntry>(); // 13, oni defaults to 5
} else {
e = nameFind(name, nameP, nameEnd);
}
if (e == null) {
// dup the name here as oni does ?, what for ? (it has to manage it, we don't)
e = new NameEntry(name, nameP, nameEnd);
nameTable.putDirect(name, nameP, nameEnd, e);
} else if (e.backNum >= 1 && !syntax.allowMultiplexDefinitionName()) {
throw new ValueException(ErrorMessages.MULTIPLEX_DEFINED_NAME, new String(name, nameP, nameEnd - nameP));
}
e.addBackref(backRef);
}
NameEntry nameToGroupNumbers(byte[]name, int nameP, int nameEnd) {
return nameFind(name, nameP, nameEnd);
}
int nameToBackrefNumber(byte[]name, int nameP, int nameEnd, Region region) {
NameEntry e = nameToGroupNumbers(name, nameP, nameEnd);
if (e == null) throw new ValueException(ErrorMessages.UNDEFINED_NAME_REFERENCE,
new String(name, nameP, nameEnd - nameP));
switch(e.backNum) {
case 0:
throw new InternalException(ErrorMessages.PARSER_BUG);
case 1:
return e.backRef1;
default:
if (region != null) {
for (int i = e.backNum - 1; i >= 0; i--) {
if (region.beg[e.backRefs[i]] != Region.REGION_NOTPOS) return e.backRefs[i];
}
}
return e.backRefs[e.backNum - 1];
}
}
void pushPrecReadNotNode(Node node) {
numPrecReadNotNodes++;
......