From 3475f2b0b0ecf8b1aa58e14ac23032e599f06b72 Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Tue, 7 Jan 2020 19:34:08 +1030 Subject: [PATCH 01/11] start with a README --- benchmark/README.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 benchmark/README.md diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 000000000..3841c5424 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,41 @@ +Benchmarking JFlex +================== + +This directory is work in progress on a small performance benchmarking +suite for JFlex. + +Main ideas: + + * use [JMH][1] as the benchmarking framework. There are good [technical articles][2] on the subtleties, and accessible [short][3] and slightly [longer tutorials][4]. + + * main goal is to gather performance numbers on the scanning engine. There are multiple options for this: + + * micro benchmark on just the generated JFlex code + skeleton, as tightly as possible, without action code + + * macro end-to-end benchmark for a full scanner on realistic input (somehow eliminating file reading overhead etc, although it might be interesting to see if anything we do makes any difference once IO is present) + + * anything in between these two + + * run on current development snapshot + + * add generated scanners from previous versions of JFlex to track development over time + + * use something like java.util.regex and maybe JLex as baseline. Unclear if we can get a theoretical maximum performance. + + * at some point automate and auto-publish results + +We could also benchmark various aspects of the generator itself, but so far +that is lower priority. + +The plan is to start with a small micro benchmark and incrementally add from +there. This should eventually include profiling to at least be informed about +what we're actually measuring. + +Open to ideas on any of this. Please comment on github issue [#689][github-issue] if you have opinions or would like to contribute. + +[1]: https://openjdk.java.net/projects/code-tools/jmh/ +[2]: https://www.oracle.com/technical-resources/articles/java/architect-benchmarking.html +[3]: https://www.mkyong.com/java/java-jmh-benchmark-tutorial/ +[4]: http://tutorials.jenkov.com/java-performance/jmh.html + +[github-issue]: https://github.com/jflex-de/jflex/issues/698 From 81a78086e63c8e04f1801d7d0dbc59dfae79260f Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Tue, 7 Jan 2020 20:24:24 +1030 Subject: [PATCH 02/11] initial skeleton --- benchmark/README.md | 17 ++- benchmark/pom.xml | 100 ++++++++++++++++++ .../java/de/jflex/benchmark/JFlexBench.java | 33 ++++++ 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 benchmark/pom.xml create mode 100644 benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java diff --git a/benchmark/README.md b/benchmark/README.md index 3841c5424..b79781635 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -4,7 +4,8 @@ Benchmarking JFlex This directory is work in progress on a small performance benchmarking suite for JFlex. -Main ideas: +Main ideas +---------- * use [JMH][1] as the benchmarking framework. There are good [technical articles][2] on the subtleties, and accessible [short][3] and slightly [longer tutorials][4]. @@ -33,6 +34,20 @@ what we're actually measuring. Open to ideas on any of this. Please comment on github issue [#689][github-issue] if you have opinions or would like to contribute. + +Building and Running +--------------------- + + mvn package + +will build the benchmark and + + java -jar target/benchmark-full-1.8.0-SNAPSHOT.jar + +will run it. + + + [1]: https://openjdk.java.net/projects/code-tools/jmh/ [2]: https://www.oracle.com/technical-resources/articles/java/architect-benchmarking.html [3]: https://www.mkyong.com/java/java-jmh-benchmark-tutorial/ diff --git a/benchmark/pom.xml b/benchmark/pom.xml new file mode 100644 index 000000000..07547ce85 --- /dev/null +++ b/benchmark/pom.xml @@ -0,0 +1,100 @@ + + + 4.0.0 + + de.jflex + jflex-parent + 1.8.0-SNAPSHOT + ../pom.xml + + benchmark + JFlex Benchmark + A small performance benchmark suite for JFlex. + + + junit + junit + 4.12 + test + + + com.google.truth + truth + 0.36 + test + + + org.openjdk.jmh + jmh-core + ${jmh.version} + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.0 + + 1.8 + 1.8 + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + + + + de.jflex + jflex-maven-plugin + 1.8.0-SNAPSHOT + + + + generate + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + ${project.artifactId}-full-${project.version} + + + de.jflex.benchmark.JFlexBench + + + + + + + + + + + + + + UTF-8 + 1.22 + + diff --git a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java new file mode 100644 index 000000000..4ed014081 --- /dev/null +++ b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java @@ -0,0 +1,33 @@ +package de.jflex.benchmark; + +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +// @BenchmarkMode({Mode.AverageTime, Mode.SampleTime}) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +public class JFlexBench { + + @Benchmark + public int dummy() { + // do anything silly + int x = 0; + for (int i = 0; i < 1000; i++) { + x += i; + } + return x; + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder().include(JFlexBench.class.getSimpleName()).forks(1).build(); + + new Runner(opt).run(); + } +} From 6cf442c39558723194df16cc15abb8eb66a5e6e0 Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Tue, 7 Jan 2020 20:27:58 +1030 Subject: [PATCH 03/11] add benchmarking module --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 378ee11a1..7862b56e7 100644 --- a/pom.xml +++ b/pom.xml @@ -59,6 +59,7 @@ jflex jflex-maven-plugin testsuite + benchmark From 9e65628d3341d55fdfbd5a200ffd5db43700a583 Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Wed, 8 Jan 2020 17:02:23 +1030 Subject: [PATCH 04/11] proposal for baseline --- benchmark/README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmark/README.md b/benchmark/README.md index b79781635..9e7457ae4 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -21,7 +21,12 @@ Main ideas * add generated scanners from previous versions of JFlex to track development over time - * use something like java.util.regex and maybe JLex as baseline. Unclear if we can get a theoretical maximum performance. + * as baseline (= can do no better than this), use a method that reads a + Reader into a buffer (at least as long as the input) and touches each + character once, sequentially. This should be the minimum a matcher with a + Reader interface must do if it is supposed to consume the entire input. + + * use something like java.util.regex and maybe JLex as comparison * at some point automate and auto-publish results From cdc9ba977cb60582b19c4f745ea84930e5a0cad4 Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Thu, 9 Jan 2020 13:03:27 +1030 Subject: [PATCH 05/11] add baseline and a lexer with minimal action code --- benchmark/pom.xml | 2 +- .../java/de/jflex/benchmark/JFlexBench.java | 71 ++- .../java/de/jflex/benchmark/NoAction17.java | 565 ++++++++++++++++++ benchmark/src/main/jflex/no-action.flex | 29 + 4 files changed, 655 insertions(+), 12 deletions(-) create mode 100644 benchmark/src/main/java/de/jflex/benchmark/NoAction17.java create mode 100644 benchmark/src/main/jflex/no-action.flex diff --git a/benchmark/pom.xml b/benchmark/pom.xml index 07547ce85..4f50eedb7 100644 --- a/benchmark/pom.xml +++ b/benchmark/pom.xml @@ -80,7 +80,7 @@ ${project.artifactId}-full-${project.version} - de.jflex.benchmark.JFlexBench + org.openjdk.jmh.Main diff --git a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java index 4ed014081..e46c3dcba 100644 --- a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java +++ b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java @@ -1,10 +1,11 @@ package de.jflex.benchmark; +import java.io.IOException; +import java.io.Reader; +import java.io.StringReader; import java.util.concurrent.TimeUnit; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; import org.openjdk.jmh.runner.RunnerException; import org.openjdk.jmh.runner.options.Options; @@ -12,17 +13,65 @@ // @BenchmarkMode({Mode.AverageTime, Mode.SampleTime}) @BenchmarkMode(Mode.AverageTime) -@OutputTimeUnit(TimeUnit.MILLISECONDS) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Warmup(iterations = 1, time = 1) // in benchmarking dev, should increase later +@Fork(value = 1) // in benchmarking dev, should increase later public class JFlexBench { + @State(Scope.Benchmark) + public static class LexerState { + /** + * Factor by which to scale the input size. We should see a benchmark time roughly linear in the + * factor, i.e. the first time times 10 and 100. + */ + @Param({"100", "1000", "10000"}) + public int factor; + + /** The length of the input for the benchmark. We give this to the baseline, but not JFlex. */ + public int length; + + /** The reader the input will be read from. Must support {@code reset()}. */ + public Reader reader; + + /** Create input and populate state fields. Runs once per entire benchmark. */ + @Setup + public void setup() { + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < 10 * factor; i++) { + // TODO: better input + builder.append("aaa"); + builder.append("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + builder.append(" "); + } + length = builder.length(); + reader = new StringReader(builder.toString()); + } + } + + @Benchmark + public int noActionLexer(LexerState state) throws IOException { + state.reader.reset(); + return new NoAction(state.reader).yylex(); + } + + @Benchmark + public int noAction17Lexer(LexerState state) throws IOException { + state.reader.reset(); + return new NoAction17(state.reader).yylex(); + } + + /** + * The base line: a single continuous pass accessing each character once, through a buffer filled + * by a reader in one single reader invocation. + */ @Benchmark - public int dummy() { - // do anything silly - int x = 0; - for (int i = 0; i < 1000; i++) { - x += i; + public void baselineReader(LexerState state, Blackhole bh) throws IOException { + char[] buffer = new char[state.length]; + state.reader.reset(); + state.reader.read(buffer, 0, buffer.length); + for (int i = 0; i < buffer.length; i++) { + bh.consume(buffer[i]); } - return x; } public static void main(String[] args) throws RunnerException { diff --git a/benchmark/src/main/java/de/jflex/benchmark/NoAction17.java b/benchmark/src/main/java/de/jflex/benchmark/NoAction17.java new file mode 100644 index 000000000..413db02df --- /dev/null +++ b/benchmark/src/main/java/de/jflex/benchmark/NoAction17.java @@ -0,0 +1,565 @@ +/* The following code was generated by JFlex 1.7.0 */ + +package de.jflex.benchmark; + +/* + A scanner with minimal action code, to measure inner matching loop + performance. +*/ + + +/** + * This class is a scanner generated by + * JFlex 1.7.0 + * from the specification file src/main/jflex/no-action.flex + */ +public class NoAction17 { + + /** This character denotes the end of file */ + public static final int YYEOF = -1; + + /** initial size of the lookahead buffer */ + private static final int ZZ_BUFFERSIZE = 16384; + + /** lexical states */ + public static final int YYINITIAL = 0; + + /** + * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l + * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l + * at the beginning of a line + * l is of the form l = 2*k, k a non negative integer + */ + private static final int ZZ_LEXSTATE[] = { + 0, 0 + }; + + /** + * Translates characters to character classes + */ + private static final String ZZ_CMAP_PACKED = + "\141\0\1\1\1\2\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffad\0"; + + /** + * Translates characters to character classes + */ + private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); + + /** + * Translates DFA states to action switch labels. + */ + private static final int [] ZZ_ACTION = zzUnpackAction(); + + private static final String ZZ_ACTION_PACKED_0 = + "\1\0\1\1\2\2"; + + private static int [] zzUnpackAction() { + int [] result = new int[4]; + int offset = 0; + offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackAction(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int count = packed.charAt(i++); + int value = packed.charAt(i++); + do result[j++] = value; while (--count > 0); + } + return j; + } + + + /** + * Translates a state to a row index in the transition table + */ + private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); + + private static final String ZZ_ROWMAP_PACKED_0 = + "\0\0\0\3\0\3\0\6"; + + private static int [] zzUnpackRowMap() { + int [] result = new int[4]; + int offset = 0; + offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackRowMap(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int high = packed.charAt(i++) << 16; + result[j++] = high | packed.charAt(i++); + } + return j; + } + + /** + * The transition table of the DFA + */ + private static final int [] ZZ_TRANS = zzUnpackTrans(); + + private static final String ZZ_TRANS_PACKED_0 = + "\1\2\1\3\1\4\5\0\1\4"; + + private static int [] zzUnpackTrans() { + int [] result = new int[9]; + int offset = 0; + offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackTrans(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int count = packed.charAt(i++); + int value = packed.charAt(i++); + value--; + do result[j++] = value; while (--count > 0); + } + return j; + } + + + /* error codes */ + private static final int ZZ_UNKNOWN_ERROR = 0; + private static final int ZZ_NO_MATCH = 1; + private static final int ZZ_PUSHBACK_2BIG = 2; + + /* error messages for the codes above */ + private static final String ZZ_ERROR_MSG[] = { + "Unknown internal scanner error", + "Error: could not match input", + "Error: pushback value was too large" + }; + + /** + * ZZ_ATTRIBUTE[aState] contains the attributes of state aState + */ + private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); + + private static final String ZZ_ATTRIBUTE_PACKED_0 = + "\1\0\2\11\1\1"; + + private static int [] zzUnpackAttribute() { + int [] result = new int[4]; + int offset = 0; + offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); + return result; + } + + private static int zzUnpackAttribute(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ + int l = packed.length(); + while (i < l) { + int count = packed.charAt(i++); + int value = packed.charAt(i++); + do result[j++] = value; while (--count > 0); + } + return j; + } + + /** the input device */ + private java.io.Reader zzReader; + + /** the current state of the DFA */ + private int zzState; + + /** the current lexical state */ + private int zzLexicalState = YYINITIAL; + + /** this buffer contains the current text to be matched and is + the source of the yytext() string */ + private char zzBuffer[] = new char[ZZ_BUFFERSIZE]; + + /** the textposition at the last accepting state */ + private int zzMarkedPos; + + /** the current text position in the buffer */ + private int zzCurrentPos; + + /** startRead marks the beginning of the yytext() string in the buffer */ + private int zzStartRead; + + /** endRead marks the last character in the buffer, that has been read + from input */ + private int zzEndRead; + + /** number of newlines encountered up to the start of the matched text */ + private int yyline; + + /** the number of characters up to the start of the matched text */ + private int yychar; + + /** + * the number of characters from the last newline up to the start of the + * matched text + */ + private int yycolumn; + + /** + * zzAtBOL == true iff the scanner is currently at the beginning of a line + */ + private boolean zzAtBOL = true; + + /** zzAtEOF == true iff the scanner is at the EOF */ + private boolean zzAtEOF; + + /** denotes if the user-EOF-code has already been executed */ + private boolean zzEOFDone; + + /** + * The number of occupied positions in zzBuffer beyond zzEndRead. + * When a lead/high surrogate has been read from the input stream + * into the final zzBuffer position, this will have a value of 1; + * otherwise, it will have a value of 0. + */ + private int zzFinalHighSurrogate = 0; + + /* user code: */ + private int matches; + + + /** + * Creates a new scanner + * + * @param in the java.io.Reader to read input from. + */ + public NoAction17(java.io.Reader in) { + this.zzReader = in; + } + + + /** + * Unpacks the compressed character translation table. + * + * @param packed the packed character translation table + * @return the unpacked character translation table + */ + private static char [] zzUnpackCMap(String packed) { + char [] map = new char[0x110000]; + int i = 0; /* index in packed string */ + int j = 0; /* index in unpacked array */ + while (i < 40) { + int count = packed.charAt(i++); + char value = packed.charAt(i++); + do map[j++] = value; while (--count > 0); + } + return map; + } + + + /** + * Refills the input buffer. + * + * @return false, iff there was new input. + * + * @exception java.io.IOException if any I/O-Error occurs + */ + private boolean zzRefill() throws java.io.IOException { + + /* first: make room (if you can) */ + if (zzStartRead > 0) { + zzEndRead += zzFinalHighSurrogate; + zzFinalHighSurrogate = 0; + System.arraycopy(zzBuffer, zzStartRead, + zzBuffer, 0, + zzEndRead-zzStartRead); + + /* translate stored positions */ + zzEndRead-= zzStartRead; + zzCurrentPos-= zzStartRead; + zzMarkedPos-= zzStartRead; + zzStartRead = 0; + } + + /* is the buffer big enough? */ + if (zzCurrentPos >= zzBuffer.length - zzFinalHighSurrogate) { + /* if not: blow it up */ + char newBuffer[] = new char[zzBuffer.length*2]; + System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length); + zzBuffer = newBuffer; + zzEndRead += zzFinalHighSurrogate; + zzFinalHighSurrogate = 0; + } + + /* fill the buffer with new input */ + int requested = zzBuffer.length - zzEndRead; + int numRead = zzReader.read(zzBuffer, zzEndRead, requested); + + /* not supposed to occur according to specification of java.io.Reader */ + if (numRead == 0) { + throw new java.io.IOException("Reader returned 0 characters. See JFlex examples for workaround."); + } + if (numRead > 0) { + zzEndRead += numRead; + /* If numRead == requested, we might have requested to few chars to + encode a full Unicode character. We assume that a Reader would + otherwise never return half characters. */ + if (numRead == requested) { + if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) { + --zzEndRead; + zzFinalHighSurrogate = 1; + } + } + /* potentially more input available */ + return false; + } + + /* numRead < 0 ==> end of stream */ + return true; + } + + + /** + * Closes the input stream. + */ + public final void yyclose() throws java.io.IOException { + zzAtEOF = true; /* indicate end of file */ + zzEndRead = zzStartRead; /* invalidate buffer */ + + if (zzReader != null) + zzReader.close(); + } + + + /** + * Resets the scanner to read from a new input stream. + * Does not close the old reader. + * + * All internal variables are reset, the old input stream + * cannot be reused (internal buffer is discarded and lost). + * Lexical state is set to ZZ_INITIAL. + * + * Internal scan buffer is resized down to its initial length, if it has grown. + * + * @param reader the new input stream + */ + public final void yyreset(java.io.Reader reader) { + zzReader = reader; + zzAtBOL = true; + zzAtEOF = false; + zzEOFDone = false; + zzEndRead = zzStartRead = 0; + zzCurrentPos = zzMarkedPos = 0; + zzFinalHighSurrogate = 0; + yyline = yychar = yycolumn = 0; + zzLexicalState = YYINITIAL; + if (zzBuffer.length > ZZ_BUFFERSIZE) + zzBuffer = new char[ZZ_BUFFERSIZE]; + } + + + /** + * Returns the current lexical state. + */ + public final int yystate() { + return zzLexicalState; + } + + + /** + * Enters a new lexical state + * + * @param newState the new lexical state + */ + public final void yybegin(int newState) { + zzLexicalState = newState; + } + + + /** + * Returns the text matched by the current regular expression. + */ + public final String yytext() { + return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead ); + } + + + /** + * Returns the character at position pos from the + * matched text. + * + * It is equivalent to yytext().charAt(pos), but faster + * + * @param pos the position of the character to fetch. + * A value from 0 to yylength()-1. + * + * @return the character at position pos + */ + public final char yycharat(int pos) { + return zzBuffer[zzStartRead+pos]; + } + + + /** + * Returns the length of the matched text region. + */ + public final int yylength() { + return zzMarkedPos-zzStartRead; + } + + + /** + * Reports an error that occured while scanning. + * + * In a wellformed scanner (no or only correct usage of + * yypushback(int) and a match-all fallback rule) this method + * will only be called with things that "Can't Possibly Happen". + * If this method is called, something is seriously wrong + * (e.g. a JFlex bug producing a faulty scanner etc.). + * + * Usual syntax/scanner level error handling should be done + * in error fallback rules. + * + * @param errorCode the code of the errormessage to display + */ + private void zzScanError(int errorCode) { + String message; + try { + message = ZZ_ERROR_MSG[errorCode]; + } + catch (ArrayIndexOutOfBoundsException e) { + message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; + } + + throw new Error(message); + } + + + /** + * Pushes the specified amount of characters back into the input stream. + * + * They will be read again by then next call of the scanning method + * + * @param number the number of characters to be read again. + * This number must not be greater than yylength()! + */ + public void yypushback(int number) { + if ( number > yylength() ) + zzScanError(ZZ_PUSHBACK_2BIG); + + zzMarkedPos -= number; + } + + + /** + * Resumes scanning until the next regular expression is matched, + * the end of input is encountered or an I/O-Error occurs. + * + * @return the next token + * @exception java.io.IOException if any I/O-Error occurs + */ + public int yylex() throws java.io.IOException { + int zzInput; + int zzAction; + + // cached fields: + int zzCurrentPosL; + int zzMarkedPosL; + int zzEndReadL = zzEndRead; + char [] zzBufferL = zzBuffer; + char [] zzCMapL = ZZ_CMAP; + + int [] zzTransL = ZZ_TRANS; + int [] zzRowMapL = ZZ_ROWMAP; + int [] zzAttrL = ZZ_ATTRIBUTE; + + while (true) { + zzMarkedPosL = zzMarkedPos; + + zzAction = -1; + + zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; + + zzState = ZZ_LEXSTATE[zzLexicalState]; + + // set up zzAction for empty match case: + int zzAttributes = zzAttrL[zzState]; + if ( (zzAttributes & 1) == 1 ) { + zzAction = zzState; + } + + + zzForAction: { + while (true) { + + if (zzCurrentPosL < zzEndReadL) { + zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); + zzCurrentPosL += Character.charCount(zzInput); + } + else if (zzAtEOF) { + zzInput = YYEOF; + break zzForAction; + } + else { + // store back cached positions + zzCurrentPos = zzCurrentPosL; + zzMarkedPos = zzMarkedPosL; + boolean eof = zzRefill(); + // get translated positions and possibly new buffer + zzCurrentPosL = zzCurrentPos; + zzMarkedPosL = zzMarkedPos; + zzBufferL = zzBuffer; + zzEndReadL = zzEndRead; + if (eof) { + zzInput = YYEOF; + break zzForAction; + } + else { + zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL); + zzCurrentPosL += Character.charCount(zzInput); + } + } + int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ]; + if (zzNext == -1) break zzForAction; + zzState = zzNext; + + zzAttributes = zzAttrL[zzState]; + if ( (zzAttributes & 1) == 1 ) { + zzAction = zzState; + zzMarkedPosL = zzCurrentPosL; + if ( (zzAttributes & 8) == 8 ) break zzForAction; + } + + } + } + + // store back cached position + zzMarkedPos = zzMarkedPosL; + + if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { + zzAtEOF = true; + { + return matches; + } + } + else { + switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { + case 1: + { /* nothing */ + } + // fall through + case 3: break; + case 2: + { matches++; + } + // fall through + case 4: break; + default: + zzScanError(ZZ_NO_MATCH); + } + } + } + } + + +} diff --git a/benchmark/src/main/jflex/no-action.flex b/benchmark/src/main/jflex/no-action.flex new file mode 100644 index 000000000..fa87e5509 --- /dev/null +++ b/benchmark/src/main/jflex/no-action.flex @@ -0,0 +1,29 @@ +package de.jflex.benchmark; + +/* + A scanner with minimal action code, to measure inner matching loop + performance. +*/ + +%% + +%public +%class NoAction + +%int + +%{ + private int matches; +%} + +SHORT = "a" +LONG = "b"+ + +%% + +{SHORT} { matches++; } +{LONG} { matches++; } + +[^] { /* nothing */ } + +<> { return matches; } From ced4c2eaf3ddaf1b1d3be45d110c8054d42658b9 Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Thu, 9 Jan 2020 13:29:38 +1030 Subject: [PATCH 06/11] towards production settings Fork=1 actually looks fine, should just not be 0. --- .../src/main/java/de/jflex/benchmark/JFlexBench.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java index e46c3dcba..d4dbdbd53 100644 --- a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java +++ b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java @@ -11,11 +11,10 @@ import org.openjdk.jmh.runner.options.Options; import org.openjdk.jmh.runner.options.OptionsBuilder; -// @BenchmarkMode({Mode.AverageTime, Mode.SampleTime}) -@BenchmarkMode(Mode.AverageTime) +// @BenchmarkMode({Mode.AverageTime, Mode.SampleTime, Mode.SingleShotTime}) +@BenchmarkMode({Mode.AverageTime, Mode.SingleShotTime}) @OutputTimeUnit(TimeUnit.MICROSECONDS) -@Warmup(iterations = 1, time = 1) // in benchmarking dev, should increase later -@Fork(value = 1) // in benchmarking dev, should increase later +@Fork(value=1) public class JFlexBench { @State(Scope.Benchmark) @@ -75,7 +74,7 @@ public void baselineReader(LexerState state, Blackhole bh) throws IOException { } public static void main(String[] args) throws RunnerException { - Options opt = new OptionsBuilder().include(JFlexBench.class.getSimpleName()).forks(1).build(); + Options opt = new OptionsBuilder().include(JFlexBench.class.getSimpleName()).build(); new Runner(opt).run(); } From 2c212967e1de1627a7154fb558335da1ed0ef1f5 Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Fri, 10 Jan 2020 18:00:52 +1030 Subject: [PATCH 07/11] use milliseconds; code format --- benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java index d4dbdbd53..82a3fb826 100644 --- a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java +++ b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java @@ -13,8 +13,8 @@ // @BenchmarkMode({Mode.AverageTime, Mode.SampleTime, Mode.SingleShotTime}) @BenchmarkMode({Mode.AverageTime, Mode.SingleShotTime}) -@OutputTimeUnit(TimeUnit.MICROSECONDS) -@Fork(value=1) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@Fork(value = 1) public class JFlexBench { @State(Scope.Benchmark) From 7fb9bcd87550e873b516cbc4cfba11d1d1c0c29f Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Fri, 10 Jan 2020 20:07:35 +1030 Subject: [PATCH 08/11] move pre-generated code out of code-fmt scope --- benchmark/pom.xml | 20 +++++++++++++++++++ .../java/de/jflex/benchmark/JFlexBench.java | 1 + .../benchmark => pregen}/NoAction17.java | 2 +- 3 files changed, 22 insertions(+), 1 deletion(-) rename benchmark/src/main/{java/de/jflex/benchmark => pregen}/NoAction17.java (99%) diff --git a/benchmark/pom.xml b/benchmark/pom.xml index 4f50eedb7..839d33281 100644 --- a/benchmark/pom.xml +++ b/benchmark/pom.xml @@ -66,6 +66,26 @@ + + maven-resources-plugin + + + copy-pregen + generate-sources + + copy-resources + + + target/generated-sources/jflex/de/jflex/benchmark/pregen + + + src/main/pregen + + + + + + org.apache.maven.plugins diff --git a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java index 82a3fb826..bfc547963 100644 --- a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java +++ b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java @@ -1,5 +1,6 @@ package de.jflex.benchmark; +import de.jflex.benchmark.pregen.NoAction17; import java.io.IOException; import java.io.Reader; import java.io.StringReader; diff --git a/benchmark/src/main/java/de/jflex/benchmark/NoAction17.java b/benchmark/src/main/pregen/NoAction17.java similarity index 99% rename from benchmark/src/main/java/de/jflex/benchmark/NoAction17.java rename to benchmark/src/main/pregen/NoAction17.java index 413db02df..3dde69cc7 100644 --- a/benchmark/src/main/java/de/jflex/benchmark/NoAction17.java +++ b/benchmark/src/main/pregen/NoAction17.java @@ -1,6 +1,6 @@ /* The following code was generated by JFlex 1.7.0 */ -package de.jflex.benchmark; +package de.jflex.benchmark.pregen; /* A scanner with minimal action code, to measure inner matching loop From 224fbf480c2c86fe615b96d3c8d716ccff0aee8a Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Fri, 10 Jan 2020 20:24:33 +1030 Subject: [PATCH 09/11] add test input with higher code points --- .../java/de/jflex/benchmark/JFlexBench.java | 21 ++++++++++++--- benchmark/src/main/jflex/no-action.flex | 3 +++ benchmark/src/main/pregen/NoAction17.java | 26 ++++++++++++------- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java index bfc547963..5293fe184 100644 --- a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java +++ b/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java @@ -27,6 +27,9 @@ public static class LexerState { @Param({"100", "1000", "10000"}) public int factor; + @Param({"1", "2"}) + public int input; + /** The length of the input for the benchmark. We give this to the baseline, but not JFlex. */ public int length; @@ -38,10 +41,20 @@ public static class LexerState { public void setup() { StringBuilder builder = new StringBuilder(); for (int i = 0; i < 10 * factor; i++) { - // TODO: better input - builder.append("aaa"); - builder.append("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); - builder.append(" "); + switch (input) { + case 1: + builder.append("aaa"); + builder.append("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + builder.append(" "); + break; + case 2: + builder.append("😎a"); + builder.append("このマニュアルについてbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + builder.append(" "); + break; + default: + assert false : "reached unreachable default case"; + } } length = builder.length(); reader = new StringReader(builder.toString()); diff --git a/benchmark/src/main/jflex/no-action.flex b/benchmark/src/main/jflex/no-action.flex index fa87e5509..528353374 100644 --- a/benchmark/src/main/jflex/no-action.flex +++ b/benchmark/src/main/jflex/no-action.flex @@ -24,6 +24,9 @@ LONG = "b"+ {SHORT} { matches++; } {LONG} { matches++; } +"このマニュアルについて" { matches++; } +"😎" { matches++; } + [^] { /* nothing */ } <> { return matches; } diff --git a/benchmark/src/main/pregen/NoAction17.java b/benchmark/src/main/pregen/NoAction17.java index 3dde69cc7..9ce4af2de 100644 --- a/benchmark/src/main/pregen/NoAction17.java +++ b/benchmark/src/main/pregen/NoAction17.java @@ -38,7 +38,9 @@ public class NoAction17 { * Translates characters to character classes */ private static final String ZZ_CMAP_PACKED = - "\141\0\1\1\1\2\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffad\0"; + "\141\0\1\1\1\2\u2fe1\0\1\14\16\0\1\3\20\0\1\13\1\0"+ + "\1\15\4\0\1\12\2\0\1\4\63\0\1\10\50\0\1\6\22\0"+ + "\1\5\6\0\1\7\5\0\1\11\uffff\0\uc523\0\1\1\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u0a00\0"; /** * Translates characters to character classes @@ -51,10 +53,10 @@ public class NoAction17 { private static final int [] ZZ_ACTION = zzUnpackAction(); private static final String ZZ_ACTION_PACKED_0 = - "\1\0\1\1\2\2"; + "\1\0\1\1\2\2\1\1\11\0"; private static int [] zzUnpackAction() { - int [] result = new int[4]; + int [] result = new int[14]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -79,10 +81,11 @@ private static int zzUnpackAction(String packed, int offset, int [] result) { private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final String ZZ_ROWMAP_PACKED_0 = - "\0\0\0\3\0\3\0\6"; + "\0\0\0\16\0\16\0\34\0\52\0\70\0\106\0\124"+ + "\0\142\0\160\0\176\0\214\0\232\0\250"; private static int [] zzUnpackRowMap() { - int [] result = new int[4]; + int [] result = new int[14]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -105,10 +108,13 @@ private static int zzUnpackRowMap(String packed, int offset, int [] result) { private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final String ZZ_TRANS_PACKED_0 = - "\1\2\1\3\1\4\5\0\1\4"; + "\1\2\1\3\1\4\1\5\12\2\20\0\1\4\17\0"+ + "\1\6\16\0\1\7\16\0\1\10\16\0\1\11\16\0"+ + "\1\12\16\0\1\13\16\0\1\14\16\0\1\15\16\0"+ + "\1\16\16\0\1\3"; private static int [] zzUnpackTrans() { - int [] result = new int[9]; + int [] result = new int[182]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; @@ -146,10 +152,10 @@ private static int zzUnpackTrans(String packed, int offset, int [] result) { private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final String ZZ_ATTRIBUTE_PACKED_0 = - "\1\0\2\11\1\1"; + "\1\0\2\11\2\1\11\0"; private static int [] zzUnpackAttribute() { - int [] result = new int[4]; + int [] result = new int[14]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -248,7 +254,7 @@ public NoAction17(java.io.Reader in) { char [] map = new char[0x110000]; int i = 0; /* index in packed string */ int j = 0; /* index in unpacked array */ - while (i < 40) { + while (i < 88) { int count = packed.charAt(i++); char value = packed.charAt(i++); do map[j++] = value; while (--count > 0); From 2d209105868b1b671e9cb28d2897493e496ddf57 Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Sun, 19 Jan 2020 17:26:54 +1030 Subject: [PATCH 10/11] stay consistent with jflex.* package names in rest of repo --- .../src/main/java/{de => }/jflex/benchmark/JFlexBench.java | 4 ++-- benchmark/src/main/jflex/no-action.flex | 2 +- benchmark/src/main/pregen/NoAction17.java | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) rename benchmark/src/main/java/{de => }/jflex/benchmark/JFlexBench.java (97%) diff --git a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java b/benchmark/src/main/java/jflex/benchmark/JFlexBench.java similarity index 97% rename from benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java rename to benchmark/src/main/java/jflex/benchmark/JFlexBench.java index 5293fe184..c488c7f53 100644 --- a/benchmark/src/main/java/de/jflex/benchmark/JFlexBench.java +++ b/benchmark/src/main/java/jflex/benchmark/JFlexBench.java @@ -1,10 +1,10 @@ -package de.jflex.benchmark; +package jflex.benchmark; -import de.jflex.benchmark.pregen.NoAction17; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.concurrent.TimeUnit; +import jflex.benchmark.pregen.NoAction17; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import org.openjdk.jmh.runner.Runner; diff --git a/benchmark/src/main/jflex/no-action.flex b/benchmark/src/main/jflex/no-action.flex index 528353374..40ae3beea 100644 --- a/benchmark/src/main/jflex/no-action.flex +++ b/benchmark/src/main/jflex/no-action.flex @@ -1,4 +1,4 @@ -package de.jflex.benchmark; +package jflex.benchmark; /* A scanner with minimal action code, to measure inner matching loop diff --git a/benchmark/src/main/pregen/NoAction17.java b/benchmark/src/main/pregen/NoAction17.java index 9ce4af2de..974507c1d 100644 --- a/benchmark/src/main/pregen/NoAction17.java +++ b/benchmark/src/main/pregen/NoAction17.java @@ -1,6 +1,6 @@ /* The following code was generated by JFlex 1.7.0 */ -package de.jflex.benchmark.pregen; +package jflex.benchmark.pregen; /* A scanner with minimal action code, to measure inner matching loop From 681d3254b6cc7ed3d5f246a0f6a20e1009aade8d Mon Sep 17 00:00:00 2001 From: Gerwin Klein Date: Sun, 19 Jan 2020 17:28:01 +1030 Subject: [PATCH 11/11] benchmark not part of aggregated sources --- scripts/preparare-deploy-source-code.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/preparare-deploy-source-code.sh b/scripts/preparare-deploy-source-code.sh index f3cff2eac..1fe6492de 100755 --- a/scripts/preparare-deploy-source-code.sh +++ b/scripts/preparare-deploy-source-code.sh @@ -41,6 +41,7 @@ update_source() { jar -xf ../../target/jflex-*-sources.jar logi "Remove unrelated sources" rm -rf jflex/maven + rm -rf jflex/benchmark rm $(find . -name 'BUILD.bazel') logi "Checking licenses"