Skip to content

Commit

Permalink
Continue unrolling and inlining value parser. Make targeted use of By…
Browse files Browse the repository at this point in the history
…teBuffer.getInt() instead of ByteBuffer.get(). Switch from GraalVM CE to GraalVM. (gunnarmorling#201)
  • Loading branch information
ebarlas authored Jan 7, 2024
1 parent aa0395d commit c13997c
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 44 deletions.
2 changes: 1 addition & 1 deletion calculate_average_ebarlas.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@
#

source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk use java 21.0.1-graalce 1>&2
sdk use java 21.0.1-graal 1>&2
JAVA_OPTS=""
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ebarlas measurements.txt 8
121 changes: 78 additions & 43 deletions src/main/java/dev/morling/onebrc/CalculateAverage_ebarlas.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Paths;
Expand All @@ -28,7 +29,7 @@

public class CalculateAverage_ebarlas {

private static final int MAX_KEY_SIZE = 100 * 4; // max 4 bytes per UTF-8 char
private static final int MAX_KEY_SIZE = 100;
private static final int HASH_FACTOR = 433;
private static final int HASH_TBL_SIZE = 16_383; // range of allowed hash values, inclusive

Expand All @@ -52,7 +53,7 @@ public static void main(String[] args) throws IOException, InterruptedException
var pSize = pEnd - pStart;
Runnable r = () -> {
try {
var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize);
var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize).order(ByteOrder.LITTLE_ENDIAN);
partitions[pIdx] = processBuffer(buffer, pIdx == 0);
}
catch (IOException e) {
Expand Down Expand Up @@ -113,7 +114,7 @@ private static void foldFootersAndHeaders(List<Partition> partitions) { // fold
var merged = mergeFooterAndHeader(pPrev.footer, pNext.header);
if (merged != null) {
if (merged[merged.length - 1] == '\n') { // fold into prev partition
doProcessBuffer(ByteBuffer.wrap(merged), true, pPrev.stats);
doProcessBuffer(ByteBuffer.wrap(merged).order(ByteOrder.LITTLE_ENDIAN), true, pPrev.stats);
}
else { // no newline appeared in partition, carry forward
pNext.footer = merged;
Expand Down Expand Up @@ -142,56 +143,90 @@ private static Partition processBuffer(ByteBuffer buffer, boolean first) {
private static Partition doProcessBuffer(ByteBuffer buffer, boolean first, Stats[] stats) {
var header = first ? null : readHeader(buffer);
var keyStart = reallyDoProcessBuffer(buffer, stats);
var footer = keyStart < buffer.position() ? readFooter(buffer, keyStart) : null;
var footer = keyStart < buffer.limit() ? readFooter(buffer, keyStart) : null;
return new Partition(header, footer, stats);
}

private static int reallyDoProcessBuffer(ByteBuffer buffer, Stats[] stats) {
var keyBuf = new byte[MAX_KEY_SIZE]; // buffer for key
var keyPos = 0; // current position in key buffer
var keyHash = 0; // accumulating hash of key
var keyStart = buffer.position(); // start of key in buffer used for footer calc
try { // abort with exception to avoid hasRemaining() calls
while (true) {
var b = buffer.get();
if (b != ';') {
keyHash = HASH_FACTOR * keyHash + b;
keyBuf[keyPos++] = b;
}
else {
var idx = keyHash & HASH_TBL_SIZE;
var st = stats[idx];
if (st == null) { // nothing in table, eagerly claim spot
st = stats[idx] = newStats(keyBuf, keyPos, keyHash);
int keyStart = 0; // start of key in buffer used for footer calc
try { // abort with exception to allow optimistic line processing
while (true) { // one line per iteration
keyStart = buffer.position(); // preserve line start
int n = buffer.getInt(); // first four bytes of key
byte b1 = (byte) (n & 0xFF);
byte b2 = (byte) ((n >> 8) & 0xFF);
byte b3 = (byte) ((n >> 16) & 0xFF);
byte b = (byte) ((n >> 24) & 0xFF);
int keyPos;
int keyHash = keyBuf[0] = b1;
if (b2 != ';' && b3 != ';') { // true for keys of length 3 or more
keyBuf[1] = b2;
keyBuf[2] = b3;
keyHash = HASH_FACTOR * (HASH_FACTOR * keyHash + b2) + b3;
keyPos = 3;
while (b != ';') {
keyHash = HASH_FACTOR * keyHash + b;
keyBuf[keyPos++] = b;
b = buffer.get();
}
else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) {
st = findInTable(stats, keyHash, keyBuf, keyPos);
}
else { // slow path, rewind and consume byte-by-byte
buffer.position(keyStart + 1);
keyPos = 1;
while ((b = buffer.get()) != ';') {
keyHash = HASH_FACTOR * keyHash + b;
keyBuf[keyPos++] = b;
}
var negative = false;
b = buffer.get(); // digit or dash
if (b == '-') {
negative = true;
b = buffer.get(); // digit after neg
}
var idx = keyHash & HASH_TBL_SIZE;
var st = stats[idx];
if (st == null) { // nothing in table, eagerly claim spot
st = stats[idx] = newStats(keyBuf, keyPos, keyHash);
}
else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) {
st = findInTable(stats, keyHash, keyBuf, keyPos);
}
var value = buffer.getInt();
b = (byte) (value & 0xFF); // digit or dash
int val;
if (b == '-') { // dash branch
val = ((byte) ((value >> 8) & 0xFF)) - '0'; // digit after dash
b = (byte) ((value >> 16) & 0xFF); // second digit or decimal
if (b != '.') { // second digit
val = val * 10 + (b - '0'); // calc second digit
// skip decimal (at >> 24)
b = buffer.get(); // digit after decimal
val = val * 10 + (b - '0'); // calc digit after decimal
}
var val = b - '0';
b = buffer.get(); // second digit or decimal
if (b != '.') {
val = val * 10 + (b - '0');
buffer.get(); // decimal
else { // decimal branch
// skip decimal (at >> 16)
b = (byte) ((value >> 24) & 0xFF); // digit after decimal
val = val * 10 + (b - '0'); // calc digit after decimal
}
val = val * 10 + (buffer.get() - '0'); // digit after decimal
buffer.get(); // newline
var v = negative ? -val : val;
st.min = Math.min(st.min, v);
st.max = Math.max(st.max, v);
st.sum += v;
st.count++;
keyStart = buffer.position(); // preserve line start
b = buffer.get(); // first byte of key
keyHash = b;
keyBuf[0] = b;
keyPos = 1;
val = -val;
}
else { // first digit branch
val = b - '0'; // calc first digit
b = (byte) ((value >> 8) & 0xFF); // second digit or decimal
if (b != '.') { // second digit branch
val = val * 10 + (b - '0'); // calc second digit
// skip decimal (at >> 16)
b = (byte) ((value >> 24) & 0xFF); // digit after decimal
val = val * 10 + (b - '0'); // calc digit after decimal
buffer.get(); // newline
}
else { // decimal branch
b = (byte) ((value >> 16) & 0xFF); // digit after decimal
val = val * 10 + (b - '0'); // calc digit after decimal
// skip newline (at >> 24)
}
}
st.min = Math.min(st.min, val);
st.max = Math.max(st.max, val);
st.sum += val;
st.count++;
}
}
catch (BufferUnderflowException ignore) {
Expand Down Expand Up @@ -220,7 +255,7 @@ private static Stats newStats(byte[] buffer, int len, int hash) {
}

private static byte[] readFooter(ByteBuffer buffer, int lineStart) { // read from line start to current pos (end-of-input)
var footer = new byte[buffer.position() - lineStart];
var footer = new byte[buffer.limit() - lineStart];
buffer.get(lineStart, footer, 0, footer.length);
return footer;
}
Expand Down

0 comments on commit c13997c

Please sign in to comment.