From f6cd361fff081276b52b4bd9db07058dee077920 Mon Sep 17 00:00:00 2001 From: Szum123321 Date: Sun, 27 Nov 2022 13:55:34 +0100 Subject: [PATCH] Selected hashing algorithm. It's a custom job which merges SeaHash with Xoroshift64*. Should be fast and correct enough to for this use case. hope I will be able to speed it up with SIMD, as java is scheduled to soon include Vector API (a part of project Panama) --- .../szum123321/textile_backup/Globals.java | 8 +- .../core/FileTreeHashBuilder.java | 28 ++--- .../szum123321/textile_backup/core/Hash.java | 30 +++++ .../textile_backup/core/XorSeaHash.java | 103 ++++++++++++++++++ .../core/create/FileInputStreamSupplier.java | 3 +- .../core/create/HashingInputStream.java | 9 +- .../compressors/AbstractCompressor.java | 2 +- .../core/restore/HashingOutputStream.java | 12 +- .../decompressors/GenericTarDecompressor.java | 2 +- .../decompressors/ZipDecompressor.java | 2 +- 10 files changed, 161 insertions(+), 38 deletions(-) create mode 100644 src/main/java/net/szum123321/textile_backup/core/Hash.java create mode 100644 src/main/java/net/szum123321/textile_backup/core/XorSeaHash.java diff --git a/src/main/java/net/szum123321/textile_backup/Globals.java b/src/main/java/net/szum123321/textile_backup/Globals.java index b906076..7c779ce 100644 --- a/src/main/java/net/szum123321/textile_backup/Globals.java +++ b/src/main/java/net/szum123321/textile_backup/Globals.java @@ -20,7 +20,9 @@ package net.szum123321.textile_backup; import net.minecraft.server.MinecraftServer; +import net.szum123321.textile_backup.core.Hash; import net.szum123321.textile_backup.core.Utilities; +import net.szum123321.textile_backup.core.XorSeaHash; import net.szum123321.textile_backup.core.create.MakeBackupRunnable; import net.szum123321.textile_backup.core.restore.AwaitThread; import org.apache.commons.io.FileUtils; @@ -34,11 +36,13 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; public class Globals { public static final Globals INSTANCE = new Globals(); - private final static TextileLogger log = new TextileLogger(TextileBackup.MOD_NAME); - public final static DateTimeFormatter defaultDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH.mm.ss"); + private static final TextileLogger log = new TextileLogger(TextileBackup.MOD_NAME); + public static final DateTimeFormatter defaultDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH.mm.ss"); + public static final Supplier CHECKSUM_SUPPLIER = XorSeaHash::new; private ExecutorService executorService = null;// = Executors.newSingleThreadExecutor(); public final AtomicBoolean globalShutdownBackupFlag = new AtomicBoolean(true); diff --git a/src/main/java/net/szum123321/textile_backup/core/FileTreeHashBuilder.java b/src/main/java/net/szum123321/textile_backup/core/FileTreeHashBuilder.java index a6084e6..7af0f61 100644 --- a/src/main/java/net/szum123321/textile_backup/core/FileTreeHashBuilder.java +++ b/src/main/java/net/szum123321/textile_backup/core/FileTreeHashBuilder.java @@ -18,35 +18,26 @@ package net.szum123321.textile_backup.core; +import net.szum123321.textile_backup.Globals; + import java.io.IOException; -import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.util.function.Supplier; -import java.util.zip.Checksum; - public class FileTreeHashBuilder { - private final static ThreadLocal buff = - ThreadLocal.withInitial(() -> new byte[Long.BYTES]); private final Object lock = new Object(); - private final Supplier hasherProvider; private long hash = 0, filesProcessed = 0, filesTotalSize = 0; - public FileTreeHashBuilder(Supplier provider) { hasherProvider = provider; } - public void update(Path path, long newHash) throws IOException { - byte[] raw = buff.get(); - var hasher = hasherProvider.get(); + var hasher = Globals.CHECKSUM_SUPPLIER.get(); long size = Files.size(path); - hasher.update(ByteBuffer.wrap(raw).putLong(size).array()); hasher.update(path.toString().getBytes(StandardCharsets.UTF_8)); - hasher.update(ByteBuffer.wrap(raw).putLong(hash).array()); + hasher.update(newHash); synchronized (lock) { - //This way exact order of files processed doesn't matter. + //This way, the exact order of files processed doesn't matter. this.hash ^= hasher.getValue(); filesProcessed++; filesTotalSize += size; @@ -54,12 +45,11 @@ public class FileTreeHashBuilder { } public long getValue() { - var hasher = hasherProvider.get(); - byte[] raw = buff.get(); + var hasher = Globals.CHECKSUM_SUPPLIER.get(); - hasher.update(ByteBuffer.wrap(raw).putLong(hash).array()); - hasher.update(ByteBuffer.wrap(raw).putLong(filesProcessed).array()); - hasher.update(ByteBuffer.wrap(raw).putLong(filesTotalSize).array()); + hasher.update(hash); + hasher.update(filesProcessed); + hasher.update(filesTotalSize); return hasher.getValue(); } diff --git a/src/main/java/net/szum123321/textile_backup/core/Hash.java b/src/main/java/net/szum123321/textile_backup/core/Hash.java new file mode 100644 index 0000000..e562cb4 --- /dev/null +++ b/src/main/java/net/szum123321/textile_backup/core/Hash.java @@ -0,0 +1,30 @@ +/* + * A simple backup mod for Fabric + * Copyright (C) 2022 Szum123321 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.szum123321.textile_backup.core; + +public interface Hash { + void update(byte b); + void update(long b); + default void update(byte[] b) { + update(b, 0, b.length); + } + void update(byte[] b, int off, int len); + + long getValue(); +} diff --git a/src/main/java/net/szum123321/textile_backup/core/XorSeaHash.java b/src/main/java/net/szum123321/textile_backup/core/XorSeaHash.java new file mode 100644 index 0000000..1b74483 --- /dev/null +++ b/src/main/java/net/szum123321/textile_backup/core/XorSeaHash.java @@ -0,0 +1,103 @@ +/* + * A simple backup mod for Fabric + * Copyright (C) 2022 Szum123321 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +package net.szum123321.textile_backup.core; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/* + This algorithm copies construction of SeaHash (https://ticki.github.io/blog/seahash-explained/) including its IV + What it differs in is that it uses Xoroshift64* instead of PCG. Although it might lower the output quality, + I don't think it matters that much, honestly. One advantage the xoroshift has is that it should be + easier to implement with AVX. Java should soon ship its vector api by default. + */ +public class XorSeaHash implements Hash { + //SeaHash IV + private final long[] state = { 0x16f11fe89b0d677cL, 0xb480a793d8e6c86cL, 0x6fe2e5aaf078ebc9L, 0x14f994a4c5259381L}; + private final int buffer_size = (state.length + 1) * Long.BYTES; + private final int buffer_limit = state.length * Long.BYTES; + private final byte[] _byte_buffer = new byte[buffer_size]; + //Enforce endianness + private final ByteBuffer buffer = ByteBuffer.wrap(_byte_buffer).order(ByteOrder.LITTLE_ENDIAN); + + private long hashed_data_length = 0; + + @Override + public void update(byte b) { + buffer.put(b); + hashed_data_length += 1; + if (buffer.position() >= buffer_limit) round(); + } + + @Override + public void update(long b) { + buffer.putLong(b); + hashed_data_length += Long.BYTES; + if(buffer.position() >= buffer_limit) round(); + } + + public void update(byte [] data) { update(data, 0, data.length); } + + public void update(byte[] data, int off, int len) { + int pos = off; + while(pos < len) { + int n = Math.min(len - pos, buffer_limit - buffer.position()); + System.arraycopy(data, pos, _byte_buffer, buffer.position(), n); + pos += n; + buffer.position(buffer.position() + n); + if(buffer.position() >= buffer_limit) round(); + } + + hashed_data_length += len; + } + + @Override + public long getValue() { + if(buffer.position() != 0) round(); + + long result = state[0]; + result ^= state[1]; + result ^= state[2]; + result ^= state[3]; + result ^= hashed_data_length; + + return xorshift64star(result); + } + + private void round() { + while(buffer.position() < buffer_limit) buffer.put((byte)0); + int p = buffer.position(); + buffer.rewind(); + + for(int i = 0; i < 4; i++) state[i] ^= buffer.getLong(); + for(int i = 0; i < 4; i++) state[i] = xorshift64star(state[i]); + + if(p > buffer_limit) { + System.arraycopy(_byte_buffer, buffer_limit, _byte_buffer, 0, buffer.limit() - p); + buffer.position(buffer.limit() - p); + } + } + + long xorshift64star(long s) { + s ^= (s >> 12); + s ^= (s << 25); + s ^= (s >> 27); + return s * 0x2545F4914F6CDD1DL; + } +} diff --git a/src/main/java/net/szum123321/textile_backup/core/create/FileInputStreamSupplier.java b/src/main/java/net/szum123321/textile_backup/core/create/FileInputStreamSupplier.java index 695294e..2ee1795 100644 --- a/src/main/java/net/szum123321/textile_backup/core/create/FileInputStreamSupplier.java +++ b/src/main/java/net/szum123321/textile_backup/core/create/FileInputStreamSupplier.java @@ -34,8 +34,7 @@ public record FileInputStreamSupplier(Path path, String name, FileTreeHashBuilde @Override public InputStream getInputStream() throws IOException { try { - //TODO: select hashing algorithm! - return new HashingInputStream(Files.newInputStream(path), path, null, hashTreeBuilder, brokenFileHandler); + return new HashingInputStream(Files.newInputStream(path), path, hashTreeBuilder, brokenFileHandler); } catch (IOException e) { brokenFileHandler.handle(path, e); throw e; diff --git a/src/main/java/net/szum123321/textile_backup/core/create/HashingInputStream.java b/src/main/java/net/szum123321/textile_backup/core/create/HashingInputStream.java index 532abca..8577de8 100644 --- a/src/main/java/net/szum123321/textile_backup/core/create/HashingInputStream.java +++ b/src/main/java/net/szum123321/textile_backup/core/create/HashingInputStream.java @@ -18,27 +18,26 @@ package net.szum123321.textile_backup.core.create; +import net.szum123321.textile_backup.Globals; import net.szum123321.textile_backup.core.DataLeftException; import net.szum123321.textile_backup.core.FileTreeHashBuilder; +import net.szum123321.textile_backup.core.Hash; import org.jetbrains.annotations.NotNull; import java.io.*; import java.nio.file.Path; -import java.util.zip.Checksum; //This class calculates a hash of the file on the input stream, submits it to FileTreeHashBuilder. //In case the whole underlying stream hasn't been read, also puts it into BrokeFileHandler public class HashingInputStream extends FilterInputStream { - private final Path path; - private final Checksum hasher; + private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get(); private final FileTreeHashBuilder hashBuilder; private final BrokenFileHandler brokenFileHandler; - public HashingInputStream(InputStream in, Path path, Checksum hasher, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) { + public HashingInputStream(InputStream in, Path path, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) { super(in); this.path = path; - this.hasher = hasher; this.hashBuilder = hashBuilder; this.brokenFileHandler = brokenFileHandler; } diff --git a/src/main/java/net/szum123321/textile_backup/core/create/compressors/AbstractCompressor.java b/src/main/java/net/szum123321/textile_backup/core/create/compressors/AbstractCompressor.java index b689ba7..3d6eee8 100644 --- a/src/main/java/net/szum123321/textile_backup/core/create/compressors/AbstractCompressor.java +++ b/src/main/java/net/szum123321/textile_backup/core/create/compressors/AbstractCompressor.java @@ -44,7 +44,7 @@ public abstract class AbstractCompressor { public void createArchive(Path inputFile, Path outputFile, BackupContext ctx, int coreLimit) throws IOException, ExecutionException, InterruptedException { Instant start = Instant.now(); - FileTreeHashBuilder fileHashBuilder = new FileTreeHashBuilder(() -> null); //TODO: select hashing algorithm + FileTreeHashBuilder fileHashBuilder = new FileTreeHashBuilder(); BrokenFileHandler brokenFileHandler = new BrokenFileHandler(); try (OutputStream outStream = Files.newOutputStream(outputFile); diff --git a/src/main/java/net/szum123321/textile_backup/core/restore/HashingOutputStream.java b/src/main/java/net/szum123321/textile_backup/core/restore/HashingOutputStream.java index 6f9b3c8..e8375f5 100644 --- a/src/main/java/net/szum123321/textile_backup/core/restore/HashingOutputStream.java +++ b/src/main/java/net/szum123321/textile_backup/core/restore/HashingOutputStream.java @@ -18,29 +18,27 @@ package net.szum123321.textile_backup.core.restore; +import net.szum123321.textile_backup.Globals; import net.szum123321.textile_backup.core.FileTreeHashBuilder; +import net.szum123321.textile_backup.core.Hash; import org.jetbrains.annotations.NotNull; import java.io.FilterOutputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.file.Path; -import java.util.zip.Checksum; public class HashingOutputStream extends FilterOutputStream { private final Path path; - private final Checksum hasher; - + private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get(); private final FileTreeHashBuilder hashBuilder; - public HashingOutputStream(OutputStream out, Path path, Checksum hasher, FileTreeHashBuilder hashBuilder) { + public HashingOutputStream(OutputStream out, Path path, FileTreeHashBuilder hashBuilder) { super(out); this.path = path; - this.hasher = hasher; this.hashBuilder = hashBuilder; } - @Override public void write(int b) throws IOException { super.write(b); @@ -48,7 +46,7 @@ public class HashingOutputStream extends FilterOutputStream { } @Override - public void write(byte[] @NotNull b, int off, int len) throws IOException { + public void write(byte @NotNull [] b, int off, int len) throws IOException { super.write(b, off, len); hasher.update(b, off, len); } diff --git a/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/GenericTarDecompressor.java b/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/GenericTarDecompressor.java index c6fe061..216c2e8 100644 --- a/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/GenericTarDecompressor.java +++ b/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/GenericTarDecompressor.java @@ -40,7 +40,7 @@ public class GenericTarDecompressor { public static long decompress(Path input, Path target) throws IOException { Instant start = Instant.now(); - FileTreeHashBuilder treeBuilder = new FileTreeHashBuilder(() -> null); + FileTreeHashBuilder treeBuilder = new FileTreeHashBuilder(); try (InputStream fileInputStream = Files.newInputStream(input); InputStream bufferedInputStream = new BufferedInputStream(fileInputStream); diff --git a/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/ZipDecompressor.java b/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/ZipDecompressor.java index ad1e9be..9ad58cf 100644 --- a/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/ZipDecompressor.java +++ b/src/main/java/net/szum123321/textile_backup/core/restore/decompressors/ZipDecompressor.java @@ -40,7 +40,7 @@ public class ZipDecompressor { public static long decompress(Path inputFile, Path target) throws IOException { Instant start = Instant.now(); - FileTreeHashBuilder hashBuilder = new FileTreeHashBuilder(() -> null); + FileTreeHashBuilder hashBuilder = new FileTreeHashBuilder(); try(ZipFile zipFile = new ZipFile(inputFile.toFile())) { for (Iterator it = zipFile.getEntries().asIterator(); it.hasNext(); ) {