Selected hashing algorithm. It's a custom job which merges SeaHash with Xoroshift64*. Should be fast and correct enough to for this use case. hope I will be able to speed it up with SIMD, as java is scheduled to soon include Vector API (a part of project Panama)
							parent
							
								
									9c37affacd
								
							
						
					
					
						commit
						f6cd361fff
					
				|  | @ -20,7 +20,9 @@ | |||
| package net.szum123321.textile_backup; | ||||
| 
 | ||||
| import net.minecraft.server.MinecraftServer; | ||||
| import net.szum123321.textile_backup.core.Hash; | ||||
| import net.szum123321.textile_backup.core.Utilities; | ||||
| import net.szum123321.textile_backup.core.XorSeaHash; | ||||
| import net.szum123321.textile_backup.core.create.MakeBackupRunnable; | ||||
| import net.szum123321.textile_backup.core.restore.AwaitThread; | ||||
| import org.apache.commons.io.FileUtils; | ||||
|  | @ -34,11 +36,13 @@ import java.util.concurrent.ExecutorService; | |||
| import java.util.concurrent.Executors; | ||||
| import java.util.concurrent.TimeUnit; | ||||
| import java.util.concurrent.atomic.AtomicBoolean; | ||||
| import java.util.function.Supplier; | ||||
| 
 | ||||
| public class Globals { | ||||
|     public static final Globals INSTANCE = new Globals(); | ||||
|     private final static TextileLogger log = new TextileLogger(TextileBackup.MOD_NAME); | ||||
|     public final static DateTimeFormatter defaultDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH.mm.ss"); | ||||
|     private static final TextileLogger log = new TextileLogger(TextileBackup.MOD_NAME); | ||||
|     public static final DateTimeFormatter defaultDateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd_HH.mm.ss"); | ||||
|     public static final Supplier<Hash> CHECKSUM_SUPPLIER = XorSeaHash::new; | ||||
| 
 | ||||
|     private ExecutorService executorService = null;// = Executors.newSingleThreadExecutor();
 | ||||
|     public final AtomicBoolean globalShutdownBackupFlag = new AtomicBoolean(true); | ||||
|  |  | |||
|  | @ -18,35 +18,26 @@ | |||
| 
 | ||||
| package net.szum123321.textile_backup.core; | ||||
| 
 | ||||
| import net.szum123321.textile_backup.Globals; | ||||
| 
 | ||||
| import java.io.IOException; | ||||
| import java.nio.ByteBuffer; | ||||
| import java.nio.charset.StandardCharsets; | ||||
| import java.nio.file.Files; | ||||
| import java.nio.file.Path; | ||||
| import java.util.function.Supplier; | ||||
| import java.util.zip.Checksum; | ||||
| 
 | ||||
| public class FileTreeHashBuilder { | ||||
|     private final static ThreadLocal<byte[]> buff = | ||||
|             ThreadLocal.withInitial(() -> new byte[Long.BYTES]); | ||||
|     private final Object lock = new Object(); | ||||
|     private final Supplier<Checksum> hasherProvider; | ||||
|     private long hash = 0, filesProcessed = 0, filesTotalSize = 0; | ||||
| 
 | ||||
|     public FileTreeHashBuilder(Supplier<Checksum> provider) { hasherProvider = provider; } | ||||
| 
 | ||||
|     public void update(Path path, long newHash) throws IOException { | ||||
|         byte[] raw = buff.get(); | ||||
|         var hasher = hasherProvider.get(); | ||||
|         var hasher = Globals.CHECKSUM_SUPPLIER.get(); | ||||
| 
 | ||||
|         long size = Files.size(path); | ||||
| 
 | ||||
|         hasher.update(ByteBuffer.wrap(raw).putLong(size).array()); | ||||
|         hasher.update(path.toString().getBytes(StandardCharsets.UTF_8)); | ||||
|         hasher.update(ByteBuffer.wrap(raw).putLong(hash).array()); | ||||
|         hasher.update(newHash); | ||||
| 
 | ||||
|         synchronized (lock) { | ||||
|             //This way exact order of files processed doesn't matter.
 | ||||
|             //This way, the exact order of files processed doesn't matter.
 | ||||
|             this.hash ^= hasher.getValue(); | ||||
|             filesProcessed++; | ||||
|             filesTotalSize += size; | ||||
|  | @ -54,12 +45,11 @@ public class FileTreeHashBuilder { | |||
|     } | ||||
| 
 | ||||
|     public long getValue() { | ||||
|         var hasher = hasherProvider.get(); | ||||
|         byte[] raw = buff.get(); | ||||
|         var hasher = Globals.CHECKSUM_SUPPLIER.get(); | ||||
| 
 | ||||
|         hasher.update(ByteBuffer.wrap(raw).putLong(hash).array()); | ||||
|         hasher.update(ByteBuffer.wrap(raw).putLong(filesProcessed).array()); | ||||
|         hasher.update(ByteBuffer.wrap(raw).putLong(filesTotalSize).array()); | ||||
|         hasher.update(hash); | ||||
|         hasher.update(filesProcessed); | ||||
|         hasher.update(filesTotalSize); | ||||
| 
 | ||||
|         return hasher.getValue(); | ||||
|     } | ||||
|  |  | |||
|  | @ -0,0 +1,30 @@ | |||
| /* | ||||
|  * A simple backup mod for Fabric | ||||
|  * Copyright (C)  2022   Szum123321 | ||||
|  * | ||||
|  * This program is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | ||||
|  */ | ||||
| 
 | ||||
| package net.szum123321.textile_backup.core; | ||||
| 
 | ||||
| public interface Hash { | ||||
|     void update(byte b); | ||||
|     void update(long b); | ||||
|     default void update(byte[] b) { | ||||
|         update(b, 0, b.length); | ||||
|     } | ||||
|     void update(byte[] b, int off, int len); | ||||
| 
 | ||||
|     long getValue(); | ||||
| } | ||||
|  | @ -0,0 +1,103 @@ | |||
| /* | ||||
|  * A simple backup mod for Fabric | ||||
|  * Copyright (C)  2022   Szum123321 | ||||
|  * | ||||
|  * This program is free software: you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation, either version 3 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 | ||||
|  */ | ||||
| 
 | ||||
| package net.szum123321.textile_backup.core; | ||||
| 
 | ||||
| import java.nio.ByteBuffer; | ||||
| import java.nio.ByteOrder; | ||||
| 
 | ||||
| /* | ||||
|     This algorithm copies construction of SeaHash (https://ticki.github.io/blog/seahash-explained/) including its IV
 | ||||
|     What it differs in is that it uses Xoroshift64* instead of PCG. Although it might lower the output quality, | ||||
|     I don't think it matters that much, honestly. One advantage the xoroshift has is that it should be | ||||
|     easier to implement with AVX. Java should soon ship its vector api by default. | ||||
|  */ | ||||
| public class XorSeaHash implements Hash { | ||||
|     //SeaHash IV
 | ||||
|     private final long[] state = { 0x16f11fe89b0d677cL, 0xb480a793d8e6c86cL, 0x6fe2e5aaf078ebc9L, 0x14f994a4c5259381L}; | ||||
|     private final int buffer_size = (state.length + 1) * Long.BYTES; | ||||
|     private final int buffer_limit = state.length * Long.BYTES; | ||||
|     private final byte[] _byte_buffer = new byte[buffer_size]; | ||||
|     //Enforce endianness
 | ||||
|     private final ByteBuffer buffer = ByteBuffer.wrap(_byte_buffer).order(ByteOrder.LITTLE_ENDIAN); | ||||
| 
 | ||||
|     private long hashed_data_length = 0; | ||||
| 
 | ||||
|     @Override | ||||
|     public void update(byte b) { | ||||
|         buffer.put(b); | ||||
|         hashed_data_length += 1; | ||||
|         if (buffer.position() >= buffer_limit) round(); | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public void update(long b) { | ||||
|         buffer.putLong(b); | ||||
|         hashed_data_length += Long.BYTES; | ||||
|         if(buffer.position() >= buffer_limit) round(); | ||||
|     } | ||||
| 
 | ||||
|     public void update(byte [] data) { update(data, 0, data.length); } | ||||
| 
 | ||||
|     public void update(byte[] data, int off, int len) { | ||||
|         int pos = off; | ||||
|         while(pos < len) { | ||||
|             int n = Math.min(len - pos, buffer_limit - buffer.position()); | ||||
|             System.arraycopy(data, pos, _byte_buffer, buffer.position(), n); | ||||
|             pos += n; | ||||
|             buffer.position(buffer.position() + n); | ||||
|             if(buffer.position() >= buffer_limit) round(); | ||||
|         } | ||||
| 
 | ||||
|         hashed_data_length += len; | ||||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public long getValue() { | ||||
|         if(buffer.position() != 0) round(); | ||||
| 
 | ||||
|         long result = state[0]; | ||||
|         result ^= state[1]; | ||||
|         result ^= state[2]; | ||||
|         result ^= state[3]; | ||||
|         result ^= hashed_data_length; | ||||
| 
 | ||||
|         return xorshift64star(result); | ||||
|     } | ||||
| 
 | ||||
|     private void round() { | ||||
|         while(buffer.position() < buffer_limit) buffer.put((byte)0); | ||||
|         int p = buffer.position(); | ||||
|         buffer.rewind(); | ||||
| 
 | ||||
|         for(int i = 0; i < 4; i++) state[i] ^= buffer.getLong(); | ||||
|         for(int i = 0; i < 4; i++) state[i] = xorshift64star(state[i]); | ||||
| 
 | ||||
|         if(p > buffer_limit) { | ||||
|             System.arraycopy(_byte_buffer, buffer_limit, _byte_buffer, 0, buffer.limit() - p); | ||||
|             buffer.position(buffer.limit() - p); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     long xorshift64star(long s) { | ||||
|         s ^= (s >> 12); | ||||
|         s ^= (s << 25); | ||||
|         s ^= (s >> 27); | ||||
|         return s * 0x2545F4914F6CDD1DL; | ||||
|     } | ||||
| } | ||||
|  | @ -34,8 +34,7 @@ public record FileInputStreamSupplier(Path path, String name, FileTreeHashBuilde | |||
|     @Override | ||||
|     public InputStream getInputStream() throws IOException { | ||||
|         try { | ||||
|             //TODO: select hashing algorithm!
 | ||||
|             return new HashingInputStream(Files.newInputStream(path), path, null, hashTreeBuilder, brokenFileHandler); | ||||
|             return new HashingInputStream(Files.newInputStream(path), path, hashTreeBuilder, brokenFileHandler); | ||||
|         } catch (IOException e) { | ||||
|             brokenFileHandler.handle(path, e); | ||||
|             throw e; | ||||
|  |  | |||
|  | @ -18,27 +18,26 @@ | |||
| 
 | ||||
| package net.szum123321.textile_backup.core.create; | ||||
| 
 | ||||
| import net.szum123321.textile_backup.Globals; | ||||
| import net.szum123321.textile_backup.core.DataLeftException; | ||||
| import net.szum123321.textile_backup.core.FileTreeHashBuilder; | ||||
| import net.szum123321.textile_backup.core.Hash; | ||||
| import org.jetbrains.annotations.NotNull; | ||||
| 
 | ||||
| import java.io.*; | ||||
| import java.nio.file.Path; | ||||
| import java.util.zip.Checksum; | ||||
| 
 | ||||
| //This class calculates a hash of the file on the input stream, submits it to FileTreeHashBuilder.
 | ||||
| //In case the whole underlying stream hasn't been read, also puts it into BrokeFileHandler
 | ||||
| public class HashingInputStream extends FilterInputStream { | ||||
| 
 | ||||
|     private final Path path; | ||||
|     private final Checksum hasher; | ||||
|     private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get(); | ||||
|     private final FileTreeHashBuilder hashBuilder; | ||||
|     private final BrokenFileHandler brokenFileHandler; | ||||
| 
 | ||||
|     public HashingInputStream(InputStream in, Path path, Checksum hasher, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) { | ||||
|     public HashingInputStream(InputStream in, Path path, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) { | ||||
|         super(in); | ||||
|         this.path = path; | ||||
|         this.hasher = hasher; | ||||
|         this.hashBuilder = hashBuilder; | ||||
|         this.brokenFileHandler = brokenFileHandler; | ||||
|     } | ||||
|  |  | |||
|  | @ -44,7 +44,7 @@ public abstract class AbstractCompressor { | |||
|     public void createArchive(Path inputFile, Path outputFile, BackupContext ctx, int coreLimit) throws IOException, ExecutionException, InterruptedException { | ||||
|         Instant start = Instant.now(); | ||||
| 
 | ||||
|         FileTreeHashBuilder fileHashBuilder = new FileTreeHashBuilder(() -> null); //TODO: select hashing algorithm
 | ||||
|         FileTreeHashBuilder fileHashBuilder = new FileTreeHashBuilder(); | ||||
|         BrokenFileHandler brokenFileHandler = new BrokenFileHandler(); | ||||
| 
 | ||||
|         try (OutputStream outStream = Files.newOutputStream(outputFile); | ||||
|  |  | |||
|  | @ -18,29 +18,27 @@ | |||
| 
 | ||||
| package net.szum123321.textile_backup.core.restore; | ||||
| 
 | ||||
| import net.szum123321.textile_backup.Globals; | ||||
| import net.szum123321.textile_backup.core.FileTreeHashBuilder; | ||||
| import net.szum123321.textile_backup.core.Hash; | ||||
| import org.jetbrains.annotations.NotNull; | ||||
| 
 | ||||
| import java.io.FilterOutputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.OutputStream; | ||||
| import java.nio.file.Path; | ||||
| import java.util.zip.Checksum; | ||||
| 
 | ||||
| public class HashingOutputStream extends FilterOutputStream { | ||||
|     private final Path path; | ||||
|     private final Checksum hasher; | ||||
| 
 | ||||
|     private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get(); | ||||
|     private final FileTreeHashBuilder hashBuilder; | ||||
| 
 | ||||
|     public HashingOutputStream(OutputStream out, Path path, Checksum hasher, FileTreeHashBuilder hashBuilder) { | ||||
|     public HashingOutputStream(OutputStream out, Path path, FileTreeHashBuilder hashBuilder) { | ||||
|         super(out); | ||||
|         this.path = path; | ||||
|         this.hasher = hasher; | ||||
|         this.hashBuilder = hashBuilder; | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     @Override | ||||
|     public void write(int b) throws IOException { | ||||
|         super.write(b); | ||||
|  | @ -48,7 +46,7 @@ public class HashingOutputStream extends FilterOutputStream { | |||
|     } | ||||
| 
 | ||||
|     @Override | ||||
|     public void write(byte[] @NotNull b, int off, int len) throws IOException { | ||||
|     public void write(byte @NotNull [] b, int off, int len) throws IOException { | ||||
|         super.write(b, off, len); | ||||
|         hasher.update(b, off, len); | ||||
|     } | ||||
|  |  | |||
|  | @ -40,7 +40,7 @@ public class GenericTarDecompressor { | |||
| 
 | ||||
|     public static long decompress(Path input, Path target) throws IOException { | ||||
|         Instant start = Instant.now(); | ||||
|         FileTreeHashBuilder treeBuilder = new FileTreeHashBuilder(() -> null); | ||||
|         FileTreeHashBuilder treeBuilder = new FileTreeHashBuilder(); | ||||
| 
 | ||||
|         try (InputStream fileInputStream = Files.newInputStream(input); | ||||
|              InputStream bufferedInputStream = new BufferedInputStream(fileInputStream); | ||||
|  |  | |||
|  | @ -40,7 +40,7 @@ public class ZipDecompressor { | |||
|     public static long decompress(Path inputFile, Path target) throws IOException { | ||||
|         Instant start = Instant.now(); | ||||
| 
 | ||||
|         FileTreeHashBuilder hashBuilder = new FileTreeHashBuilder(() -> null); | ||||
|         FileTreeHashBuilder hashBuilder = new FileTreeHashBuilder(); | ||||
| 
 | ||||
|         try(ZipFile zipFile = new ZipFile(inputFile.toFile())) { | ||||
|             for (Iterator<ZipArchiveEntry> it = zipFile.getEntries().asIterator(); it.hasNext(); ) { | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue