FileTreeHashBuilder::update now also takes the number of bytes as an argument. I/OHashingStreams count no. of bytes written.

2.x
Szum123321 2023-05-12 18:36:39 +02:00
parent b9af3b3777
commit 41934c32cf
7 changed files with 28 additions and 117 deletions

View File

@ -38,9 +38,9 @@ public record FileInputStreamSupplier(Path path, String name, FileTreeHashBuilde
try { try {
return new HashingInputStream(Files.newInputStream(path), path, hashTreeBuilder, brokenFileHandler); return new HashingInputStream(Files.newInputStream(path), path, hashTreeBuilder, brokenFileHandler);
} catch (IOException e) { } catch (IOException e) {
//Probably good idea to just put it here. In the case an exception is thrown here, it could be possble //Probably good idea to just put it here. In the case an exception is thrown here, it could be possible
//The latch would have never been lifted //The latch would have never been lifted
hashTreeBuilder.update(path, 0); hashTreeBuilder.update(path, 0, 0);
brokenFileHandler.handle(path, e); brokenFileHandler.handle(path, e);
throw e; throw e;
} }

View File

@ -73,7 +73,7 @@ public abstract class AbstractCompressor {
); );
} catch (IOException e) { } catch (IOException e) {
brokenFileHandler.handle(file, e); brokenFileHandler.handle(file, e);
fileHashBuilder.update(file, 0); fileHashBuilder.update(file, 0, 0);
//In Permissive mode we allow partial backups //In Permissive mode we allow partial backups
if (ConfigHelper.INSTANCE.get().integrityVerificationMode.isStrict()) throw e; if (ConfigHelper.INSTANCE.get().integrityVerificationMode.isStrict()) throw e;
else log.sendErrorAL(ctx, "An exception occurred while trying to compress: {}", else log.sendErrorAL(ctx, "An exception occurred while trying to compress: {}",

View File

@ -23,7 +23,7 @@ import java.nio.ByteOrder;
import java.util.Arrays; import java.util.Arrays;
/** /**
* This algorithm copies construction of <a href="https://ticki.github.io/blog/seahash-explained/">SeaHash</a> including its IV. * This algorithm copies the construction of <a href="https://ticki.github.io/blog/seahash-explained/">SeaHash</a> including its IV.
* What it differs in is that it uses Xoroshift64* instead of PCG as its pseudo-random function. Although it might lower * What it differs in is that it uses Xoroshift64* instead of PCG as its pseudo-random function. Although it might lower
* the output quality, I don't think it matters that much, honestly. One advantage of xoroshift is that it should be * the output quality, I don't think it matters that much, honestly. One advantage of xoroshift is that it should be
* easier to implement with AVX. Java should soon ship its vector api by default. * easier to implement with AVX. Java should soon ship its vector api by default.

View File

@ -1,96 +0,0 @@
/*
* A simple backup mod for Fabric
* Copyright (C) 2022 Szum123321
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package net.szum123321.textile_backup.core.digest;
//import jdk.incubator.vector.*;
import net.szum123321.textile_backup.core.digest.BalticHash;
/**
* Mostly working XorSeaHash impl using SIMD. Should speed up calculation on most systems currently in use
<br>...<br>
* It's actually slower. I tested it by comparing runtimes while hashing a directly opened FileInputStream.
* My cpu is AMD Ryzen 5 3500U
* There are two reasons I can think of: either vector construction simply takes so much time or jvm auto-vectorizes better than I.
* It's still probably far from being the slowest part of code, so I don't expect any major slowdowns
* I will keep this code here for future work perhaps
*/
public class BalticHashSIMD extends BalticHash {/*
public BalticHashSIMD() { throw new UnsupportedOperationException(); } //For safety
private LongVector state = LongVector.fromArray(LongVector.SPECIES_256, IV, 0);
@Override
public long getValue() {
if (buffer.position() != 0) {
while (buffer.position() < buffer_limit) buffer.put((byte) 0);
round();
}
long result = state.reduceLanesToLong(VectorOperators.XOR);
result ^= hashed_data_length;
return xorshift64star(result);
}
//This is wrong. will have to correct (
@Override
public void update(byte[] data, int off, int len) {
int pos = off; //should be = 0
while (pos < len) {
int n = Math.min(len - pos, buffer_limit - buffer.position());
if (n == 32) {
var v = ByteVector.fromArray(ByteVector.SPECIES_256, data, pos).reinterpretAsLongs();
state = state.lanewise(VectorOperators.XOR, v);
state = xorshift64star(state);
} else {
System.arraycopy(data, pos, _byte_buffer, buffer.position(), n);
buffer.position(buffer.position() + n);
if (buffer.position() == buffer_limit) round();
}
pos += n;
}
hashed_data_length += len;
}
@Override
protected void round() {
var s = ByteVector.fromArray(ByteVector.SPECIES_256, _byte_buffer, 0).reinterpretAsLongs();
state = state.lanewise(VectorOperators.XOR, s);
state = xorshift64star(state);
int p = buffer.position();
if (p > buffer_limit) {
System.arraycopy(_byte_buffer, buffer_limit, _byte_buffer, 0, buffer.limit() - p);
buffer.position(buffer.limit() - p);
} else buffer.rewind();
}
LongVector xorshift64star(LongVector v) {
v = v.lanewise(VectorOperators.XOR, v.lanewise(VectorOperators.ASHR, 12));
v = v.lanewise(VectorOperators.XOR, v.lanewise(VectorOperators.LSHL, 25));
v = v.lanewise(VectorOperators.XOR, v.lanewise(VectorOperators.ASHR, 27));
v = v.lanewise(VectorOperators.MUL, 0x2545F4914F6CDD1DL);
return v;
}*/
}

View File

@ -44,16 +44,14 @@ public class FileTreeHashBuilder {
latch = new CountDownLatch(filesToProcess); latch = new CountDownLatch(filesToProcess);
} }
public void update(Path path, long newHash) throws IOException { public void update(Path path, long newHash, long bytes) throws IOException {
if(path.getFileName().toString().equals(CompressionStatus.DATA_FILENAME)) return; if(path.getFileName().toString().equals(CompressionStatus.DATA_FILENAME)) return;
latch.countDown(); latch.countDown();
long size = Files.size(path);
synchronized (lock) { synchronized (lock) {
this.hash ^= newHash; this.hash ^= newHash;
filesTotalSize += size; filesTotalSize += bytes;
filesProcessed++; filesProcessed++;
} }
} }

View File

@ -19,8 +19,6 @@
package net.szum123321.textile_backup.core.digest; package net.szum123321.textile_backup.core.digest;
import net.szum123321.textile_backup.Globals; import net.szum123321.textile_backup.Globals;
import net.szum123321.textile_backup.TextileBackup;
import net.szum123321.textile_backup.TextileLogger;
import net.szum123321.textile_backup.core.DataLeftException; import net.szum123321.textile_backup.core.DataLeftException;
import net.szum123321.textile_backup.core.create.BrokenFileHandler; import net.szum123321.textile_backup.core.create.BrokenFileHandler;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
@ -39,10 +37,12 @@ import java.nio.file.Path;
*/ */
public class HashingInputStream extends FilterInputStream { public class HashingInputStream extends FilterInputStream {
private final Path path; private final Path path;
private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get(); private final Hash hash = Globals.CHECKSUM_SUPPLIER.get();
private final FileTreeHashBuilder hashBuilder; private final FileTreeHashBuilder hashBuilder;
private final BrokenFileHandler brokenFileHandler; private final BrokenFileHandler brokenFileHandler;
private long bytesWritten = 0;
public HashingInputStream(InputStream in, Path path, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) { public HashingInputStream(InputStream in, Path path, FileTreeHashBuilder hashBuilder, BrokenFileHandler brokenFileHandler) {
super(in); super(in);
this.path = path; this.path = path;
@ -53,14 +53,20 @@ public class HashingInputStream extends FilterInputStream {
@Override @Override
public int read(byte @NotNull [] b, int off, int len) throws IOException { public int read(byte @NotNull [] b, int off, int len) throws IOException {
int i = in.read(b, off, len); int i = in.read(b, off, len);
if(i != -1) hasher.update(b, off, i); if(i != -1) {
hash.update(b, off, i);
bytesWritten += i;
}
return i; return i;
} }
@Override @Override
public int read() throws IOException { public int read() throws IOException {
int i = in.read(); int i = in.read();
if(i != -1) hasher.update((byte)i); if(i != -1) {
hash.update(i);
bytesWritten++;
}
return i; return i;
} }
@ -71,9 +77,9 @@ public class HashingInputStream extends FilterInputStream {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
hasher.update(path.getFileName().toString().getBytes(StandardCharsets.UTF_8)); hash.update(path.getFileName().toString().getBytes(StandardCharsets.UTF_8));
hashBuilder.update(path, hasher.getValue()); hashBuilder.update(path, hash.getValue(), bytesWritten);
if(in.available() != 0) brokenFileHandler.handle(path, new DataLeftException(in.available())); if(in.available() != 0) brokenFileHandler.handle(path, new DataLeftException(in.available()));

View File

@ -29,9 +29,11 @@ import java.nio.file.Path;
public class HashingOutputStream extends FilterOutputStream { public class HashingOutputStream extends FilterOutputStream {
private final Path path; private final Path path;
private final Hash hasher = Globals.CHECKSUM_SUPPLIER.get(); private final Hash hash = Globals.CHECKSUM_SUPPLIER.get();
private final FileTreeHashBuilder hashBuilder; private final FileTreeHashBuilder hashBuilder;
private long bytesWritten = 0;
public HashingOutputStream(OutputStream out, Path path, FileTreeHashBuilder hashBuilder) { public HashingOutputStream(OutputStream out, Path path, FileTreeHashBuilder hashBuilder) {
super(out); super(out);
this.path = path; this.path = path;
@ -41,20 +43,21 @@ public class HashingOutputStream extends FilterOutputStream {
@Override @Override
public void write(int b) throws IOException { public void write(int b) throws IOException {
out.write(b); out.write(b);
hasher.update(b); hash.update(b);
bytesWritten++;
} }
@Override @Override
public void write(byte @NotNull [] b, int off, int len) throws IOException { public void write(byte @NotNull [] b, int off, int len) throws IOException {
out.write(b, off, len); out.write(b, off, len);
hasher.update(b, off, len); hash.update(b, off, len);
bytesWritten += len;
} }
@Override @Override
public void close() throws IOException { public void close() throws IOException {
hasher.update(path.getFileName().toString().getBytes(StandardCharsets.UTF_8)); hash.update(path.getFileName().toString().getBytes(StandardCharsets.UTF_8));
long h = hasher.getValue(); hashBuilder.update(path, hash.getValue(), bytesWritten);
hashBuilder.update(path, h);
super.close(); super.close();
} }
} }