Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>2.5.1</version>
<version>2.5.2</version>

<packaging>pom</packaging>

Expand Down
4 changes: 2 additions & 2 deletions qendpoint-backend/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-backend</artifactId>
<version>2.5.1</version>
<version>2.5.2</version>

<packaging>jar</packaging>

Expand All @@ -15,7 +15,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>2.5.1</version>
<version>2.5.2</version>
</parent>

<licenses>
Expand Down
4 changes: 2 additions & 2 deletions qendpoint-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-cli</artifactId>
<version>2.5.1</version>
<version>2.5.2</version>

<name>qendpoint package</name>
<description>Package of the qendpoint.</description>
Expand All @@ -11,7 +11,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>2.5.1</version>
<version>2.5.2</version>
</parent>

<dependencies>
Expand Down
10 changes: 8 additions & 2 deletions qendpoint-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>qendpoint-core</artifactId>
<version>2.5.1</version>
<version>2.5.2</version>

<packaging>jar</packaging>

Expand All @@ -27,7 +27,7 @@
<parent>
<groupId>com.the-qa-company</groupId>
<artifactId>qendpoint-parent</artifactId>
<version>2.5.1</version>
<version>2.5.2</version>
</parent>

<licenses>
Expand All @@ -52,6 +52,7 @@
<jena.version>4.3.2</jena.version>
<slf4j.version>1.7.30</slf4j.version>
<lz4.version>1.8.0</lz4.version>
<zstd.version>1.5.7-3</zstd.version>

<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
Expand Down Expand Up @@ -126,5 +127,10 @@
<artifactId>lz4-java</artifactId>
<version>${lz4.version}</version>
</dependency>
<dependency>
<groupId>com.github.luben</groupId>
<artifactId>zstd-jni</artifactId>
<version>${zstd.version}</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import com.the_qa_company.qendpoint.core.util.io.Closer;
import com.the_qa_company.qendpoint.core.util.io.CountInputStream;
import com.the_qa_company.qendpoint.core.util.io.IOUtil;
import com.the_qa_company.qendpoint.core.util.io.IntegrityObject;

import java.io.BufferedInputStream;
import java.io.Closeable;
Expand All @@ -51,7 +52,7 @@
/**
* @author mario.arias
*/
public class SequenceLog64Map implements Sequence, Closeable {
public class SequenceLog64Map implements Sequence, Closeable, IntegrityObject {
private static final byte W = 64;
private static final long LONGS_PER_BUFFER = 128 * 1024 * 1024; // 128*8 =
// 1Gb per
Expand All @@ -62,6 +63,7 @@ public class SequenceLog64Map implements Sequence, Closeable {
private final long numentries;
private long lastword;
private final long numwords;
private final long crc;

public SequenceLog64Map(File f) throws IOException {
// Read from the beginning of the file
Expand Down Expand Up @@ -100,7 +102,7 @@ private SequenceLog64Map(CountInputStream in, File f, boolean closeInput) throws
lastword = BitUtil.readLowerBitsByteAligned(lastWordUsed, in);
// System.out.println("LastWord0: "+Long.toHexString(lastword));
}
IOUtil.skip(in, 4); // CRC
crc = IOUtil.readInt(in) & 0xFFFFFFFFL;

mapFiles(f, base);

Expand All @@ -114,6 +116,8 @@ public SequenceLog64Map(int numbits, long numentries, File f) throws IOException
this.numentries = numentries;
this.numwords = SequenceLog64.numWordsFor(numbits, numentries);

crc = 0;

mapFiles(f, 0);
}

Expand Down Expand Up @@ -287,4 +291,24 @@ public void close() throws IOException {
buffers = null;
}
}

@Override
public void checkIntegrity(ProgressListener listener) throws IOException {
CRC32 crc = new CRC32();

ProgressListener il = ProgressListener.ofNullable(listener);
for (int i = 0; i < buffers.length; i++) {
CloseMappedByteBuffer buffer = buffers[i];
if (buffer == null)
continue;
il.notifyProgress((float) i / buffers.length, "load sequence buffers " + i + "/" + buffers.length);
crc.update(buffer, 0, buffer.capacity());
}

long crcVal = crc.getValue();
if (crcVal != this.crc) {
throw new CRCException(
"Invalid sequence crc: 0x" + Long.toHexString(crcVal) + " != 0x" + Long.toHexString(this.crc));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.the_qa_company.qendpoint.core.dictionary;

import com.the_qa_company.qendpoint.core.listener.ProgressListener;

import java.io.IOException;

public interface WriteDictionarySectionPrivate extends DictionarySectionPrivate {
WriteDictionarySectionPrivateAppender createAppender(long size, ProgressListener listener) throws IOException;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.the_qa_company.qendpoint.core.dictionary;

import com.the_qa_company.qendpoint.core.util.string.ByteString;

import java.io.Closeable;
import java.io.IOException;

public interface WriteDictionarySectionPrivateAppender extends Closeable {
void append(ByteString str) throws IOException;

long getNumberElements();
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate;
import com.the_qa_company.qendpoint.core.dictionary.TempDictionary;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WriteDictionarySection;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WritePFCDictionarySection;
import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException;
import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary;
import com.the_qa_company.qendpoint.core.header.Header;
Expand All @@ -24,7 +24,7 @@
import java.nio.file.Path;

/**
* Version of four section dictionary with {@link WriteDictionarySection}
* Version of four section dictionary with {@link WritePFCDictionarySection}
*
* @author Antoine Willerval
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate;
import com.the_qa_company.qendpoint.core.dictionary.TempDictionary;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WriteDictionarySection;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WritePFCDictionarySection;
import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException;
import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary;
import com.the_qa_company.qendpoint.core.header.Header;
Expand Down Expand Up @@ -35,7 +35,7 @@
import java.util.TreeMap;

/**
* Version of mutli-section dictionary with {@link WriteDictionarySection}
* Version of mutli-section dictionary with {@link WritePFCDictionarySection}
*
* @author Antoine Willerval
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate;
import com.the_qa_company.qendpoint.core.dictionary.TempDictionary;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WriteDictionarySection;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WritePFCDictionarySection;
import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException;
import com.the_qa_company.qendpoint.core.iterator.utils.PeekIteratorImpl;
import com.the_qa_company.qendpoint.core.iterator.utils.PipedCopyIterator;
Expand Down Expand Up @@ -35,7 +35,7 @@
import java.util.TreeMap;

/**
* Version of mutli-section dictionary with {@link WriteDictionarySection}
* Version of mutli-section dictionary with {@link WritePFCDictionarySection}
*
* @author Antoine Willerval
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate;
import com.the_qa_company.qendpoint.core.dictionary.TempDictionary;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.DictionarySectionFactory;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WriteDictionarySection;
import com.the_qa_company.qendpoint.core.dictionary.impl.section.WritePFCDictionarySection;
import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException;
import com.the_qa_company.qendpoint.core.hdt.HDTVocabulary;
import com.the_qa_company.qendpoint.core.iterator.utils.PeekIteratorImpl;
Expand Down Expand Up @@ -37,7 +37,7 @@
import java.util.TreeMap;

/**
* Version of mutli-section dictionary with {@link WriteDictionarySection}
* Version of mutli-section dictionary with {@link WritePFCDictionarySection}
*
* @author Antoine Willerval
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public TripleID next() {
}
}

private static long firstSubjectTripleId(HDT hdt) {
public static long firstSubjectTripleId(HDT hdt) {
if (hdt.getDictionary().getSubjects().getNumberOfElements() == 0) {
// no subjects
return -1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
import com.the_qa_company.qendpoint.core.triples.IteratorTripleID;
import com.the_qa_company.qendpoint.core.triples.TripleID;
import com.the_qa_company.qendpoint.core.triples.Triples;
import com.the_qa_company.qendpoint.core.triples.TriplesFactory;
import com.the_qa_company.qendpoint.core.triples.TriplesPrivate;
import com.the_qa_company.qendpoint.core.triples.impl.BitmapTriples;
import com.the_qa_company.qendpoint.core.triples.impl.OneReadTempTriples;
import com.the_qa_company.qendpoint.core.triples.impl.WriteBitmapTriples;
import com.the_qa_company.qendpoint.core.triples.impl.StreamTriples;
import com.the_qa_company.qendpoint.core.util.Profiler;
import com.the_qa_company.qendpoint.core.util.io.CloseSuppressPath;
import com.the_qa_company.qendpoint.core.util.io.Closer;
Expand Down Expand Up @@ -111,10 +113,13 @@ private static HDT loadOrMapHDT(Object obj, ProgressListener listener, HDTOption

private static TripleComponentOrder getOrder(HDT hdt) {
Triples triples = hdt.getTriples();
if (!(triples instanceof BitmapTriples bt)) {
throw new IllegalArgumentException("HDT Triples can't be BitmapTriples");
if (triples instanceof BitmapTriples bt) {
return bt.getOrder();
}
return bt.getOrder();
if (triples instanceof StreamTriples st) {
return st.getOrder();
}
throw new IllegalArgumentException("Unknown HDT Triples implementation");
}

private final String baseURI;
Expand Down Expand Up @@ -384,14 +389,15 @@ public HDT cat() throws IOException {
// stream
Iterator<TripleID> tripleIterator = GroupBySubjectMapIterator.fromHDTs(merger, hdts, deleteBitmaps);
long quads = quad ? dictionary.getNgraphs() : -1;
try (WriteBitmapTriples triples = new WriteBitmapTriples(hdtFormat, location.resolve("triples"),
try (TriplesPrivate triples = TriplesFactory.createWriteTriples(hdtFormat, location.resolve("triples"),
bufferSize, quads)) {
long count = Arrays.stream(hdts).mapToLong(h -> h.getTriples().getNumberOfElements()).sum();

il.setRange(40, 80);
il.setPrefix("Merge triples: ");
il.notifyProgress(0, "start");
triples.load(new OneReadTempTriples(tripleIterator, order, count, quads), il);
triples.load(new OneReadTempTriples(tripleIterator, order, count, quads, merger.getCountShared()),
il);
profiler.popSection();

WriteHDTImpl writeHDT = new WriteHDTImpl(hdtFormat, location, dictionary, triples,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,10 @@ private boolean add(LocatedIndexedNode node) {
// we can't have more than buffer size because a source HDT
// wouldn't be
// without duplicated or a so/sh conflict
if (used == buffer.length) {
throw new ArrayIndexOutOfBoundsException(
"More than " + used + " nodes for string " + node.getNode());
}
buffer[used++] = node;
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@
import java.nio.file.Path;

import com.the_qa_company.qendpoint.core.dictionary.DictionarySectionPrivate;
import com.the_qa_company.qendpoint.core.exceptions.NotImplementedException;
import com.the_qa_company.qendpoint.core.dictionary.WriteDictionarySectionPrivate;
import com.the_qa_company.qendpoint.core.listener.ProgressListener;
import com.the_qa_company.qendpoint.core.options.HDTOptions;
import com.the_qa_company.qendpoint.core.options.HDTOptionsKeys;
import com.the_qa_company.qendpoint.core.options.HDTSpecification;
import com.the_qa_company.qendpoint.core.util.io.CountInputStream;

/**
Expand All @@ -40,11 +39,12 @@ public class DictionarySectionFactory {
private DictionarySectionFactory() {
}

public static DictionarySectionPrivate createWriteSection(HDTOptions spec, Path filename, int bufferSize) {
public static WriteDictionarySectionPrivate createWriteSection(HDTOptions spec, Path filename, int bufferSize) {
String type = spec.get(HDTOptionsKeys.DISK_WRITE_SECTION_TYPE_KEY,
HDTOptionsKeys.DISK_WRITE_SECTION_TYPE_VALUE_PFC);
return switch (type) {
case HDTOptionsKeys.DISK_WRITE_SECTION_TYPE_VALUE_PFC -> new WriteDictionarySection(spec, filename, bufferSize);
case HDTOptionsKeys.DISK_WRITE_SECTION_TYPE_VALUE_PFC ->
new WritePFCDictionarySection(spec, filename, bufferSize);
case HDTOptionsKeys.DISK_WRITE_SECTION_TYPE_VALUE_STREAM ->
new WriteStreamDictionarySection(spec, filename, bufferSize);
default -> throw new IllegalArgumentException("No write implementation for type " + type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import com.the_qa_company.qendpoint.core.util.io.BigByteBuffer;
import com.the_qa_company.qendpoint.core.util.io.IOUtil;
import com.the_qa_company.qendpoint.core.util.string.ByteString;
import com.the_qa_company.qendpoint.core.util.string.CompactString;
import com.the_qa_company.qendpoint.core.util.string.ReplazableString;

import java.io.Closeable;
Expand All @@ -28,6 +29,7 @@

public class StreamDictionarySection implements DictionarySectionPrivate, Closeable {
public static final int TYPE_INDEX = 0x30;
public static final int STREAM_SECTION_END_COOKIE = 0x48535324;
BigByteBuffer data = BigByteBuffer.allocate(0);
private long numstrings;
private long bufferSize;
Expand Down Expand Up @@ -84,6 +86,7 @@ public void save(OutputStream output, ProgressListener listener) throws IOExcept
out.setCRC(new CRC32());
data.writeStream(out, 0, bufferSize, listener);
out.writeCRC();
IOUtil.writeInt(out, STREAM_SECTION_END_COOKIE);
}

@Override
Expand Down Expand Up @@ -119,6 +122,11 @@ public void load(InputStream input, ProgressListener listener) throws IOExceptio
if (!in.readCRCAndCheck()) {
throw new CRCException("CRC Error while reading Dictionary Section Plain Front Coding Data.");
}

int cookie = IOUtil.readInt(in);
if (cookie != STREAM_SECTION_END_COOKIE) {
throw new IOException("Can't read stream triples end cookie, found 0x" + Integer.toHexString(cookie));
}
}

@Override
Expand Down Expand Up @@ -146,7 +154,7 @@ protected ByteString getNext() {
offset += current.replace2(data, offset, delta);
idx++;

return current;
return new CompactString(current);
}
}

Expand Down
Loading
Loading