Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions lib/trino-parquet/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
<artifactId>aircompressor</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>configuration</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>log</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ public interface ParquetDataSource

Slice readFully(long position, int length);

void readFully(long position, byte[] buffer);

void readFully(long position, byte[] buffer, int bufferOffset, int bufferLength);

<K> ListMultimap<K, ChunkReader> planRead(ListMultimap<K, DiskRange> diskRanges);

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.trino.parquet.cache;

import com.google.common.cache.Cache;
import com.google.common.util.concurrent.UncheckedExecutionException;
import io.trino.parquet.ParquetDataSource;
import io.trino.parquet.ParquetDataSourceId;

import java.io.IOException;
import java.util.concurrent.ExecutionException;

import static com.google.common.base.Throwables.throwIfInstanceOf;
import static java.util.Objects.requireNonNull;

public class CachingParquetMetadataSource
implements ParquetMetadataSource
{
private Cache<ParquetDataSourceId, ParquetFileMetadata> cache;
private ParquetMetadataSource delegate;

public CachingParquetMetadataSource()
{
}

public CachingParquetMetadataSource(Cache<ParquetDataSourceId, ParquetFileMetadata> cache, ParquetMetadataSource delegate)
{
this.cache = requireNonNull(cache, "cache is null");
this.delegate = requireNonNull(delegate, "delegate is null");
}

@Override
public ParquetFileMetadata getParquetMetadata(
ParquetDataSource parquetDataSource,
long fileSize,
boolean cacheable,
long modificationTime)
throws IOException
{
try {
if (cacheable) {
ParquetFileMetadata fileMetadataCache = cache.get(
parquetDataSource.getId(),
() -> delegate.getParquetMetadata(parquetDataSource, fileSize, cacheable, modificationTime));
if (fileMetadataCache.getModificationTime() == modificationTime) {
return fileMetadataCache;
}
else {
cache.invalidate(parquetDataSource.getId());
}
}
return delegate.getParquetMetadata(parquetDataSource, fileSize, cacheable, modificationTime);
}
catch (ExecutionException | UncheckedExecutionException e) {
throwIfInstanceOf(e.getCause(), IOException.class);
throw new IOException("Unexpected error in parquet metadata reading after cache miss", e.getCause());
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.parquet.cache;

import io.airlift.configuration.Config;
import io.airlift.configuration.ConfigDescription;
import io.airlift.units.DataSize;
import io.airlift.units.Duration;
import io.airlift.units.MinDataSize;
import io.airlift.units.MinDuration;

import static io.airlift.units.DataSize.Unit.BYTE;
import static java.util.concurrent.TimeUnit.SECONDS;

public class ParquetCacheConfig
{
private boolean metadataCacheEnabled;
private DataSize metadataCacheSize = DataSize.of(0, BYTE);
private Duration metadataCacheTtlSinceLastAccess = new Duration(0, SECONDS);

public boolean isMetadataCacheEnabled()
{
return metadataCacheEnabled;
}

@Config("parquet.metadata-cache-enabled")
@ConfigDescription("Enable cache for parquet metadata")
public ParquetCacheConfig setMetadataCacheEnabled(boolean metadataCacheEnabled)
{
this.metadataCacheEnabled = metadataCacheEnabled;
return this;
}

@MinDataSize("0B")
public DataSize getMetadataCacheSize()
{
return metadataCacheSize;
}

@Config("parquet.metadata-cache-size")
@ConfigDescription("Size of the parquet metadata cache")
public ParquetCacheConfig setMetadataCacheSize(DataSize metadataCacheSize)
{
this.metadataCacheSize = metadataCacheSize;
return this;
}

@MinDuration("0s")
public Duration getMetadataCacheTtlSinceLastAccess()
{
return metadataCacheTtlSinceLastAccess;
}

@Config("parquet.metadata-cache-ttl-since-last-access")
@ConfigDescription("Time-to-live for parquet metadata cache entry after last access")
public ParquetCacheConfig setMetadataCacheTtlSinceLastAccess(Duration metadataCacheTtlSinceLastAccess)
{
this.metadataCacheTtlSinceLastAccess = metadataCacheTtlSinceLastAccess;
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package io.trino.parquet.cache;

/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import org.apache.parquet.hadoop.metadata.ParquetMetadata;

import static java.util.Objects.requireNonNull;

public class ParquetFileMetadata
{
private final ParquetMetadata parquetMetadata;
private final int metadataSize;
private final long modificationTime;

public ParquetFileMetadata(ParquetMetadata parquetMetadata, int metadataSize, long modificationTime)
{
this.parquetMetadata = requireNonNull(parquetMetadata, "parquetMetadata is null");
this.metadataSize = metadataSize;
this.modificationTime = modificationTime;
}

public int getMetadataSize()
{
return metadataSize;
}

public ParquetMetadata getParquetMetadata()
{
return parquetMetadata;
}

public long getModificationTime()
{
return modificationTime;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.parquet.cache;

import io.trino.parquet.ParquetDataSource;

import java.io.IOException;

public interface ParquetMetadataSource
{
ParquetFileMetadata getParquetMetadata(
ParquetDataSource parquetDataSource,
long fileSize,
boolean cacheable,
long modificationTime)
throws IOException;
}
Loading