Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions datafusion/core/src/datasource/listing_table_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ impl TableProviderFactory for ListingTableFactory {
))?
.create(session_state, &cmd.options)?;

let file_extension = get_extension(cmd.location.as_str());

let (provided_schema, table_partition_cols) = if cmd.schema.fields().is_empty() {
(
None,
Expand Down Expand Up @@ -109,9 +107,16 @@ impl TableProviderFactory for ListingTableFactory {
};

let mut table_path = ListingTableUrl::parse(&cmd.location)?;

let file_extension = match table_path.is_collection() {
// Setting the extension to be empty instead of allowing the default extension seems
// odd, but was done to ensure existing behavior isn't modified. It seems like this
// could be refactored to either use the default extension or set the fully expected
// extension when compression is included (e.g. ".csv.gz")
true => "",
false => &get_extension(cmd.location.as_str()),
};
let options = ListingOptions::new(file_format)
.with_file_extension(&file_extension)
.with_file_extension(file_extension)
.with_session_config_options(session_state.config())
.with_table_partition_cols(table_partition_cols);

Expand Down Expand Up @@ -189,6 +194,8 @@ fn get_extension(path: &str) -> String {
mod tests {
use glob::Pattern;
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;

use super::*;
use crate::{
Expand Down Expand Up @@ -375,4 +382,41 @@ mod tests {
Pattern::new("*.csv").unwrap()
);
}

#[tokio::test]
async fn test_odd_directory_names() {
let dir = tempfile::tempdir().unwrap();
let mut path = PathBuf::from(dir.path());
path.extend(["odd.v1", "odd.v2"]);
fs::create_dir_all(&path).unwrap();

let factory = ListingTableFactory::new();
let context = SessionContext::new();
let state = context.state();
let name = TableReference::bare("foo");

let cmd = CreateExternalTable {
name,
location: String::from(path.to_str().unwrap()),
file_type: "parquet".to_string(),
schema: Arc::new(DFSchema::empty()),
table_partition_cols: vec![],
if_not_exists: false,
temporary: false,
definition: None,
order_exprs: vec![],
unbounded: false,
options: HashMap::new(),
constraints: Constraints::default(),
column_defaults: HashMap::new(),
};
let table_provider = factory.create(&state, &cmd).await.unwrap();
let listing_table = table_provider
.as_any()
.downcast_ref::<ListingTable>()
.unwrap();

let listing_options = listing_table.options();
assert_eq!("", listing_options.file_extension);
}
}
35 changes: 35 additions & 0 deletions datafusion/sqllogictest/test_files/insert_to_external.slt
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,41 @@ select * from directory_test;
1 2
3 4

statement count 0
CREATE EXTERNAL TABLE
directory_with_dots_test(a bigint, b bigint)
STORED AS parquet
LOCATION 'test_files/scratch/insert_to_external/external_versioned_parquet_table.v0/';

query I
INSERT INTO directory_with_dots_test values (1, 2), (3, 4);
----
2

query II
select * from directory_with_dots_test;
----
1 2
3 4

statement count 0
CREATE EXTERNAL TABLE
directory_with_dots_readback
STORED AS parquet
LOCATION 'test_files/scratch/insert_to_external/external_versioned_parquet_table.v0/';

query TTT
describe directory_with_dots_readback
----
a Int64 YES
b Int64 YES

query II
select * from directory_with_dots_readback
----
1 2
3 4

statement ok
CREATE EXTERNAL TABLE
table_without_values(field1 BIGINT NULL, field2 BIGINT NULL)
Expand Down