From e192a746dd8c5b2ebc13b93e98994a9b772e4366 Mon Sep 17 00:00:00 2001 From: KristofferC Date: Fri, 29 Apr 2022 15:16:53 +0200 Subject: [PATCH 1/4] use the externally hosted DelimitedFiles.jl --- .../md5 | 1 + .../sha512 | 1 + stdlib/.gitignore | 2 + stdlib/DelimitedFiles.version | 4 + stdlib/DelimitedFiles/Project.toml | 12 - stdlib/DelimitedFiles/docs/src/index.md | 13 - stdlib/DelimitedFiles/src/DelimitedFiles.jl | 832 ------------------ stdlib/DelimitedFiles/test/runtests.jl | 332 ------- stdlib/Makefile | 2 +- 9 files changed, 9 insertions(+), 1190 deletions(-) create mode 100644 deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 create mode 100644 deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 create mode 100644 stdlib/DelimitedFiles.version delete mode 100644 stdlib/DelimitedFiles/Project.toml delete mode 100644 stdlib/DelimitedFiles/docs/src/index.md delete mode 100644 stdlib/DelimitedFiles/src/DelimitedFiles.jl delete mode 100644 stdlib/DelimitedFiles/test/runtests.jl diff --git a/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 b/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 new file mode 100644 index 0000000000000..7251ab83e0d5c --- /dev/null +++ b/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 @@ -0,0 +1 @@ +38ef69e1e66ead7f99f7776b023fbc77 diff --git a/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 b/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 new file mode 100644 index 0000000000000..0d81c8969e657 --- /dev/null +++ b/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 @@ -0,0 +1 @@ +554f291c7f6c58bd8ef40e3554a1af575929ad034e1a6e3d72327115e93af1e3bd8431996100d9083880929e550a344a9921e396afa39138ad55a38b422548af diff --git a/stdlib/.gitignore b/stdlib/.gitignore index ffbc2f12f52da..038b2d9602b2a 100644 --- a/stdlib/.gitignore +++ b/stdlib/.gitignore @@ -5,6 +5,8 @@ /Statistics /LibCURL-* /LibCURL +/DelimitedFiles-* +/DelimitedFiles /Downloads-* /Downloads /ArgTools-* diff --git a/stdlib/DelimitedFiles.version b/stdlib/DelimitedFiles.version new file mode 100644 index 0000000000000..3055926a220ce --- /dev/null +++ b/stdlib/DelimitedFiles.version @@ -0,0 +1,4 @@ +DELIMITEDFILES_BRANCH = main +DELIMITEDFILES_SHA1 = 79f7865b7f009f2ca5917096276a01b11eeac90d +DELIMITEDFILES_GIT_URL := https://github.com/JuliaData/DelimitedFiles.jl.git +DELIMITEDFILES_TAR_URL = https://api.github.com/repos/JuliaData/DelimitedFiles.jl/tarball/$1 diff --git a/stdlib/DelimitedFiles/Project.toml b/stdlib/DelimitedFiles/Project.toml deleted file mode 100644 index 7b774ec3ba035..0000000000000 --- a/stdlib/DelimitedFiles/Project.toml +++ /dev/null @@ -1,12 +0,0 @@ -name = "DelimitedFiles" -uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" - -[deps] -Mmap = "a63ad114-7e13-5084-954f-fe012c677804" - -[extras] -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" - -[targets] -test = ["Test", "Random"] diff --git a/stdlib/DelimitedFiles/docs/src/index.md b/stdlib/DelimitedFiles/docs/src/index.md deleted file mode 100644 index a0ce8d61e342e..0000000000000 --- a/stdlib/DelimitedFiles/docs/src/index.md +++ /dev/null @@ -1,13 +0,0 @@ -# Delimited Files - -Utilities for reading and writing delimited files, for example ".csv". - -```@docs -DelimitedFiles.readdlm(::Any, ::AbstractChar, ::Type, ::AbstractChar) -DelimitedFiles.readdlm(::Any, ::AbstractChar, ::AbstractChar) -DelimitedFiles.readdlm(::Any, ::AbstractChar, ::Type) -DelimitedFiles.readdlm(::Any, ::AbstractChar) -DelimitedFiles.readdlm(::Any, ::Type) -DelimitedFiles.readdlm(::Any) -DelimitedFiles.writedlm -``` diff --git a/stdlib/DelimitedFiles/src/DelimitedFiles.jl b/stdlib/DelimitedFiles/src/DelimitedFiles.jl deleted file mode 100644 index 7c0e3e39b6b86..0000000000000 --- a/stdlib/DelimitedFiles/src/DelimitedFiles.jl +++ /dev/null @@ -1,832 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -""" -Utilities for reading and writing delimited files, for example ".csv". -See [`readdlm`](@ref) and [`writedlm`](@ref). -""" -module DelimitedFiles - -using Mmap - -import Base: tryparse_internal, show - -export readdlm, writedlm - -invalid_dlm(::Type{Char}) = reinterpret(Char, 0xfffffffe) -invalid_dlm(::Type{UInt8}) = 0xfe -invalid_dlm(::Type{UInt16}) = 0xfffe -invalid_dlm(::Type{UInt32}) = 0xfffffffe - -const offs_chunk_size = 5000 - -""" - readdlm(source, T::Type; options...) - -The columns are assumed to be separated by one or more whitespaces. The end of line -delimiter is taken as `\\n`. - -# Examples -```jldoctest -julia> using DelimitedFiles - -julia> x = [1; 2; 3; 4]; - -julia> y = [5; 6; 7; 8]; - -julia> open("delim_file.txt", "w") do io - writedlm(io, [x y]) - end; - -julia> readdlm("delim_file.txt", Int64) -4×2 Matrix{Int64}: - 1 5 - 2 6 - 3 7 - 4 8 - -julia> readdlm("delim_file.txt", Float64) -4×2 Matrix{Float64}: - 1.0 5.0 - 2.0 6.0 - 3.0 7.0 - 4.0 8.0 - -julia> rm("delim_file.txt") -``` -""" -readdlm(input, T::Type; opts...) = readdlm(input, invalid_dlm(Char), T, '\n'; opts...) - -""" - readdlm(source, delim::AbstractChar, T::Type; options...) - -The end of line delimiter is taken as `\\n`. - -# Examples -```jldoctest -julia> using DelimitedFiles - -julia> x = [1; 2; 3; 4]; - -julia> y = [1.1; 2.2; 3.3; 4.4]; - -julia> open("delim_file.txt", "w") do io - writedlm(io, [x y], ',') - end; - -julia> readdlm("delim_file.txt", ',', Float64) -4×2 Matrix{Float64}: - 1.0 1.1 - 2.0 2.2 - 3.0 3.3 - 4.0 4.4 - -julia> rm("delim_file.txt") -``` -""" -readdlm(input, dlm::AbstractChar, T::Type; opts...) = readdlm(input, dlm, T, '\n'; opts...) - -""" - readdlm(source; options...) - -The columns are assumed to be separated by one or more whitespaces. The end of line -delimiter is taken as `\\n`. If all data is numeric, the result will be a numeric array. If -some elements cannot be parsed as numbers, a heterogeneous array of numbers and strings -is returned. - -# Examples -```jldoctest -julia> using DelimitedFiles - -julia> x = [1; 2; 3; 4]; - -julia> y = ["a"; "b"; "c"; "d"]; - -julia> open("delim_file.txt", "w") do io - writedlm(io, [x y]) - end; - -julia> readdlm("delim_file.txt") -4×2 Matrix{Any}: - 1 "a" - 2 "b" - 3 "c" - 4 "d" - -julia> rm("delim_file.txt") -``` -""" -readdlm(input; opts...) = readdlm(input, invalid_dlm(Char), '\n'; opts...) - -""" - readdlm(source, delim::AbstractChar; options...) - -The end of line delimiter is taken as `\\n`. If all data is numeric, the result will be a -numeric array. If some elements cannot be parsed as numbers, a heterogeneous array of -numbers and strings is returned. - -# Examples -```jldoctest -julia> using DelimitedFiles - -julia> x = [1; 2; 3; 4]; - -julia> y = [1.1; 2.2; 3.3; 4.4]; - -julia> open("delim_file.txt", "w") do io - writedlm(io, [x y], ',') - end; - -julia> readdlm("delim_file.txt", ',') -4×2 Matrix{Float64}: - 1.0 1.1 - 2.0 2.2 - 3.0 3.3 - 4.0 4.4 - -julia> z = ["a"; "b"; "c"; "d"]; - -julia> open("delim_file.txt", "w") do io - writedlm(io, [x z], ',') - end; - -julia> readdlm("delim_file.txt", ',') -4×2 Matrix{Any}: - 1 "a" - 2 "b" - 3 "c" - 4 "d" - -julia> rm("delim_file.txt") -``` -""" -readdlm(input, dlm::AbstractChar; opts...) = readdlm(input, dlm, '\n'; opts...) - -""" - readdlm(source, delim::AbstractChar, eol::AbstractChar; options...) - -If all data is numeric, the result will be a numeric array. If some elements cannot be -parsed as numbers, a heterogeneous array of numbers and strings is returned. -""" -readdlm(input, dlm::AbstractChar, eol::AbstractChar; opts...) = - readdlm_auto(input, dlm, Float64, eol, true; opts...) - -""" - readdlm(source, delim::AbstractChar, T::Type, eol::AbstractChar; header=false, skipstart=0, skipblanks=true, use_mmap, quotes=true, dims, comments=false, comment_char='#') - -Read a matrix from the source where each line (separated by `eol`) gives one row, with -elements separated by the given delimiter. The source can be a text file, stream or byte -array. Memory mapped files can be used by passing the byte array representation of the -mapped segment as source. - -If `T` is a numeric type, the result is an array of that type, with any non-numeric elements -as `NaN` for floating-point types, or zero. Other useful values of `T` include -`String`, `AbstractString`, and `Any`. - -If `header` is `true`, the first row of data will be read as header and the tuple -`(data_cells, header_cells)` is returned instead of only `data_cells`. - -Specifying `skipstart` will ignore the corresponding number of initial lines from the input. - -If `skipblanks` is `true`, blank lines in the input will be ignored. - -If `use_mmap` is `true`, the file specified by `source` is memory mapped for potential -speedups if the file is large. Default is `false`. On a Windows filesystem, `use_mmap` should not be set -to `true` unless the file is only read once and is also not written to. -Some edge cases exist where an OS is Unix-like but the filesystem is Windows-like. - -If `quotes` is `true`, columns enclosed within double-quote (\") characters are allowed to -contain new lines and column delimiters. Double-quote characters within a quoted field must -be escaped with another double-quote. Specifying `dims` as a tuple of the expected rows and -columns (including header, if any) may speed up reading of large files. If `comments` is -`true`, lines beginning with `comment_char` and text following `comment_char` in any line -are ignored. - -# Examples -```jldoctest -julia> using DelimitedFiles - -julia> x = [1; 2; 3; 4]; - -julia> y = [5; 6; 7; 8]; - -julia> open("delim_file.txt", "w") do io - writedlm(io, [x y]) - end - -julia> readdlm("delim_file.txt", '\\t', Int, '\\n') -4×2 Matrix{Int64}: - 1 5 - 2 6 - 3 7 - 4 8 - -julia> rm("delim_file.txt") -``` -""" -readdlm(input, dlm::AbstractChar, T::Type, eol::AbstractChar; opts...) = - readdlm_auto(input, dlm, T, eol, false; opts...) - -readdlm_auto(input::Vector{UInt8}, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Bool; opts...) = - readdlm_string(String(copyto!(Base.StringVector(length(input)), input)), dlm, T, eol, auto, val_opts(opts)) -readdlm_auto(input::IO, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Bool; opts...) = - readdlm_string(read(input, String), dlm, T, eol, auto, val_opts(opts)) -function readdlm_auto(input::AbstractString, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Bool; opts...) - isfile(input) || throw(ArgumentError("Cannot open \'$input\': not a file")) - optsd = val_opts(opts) - use_mmap = get(optsd, :use_mmap, false) - fsz = filesize(input) - if use_mmap && fsz > 0 && fsz < typemax(Int) - a = open(input, "r") do f - mmap(f, Vector{UInt8}, (Int(fsz),)) - end - # TODO: It would be nicer to use String(a) without making a copy, - # but because the mmap'ed array is not NUL-terminated this causes - # jl_try_substrtod to segfault below. - return readdlm_string(GC.@preserve(a, unsafe_string(pointer(a),length(a))), dlm, T, eol, auto, optsd) - else - return readdlm_string(read(input, String), dlm, T, eol, auto, optsd) - end -end - -# -# Handlers act on events generated by the parser. -# Parser calls store_cell on the handler to pass events. -# -# DLMOffsets: Keep offsets (when result dimensions are not known) -# DLMStore: Store values directly into a result store (when result dimensions are known) -abstract type DLMHandler end - -mutable struct DLMOffsets <: DLMHandler - oarr::Vector{Vector{Int}} - offidx::Int - thresh::Int - bufflen::Int - - function DLMOffsets(sbuff::String) - offsets = Vector{Vector{Int}}(undef, 1) - offsets[1] = Vector{Int}(undef, offs_chunk_size) - thresh = ceil(min(typemax(UInt), Base.Sys.total_memory()) / sizeof(Int) / 5) - new(offsets, 1, thresh, sizeof(sbuff)) - end -end - -function store_cell(dlmoffsets::DLMOffsets, row::Int, col::Int, - quoted::Bool, startpos::Int, endpos::Int) - offidx = dlmoffsets.offidx - (offidx == 0) && return # offset collection stopped to avoid choking on memory - - oarr = dlmoffsets.oarr - offsets = oarr[end] - if length(offsets) < offidx - offlen = offs_chunk_size * length(oarr) - if (offlen + offs_chunk_size) > dlmoffsets.thresh - est_tot = round(Int, offlen * dlmoffsets.bufflen / endpos) - if (est_tot - offlen) > offs_chunk_size # allow another chunk - # abandon offset collection - dlmoffsets.oarr = Vector{Int}[] - dlmoffsets.offidx = 0 - return - end - end - offsets = Vector{Int}(undef, offs_chunk_size) - push!(oarr, offsets) - offidx = 1 - end - offsets[offidx] = row - offsets[offidx+1] = col - offsets[offidx+2] = Int(quoted) - offsets[offidx+3] = startpos - offsets[offidx+4] = endpos - dlmoffsets.offidx = offidx + 5 - nothing -end - -function result(dlmoffsets::DLMOffsets) - trimsz = (dlmoffsets.offidx-1) % offs_chunk_size - ((trimsz > 0) || (dlmoffsets.offidx == 1)) && resize!(dlmoffsets.oarr[end], trimsz) - dlmoffsets.oarr -end - -mutable struct DLMStore{T} <: DLMHandler - hdr::Array{AbstractString, 2} - data::Array{T, 2} - - nrows::Int - ncols::Int - lastrow::Int - lastcol::Int - hdr_offset::Int - sbuff::String - auto::Bool - eol::Char -end - -function DLMStore(::Type{T}, dims::NTuple{2,Integer}, - has_header::Bool, sbuff::String, auto::Bool, eol::AbstractChar) where T - (nrows,ncols) = dims - nrows <= 0 && throw(ArgumentError("number of rows in dims must be > 0, got $nrows")) - ncols <= 0 && throw(ArgumentError("number of columns in dims must be > 0, got $ncols")) - hdr_offset = has_header ? 1 : 0 - DLMStore{T}(fill(SubString(sbuff,1,0), 1, ncols), Matrix{T}(undef, nrows-hdr_offset, ncols), - nrows, ncols, 0, 0, hdr_offset, sbuff, auto, Char(eol)) -end - -_chrinstr(sbuff::String, chr::UInt8, startpos::Int, endpos::Int) = - GC.@preserve sbuff (endpos >= startpos) && (C_NULL != ccall(:memchr, Ptr{UInt8}, - (Ptr{UInt8}, Int32, Csize_t), pointer(sbuff)+startpos-1, chr, endpos-startpos+1)) - -function store_cell(dlmstore::DLMStore{T}, row::Int, col::Int, - quoted::Bool, startpos::Int, endpos::Int) where T - drow = row - dlmstore.hdr_offset - - ncols = dlmstore.ncols - lastcol = dlmstore.lastcol - lastrow = dlmstore.lastrow - cells::Matrix{T} = dlmstore.data - sbuff = dlmstore.sbuff - - endpos = prevind(sbuff, nextind(sbuff,endpos)) - if (endpos > 0) && ('\n' == dlmstore.eol) && ('\r' == Char(sbuff[endpos])) - endpos = prevind(sbuff, endpos) - end - if quoted - startpos += 1 - endpos = prevind(sbuff, endpos) - end - - if drow > 0 - # fill missing elements - while ((drow - lastrow) > 1) || ((drow > lastrow > 0) && (lastcol < ncols)) - if (lastcol == ncols) || (lastrow == 0) - lastcol = 0 - lastrow += 1 - end - for cidx in (lastcol+1):ncols - if (T <: AbstractString) || (T == Any) - cells[lastrow, cidx] = SubString(sbuff, 1, 0) - elseif ((T <: Number) || (T <: AbstractChar)) && dlmstore.auto - throw(TypeError(:store_cell, "", Any, T)) - else - error("missing value at row $lastrow column $cidx") - end - end - lastcol = ncols - end - - # fill data - if quoted && _chrinstr(sbuff, UInt8('"'), startpos, endpos) - unescaped = replace(SubString(sbuff, startpos, endpos), r"\"\"" => "\"") - fail = colval(unescaped, 1, lastindex(unescaped), cells, drow, col) - else - fail = colval(sbuff, startpos, endpos, cells, drow, col) - end - if fail - sval = SubString(sbuff, startpos, endpos) - if (T <: Number) && dlmstore.auto - throw(TypeError(:store_cell, "", Any, T)) - else - error("file entry \"$(sval)\" cannot be converted to $T") - end - end - - dlmstore.lastrow = drow - dlmstore.lastcol = col - else - # fill header - if quoted && _chrinstr(sbuff, UInt8('"'), startpos, endpos) - unescaped = replace(SubString(sbuff, startpos, endpos), r"\"\"" => "\"") - colval(unescaped, 1, lastindex(unescaped), dlmstore.hdr, 1, col) - else - colval(sbuff, startpos, endpos, dlmstore.hdr, 1, col) - end - end - - nothing -end - -function result(dlmstore::DLMStore{T}) where T - nrows = dlmstore.nrows - dlmstore.hdr_offset - ncols = dlmstore.ncols - lastcol = dlmstore.lastcol - lastrow = dlmstore.lastrow - cells = dlmstore.data - sbuff = dlmstore.sbuff - - if (nrows > 0) && ((lastcol < ncols) || (lastrow < nrows)) - while lastrow <= nrows - (lastcol == ncols) && (lastcol = 0; lastrow += 1) - for cidx in (lastcol+1):ncols - if (T <: AbstractString) || (T == Any) - cells[lastrow, cidx] = SubString(sbuff, 1, 0) - elseif ((T <: Number) || (T <: AbstractChar)) && dlmstore.auto - throw(TypeError(:store_cell, "", Any, T)) - else - error("missing value at row $lastrow column $cidx") - end - end - lastcol = ncols - (lastrow == nrows) && break - end - dlmstore.lastrow = lastrow - dlmstore.lastcol = ncols - end - (dlmstore.hdr_offset > 0) ? (dlmstore.data, dlmstore.hdr) : dlmstore.data -end - - -function readdlm_string(sbuff::String, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Bool, optsd::Dict) - ign_empty = (dlm == invalid_dlm(Char)) - quotes = get(optsd, :quotes, true) - comments = get(optsd, :comments, false) - comment_char = get(optsd, :comment_char, '#') - dims = get(optsd, :dims, nothing) - - has_header = get(optsd, :header, get(optsd, :has_header, false)) - haskey(optsd, :has_header) && (optsd[:has_header] != has_header) && throw(ArgumentError("conflicting values for header and has_header")) - - skipstart = get(optsd, :skipstart, 0) - (skipstart >= 0) || throw(ArgumentError("skipstart must be ≥ 0, got $skipstart")) - - skipblanks = get(optsd, :skipblanks, true) - - offset_handler = (dims === nothing) ? DLMOffsets(sbuff) : DLMStore(T, dims, has_header, sbuff, auto, eol) - - for retry in 1:2 - try - dims = dlm_parse(sbuff, eol, dlm, '"', comment_char, ign_empty, quotes, comments, skipstart, skipblanks, offset_handler) - break - catch ex - if isa(ex, TypeError) && (ex.func === :store_cell) - T = ex.expected - else - rethrow() - end - offset_handler = (dims === nothing) ? DLMOffsets(sbuff) : DLMStore(T, dims, has_header, sbuff, auto, eol) - end - end - - isa(offset_handler, DLMStore) && (return result(offset_handler)) - - offsets = result(offset_handler) - !isempty(offsets) && (return dlm_fill(T, offsets, dims, has_header, sbuff, auto, eol)) - - optsd[:dims] = dims - return readdlm_string(sbuff, dlm, T, eol, auto, optsd) -end - -const valid_opts = [:header, :has_header, :use_mmap, :quotes, :comments, :dims, :comment_char, :skipstart, :skipblanks] -const valid_opt_types = [Bool, Bool, Bool, Bool, Bool, NTuple{2,Integer}, Char, Integer, Bool] - -function val_opts(opts) - d = Dict{Symbol, Union{Bool, NTuple{2, Integer}, Char, Integer}}() - for (opt_name, opt_val) in opts - in(opt_name, valid_opts) || - throw(ArgumentError("unknown option $opt_name")) - opt_typ = valid_opt_types[findfirst(isequal(opt_name), valid_opts)::Int] - isa(opt_val, opt_typ) || - throw(ArgumentError("$opt_name should be of type $opt_typ, got $(typeof(opt_val))")) - d[opt_name] = opt_val - end - return d -end - -function dlm_fill(T::DataType, offarr::Vector{Vector{Int}}, dims::NTuple{2,Integer}, has_header::Bool, sbuff::String, auto::Bool, eol::AbstractChar) - idx = 1 - offidx = 1 - offsets = offarr[1] - row = 0 - col = 0 - try - dh = DLMStore(T, dims, has_header, sbuff, auto, eol) - while idx <= length(offsets) - row = offsets[idx] - col = offsets[idx+1] - quoted = offsets[idx+2] != 0 - startpos = offsets[idx+3] - endpos = offsets[idx+4] - - ((idx += 5) > offs_chunk_size) && (offidx < length(offarr)) && (idx = 1; offsets = offarr[offidx += 1]) - - store_cell(dh, row, col, quoted, startpos, endpos) - end - return result(dh) - catch ex - isa(ex, TypeError) && (ex.func === :store_cell) && (return dlm_fill(ex.expected, offarr, dims, has_header, sbuff, auto, eol)) - error("at row $row, column $col : $ex") - end -end - -function colval(sbuff::String, startpos::Int, endpos::Int, cells::Array{Bool,2}, row::Int, col::Int) - n = tryparse_internal(Bool, sbuff, startpos, endpos, 0, false) - n === nothing || (cells[row, col] = n) - n === nothing -end -function colval(sbuff::String, startpos::Int, endpos::Int, cells::Array{T,2}, row::Int, col::Int) where T<:Integer - n = tryparse_internal(T, sbuff, startpos, endpos, 0, false) - n === nothing || (cells[row, col] = n) - n === nothing -end -function colval(sbuff::String, startpos::Int, endpos::Int, cells::Array{T,2}, row::Int, col::Int) where T<:Union{Real,Complex} - n = tryparse_internal(T, sbuff, startpos, endpos, false) - n === nothing || (cells[row, col] = n) - n === nothing -end -function colval(sbuff::String, startpos::Int, endpos::Int, cells::Array{<:AbstractString,2}, row::Int, col::Int) - cells[row, col] = SubString(sbuff, startpos, endpos) - return false -end -function colval(sbuff::String, startpos::Int, endpos::Int, cells::Array{Any,2}, row::Int, col::Int) - # if array is of Any type, attempt parsing only the most common types: Int, Bool, Float64 and fallback to SubString - len = endpos-startpos+1 - if len > 0 - # check Inteter - ni64 = tryparse_internal(Int, sbuff, startpos, endpos, 0, false) - ni64 === nothing || (cells[row, col] = ni64; return false) - - # check Bool - nb = tryparse_internal(Bool, sbuff, startpos, endpos, 0, false) - nb === nothing || (cells[row, col] = nb; return false) - - # check float64 - hasvalue, valf64 = ccall(:jl_try_substrtod, Tuple{Bool, Float64}, - (Ptr{UInt8}, Csize_t, Csize_t), sbuff, startpos-1, endpos-startpos+1) - hasvalue && (cells[row, col] = valf64; return false) - end - cells[row, col] = SubString(sbuff, startpos, endpos) - false -end -function colval(sbuff::String, startpos::Int, endpos::Int, cells::Array{<:AbstractChar,2}, row::Int, col::Int) - if startpos == endpos - cells[row, col] = iterate(sbuff, startpos)[1] - return false - else - return true - end -end -colval(sbuff::String, startpos::Int, endpos::Int, cells::Array, row::Int, col::Int) = true - -function dlm_parse(dbuff::String, eol::D, dlm::D, qchar::D, cchar::D, - ign_adj_dlm::Bool, allow_quote::Bool, allow_comments::Bool, - skipstart::Int, skipblanks::Bool, dh::DLMHandler) where D - ncols = nrows = col = 0 - is_default_dlm = (dlm == invalid_dlm(D)) - error_str = "" - # 0: begin field, 1: quoted field, 2: unquoted field, - # 3: second quote (could either be end of field or escape character), - # 4: comment, 5: skipstart - state = (skipstart > 0) ? 5 : 0 - is_eol = is_dlm = is_cr = is_quote = is_comment = expct_col = false - idx = 1 - try - slen = sizeof(dbuff) - col_start_idx = 1 - was_cr = false - while idx <= slen - val,idx = iterate(dbuff, idx) - if (is_eol = (Char(val) == Char(eol))) - is_dlm = is_comment = is_cr = is_quote = false - elseif (is_dlm = (is_default_dlm ? isspace(Char(val)) : (Char(val) == Char(dlm)))) - is_comment = is_cr = is_quote = false - elseif (is_quote = (Char(val) == Char(qchar))) - is_comment = is_cr = false - elseif (is_comment = (Char(val) == Char(cchar))) - is_cr = false - else - is_cr = (Char(eol) == '\n') && (Char(val) == '\r') - end - - if 2 == state # unquoted field - if is_dlm - state = 0 - col += 1 - store_cell(dh, nrows+1, col, false, col_start_idx, idx-2) - col_start_idx = idx - !ign_adj_dlm && (expct_col = true) - elseif is_eol - nrows += 1 - col += 1 - store_cell(dh, nrows, col, false, col_start_idx, idx - (was_cr ? 3 : 2)) - col_start_idx = idx - ncols = max(ncols, col) - col = 0 - state = 0 - elseif (is_comment && allow_comments) - nrows += 1 - col += 1 - store_cell(dh, nrows, col, false, col_start_idx, idx - 2) - ncols = max(ncols, col) - col = 0 - state = 4 - end - elseif 1 == state # quoted field - is_quote && (state = 3) - elseif 4 == state # comment line - if is_eol - col_start_idx = idx - state = 0 - end - elseif 0 == state # begin field - if is_quote - state = (allow_quote && !was_cr) ? 1 : 2 - expct_col = false - elseif is_dlm - if !ign_adj_dlm - expct_col = true - col += 1 - store_cell(dh, nrows+1, col, false, col_start_idx, idx-2) - end - col_start_idx = idx - elseif is_eol - if (col > 0) || !skipblanks - nrows += 1 - if expct_col - col += 1 - store_cell(dh, nrows, col, false, col_start_idx, idx - (was_cr ? 3 : 2)) - end - ncols = max(ncols, col) - col = 0 - end - col_start_idx = idx - expct_col = false - elseif is_comment && allow_comments - if col > 0 - nrows += 1 - if expct_col - col += 1 - store_cell(dh, nrows, col, false, col_start_idx, idx - 2) - end - ncols = max(ncols, col) - col = 0 - end - expct_col = false - state = 4 - elseif !is_cr - state = 2 - expct_col = false - end - elseif 3 == state # second quote - if is_quote && !was_cr - state = 1 - elseif is_dlm && !was_cr - state = 0 - col += 1 - store_cell(dh, nrows+1, col, true, col_start_idx, idx-2) - col_start_idx = idx - !ign_adj_dlm && (expct_col = true) - elseif is_eol - nrows += 1 - col += 1 - store_cell(dh, nrows, col, true, col_start_idx, idx - (was_cr ? 3 : 2)) - col_start_idx = idx - ncols = max(ncols, col) - col = 0 - state = 0 - elseif is_comment && allow_comments && !was_cr - nrows += 1 - col += 1 - store_cell(dh, nrows, col, true, col_start_idx, idx - 2) - ncols = max(ncols, col) - col = 0 - state = 4 - elseif (is_cr && was_cr) || !is_cr - error_str = escape_string("unexpected character '$(Char(val))' after quoted field at row $(nrows+1) column $(col+1)") - break - end - elseif 5 == state # skip start - if is_eol - col_start_idx = idx - skipstart -= 1 - (0 == skipstart) && (state = 0) - end - end - was_cr = is_cr - end - - if isempty(error_str) - if 1 == state # quoted field - error_str = "truncated column at row $(nrows+1) column $(col+1)" - elseif (2 == state) || (3 == state) || ((0 == state) && is_dlm) # unquoted field, second quote, or begin field with last character as delimiter - col += 1 - nrows += 1 - store_cell(dh, nrows, col, (3 == state), col_start_idx, idx-1) - ncols = max(ncols, col) - end - end - catch ex - if isa(ex, TypeError) && (ex.func === :store_cell) - rethrow() - else - error("at row $(nrows+1), column $col : $ex)") - end - end - !isempty(error_str) && error(error_str) - - return (nrows, ncols) -end - -# todo: keyword argument for # of digits to print -writedlm_cell(io::IO, elt::AbstractFloat, dlm, quotes) = print(io, elt) -function writedlm_cell(io::IO, elt::AbstractString, dlm::T, quotes::Bool) where T - if quotes && !isempty(elt) && (('"' in elt) || ('\n' in elt) || ((T <: AbstractChar) ? (dlm in elt) : occursin(dlm, elt))) - print(io, '"', replace(elt, r"\"" => "\"\""), '"') - else - print(io, elt) - end -end -writedlm_cell(io::IO, elt, dlm, quotes) = print(io, elt) -function writedlm(io::IO, a::AbstractMatrix, dlm; opts...) - optsd = val_opts(opts) - quotes = get(optsd, :quotes, true) - pb = PipeBuffer() - lastc = last(axes(a, 2)) - for i = axes(a, 1) - for j = axes(a, 2) - writedlm_cell(pb, a[i, j], dlm, quotes) - j == lastc ? print(pb,'\n') : print(pb,dlm) - end - (bytesavailable(pb) > (16*1024)) && write(io, take!(pb)) - end - write(io, take!(pb)) - nothing -end - -writedlm(io::IO, a::AbstractArray{<:Any,0}, dlm; opts...) = writedlm(io, reshape(a,1), dlm; opts...) - -# write an iterable row as dlm-separated items -function writedlm_row(io::IO, row, dlm, quotes) - y = iterate(row) - while y !== nothing - (x, state) = y - y = iterate(row, state) - writedlm_cell(io, x, dlm, quotes) - y === nothing ? print(io,'\n') : print(io,dlm) - end -end - -# If the row is a single string, write it as a string rather than -# iterating over characters. Also, include the common case of -# a Number (handled correctly by the generic writedlm_row above) -# purely as an optimization. -function writedlm_row(io::IO, row::Union{Number,AbstractString}, dlm, quotes) - writedlm_cell(io, row, dlm, quotes) - print(io, '\n') -end - -# write an iterable collection of iterable rows -function writedlm(io::IO, itr, dlm; opts...) - optsd = val_opts(opts) - quotes = get(optsd, :quotes, true) - pb = PipeBuffer() - for row in itr - writedlm_row(pb, row, dlm, quotes) - (bytesavailable(pb) > (16*1024)) && write(io, take!(pb)) - end - write(io, take!(pb)) - nothing -end - -function writedlm(fname::AbstractString, a, dlm; opts...) - open(fname, "w") do io - writedlm(io, a, dlm; opts...) - end -end - -""" - writedlm(f, A, delim='\\t'; opts) - -Write `A` (a vector, matrix, or an iterable collection of iterable rows) as text to `f` -(either a filename string or an `IO` stream) using the given delimiter -`delim` (which defaults to tab, but can be any printable Julia object, typically a `Char` or -`AbstractString`). - -For example, two vectors `x` and `y` of the same length can be written as two columns of -tab-delimited text to `f` by either `writedlm(f, [x y])` or by `writedlm(f, zip(x, y))`. - -# Examples -```jldoctest -julia> using DelimitedFiles - -julia> x = [1; 2; 3; 4]; - -julia> y = [5; 6; 7; 8]; - -julia> open("delim_file.txt", "w") do io - writedlm(io, [x y]) - end - -julia> readdlm("delim_file.txt", '\\t', Int, '\\n') -4×2 Matrix{Int64}: - 1 5 - 2 6 - 3 7 - 4 8 - -julia> rm("delim_file.txt") -``` -""" -writedlm(io, a; opts...) = writedlm(io, a, '\t'; opts...) - -show(io::IO, ::MIME"text/csv", a) = writedlm(io, a, ',') -show(io::IO, ::MIME"text/tab-separated-values", a) = writedlm(io, a, '\t') - -end # module DelimitedFiles diff --git a/stdlib/DelimitedFiles/test/runtests.jl b/stdlib/DelimitedFiles/test/runtests.jl deleted file mode 100644 index 3bb8381354c55..0000000000000 --- a/stdlib/DelimitedFiles/test/runtests.jl +++ /dev/null @@ -1,332 +0,0 @@ -# This file is a part of Julia. License is MIT: https://julialang.org/license - -using Test, Random -using DelimitedFiles - -isequaldlm(m1, m2, t) = isequal(m1, m2) && (eltype(m1) == eltype(m2) == t) - -@testset "readdlm" begin - @test isequaldlm(readdlm(IOBuffer("1\t2\n3\t4\n5\t6\n")), [1. 2; 3 4; 5 6], Float64) - @test isequaldlm(readdlm(IOBuffer("1\t2\n3\t4\n5\t6\n"), Int), [1 2; 3 4; 5 6], Int) - @test isequaldlm(readdlm(IOBuffer("1,22222222222222222222222222222222222222,0x3,10e6\n2000.1,true,false,-10.34"), ',', Any), - reshape(Any[1,2000.1,Float64(22222222222222222222222222222222222222),true,0x3,false,10e6,-10.34], 2, 4), Any) - - @test isequaldlm(readdlm(IOBuffer("-9223355253176920979,9223355253176920979"), ',', Int64), Int64[-9223355253176920979 9223355253176920979], Int64) - - @test size(readdlm(IOBuffer("1,2,3,4"), ',')) == (1,4) - @test size(readdlm(IOBuffer("1,2,3,"), ',')) == (1,4) - @test size(readdlm(IOBuffer("1,2,3,4\n"), ',')) == (1,4) - @test size(readdlm(IOBuffer("1,2,3,\n"), ',')) == (1,4) - @test size(readdlm(IOBuffer("1,2,3,4\n1,2,3,4"), ',')) == (2,4) - @test size(readdlm(IOBuffer("1,2,3,4\n1,2,3,"), ',')) == (2,4) - @test size(readdlm(IOBuffer("1,2,3,4\n1,2,3"), ',')) == (2,4) - - @test size(readdlm(IOBuffer("1,2,3,4\r\n"), ',')) == (1,4) - @test size(readdlm(IOBuffer("1,2,3,4\r\n1,2,3\r\n"), ',')) == (2,4) - @test size(readdlm(IOBuffer("1,2,3,4\r\n1,2,3,4\r\n"), ',')) == (2,4) - @test size(readdlm(IOBuffer("1,2,3,\"4\"\r\n1,2,3,4\r\n"), ',')) == (2,4) - - @test size(readdlm(IOBuffer("1 2 3 4\n1 2 3"))) == (2,4) - @test size(readdlm(IOBuffer("1\t2 3 4\n1 2 3"))) == (2,4) - @test size(readdlm(IOBuffer("1\t 2 3 4\n1 2 3"))) == (2,4) - @test size(readdlm(IOBuffer("1\t 2 3 4\n1 2 3\n"))) == (2,4) - @test size(readdlm(IOBuffer("1,,2,3,4\n1,2,3\n"), ',')) == (2,5) - - let result1 = reshape(Any["", "", "", "", "", "", 1.0, 1.0, "", "", "", "", "", 1.0, 2.0, "", 3.0, "", "", "", "", "", 4.0, "", "", ""], 2, 13), - result2 = reshape(Any[1.0, 1.0, 2.0, 1.0, 3.0, "", 4.0, ""], 2, 4) - - @test isequaldlm(readdlm(IOBuffer(",,,1,,,,2,3,,,4,\n,,,1,,,1\n"), ','), result1, Any) - @test isequaldlm(readdlm(IOBuffer(" 1 2 3 4 \n 1 1\n")), result2, Any) - @test isequaldlm(readdlm(IOBuffer(" 1 2 3 4 \n 1 1\n"), ' '), result1, Any) - @test isequaldlm(readdlm(IOBuffer("1 2\n3 4 \n")), [[1.0, 3.0] [2.0, 4.0]], Float64) - end - - let result1 = reshape(Any["", "", "", "", "", "", "भारत", 1.0, "", "", "", "", "", 1.0, 2.0, "", 3.0, "", "", "", "", "", 4.0, "", "", ""], 2, 13) - @test isequaldlm(readdlm(IOBuffer(",,,भारत,,,,2,3,,,4,\n,,,1,,,1\n"), ',') , result1, Any) - end - - let result1 = reshape(Any[1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, ""], 2, 4) - @test isequaldlm(readdlm(IOBuffer("1\t 2 3 4\n1 2 3")), result1, Any) - @test isequaldlm(readdlm(IOBuffer("1\t 2 3 4\n1 2 3 ")), result1, Any) - @test isequaldlm(readdlm(IOBuffer("1\t 2 3 4\n1 2 3\n")), result1, Any) - @test isequaldlm(readdlm(IOBuffer("1,2,3,4\n1,2,3\n"), ','), result1, Any) - @test isequaldlm(readdlm(IOBuffer("1,2,3,4\n1,2,3"), ','), result1, Any) - @test isequaldlm(readdlm(IOBuffer("1,2,3,4\r\n1,2,3\r\n"), ','), result1, Any) - @test isequaldlm(readdlm(IOBuffer("1,2,3,\"4\"\r\n1,2,3\r\n"), ','), result1, Any) - end - - let result1 = reshape(Any["abc", "hello", "def,ghi", " \"quote\" ", "new\nline", "world"], 2, 3), - result2 = reshape(Any["abc", "line\"", "\"hello\"", "\"def", "", "\" \"\"quote\"\" \"", "ghi\"", "", "world", "\"new", "", ""], 3, 4) - - @test isequaldlm(readdlm(IOBuffer("abc,\"def,ghi\",\"new\nline\"\n\"hello\",\" \"\"quote\"\" \",world"), ','), result1, Any) - @test isequaldlm(readdlm(IOBuffer("abc,\"def,ghi\",\"new\nline\"\n\"hello\",\" \"\"quote\"\" \",world"), ',', quotes=false), result2, Any) - end - - let result1 = reshape(Any["t", "c", "", "c"], 2, 2), - result2 = reshape(Any["t", "\"c", "t", "c"], 2, 2) - @test isequaldlm(readdlm(IOBuffer("t \n\"c\" c")), result1, Any) - @test isequaldlm(readdlm(IOBuffer("t t \n\"\"\"c\" c")), result2, Any) - end - - @test isequaldlm(readdlm(IOBuffer("\n1,2,3\n4,5,6\n\n\n"), ',', skipblanks=false), - reshape(Any["",1.0,4.0,"","","",2.0,5.0,"","","",3.0,6.0,"",""], 5, 3), Any) - @test isequaldlm(readdlm(IOBuffer("\n1,2,3\n4,5,6\n\n\n"), ',', skipblanks=true), reshape([1.0,4.0,2.0,5.0,3.0,6.0], 2, 3), Float64) - @test isequaldlm(readdlm(IOBuffer("1,2\n\n4,5"), ',', skipblanks=false), reshape(Any[1.0,"",4.0,2.0,"",5.0], 3, 2), Any) - @test isequaldlm(readdlm(IOBuffer("1,2\n\n4,5"), ',', skipblanks=true), reshape([1.0,4.0,2.0,5.0], 2, 2), Float64) - - let x = bitrand(5, 10), io = IOBuffer() - writedlm(io, x) - seek(io, 0) - @test readdlm(io, Bool) == x - end - - let x = [1,2,3], y = [4,5,6], io = IOBuffer() - writedlm(io, zip(x,y), ", ") - seek(io, 0) - @test readdlm(io, ',') == [x y] - end - - let x = [0.1 0.3 0.5], io = IOBuffer() - writedlm(io, x, ", ") - seek(io, 0) - @test read(io, String) == "0.1, 0.3, 0.5\n" - end - - let x = [0.1 0.3 0.5], io = IOBuffer() - writedlm(io, x, ", ") - seek(io, 0) - @test readdlm(io, ',') == [0.1 0.3 0.5] - end - - let x = ["abc", "def\"ghi", "jk\nl"], y = [1, ",", "\"quoted\""], io = IOBuffer() - writedlm(io, zip(x,y), ',') - seek(io, 0) - @test readdlm(io, ',') == [x y] - end - - let x = ["a" "b"; "d" ""], io = IOBuffer() - writedlm(io, x) - seek(io, 0) - @test readdlm(io) == x - end - - let x = ["\"hello\"", "world\""], io = IOBuffer() - writedlm(io, x, quotes=false) - @test String(take!(io)) == "\"hello\"\nworld\"\n" - - writedlm(io, x) - @test String(take!(io)) == "\"\"\"hello\"\"\"\n\"world\"\"\"\n" - end -end - -@testset "comments" begin - @test isequaldlm(readdlm(IOBuffer("#this is comment\n1,2,3\n#one more comment\n4,5,6"), ',', comments=true), [1. 2. 3.;4. 5. 6.], Float64) - @test isequaldlm(readdlm(IOBuffer("#this is \n#comment\n1,2,3\n#one more \n#comment\n4,5,6"), ',', comments=true), [1. 2. 3.;4. 5. 6.], Float64) - @test isequaldlm(readdlm(IOBuffer("1,2,#3\n4,5,6"), ',', comments=true), [1. 2. "";4. 5. 6.], Any) - @test isequaldlm(readdlm(IOBuffer("1#,2,3\n4,5,6"), ',', comments=true), [1. "" "";4. 5. 6.], Any) - @test isequaldlm(readdlm(IOBuffer("1,2,\"#3\"\n4,5,6"), ',', comments=true), [1. 2. "#3";4. 5. 6.], Any) - @test isequaldlm(readdlm(IOBuffer("1,2,3\n #with leading whitespace\n4,5,6"), ',', comments=true), [1. 2. 3.;" " "" "";4. 5. 6.], Any) -end - -@testset "without comments" begin - @test isequaldlm(readdlm(IOBuffer("1,2,#3\n4,5,6"), ','), [1. 2. "#3";4. 5. 6.], Any) - @test isequaldlm(readdlm(IOBuffer("1#,2,3\n4,5,6"), ','), ["1#" 2. 3.;4. 5. 6.], Any) - @test isequaldlm(readdlm(IOBuffer("1,2,\"#3\"\n4,5,6"), ','), [1. 2. "#3";4. 5. 6.], Any) -end - -@testset "skipstart" begin - x = ["a" "b" "c"; "d" "e" "f"; "g" "h" "i"; "A" "B" "C"; 1 2 3; 4 5 6; 7 8 9] - io = IOBuffer() - - writedlm(io, x, quotes=false) - seek(io, 0) - (data, hdr) = readdlm(io, header=true, skipstart=3) - @test data == [1 2 3; 4 5 6; 7 8 9] - @test hdr == ["A" "B" "C"] - - x = ["a" "b" "\nc"; "d" "\ne" "f"; "g" "h" "i\n"; "A" "B" "C"; 1 2 3; 4 5 6; 7 8 9] - io = IOBuffer() - - writedlm(io, x, quotes=true) - seek(io, 0) - (data, hdr) = readdlm(io, header=true, skipstart=6) - @test data == [1 2 3; 4 5 6; 7 8 9] - @test hdr == ["A" "B" "C"] - - io = IOBuffer() - writedlm(io, x, quotes=false) - seek(io, 0) - (data, hdr) = readdlm(io, header=true, skipstart=6) - @test data == [1 2 3; 4 5 6; 7 8 9] - @test hdr == ["A" "B" "C"] -end - -@testset "i18n" begin - # source: http://www.i18nguy.com/unicode/unicode-example-utf8.zip - let i18n_data = ["Origin (English)", "Name (English)", "Origin (Native)", "Name (Native)", - "Australia", "Nicole Kidman", "Australia", "Nicole Kidman", - "Austria", "Johann Strauss", "Österreich", "Johann Strauß", - "Belgium (Flemish)", "Rene Magritte", "België", "René Magritte", - "Belgium (French)", "Rene Magritte", "Belgique", "René Magritte", - "Belgium (German)", "Rene Magritte", "Belgien", "René Magritte", - "Bhutan", "Gonpo Dorji", "འབྲུག་ཡུལ།", "མགོན་པོ་རྡོ་རྗེ།", - "Canada", "Celine Dion", "Canada", "Céline Dion", - "Canada - Nunavut (Inuktitut)", "Susan Aglukark", "ᓄᓇᕗᒻᒥᐅᑦ", "ᓱᓴᓐ ᐊᒡᓗᒃᑲᖅ", - "Democratic People's Rep. of Korea", "LEE Sol-Hee", "조선 민주주의 인민 공화국", "이설희", - "Denmark", "Soren Hauch-Fausboll", "Danmark", "Søren Hauch-Fausbøll", - "Denmark", "Soren Kierkegaard", "Danmark", "Søren Kierkegård", - "Egypt", "Abdel Halim Hafez", "ﻣﺼﺮ", "ﻋﺑﺪﺍﻠﺣﻟﻳﻢ ﺤﺎﻓﻅ", - "Egypt", "Om Kolthoum", "ﻣﺼﺮ", "ﺃﻡ ﻛﻟﺛﻭﻡ", - "Eritrea", "Berhane Zeray", "ብርሃነ ዘርኣይ", "ኤርትራ", - "Ethiopia", "Haile Gebreselassie", "ኃይሌ ገብረሥላሴ", "ኢትዮጵያ", - "France", "Gerard Depardieu", "France", "Gérard Depardieu", - "France", "Jean Reno", "France", "Jean Réno", - "France", "Camille Saint-Saens", "France", "Camille Saint-Saëns", - "France", "Mylene Demongeot", "France", "Mylène Demongeot", - "France", "Francois Truffaut", "France", "François Truffaut", - "France (Braille)", "Louis Braille", "⠋⠗⠁⠝⠉⠑", "⠇⠕⠥⠊⠎⠀
⠃⠗⠁⠊⠇⠇⠑", - "Georgia", "Eduard Shevardnadze", "საქართველო", "ედუარდ შევარდნაძე", - "Germany", "Rudi Voeller", "Deutschland", "Rudi Völler", - "Germany", "Walter Schultheiss", "Deutschland", "Walter Schultheiß", - "Greece", "Giorgos Dalaras", "Ελλάς", "Γιώργος Νταλάρας", - "Iceland", "Bjork Gudmundsdottir", "Ísland", "Björk Guðmundsdóttir", - "India (Hindi)", "Madhuri Dixit", "भारत", "माधुरी दिछित", - "Ireland", "Sinead O'Connor", "Éire", "Sinéad O'Connor", - "Israel", "Yehoram Gaon", "ישראל", "יהורם גאון", - "Italy", "Fabrizio DeAndre", "Italia", "Fabrizio De André", - "Japan", "KUBOTA Toshinobu", "日本", "久保田 利伸", - "Japan", "HAYASHIBARA Megumi", "日本", "林原 めぐみ", - "Japan", "Mori Ogai", "日本", "森鷗外", - "Japan", "Tex Texin", "日本", "テクス テクサン", - "Norway", "Tor Age Bringsvaerd", "Noreg", "Tor Åge Bringsværd", - "Pakistan (Urdu)", "Nusrat Fatah Ali Khan", "پاکستان", "نصرت فتح علی خان", - "People's Rep. of China", "ZHANG Ziyi", "中国", "章子怡", - "People's Rep. of China", "WONG Faye", "中国", "王菲", - "Poland", "Lech Walesa", "Polska", "Lech Wałęsa", - "Puerto Rico", "Olga Tanon", "Puerto Rico", "Olga Tañón", - "Rep. of China", "Hsu Chi", "臺灣", "舒淇", - "Rep. of China", "Ang Lee", "臺灣", "李安", - "Rep. of Korea", "AHN Sung-Gi", "대한민국", "안성기", - "Rep. of Korea", "SHIM Eun-Ha", "대한민국", "심은하", - "Russia", "Mikhail Gorbachev", "Россия", "Михаил Горбачёв", - "Russia", "Boris Grebenshchikov", "Россия", "Борис Гребенщиков", - "Slovenia", "\"Frane \"\"Jezek\"\" Milcinski", "Slovenija", "Frane Milčinski - Ježek", - "Syracuse (Sicily)", "Archimedes", "Συρακούσα", "Ἀρχιμήδης", - "Thailand", "Thongchai McIntai", "ประเทศไทย", "ธงไชย แม็คอินไตย์", - "U.S.A.", "Brad Pitt", "U.S.A.", "Brad Pitt", - "Yugoslavia (Cyrillic)", "Djordje Balasevic", "Југославија", "Ђорђе Балашевић", - "Yugoslavia (Latin)", "Djordje Balasevic", "Jugoslavija", "Đorđe Balašević"] - - i18n_arr = permutedims(reshape(i18n_data, 4, Int(floor(length(i18n_data)/4))), [2, 1]) - i18n_buff = PipeBuffer() - writedlm(i18n_buff, i18n_arr, ',') - @test i18n_arr == readdlm(i18n_buff, ',') - - hdr = i18n_arr[1:1, :] - data = i18n_arr[2:end, :] - writedlm(i18n_buff, i18n_arr, ',') - @test (data, hdr) == readdlm(i18n_buff, ',', header=true) - - writedlm(i18n_buff, i18n_arr, '\t') - @test (data, hdr) == readdlm(i18n_buff, '\t', header=true) - end -end - -@testset "issue #13028" begin - for data in ["A B C", "A B C\n"] - data,hdr = readdlm(IOBuffer(data), header=true) - @test hdr == AbstractString["A" "B" "C"] - @test data == Matrix{Float64}(undef, 0, 3) - end -end - -# fix #13179 parsing unicode lines with default delmiters -@test isequaldlm(readdlm(IOBuffer("# Should ignore this π\n1\tα\n2\tβ\n"), comments=true), Any[1 "α"; 2 "β"], Any) - -# BigInt parser -let data = "1 2 3" - readdlm(IOBuffer(data), ' ', BigInt) == BigInt[1 2 3] -end - -@testset "show with MIME types" begin - @test sprint(show, "text/csv", [1 2; 3 4]) == "1,2\n3,4\n" - @test sprint(show, "text/tab-separated-values", [1 2; 3 4]) == "1\t2\n3\t4\n" - - for writefunc in ((io,x) -> show(io, "text/csv", x), - (io,x) -> invoke(writedlm, Tuple{IO,Any,Any}, io, x, ",")) - # iterable collections of iterable rows: - let x = [(1,2), (3,4)], io = IOBuffer() - writefunc(io, x) - seek(io, 0) - @test readdlm(io, ',') == [1 2; 3 4] - end - # vectors of strings: - let x = ["foo", "bar"], io = IOBuffer() - writefunc(io, x) - seek(io, 0) - @test vec(readdlm(io, ',')) == x - end - end - - for writefunc in ((io,x) -> show(io, "text/tab-separated-values", x), - (io,x) -> invoke(writedlm, Tuple{IO,Any,Any}, io, x, "\t")) - # iterable collections of iterable rows: - let x = [(1,2), (3,4)], io = IOBuffer() - writefunc(io, x) - seek(io, 0) - @test readdlm(io, '\t') == [1 2; 3 4] - end - # vectors of strings: - let x = ["foo", "bar"], io = IOBuffer() - writefunc(io, x) - seek(io, 0) - @test vec(readdlm(io, '\t')) == x - end - end -end - -# Test that we can read a write protected file -let fn = tempname() - open(fn, "w") do f - write(f, "Julia") - end - chmod(fn, 0o444) - readdlm(fn)[] == "Julia" - rm(fn) -end - -# test writedlm with a filename instead of io input -let fn = tempname(), x = ["a" "b"; "d" ""] - writedlm(fn, x, ',') - @test readdlm(fn, ',') == x - rm(fn) -end - -# issue #21180 -let data = "\"721\",\"1438\",\"1439\",\"…\",\"1\"" - @test readdlm(IOBuffer(data), ',') == Any[721 1438 1439 "…" 1] -end - -# issue #21207 -let data = "\"1\",\"灣\"\"灣灣灣灣\",\"3\"" - @test readdlm(IOBuffer(data), ',') == Any[1 "灣\"灣灣灣灣" 3] -end - -# reading from a byte array (#16731) -let data = Vector{UInt8}("1,2,3\n4,5,6"), origdata = copy(data) - @test readdlm(data, ',') == [1 2 3; 4 5 6] - @test data == origdata -end - -# issue #11484: useful error message for invalid readdlm filepath arguments -@test_throws ArgumentError readdlm(tempdir()) - -# showing as text/csv -let d = TextDisplay(PipeBuffer()) - show(d.io, "text/csv", [3 1 4]) - @test read(d.io, String) == "3,1,4\n" -end - -@testset "complex" begin - @test readdlm(IOBuffer("3+4im, 4+5im"), ',', Complex{Int}) == [3+4im 4+5im] -end diff --git a/stdlib/Makefile b/stdlib/Makefile index 9c18fa261b985..44c3b97e2fb0f 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -44,7 +44,7 @@ STDLIBS = Artifacts Base64 CRC32c Dates DelimitedFiles Distributed FileWatching SharedArrays Sockets SparseArrays SuiteSparse Test TOML Unicode UUIDs \ $(JLL_NAMES) -STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA +STDLIBS_EXT = Pkg Statistics LibCURL DelimitedFiles Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z)))) From ee2c6e4537fa54fa418842b3ab6ea9edd8dfa491 Mon Sep 17 00:00:00 2001 From: KristofferC Date: Fri, 29 Apr 2022 15:17:14 +0200 Subject: [PATCH 2/4] remove tests in Base relying on DelimitedFiles --- test/offsetarray.jl | 6 ------ test/read.jl | 8 +------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/test/offsetarray.jl b/test/offsetarray.jl index 515e0491ee994..15fca5483f343 100644 --- a/test/offsetarray.jl +++ b/test/offsetarray.jl @@ -3,7 +3,6 @@ isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl") using .Main.OffsetArrays import .Main.OffsetArrays: IdOffsetRange -using DelimitedFiles using Random using LinearAlgebra using Statistics @@ -494,11 +493,6 @@ B92 = view(A92, :, :, Base.IdentityUnitRange(-1:0)) end end -io = IOBuffer() -writedlm(io, A) -seek(io, 0) -@test readdlm(io, eltype(A)) == parent(A) - amin, amax = extrema(parent(A)) @test clamp.(A, (amax+amin)/2, amax).parent == clamp.(parent(A), (amax+amin)/2, amax) diff --git a/test/read.jl b/test/read.jl index 7a5acbcca969e..b8060a023333f 100644 --- a/test/read.jl +++ b/test/read.jl @@ -1,6 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license -using DelimitedFiles, Random, Sockets +using Random, Sockets mktempdir() do dir @@ -312,12 +312,6 @@ for (name, f) in l verbose && println("$name countlines...") @test countlines(io()) == countlines(IOBuffer(text)) - - verbose && println("$name readdlm...") - @test readdlm(io(), ',') == readdlm(IOBuffer(text), ',') - @test readdlm(io(), ',') == readdlm(filename, ',') - - cleanup() end text = old_text From 262d2dee991cd898efeb3211a065f1b8aa91878b Mon Sep 17 00:00:00 2001 From: KristofferC Date: Fri, 29 Apr 2022 15:17:23 +0200 Subject: [PATCH 3/4] move DelimitedFiles out of the sysimage --- base/sysimg.jl | 1 - test/precompile.jl | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/base/sysimg.jl b/base/sysimg.jl index b58df76c63dc3..c68d9f9c82bff 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -38,7 +38,6 @@ let :Unicode, # 1-depth packages - :DelimitedFiles, :LinearAlgebra, :Markdown, :Printf, diff --git a/test/precompile.jl b/test/precompile.jl index fb38f08dad93b..583f728078297 100644 --- a/test/precompile.jl +++ b/test/precompile.jl @@ -359,7 +359,7 @@ precompile_test_harness(false) do dir Dict(let m = Base.root_module(Base, s) Base.PkgId(m) => Base.module_build_id(m) end for s in - [:ArgTools, :Artifacts, :Base64, :CompilerSupportLibraries_jll, :CRC32c, :Dates, :DelimitedFiles, + [:ArgTools, :Artifacts, :Base64, :CompilerSupportLibraries_jll, :CRC32c, :Dates, :Distributed, :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll, :LazyArtifacts, :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra, :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf, From 0b07de7c4418301b325556f17d7a880e38fa0d6b Mon Sep 17 00:00:00 2001 From: Kristoffer Date: Wed, 18 May 2022 09:59:11 +0200 Subject: [PATCH 4/4] update version --- .../md5 | 1 - .../sha512 | 1 - .../md5 | 1 + .../sha512 | 1 + stdlib/DelimitedFiles.version | 2 +- 5 files changed, 3 insertions(+), 3 deletions(-) delete mode 100644 deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 delete mode 100644 deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 create mode 100644 deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/md5 create mode 100644 deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/sha512 diff --git a/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 b/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 deleted file mode 100644 index 7251ab83e0d5c..0000000000000 --- a/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -38ef69e1e66ead7f99f7776b023fbc77 diff --git a/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 b/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 deleted file mode 100644 index 0d81c8969e657..0000000000000 --- a/deps/checksums/DelimitedFiles-79f7865b7f009f2ca5917096276a01b11eeac90d.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -554f291c7f6c58bd8ef40e3554a1af575929ad034e1a6e3d72327115e93af1e3bd8431996100d9083880929e550a344a9921e396afa39138ad55a38b422548af diff --git a/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/md5 b/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/md5 new file mode 100644 index 0000000000000..93a2d414cff7d --- /dev/null +++ b/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/md5 @@ -0,0 +1 @@ +ba99caf3dbe9c1c40e67033898ccea2d diff --git a/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/sha512 b/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/sha512 new file mode 100644 index 0000000000000..99c68c413c411 --- /dev/null +++ b/deps/checksums/DelimitedFiles-f520e069d2eb8282e8a07dcb384fe0e0c6293bc3.tar.gz/sha512 @@ -0,0 +1 @@ +c39a90233d3d47431ac7bcbcc47cea9502a9e3a778caf1a67d8bd8364e273ccbe34c9c53f01ba4cfec97ca87b5e7bf9b7901889385061f6dd609413192635b40 diff --git a/stdlib/DelimitedFiles.version b/stdlib/DelimitedFiles.version index 3055926a220ce..972918a83b75e 100644 --- a/stdlib/DelimitedFiles.version +++ b/stdlib/DelimitedFiles.version @@ -1,4 +1,4 @@ DELIMITEDFILES_BRANCH = main -DELIMITEDFILES_SHA1 = 79f7865b7f009f2ca5917096276a01b11eeac90d +DELIMITEDFILES_SHA1 = f520e069d2eb8282e8a07dcb384fe0e0c6293bc3 DELIMITEDFILES_GIT_URL := https://github.com/JuliaData/DelimitedFiles.jl.git DELIMITEDFILES_TAR_URL = https://api.github.com/repos/JuliaData/DelimitedFiles.jl/tarball/$1