From 7853e9dd1069a3099a2d0ddf361e020499b1bb8b Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 07:42:51 +0000 Subject: [PATCH 01/27] Refactor compress_graphs.jl + is_trivial_left + is_trivial_right + has_identity_lincomb + has_trivial_nodes --- src/compress_graph.jl | 42 ++++++++++++------------------------------ 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/src/compress_graph.jl b/src/compress_graph.jl index 78562e4..83c38c0 100644 --- a/src/compress_graph.jl +++ b/src/compress_graph.jl @@ -110,24 +110,19 @@ end # Check if node is trivial because of an identity on the left (:mult or :ldiv). function is_trivial_left(graph, node) - op = graph.operations[node] - if (op == :lincomb) - return false; - else - return graph.parents[node][1] == :I && - (op == :mult || op == :ldiv) - end + return graph.operations[node] ∈ [:mult, :ldiv] && + graph.parents[node][1] == :I end # Check if node is trivial because of an identity on the right (:mult). function is_trivial_right(graph, node) + return graph.operations[node] == :mult && + graph.parents[node][2] == :I +end - op = graph.operations[node] - if (op == :lincomb) - return false; - else - return (graph.parents[node][2] == :I && op == :mult) - end +# Check if node is trivial. +function is_trivial(graph, node) + return is_trivial_left(graph, node) || is_trivial_right(graph, node) end # Return a parent that can replace a node. @@ -166,15 +161,9 @@ Checks whether the graph has a node which is a linear combination of identity matrices. """ function has_identity_lincomb(graph) - has_identity_lincomb = false - for (key, parents) in graph.parents - if graph.operations[key] == :lincomb && - all(parents .== :I) - has_identity_lincomb = true - break - end - end - return has_identity_lincomb + lincombs = [key for (key, value) in graph.operations if value == :lincomb] + lincomb_parents = [graph.parents[key] for key in lincombs] + return any(x -> all(x .== :I), lincomb_parents) end """ @@ -184,14 +173,7 @@ Checks whether the graph has trivial nodes, that is, multiplications by the identity or linear systems whose coefficient is the identity matrix. """ function has_trivial_nodes(graph) - has_trivial_nodes = false - for (key, parents) in graph.parents - if is_trivial_left(graph, key) || is_trivial_right(graph, key) - has_trivial_nodes = true - break - end - end - return has_trivial_nodes + return any(is_trivial(graph, key) for (key, value) in graph.parents) end """ From fa93b891f28ff1342baa37d8478248ac052dc089 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 07:55:47 +0000 Subject: [PATCH 02/27] Add function to check if graph has lincomb with identity --- src/compress_graph.jl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/compress_graph.jl b/src/compress_graph.jl index 83c38c0..6b11314 100644 --- a/src/compress_graph.jl +++ b/src/compress_graph.jl @@ -3,6 +3,7 @@ export compress_graph_dangling! export compress_graph_zero_coeff! +export has_lincomb_with_identity export has_identity_lincomb export has_trivial_nodes export compress_graph_trivial! @@ -154,6 +155,18 @@ function replace_node!(graph, node, replacement, cref) return delete_crefs!(cref, node) end +""" + has_lincomb_with_identity(graph) -> Bool + +Checks whether the graph has a linear combination that includes the identity +matrix. +""" +function has_lincomb_with_identity(graph) + lincombs = [key for (key, value) in graph.operations if value == :lincomb] + lincomb_parents = [graph.parents[key] for key in lincombs] + return any(x -> :I in x, lincomb_parents) +end + """ has_identity_lincomb(graph) -> Bool From b623cd29a18e77f350725cbcf2bf1b6136a1caa8 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 08:03:45 +0000 Subject: [PATCH 03/27] Add tests for graph compression + has_lincomb_with_identity + has_identity_lincomb --- test/compress.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/compress.jl b/test/compress.jl index d16575e..6327843 100644 --- a/test/compress.jl +++ b/test/compress.jl @@ -34,13 +34,25 @@ using LinearAlgebra # Check that we actually removed something @test size(get_all_cref(graph2), 1) < size(get_all_cref(graph1), 1) + ## Test function to detect if graph has linear combination with identities ## Test function to detect if graph has linear combination of identities graph = Compgraph() add_lincomb!(graph, :P1, 1.0, :A, 1.0, :I) add_lincomb!(graph, :P2, 1.0, :I, 1.0, :I) add_mult!(graph, :P0, :P1, :P2) + @test has_lincomb_with_identity(graph) == true @test has_identity_lincomb(graph) == true + graph = Compgraph() + add_lincomb!(graph, :P, 1.0, :A, 1.0, :I) + @test has_lincomb_with_identity(graph) == true + @test has_identity_lincomb(graph) == false + + graph = Compgraph() + add_mult!(graph, :P, :A, :A) + @test has_lincomb_with_identity(graph) == false + @test has_identity_lincomb(graph) == false + ## Test trivial node detection and removal graph = Compgraph() add_mult!(graph, :AI, :A, :I) From 3f2027c8e1e10a1a71636db60dbce37a946d9f90 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 10:14:33 +0000 Subject: [PATCH 04/27] Fix spacing in generated C code --- src/code_gen/gen_c_code.jl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index fdb7e10..b686082 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -145,7 +145,7 @@ function init_mem(lang::LangC, max_nof_nodes, precomputed_nodes) end function function_init(lang::LangC, T, mem, graph, precomputed_nodes) - (blas_type, blas_prefix) = get_blas_type(lang, T) + (blas_t, blas_prefix) = get_blas_type(lang, T) code = init_code(lang) max_nodes = size(mem.slots, 1) @@ -181,7 +181,7 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) end # Array ipiv for pivots of GEPP (needed only if graph has linear systems). if :ldiv in graph_ops - push_code!(code, "lapack_int *ipiv = malloc(n*sizeof(*ipiv));") + push_code!(code, "lapack_int *ipiv = malloc(n * sizeof(*ipiv));") end push_code!(code, "size_t j;") push_code!(code, "") @@ -241,7 +241,7 @@ function function_end(lang::LangC, graph, mem) retval = get_slot_name(mem, retval_node) push_code!(code, "") push_comment!(code, "Prepare output.") - push_code!(code, "memcpy(output, $retval, n*n*sizeof(*output));") + push_code!(code, "memcpy(output, $retval, n * n * sizeof(*output));") if :ldiv in values(graph.operations) push_code!(code, "free(ipiv);") end @@ -535,7 +535,7 @@ function scalar_to_string(::LangC, z) return "$z" end function scalar_to_string(::LangC_OpenBLAS, z::T) where {T<:Complex} - return "$(real(z)) + $(imag(z))*I" + return "$(real(z)) + $(imag(z)) * I" end function scalar_to_string(::LangC_MKL, z::T) where {T<:Complex} return "{$(real(z)), $(imag(z))}" @@ -596,15 +596,15 @@ function gen_main(lang::LangC, T, fname, funname; A = 10::Union{Integer,Matrix}) n = A push_code!(code, "size_t n = $n;") push_code!(code, "srand(0);") - push_code!(code, "blas_type *A = malloc(n*n*sizeof(*A));") - push_code!(code, "for(i=0; i Date: Fri, 12 Jan 2024 10:16:15 +0000 Subject: [PATCH 05/27] Fix spacing in generated Julia code --- src/code_gen/gen_c_code.jl | 2 +- src/code_gen/gen_julia_code.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index b686082..3031cec 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -81,7 +81,7 @@ reference_constant(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" # Code generation. # Return MKL/OpenBLAS-specific includes. function get_blas_includes(::LangC_MKL) - return "#include" + return "#include" end function get_blas_includes(::LangC_OpenBLAS) return "#include\n#include" diff --git a/src/code_gen/gen_julia_code.jl b/src/code_gen/gen_julia_code.jl index 50ab7de..70a7887 100644 --- a/src/code_gen/gen_julia_code.jl +++ b/src/code_gen/gen_julia_code.jl @@ -200,7 +200,7 @@ function function_init(lang::LangJulia, T, mem, graph, precomputed_nodes) push_code!(code,"$thisslotname = "*lang.alloc_function(i)); end - push_comment!(code,"Assign precomputed nodes memslots "); + push_comment!(code,"Assign precomputed nodes memslots"); precomp_nodes_string = join(repeat("A",jj),","); for (i, n) in enumerate(precomputed_nodes) From f34a262766623b7794c394ca3c6dbd9a0924f44e Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 10:16:50 +0000 Subject: [PATCH 06/27] Add partial support for lincomb with multiple parents in C + Full support for OpenBLAS + Support for real types for MKL --- src/code_gen/gen_c_code.jl | 273 ++++++++++++++----------------------- 1 file changed, 100 insertions(+), 173 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 3031cec..6dea5d5 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -42,13 +42,13 @@ function assign_coeff(::LangC_MKL, val::T, i) where {T<:Complex} "coeff$i.real = " * string(real(val)) * ";", "coeff$i.imag = " * string(imag(val)) * ";", ] - variable_string = "&coeff$i" + variable_string = "coeff$i" return (variable_string, assignment_string) end function assign_coeff(::LangC_OpenBLAS, val::T, i) where {T<:Complex} assignment_string = - ["coeff$i = " * string(real(val)) * " + " * string(imag(val)) * "*I;"] - variable_string = "&coeff$i" + ["coeff$i = " * string(real(val)) * " + " * string(imag(val)) * " * I;"] + variable_string = "coeff$i" return (variable_string, assignment_string) end function assign_coeff(::LangC, val::T, i) where {T<:Real} @@ -58,25 +58,18 @@ function assign_coeff(::LangC, val::T, i) where {T<:Real} end # Constant declaration and reference. -function declare_constant(::LangC_MKL, val::T, id, type) where {T<:Complex} - return "const $type $id = {.real = " * - string(real(val)) * - ", " * - ".imag = " * - string(imag(val)) * - "};" +function declare_const(::LangC_MKL, val::T, id, type) where {T<:Complex} + return "const $type $id = {.real = " * string(real(val)) * ", " * + ".imag = " * string(imag(val)) * "};" end -function declare_constant(::LangC_OpenBLAS, val::T, id, type) where {T<:Complex} - return "const $type $id = " * - string(real(val)) * - " + " * - string(imag(val)) * - "*I;" +function declare_const(::LangC_OpenBLAS, val::T, id, type) where {T<:Complex} + return "const $type $id = " * string(real(val)) * " + " * + string(imag(val)) * "*I;" end -function declare_constant(::LangC, val::T, id, type) where {T<:Real} +function declare_const(::LangC, val::T, id, type) where {T<:Real} return "const $type $id = $val;" end -reference_constant(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" +reference_const(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" # Code generation. # Return MKL/OpenBLAS-specific includes. @@ -107,10 +100,8 @@ function get_blas_type(::LangC_OpenBLAS, T::Type{Complex{Float64}}) return ("openblas_complex_double", "z") end - function preprocess_codegen(graph, lang::LangC) - (g,_)=graph_bigraph(graph) - return g # Lang C only supports bigraphs + return graph end function function_definition(lang::LangC, graph, T, funname, precomputed_nodes) @@ -164,20 +155,17 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) push_comment!(code, "Declarations and initializations.") # Allocate coefficients for linear combinations, if needed. if :lincomb in graph_ops - push_code!(code, "$blas_type coeff1, coeff2;") + num_coeffs = maximum(length.(values(graph.coeffs))) + min_coeff = has_lincomb_with_identity(graph) ? 0 : 1 + coefficients = join(["coeff$i" for i in min_coeff:num_coeffs], ", ") + push_code!(code, "$blas_t " .* "$coefficients;") end # Declare constants ZERO and ONE, if needed. if :mult in graph_ops - push_code!( - code, - declare_constant(lang, convert(T, 0.0), "ZERO", blas_type), - ) + push_code!(code, declare_const(lang, convert(T, 0.0), "ZERO", blas_t)) end if :lincomb in graph_ops || :mult in graph_ops - push_code!( - code, - declare_constant(lang, convert(T, 1.0), "ONE", blas_type), - ) + push_code!(code, declare_const(lang, convert(T, 1.0), "ONE", blas_t)) end # Array ipiv for pivots of GEPP (needed only if graph has linear systems). if :ldiv in graph_ops @@ -189,12 +177,12 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) push_comment!(code, "Memory management.") push_code!( code, - "$blas_type *master_mem = malloc(n*n*max_memalloc" * - "*sizeof(*master_mem));", + "$blas_t *master_mem = malloc(n * n * max_memalloc" * + " * sizeof(*master_mem));", ) push_code!( code, - "$blas_type *memslots[max_memslots]; " * + "$blas_t *memslots[max_memslots]; " * comment(lang, "As many slots as nodes"), ) @@ -211,26 +199,18 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) # Otherwise make a copy push_code!( code, - "memcpy($Ak_slot_name,$n,n*n*sizeof(*master_mem));", + "memcpy($Ak_slot_name, $n, n * n * sizeof(*master_mem));", ) end end # Initialize pointers. push_comment!(code, "The other slots are pointers to allocated memory.") - push_code!(code, "for (j=$(num_preallocated_slots-1); j Date: Fri, 12 Jan 2024 14:47:02 +0000 Subject: [PATCH 07/27] Complete support for lincomb with multiple parents in C + Support for complex types for MKL --- src/code_gen/gen_c_code.jl | 128 +++++++++++++++++++++++++++++-------- 1 file changed, 100 insertions(+), 28 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 6dea5d5..1ee9f5e 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -58,18 +58,20 @@ function assign_coeff(::LangC, val::T, i) where {T<:Real} end # Constant declaration and reference. -function declare_const(::LangC_MKL, val::T, id, type) where {T<:Complex} - return "const $type $id = {.real = " * string(real(val)) * ", " * - ".imag = " * string(imag(val)) * "};" +function initialization_string(::LangC, val::T) where {T<:Real} + return "$val" end -function declare_const(::LangC_OpenBLAS, val::T, id, type) where {T<:Complex} - return "const $type $id = " * string(real(val)) * " + " * - string(imag(val)) * "*I;" +function initialization_string(::LangC_MKL, val::T) where {T<:Complex} + return "{.real = " * string(real(val)) * ", " * + ".imag = " * string(imag(val)) * "}" end -function declare_const(::LangC, val::T, id, type) where {T<:Real} - return "const $type $id = $val;" +function initialization_string(::LangC_OpenBLAS, val::T) where {T<:Complex} + return string(real(val)) * " + " * string(imag(val)) * "*I" end -reference_const(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" +function declare_const(lang::LangC, val, id, type) + return "const $type $id = " * initialization_string(lang, val) * ";" +end +reference_value(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" # Code generation. # Return MKL/OpenBLAS-specific includes. @@ -100,6 +102,43 @@ function get_blas_type(::LangC_OpenBLAS, T::Type{Complex{Float64}}) return ("openblas_complex_double", "z") end +# Add functions to work with MKL complex types. +function add_auxiliary_functions(code, ::LangC, T) + return # In general, nothing is needed. +end + +function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float32}}) + push_code!(code, +"void fma_MKL_Complex8(MKL_Complex8 *acc, + const MKL_Complex8 *a, + const MKL_Complex8 *b) { + acc->real += a->real * b->real - a->imag * b->imag; + acc->imag += a->real * b->imag + a->imag * b->real; +} + +void acc_MKL_Complex8(MKL_Complex8 *acc, + const MKL_Complex16 *a) { + acc->real += a->real; + acc->imag += a->imag; +}", ind_lvl = 0) +end + +function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float64}}) + push_code!(code, +"void fma_MKL_Complex16(MKL_Complex16 *acc, + const MKL_Complex16 *a, + const MKL_Complex16 *b) { + acc->real += a->real * a->real - b->imag * b->imag; + acc->imag += a->real * b->imag + a->imag * b->real; +} + +void acc_MKL_Complex16(MKL_Complex16 *acc, + const MKL_Complex16 *a) { + acc->real += a->real; + acc->imag += a->imag; +}", ind_lvl = 0) +end + function preprocess_codegen(graph, lang::LangC) return graph end @@ -112,6 +151,8 @@ function function_definition(lang::LangC, graph, T, funname, precomputed_nodes) push_code!(code, "#include", ind_lvl = 0) push_code!(code, "#include", ind_lvl = 0) push_code!(code, "") + add_auxiliary_functions(code, lang, T) + push_code!(code, "") push_comment!(code, "Code for matrix function evaluation.", ind_lvl = 0) input_variables = join(map(x -> "$blas_type *" * string(x), precomputed_nodes), ", ") @@ -207,7 +248,7 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) # Initialize pointers. push_comment!(code, "The other slots are pointers to allocated memory.") push_code!( - code, + code, "for (j = $(num_preallocated_slots-1); j < max_memalloc; j++)" ) push_code!(code, "memslots[j+1] = master_mem + j * n * n;", ind_lvl = 2) @@ -232,6 +273,45 @@ function function_end(lang::LangC, graph, mem) return code end +function add_lincomb_body(code, lang::LangC, T, nodemem, coeff_names, parent_mems) + println(T) + rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems.* " + i)"), " + ") + for_body = "*(" * nodemem * " + i) = " * (isempty(rhs) ? "0" : rhs) * ";" + push_code!(code, for_body; ind_lvl = 2) +end + +fma_function_name(T::Type{Complex{Float32}}) = "fma_MKL_Complex8" +fma_function_name(T::Type{Complex{Float64}}) = "fma_MKL_Complex16" +function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, + nodemem, coeff_names, parent_mems) where S <: Real + if isempty(coeff_names) + lhs = "*(" * nodemem * " + i)" + push_code!(code, lhs * ".real = 0;", ind_lvl = 2) + push_code!(code, lhs * ".imag = 0;", ind_lvl = 2) + else + for (it, (coeff, parent)) in enumerate(zip(coeff_names, parent_mems)) + statement = fma_function_name(T) * + "(" * nodemem * " + i, " * + reference_value(lang, T, coeff) * ", " * + parent * " + i);" + push_code!(code, statement; ind_lvl = 2) + end + end +end + +function add_lincomb_identity_body(code, lang::LangC, T, nodemem, coeff_id) + push_code!(code, "*(" * nodemem * " + i) += " * coeff_id * ";", ind_lvl = 2) +end + +acc_function_name(T::Type{Complex{Float32}}) = "acc_MKL_Complex8" +acc_function_name(T::Type{Complex{Float64}}) = "acc_MKL_Complex16" +function add_lincomb_identity_body(code, lang::LangC_MKL, T::Type{Complex{S}}, + nodemem, coeff_id) where S <: Real + statement = acc_function_name(T) * "(" * nodemem * " + i, " * + reference_value(lang, T, coeff_id) * ");" + push_code!(code, statement, ind_lvl = 2) +end + function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) (blas_type, blas_prefix) = get_blas_type(lang, T) @@ -263,8 +343,8 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) parent2mem = get_slot_name(mem, parent2) end - rzero = reference_const(lang, T, "ZERO") - rone = reference_const(lang, T, "ONE") + rzero = reference_value(lang, T, "ZERO") + rone = reference_value(lang, T, "ONE") code = init_code(lang) push_code!(code, "") @@ -392,12 +472,6 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) id_coefficient += v end end - if id_coefficient != 0 - (coeff_id, coeff_id_code) = assign_coeff(lang, id_coefficient, 0) - for statement in coeff_id_code - push_code!(code, statement) - end - end # Determine a target mem: nodemem nodemem = nothing @@ -417,20 +491,18 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) end # Write the linear combination. - push_code!(code, "for (size_t i = 0; i < n*n; i++) {") - rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems.* "+i)"), " + ") - for_body = "*(" * nodemem * " + i) = " - if !isempty(rhs) - for_body *= rhs * ";" - else - for_body *= "0;" - end - push_code!(code, for_body; ind_lvl = 2) + push_code!(code, "for (size_t i = 0; i < n * n; i++) {") + add_lincomb_body(code, lang, T, nodemem, coeff_names, parent_mems) push_code!(code, "}") + push_code!(code, "") if id_coefficient != 0 + (coeff_id, coeff_id_code) = assign_coeff(lang, id_coefficient, 0) + for statement in coeff_id_code + push_code!(code, statement) + end push_code!(code, "for (size_t i = 0; i < n*n; i += n + 1) {") - push_code!(code, "*(" * nodemem * " + i) += " * coeff_id * ";", ind_lvl = 2) + add_lincomb_identity_body(code, lang, T, nodemem, coeff_id) push_code!(code, "}") end From 0e4e0c9bd98f6043edfc8e575ad86182e3c0804d Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 17:18:51 +0000 Subject: [PATCH 08/27] Fix error with lincomb of identity in C code generators --- src/code_gen/gen_c_code.jl | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 1ee9f5e..88932c2 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -253,6 +253,17 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) ) push_code!(code, "memslots[j+1] = master_mem + j * n * n;", ind_lvl = 2) + # Store identity explicitly only if graph has a linear combination of I. + if has_identity_lincomb(graph) + push_comment!(code, "Graph has linear combination of identities.") + push_comment!(code, "The matrix I is explicitly allocated.") + alloc_slot!(mem, num_precomputed_nodes + 1, :I) + nodemem = get_slot_name(mem, :I) + push_code!(code, "memset($nodemem, 0, n * n * sizeof(*master_mem));") + push_code!(code, "for(j = 0; j < n * n; j += n + 1)") + push_code!(code, "*($nodemem+j) = ONE;", ind_lvl = 2) + end + return code end @@ -317,14 +328,14 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) op = graph.operations[node] - # TODO: The following should have been done better. - parent1 = graph.parents[node][1] - parent2 = graph.parents[node][2] - # Keep deallocation list (used for smart memory management). dealloc_list = deepcopy(dealloc_list) setdiff!(dealloc_list, keys(mem.special_names)) + + parent1 = graph.parents[node][1] + parent2 = graph.parents[node][2] + # TODO: Should be simplified. # Check if parents have a memory slot. if has_identity_lincomb(graph) # Linear combination of I. From 0fc78b78d801716bb1960903fbc8300505cad171 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 17:39:34 +0000 Subject: [PATCH 09/27] Remove repeated code --- src/code_gen/gen_c_code.jl | 53 +++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 88932c2..c6fcc3b 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -4,7 +4,7 @@ export LangC_MKL, LangC_OpenBLAS """ LangC_MKL(gen_main::Bool) -Code generation in C using MKL implementation of BLAS. +Code generation in C using the oneAPI MKL implementation of BLAS. """ struct LangC_MKL gen_main::Any @@ -14,7 +14,7 @@ end """ LangC_OpenBLAS(gen_main::Bool) -Code generation in C using OpenBLAS implementation of BLAS. +Code generation in C using the OpenBLAS implementation of BLAS. """ struct LangC_OpenBLAS gen_main::Any @@ -32,7 +32,6 @@ end # Language specific operations. comment(::LangC, s) = "/* $s */" - slotname(::LangC, i) = "memslots[$(i-1)]" # Variable declaration, initialization, and reference. @@ -151,7 +150,10 @@ function function_definition(lang::LangC, graph, T, funname, precomputed_nodes) push_code!(code, "#include", ind_lvl = 0) push_code!(code, "#include", ind_lvl = 0) push_code!(code, "") - add_auxiliary_functions(code, lang, T) + # Add auxiliary functions to work with MKL complex types in lincomb. + if :lincomb ∈ values(graph.operations) + add_auxiliary_functions(code, lang, T) + end push_code!(code, "") push_comment!(code, "Code for matrix function evaluation.", ind_lvl = 0) input_variables = @@ -328,39 +330,24 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) op = graph.operations[node] + code = init_code(lang) + push_code!(code, "") + push_comment!(code, "Computing $node with operation: $op") + # Keep deallocation list (used for smart memory management). dealloc_list = deepcopy(dealloc_list) setdiff!(dealloc_list, keys(mem.special_names)) - - parent1 = graph.parents[node][1] - parent2 = graph.parents[node][2] - - # TODO: Should be simplified. - # Check if parents have a memory slot. - if has_identity_lincomb(graph) # Linear combination of I. - p1_is_identity = p2_is_identity = false - else - p1_is_identity = parent1 == :I - p2_is_identity = parent2 == :I - setdiff!(dealloc_list, [:I]) - end - - # Get memory slots of parents, if explicitly stored. - if !p1_is_identity - parent1mem = get_slot_name(mem, parent1) - end - if !p2_is_identity - parent2mem = get_slot_name(mem, parent2) - end - rzero = reference_value(lang, T, "ZERO") rone = reference_value(lang, T, "ONE") - code = init_code(lang) - push_code!(code, "") - push_comment!(code, "Computing $node with operation: $op") if op == :mult + # The graph has been compressed, thus we can assume that neither parent1 + # is the identity. + parent1 = graph.parents[node][1] + parent2 = graph.parents[node][2] + parent1mem = get_slot_name(mem, parent1) + parent2mem = get_slot_name(mem, parent2) (nodemem_i, nodemem) = get_free_slot(mem) alloc_slot!(mem, nodemem_i, node) push_code!( @@ -372,6 +359,11 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) " $rzero, $nodemem, n);", ) elseif op == :ldiv + # Left parent cannot be the identity. + parent1 = graph.parents[node][1] + parent1mem = get_slot_name(mem, parent1) + # Parent 2 can be the identity, but this is dealt with below. + parent2 = graph.parents[node][2] # Find a memory slot for the LU factors. # As ?detrs computes the LU decomposition of parent1 in-place, we need # to make a copy of $parent1mem, unless parent1 is on the deallocation @@ -389,7 +381,7 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) alloc_slot!(mem, lhsmem_i, :dummy) push_code!( code, - "memcpy($lhsmem, $parent1mem, " * "n*n*sizeof(*master_mem));", + "memcpy($lhsmem, $parent1mem, " * "n * n * sizeof(*master_mem));", ) end @@ -430,6 +422,7 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) setdiff!(dealloc_list, [parent2]) set_slot_number!(mem, get_slot_number(mem, parent2), node) else + parent2mem = get_slot_name(mem, parent2) (nodemem_i, nodemem) = get_free_slot(mem) alloc_slot!(mem, nodemem_i, node) push_code!( From f3829a83a849c8b586e805dcbe5a6d02a47d3913 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 19:13:20 +0000 Subject: [PATCH 10/27] Use per-lincomb coefficient declaration --- src/code_gen/gen_c_code.jl | 77 ++++++++++++++------------------------ 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index c6fcc3b..a89cf2f 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -36,27 +36,6 @@ slotname(::LangC, i) = "memslots[$(i-1)]" # Variable declaration, initialization, and reference. # In C, complex types are structures and are passed by reference. -function assign_coeff(::LangC_MKL, val::T, i) where {T<:Complex} - assignment_string = [ - "coeff$i.real = " * string(real(val)) * ";", - "coeff$i.imag = " * string(imag(val)) * ";", - ] - variable_string = "coeff$i" - return (variable_string, assignment_string) -end -function assign_coeff(::LangC_OpenBLAS, val::T, i) where {T<:Complex} - assignment_string = - ["coeff$i = " * string(real(val)) * " + " * string(imag(val)) * " * I;"] - variable_string = "coeff$i" - return (variable_string, assignment_string) -end -function assign_coeff(::LangC, val::T, i) where {T<:Real} - assignment_string = ["coeff$i = $val;"] - variable_string = "coeff$i" - return (variable_string, assignment_string) -end - -# Constant declaration and reference. function initialization_string(::LangC, val::T) where {T<:Real} return "$val" end @@ -67,8 +46,18 @@ end function initialization_string(::LangC_OpenBLAS, val::T) where {T<:Complex} return string(real(val)) * " + " * string(imag(val)) * "*I" end +function declare_var(lang::LangC, val, id, type) + return "$type $id = " * initialization_string(lang, val) * ";" +end +function declare_coeff(lang::LangC, val, id, type) + variable_string = "coeff_$id" + dec_init_string = declare_var(lang, val, variable_string, type) + return (variable_string, dec_init_string) +end + +# Constant declaration and reference. function declare_const(lang::LangC, val, id, type) - return "const $type $id = " * initialization_string(lang, val) * ";" + return "const " * declare_var(lang::LangC, val, id, type) end reference_value(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" @@ -196,18 +185,9 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) push_code!(code, "") push_comment!(code, "Declarations and initializations.") - # Allocate coefficients for linear combinations, if needed. - if :lincomb in graph_ops - num_coeffs = maximum(length.(values(graph.coeffs))) - min_coeff = has_lincomb_with_identity(graph) ? 0 : 1 - coefficients = join(["coeff$i" for i in min_coeff:num_coeffs], ", ") - push_code!(code, "$blas_t " .* "$coefficients;") - end # Declare constants ZERO and ONE, if needed. if :mult in graph_ops push_code!(code, declare_const(lang, convert(T, 0.0), "ZERO", blas_t)) - end - if :lincomb in graph_ops || :mult in graph_ops push_code!(code, declare_const(lang, convert(T, 1.0), "ONE", blas_t)) end # Array ipiv for pivots of GEPP (needed only if graph has linear systems). @@ -450,30 +430,28 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) elseif op == :lincomb - # Don't overwrite the list - dealloc_list = deepcopy(dealloc_list) - setdiff!(dealloc_list, keys(mem.special_names)) setdiff!(dealloc_list, [:I]) fused_sum = (join("x*" .* string.(graph.parents[node]), " + ")) push_comment!(code, "Computing $node = $fused_sum") - # Write the coeffs into appropriate vectors + # Set coefficients. coeff_names = Vector() - coeff_list = graph.coeffs[node] parent_mems = Vector() id_coefficient = 0 - for (i, v) in enumerate(coeff_list) + for (i, v) in enumerate(graph.coeffs[node]) n = graph.parents[node][i] - if (n != :I) - (coeff_i, coeff_i_code) = assign_coeff(lang, v, i) - for statement in coeff_i_code - push_code!(code, statement) - end + if (n == :I) + # Coefficient of identities. + id_coefficient += v + else + # Coefficient of other nodes. + (blas_t, blas_prefix) = get_blas_type(lang, T) + (coeff_i, coeff_i_code) = declare_coeff(lang, v, + "$node" * "_" * "$i", blas_t) + push_code!(code, coeff_i_code) push!(coeff_names, coeff_i) push!(parent_mems, get_slot_name(mem, n)) - else # The identites are added in a different vector - id_coefficient += v end end @@ -501,10 +479,13 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) push_code!(code, "") if id_coefficient != 0 - (coeff_id, coeff_id_code) = assign_coeff(lang, id_coefficient, 0) - for statement in coeff_id_code - push_code!(code, statement) - end + (blas_t, blas_prefix) = get_blas_type(lang, T) + (coeff_id, coeff_id_code) = declare_coeff(lang, + id_coefficient, "$node" * "_0", blas_t) + push_code!(code, coeff_id_code) + # for statement in coeff_id_code + # push_code!(code, statement) + # end push_code!(code, "for (size_t i = 0; i < n*n; i += n + 1) {") add_lincomb_identity_body(code, lang, T, nodemem, coeff_id) push_code!(code, "}") From faa52895d6cd3b56f808f5a9e8310da3c92b6250 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 19:23:55 +0000 Subject: [PATCH 11/27] Remove now unused allocation in C code generation --- src/code_gen/gen_c_code.jl | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index a89cf2f..35859f0 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -235,17 +235,6 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) ) push_code!(code, "memslots[j+1] = master_mem + j * n * n;", ind_lvl = 2) - # Store identity explicitly only if graph has a linear combination of I. - if has_identity_lincomb(graph) - push_comment!(code, "Graph has linear combination of identities.") - push_comment!(code, "The matrix I is explicitly allocated.") - alloc_slot!(mem, num_precomputed_nodes + 1, :I) - nodemem = get_slot_name(mem, :I) - push_code!(code, "memset($nodemem, 0, n * n * sizeof(*master_mem));") - push_code!(code, "for(j = 0; j < n * n; j += n + 1)") - push_code!(code, "*($nodemem+j) = ONE;", ind_lvl = 2) - end - return code end From e0f63a7b5387e791dd9f5fe5aa6a318b66877531 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 19:24:27 +0000 Subject: [PATCH 12/27] Improve formatting of generated code --- src/code_gen/gen_c_code.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 35859f0..ce2a3b2 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -256,8 +256,8 @@ function function_end(lang::LangC, graph, mem) end function add_lincomb_body(code, lang::LangC, T, nodemem, coeff_names, parent_mems) - println(T) - rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems.* " + i)"), " + ") + rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems .* " + i)"), + " + \n ") for_body = "*(" * nodemem * " + i) = " * (isempty(rhs) ? "0" : rhs) * ";" push_code!(code, for_body; ind_lvl = 2) end @@ -397,7 +397,7 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) push_code!( code, "memcpy($nodemem, $parent2mem, " * - "n*n*sizeof(*master_mem));", + "n * n*sizeof(*master_mem));", ) end @@ -475,7 +475,7 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) # for statement in coeff_id_code # push_code!(code, statement) # end - push_code!(code, "for (size_t i = 0; i < n*n; i += n + 1) {") + push_code!(code, "for (size_t i = 0; i < n * n; i += n + 1) {") add_lincomb_identity_body(code, lang, T, nodemem, coeff_id) push_code!(code, "}") end @@ -562,7 +562,7 @@ function gen_main(lang::LangC, T, fname, funname; A = 10::Union{Integer,Matrix}) if isa(A, Matrix) n = LinearAlgebra.checksquare(A) push_code!(code, "size_t n = $n;") - push_code!(code, "blas_type A[$(n*n)] = {") + push_code!(code, "blas_type A[$(n * n)] = {") print_indented_matrix(lang, code, A, ind_lvl = 2) push_code!(code, "};") else # A is an integer From cba0accbd55cb19a61f9a6086f1ad89aba418d29 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 21:26:11 +0000 Subject: [PATCH 13/27] Reduce number of operations if coefficients of lincomb are real --- src/code_gen/gen_c_code.jl | 83 ++++++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index ce2a3b2..1948d36 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -49,10 +49,14 @@ end function declare_var(lang::LangC, val, id, type) return "$type $id = " * initialization_string(lang, val) * ";" end -function declare_coeff(lang::LangC, val, id, type) +function declare_coeff(lang::LangC, val::T, id) where T + # Use real type regardless of T. + is_real_val = isreal(val) + actual_val = is_real_val ? real(val) : val + (blas_t, blas_prefix) = get_blas_type(lang, typeof(actual_val)) variable_string = "coeff_$id" - dec_init_string = declare_var(lang, val, variable_string, type) - return (variable_string, dec_init_string) + dec_init_string = declare_var(lang, actual_val, variable_string, blas_t) + return (is_real_val, variable_string, dec_init_string) end # Constant declaration and reference. @@ -97,7 +101,19 @@ end function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float32}}) push_code!(code, -"void fma_MKL_Complex8(MKL_Complex8 *acc, +"void fma_MKL_Complex8_float(MKL_Complex8 *acc, + const float a, + const MKL_Complex8 *b) { + acc->real += a * b->real; + acc->imag += a * b->imag; +} + +void acc_MKL_Complex8_float(MKL_Complex8 *acc, + const float a) { + acc->real += a; +} + +void fma_MKL_Complex8(MKL_Complex8 *acc, const MKL_Complex8 *a, const MKL_Complex8 *b) { acc->real += a->real * b->real - a->imag * b->imag; @@ -113,7 +129,19 @@ end function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float64}}) push_code!(code, -"void fma_MKL_Complex16(MKL_Complex16 *acc, +"void fma_MKL_Complex16_double(MKL_Complex16 *acc, + const double a, + const MKL_Complex16 *b) { + acc->real += a * b->real; + acc->imag += a * b->imag; +} + +void acc_MKL_Complex16_float(MKL_Complex16 *acc, + const double a) { + acc->real += a; +} + +void fma_MKL_Complex16(MKL_Complex16 *acc, const MKL_Complex16 *a, const MKL_Complex16 *b) { acc->real += a->real * a->real - b->imag * b->imag; @@ -255,24 +283,27 @@ function function_end(lang::LangC, graph, mem) return code end -function add_lincomb_body(code, lang::LangC, T, nodemem, coeff_names, parent_mems) +function add_lincomb_body(code, lang::LangC, T, nodemem, + coeff_names, coeff_is_real, parent_mems) rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems .* " + i)"), " + \n ") for_body = "*(" * nodemem * " + i) = " * (isempty(rhs) ? "0" : rhs) * ";" push_code!(code, for_body; ind_lvl = 2) end -fma_function_name(T::Type{Complex{Float32}}) = "fma_MKL_Complex8" -fma_function_name(T::Type{Complex{Float64}}) = "fma_MKL_Complex16" -function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, - nodemem, coeff_names, parent_mems) where S <: Real +fma_function_name(T::Type{Complex{Float32}}, coeff_real) = + coeff_real ? "fma_MKL_Complex8_float" : "fma_MKL_Complex8" +fma_function_name(T::Type{Complex{Float64}}, coeff_real) = + coeff_real ? "fma_MKL_Complex16_double" : "fma_MKL_Complex16" +function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, nodemem, + coeff_names, coeff_is_real, parent_mems) where S <: Real if isempty(coeff_names) lhs = "*(" * nodemem * " + i)" push_code!(code, lhs * ".real = 0;", ind_lvl = 2) push_code!(code, lhs * ".imag = 0;", ind_lvl = 2) else - for (it, (coeff, parent)) in enumerate(zip(coeff_names, parent_mems)) - statement = fma_function_name(T) * + for (it, (coeff, coeff_real, parent)) in enumerate(zip(coeff_names, coeff_is_real, parent_mems)) + statement = fma_function_name(T, coeff_real) * "(" * nodemem * " + i, " * reference_value(lang, T, coeff) * ", " * parent * " + i);" @@ -281,15 +312,17 @@ function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, end end -function add_lincomb_identity_body(code, lang::LangC, T, nodemem, coeff_id) +function add_lincomb_identity_body(code, lang::LangC, T, nodemem, coeff_id, coeff_real) push_code!(code, "*(" * nodemem * " + i) += " * coeff_id * ";", ind_lvl = 2) end -acc_function_name(T::Type{Complex{Float32}}) = "acc_MKL_Complex8" -acc_function_name(T::Type{Complex{Float64}}) = "acc_MKL_Complex16" +acc_function_name(T::Type{Complex{Float32}}, coeff_real) = + coeff_real ? "acc_MKL_Complex8_float" : "acc_MKL_Complex8" +acc_function_name(T::Type{Complex{Float64}}, coeff_real) = + coeff_real ? "acc_MKL_Complex16_double" : "acc_MKL_Complex16" function add_lincomb_identity_body(code, lang::LangC_MKL, T::Type{Complex{S}}, - nodemem, coeff_id) where S <: Real - statement = acc_function_name(T) * "(" * nodemem * " + i, " * + nodemem, coeff_id, coeff_real) where S <: Real + statement = acc_function_name(T, coeff_real) * "(" * nodemem * " + i, " * reference_value(lang, T, coeff_id) * ");" push_code!(code, statement, ind_lvl = 2) end @@ -426,6 +459,7 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) # Set coefficients. coeff_names = Vector() + coeff_is_real = Vector() parent_mems = Vector() id_coefficient = 0 for (i, v) in enumerate(graph.coeffs[node]) @@ -435,11 +469,11 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) id_coefficient += v else # Coefficient of other nodes. - (blas_t, blas_prefix) = get_blas_type(lang, T) - (coeff_i, coeff_i_code) = declare_coeff(lang, v, - "$node" * "_" * "$i", blas_t) + (coeff_real, coeff_i, coeff_i_code) = declare_coeff(lang, v, + "$node" * "_" * "$i") push_code!(code, coeff_i_code) push!(coeff_names, coeff_i) + push!(coeff_is_real, coeff_real) push!(parent_mems, get_slot_name(mem, n)) end end @@ -463,20 +497,19 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) # Write the linear combination. push_code!(code, "for (size_t i = 0; i < n * n; i++) {") - add_lincomb_body(code, lang, T, nodemem, coeff_names, parent_mems) + add_lincomb_body(code, lang, T, nodemem, coeff_names, coeff_is_real, parent_mems) push_code!(code, "}") push_code!(code, "") if id_coefficient != 0 - (blas_t, blas_prefix) = get_blas_type(lang, T) - (coeff_id, coeff_id_code) = declare_coeff(lang, - id_coefficient, "$node" * "_0", blas_t) + (coeff_real, coeff_id, coeff_id_code) = declare_coeff(lang, + id_coefficient, "$node" * "_0") push_code!(code, coeff_id_code) # for statement in coeff_id_code # push_code!(code, statement) # end push_code!(code, "for (size_t i = 0; i < n * n; i += n + 1) {") - add_lincomb_identity_body(code, lang, T, nodemem, coeff_id) + add_lincomb_identity_body(code, lang, T, nodemem, coeff_id, coeff_real) push_code!(code, "}") end From c1e0dc756d262c3776bab24a6da7a9852361eed8 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 21:47:29 +0000 Subject: [PATCH 14/27] Use memset to initialize empty matrices --- src/code_gen/gen_c_code.jl | 62 ++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 1948d36..56db70e 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -102,15 +102,15 @@ end function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float32}}) push_code!(code, "void fma_MKL_Complex8_float(MKL_Complex8 *acc, - const float a, + const float *a, const MKL_Complex8 *b) { - acc->real += a * b->real; - acc->imag += a * b->imag; + acc->real += *a * b->real; + acc->imag += *a * b->imag; } void acc_MKL_Complex8_float(MKL_Complex8 *acc, - const float a) { - acc->real += a; + const float *a) { + acc->real += *a; } void fma_MKL_Complex8(MKL_Complex8 *acc, @@ -130,15 +130,15 @@ end function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float64}}) push_code!(code, "void fma_MKL_Complex16_double(MKL_Complex16 *acc, - const double a, + const double *a, const MKL_Complex16 *b) { - acc->real += a * b->real; - acc->imag += a * b->imag; + acc->real += *a * b->real; + acc->imag += *a * b->imag; } -void acc_MKL_Complex16_float(MKL_Complex16 *acc, - const double a) { - acc->real += a; +void acc_MKL_Complex16_double(MKL_Complex16 *acc, + const double *a) { + acc->real += *a; } void fma_MKL_Complex16(MKL_Complex16 *acc, @@ -285,30 +285,25 @@ end function add_lincomb_body(code, lang::LangC, T, nodemem, coeff_names, coeff_is_real, parent_mems) - rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems .* " + i)"), + rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems .* " + i)"), " + \n ") for_body = "*(" * nodemem * " + i) = " * (isempty(rhs) ? "0" : rhs) * ";" push_code!(code, for_body; ind_lvl = 2) end -fma_function_name(T::Type{Complex{Float32}}, coeff_real) = +fma_function_name(T::Type{Complex{Float32}}, coeff_real) = coeff_real ? "fma_MKL_Complex8_float" : "fma_MKL_Complex8" -fma_function_name(T::Type{Complex{Float64}}, coeff_real) = +fma_function_name(T::Type{Complex{Float64}}, coeff_real) = coeff_real ? "fma_MKL_Complex16_double" : "fma_MKL_Complex16" function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, nodemem, coeff_names, coeff_is_real, parent_mems) where S <: Real - if isempty(coeff_names) - lhs = "*(" * nodemem * " + i)" - push_code!(code, lhs * ".real = 0;", ind_lvl = 2) - push_code!(code, lhs * ".imag = 0;", ind_lvl = 2) - else - for (it, (coeff, coeff_real, parent)) in enumerate(zip(coeff_names, coeff_is_real, parent_mems)) - statement = fma_function_name(T, coeff_real) * - "(" * nodemem * " + i, " * - reference_value(lang, T, coeff) * ", " * - parent * " + i);" - push_code!(code, statement; ind_lvl = 2) - end + for (it, (coeff, coeff_real, parent)) in + enumerate(zip(coeff_names, coeff_is_real, parent_mems)) + statement = fma_function_name(T, coeff_real) * + "(" * nodemem * " + i, " * + reference_value(lang, T, coeff) * ", " * + parent * " + i);" + push_code!(code, statement; ind_lvl = 2) end end @@ -316,9 +311,9 @@ function add_lincomb_identity_body(code, lang::LangC, T, nodemem, coeff_id, coef push_code!(code, "*(" * nodemem * " + i) += " * coeff_id * ";", ind_lvl = 2) end -acc_function_name(T::Type{Complex{Float32}}, coeff_real) = +acc_function_name(T::Type{Complex{Float32}}, coeff_real) = coeff_real ? "acc_MKL_Complex8_float" : "acc_MKL_Complex8" -acc_function_name(T::Type{Complex{Float64}}, coeff_real) = +acc_function_name(T::Type{Complex{Float64}}, coeff_real) = coeff_real ? "acc_MKL_Complex16_double" : "acc_MKL_Complex16" function add_lincomb_identity_body(code, lang::LangC_MKL, T::Type{Complex{S}}, nodemem, coeff_id, coeff_real) where S <: Real @@ -496,10 +491,13 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) end # Write the linear combination. - push_code!(code, "for (size_t i = 0; i < n * n; i++) {") - add_lincomb_body(code, lang, T, nodemem, coeff_names, coeff_is_real, parent_mems) - push_code!(code, "}") - push_code!(code, "") + if isempty(coeff_names) + push_code!(code, "memset($nodemem, 0, n * n * sizeof(*master_mem));") + else + push_code!(code, "for (size_t i = 0; i < n * n; i++) {") + add_lincomb_body(code, lang, T, nodemem, coeff_names, coeff_is_real, parent_mems) + push_code!(code, "}") + end if id_coefficient != 0 (coeff_real, coeff_id, coeff_id_code) = declare_coeff(lang, From 950d92ad9db13b48cdd84076d310a70c1691e02e Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 22:10:24 +0000 Subject: [PATCH 15/27] Add tests to increase line coverage --- test/code_gen_test.jl | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/test/code_gen_test.jl b/test/code_gen_test.jl index 8ff8085..235f5c6 100644 --- a/test/code_gen_test.jl +++ b/test/code_gen_test.jl @@ -70,7 +70,9 @@ using LinearAlgebra, StaticArrays for i = 1:4 #Not high-precision test for Matlab and C (graph, crefs) = graph_ps_degopt([3 4 2 a[i] 1 0]) - add_ldiv!(graph, :R0, :B4, graph.outputs[1]) + add_ldiv!(graph, :R1, :B4, graph.outputs[1]) + add_lincomb!(graph,:Q, [1, 2, 3], [:I, :I, :I]) + add_lincomb!(graph,:R0, [2, 3, 4], [:I, :Q, :R1]) clear_outputs!(graph) add_output!(graph, :R0) @@ -80,20 +82,16 @@ using LinearAlgebra, StaticArrays rm(fname) # Test C code generation - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_MKL()) - rm(fname) - - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_MKL(true)) - rm(fname) - - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_OpenBLAS()) - rm(fname) + for gen_main in (true, false) + for overwrite_input in (true, false) + fname = tempname() * ".c" + gen_code(fname, graph, lang = LangC_MKL(gen_main, overwrite_input)) + rm(fname) - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_OpenBLAS(true)) - rm(fname) + fname = tempname() * ".c" + gen_code(fname, graph, lang = LangC_OpenBLAS(gen_main, overwrite_input)) + rm(fname) + end + end end end From 26544b17a292c2996e888368d4da8dfc9e0f99ac Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Fri, 12 Jan 2024 22:20:03 +0000 Subject: [PATCH 16/27] Add tests to increase coverage of matrix inversion --- test/code_gen_test.jl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/code_gen_test.jl b/test/code_gen_test.jl index 235f5c6..3db5d8d 100644 --- a/test/code_gen_test.jl +++ b/test/code_gen_test.jl @@ -71,10 +71,12 @@ using LinearAlgebra, StaticArrays for i = 1:4 #Not high-precision test for Matlab and C (graph, crefs) = graph_ps_degopt([3 4 2 a[i] 1 0]) add_ldiv!(graph, :R1, :B4, graph.outputs[1]) - add_lincomb!(graph,:Q, [1, 2, 3], [:I, :I, :I]) - add_lincomb!(graph,:R0, [2, 3, 4], [:I, :Q, :R1]) + add_ldiv!(graph, :R2, :R1, :I) + add_ldiv!(graph, :R3, :B4, :A) + add_lincomb!(graph,:R4, [1., 2., 3.], [:I, :I, :I]) + add_lincomb!(graph,:R5, [2., 3., 4., 5., 6.], [:I, :R1, :R2, :R2, :R4]) clear_outputs!(graph) - add_output!(graph, :R0) + add_output!(graph, :R5) # Test Matlab code generation (not execution) fname = tempname() * ".m" From 27749c0f5c04dd4289fc744bf60efb52c0a7ba16 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Sun, 14 Jan 2024 16:40:39 +0000 Subject: [PATCH 17/27] Add test to check if node with single lincomb is trivial --- src/compress_graph.jl | 3 ++- test/compress.jl | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/compress_graph.jl b/src/compress_graph.jl index 6b11314..93c9f3e 100644 --- a/src/compress_graph.jl +++ b/src/compress_graph.jl @@ -357,7 +357,8 @@ end """ compress_graph_passthrough!(graph,cref=[];verbose=false); -Identifies lincombs lincomb of length one with coeff equal to one. The node has no effect. It redirect appropriately. +Identifies lincombs of length one with coeff equal to one. +The node has no effect and is redirected appropriately. """ function compress_graph_passthrough!(graph, diff --git a/test/compress.jl b/test/compress.jl index 6327843..e326003 100644 --- a/test/compress.jl +++ b/test/compress.jl @@ -70,10 +70,15 @@ using LinearAlgebra @test has_trivial_nodes(graph) == true graph = Compgraph() - add_mult!(graph, :I2, :I, :I) # This node is trivial - add_mult!(graph, :AI, :A, :I2) # This node is trivial + add_lincomb!(graph, :A2, [2], [:A]) + add_output!(graph, :A2) + @test has_trivial_nodes(graph) == false + + graph = Compgraph() + add_mult!(graph, :I2, :I, :I) # This node is trivial + add_mult!(graph, :AI, :A, :I2) # This node is trivial add_mult!(graph, :IA, :I2, :AI) # This node is trivial - add_ldiv!(graph, :P0, :I, :IA) # This node is trivial + add_ldiv!(graph, :P0, :I, :IA) # This node is trivial add_output!(graph, :P0) graph1 = graph graph2 = deepcopy(graph) From 1af899006b1592e07dc7b3e0e539bbd08fc4902c Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Sun, 14 Jan 2024 16:54:11 +0000 Subject: [PATCH 18/27] Increase test coverage of Julia code generator --- test/code_gen_test.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/code_gen_test.jl b/test/code_gen_test.jl index 3db5d8d..fa2d52f 100644 --- a/test/code_gen_test.jl +++ b/test/code_gen_test.jl @@ -11,8 +11,11 @@ using LinearAlgebra, StaticArrays # Test julia code generation for i = 1:length(a) (graph, crefs) = graph_ps_degopt([3 4 2 -1 2 a[i]]) - add_lincomb!(graph,:Q,[2.0],graph.outputs) # Check - add_ldiv!(graph, :R0, :B4, :Q) + add_lincomb!(graph, :Q, [2.0], graph.outputs)# Check + add_ldiv!(graph, :R2, :B4, :Q) + add_lincomb!(graph, :Is, [2., 3., 5.], [:I, :I, :I]) + add_lincomb!(graph, :R1, [2., 2.], [:Is, :R2]) + add_ldiv!(graph, :R0, :R1, :I) clear_outputs!(graph) add_output!(graph, :R0) From ee327327adb0948159ae5538890a4cf328c328c2 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Mon, 15 Jan 2024 17:27:33 +0000 Subject: [PATCH 19/27] Improve coefficients of lincomb nodes in C generated code + Use consecutive indices starting from 0 for non-identity nodes + Do not generate code for combinations with coefficient 0 --- src/code_gen/gen_c_code.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 56db70e..32cc6fb 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -457,15 +457,20 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) coeff_is_real = Vector() parent_mems = Vector() id_coefficient = 0 + counter = 1 for (i, v) in enumerate(graph.coeffs[node]) n = graph.parents[node][i] + if v == 0 + continue + end if (n == :I) # Coefficient of identities. id_coefficient += v else # Coefficient of other nodes. (coeff_real, coeff_i, coeff_i_code) = declare_coeff(lang, v, - "$node" * "_" * "$i") + "$node" * "_" * "$counter") + counter += 1 push_code!(code, coeff_i_code) push!(coeff_names, coeff_i) push!(coeff_is_real, coeff_real) From b8e774f99d6662d4eba008ee745d928a8b97f788 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Mon, 15 Jan 2024 21:19:40 +0000 Subject: [PATCH 20/27] Remove unnecessary operations in generated C code --- src/code_gen/gen_c_code.jl | 60 ++++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 32cc6fb..ed18a51 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -108,6 +108,13 @@ function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float32}}) acc->imag += *a * b->imag; } +void prod_MKL_Complex8_float(MKL_Complex8 *acc, + const float *a, + const MKL_Complex8 *b) { + acc->real = *a * b->real; + acc->imag = *a * b->imag; +} + void acc_MKL_Complex8_float(MKL_Complex8 *acc, const float *a) { acc->real += *a; @@ -120,6 +127,13 @@ void fma_MKL_Complex8(MKL_Complex8 *acc, acc->imag += a->real * b->imag + a->imag * b->real; } +void prod_MKL_Complex8(MKL_Complex8 *acc, + const MKL_Complex8 *a, + const MKL_Complex8 *b) { + acc->real = a->real * b->real - a->imag * b->imag; + acc->imag = a->real * b->imag + a->imag * b->real; +} + void acc_MKL_Complex8(MKL_Complex8 *acc, const MKL_Complex16 *a) { acc->real += a->real; @@ -136,6 +150,13 @@ function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float64}}) acc->imag += *a * b->imag; } +void prod_MKL_Complex16_double(MKL_Complex16 *acc, + const double *a, + const MKL_Complex16 *b) { + acc->real = *a * b->real; + acc->imag = *a * b->imag; +} + void acc_MKL_Complex16_double(MKL_Complex16 *acc, const double *a) { acc->real += *a; @@ -148,6 +169,13 @@ void fma_MKL_Complex16(MKL_Complex16 *acc, acc->imag += a->real * b->imag + a->imag * b->real; } +void prod_MKL_Complex16(MKL_Complex16 *acc, + const MKL_Complex16 *a, + const MKL_Complex16 *b) { + acc->real = a->real * b->real - a->imag * b->imag; + acc->imag = a->real * b->imag + a->imag * b->real; +} + void acc_MKL_Complex16(MKL_Complex16 *acc, const MKL_Complex16 *a) { acc->real += a->real; @@ -291,15 +319,22 @@ function add_lincomb_body(code, lang::LangC, T, nodemem, push_code!(code, for_body; ind_lvl = 2) end +prod_function_name(T::Type{Complex{Float32}}, coeff_real) = + coeff_real ? "prod_MKL_Complex8_float" : "prod_MKL_Complex8" +prod_function_name(T::Type{Complex{Float64}}, coeff_real) = + coeff_real ? "prod_MKL_Complex16_double" : "prod_MKL_Complex16" fma_function_name(T::Type{Complex{Float32}}, coeff_real) = coeff_real ? "fma_MKL_Complex8_float" : "fma_MKL_Complex8" fma_function_name(T::Type{Complex{Float64}}, coeff_real) = coeff_real ? "fma_MKL_Complex16_double" : "fma_MKL_Complex16" function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, nodemem, - coeff_names, coeff_is_real, parent_mems) where S <: Real + coeff_names, coeff_is_real, + parent_mems) where S <: Real for (it, (coeff, coeff_real, parent)) in enumerate(zip(coeff_names, coeff_is_real, parent_mems)) - statement = fma_function_name(T, coeff_real) * + statement = (it == 1 ? + prod_function_name(T, coeff_real) : + fma_function_name(T, coeff_real)) * "(" * nodemem * " + i, " * reference_value(lang, T, coeff) * ", " * parent * " + i);" @@ -307,7 +342,8 @@ function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, nodemem, end end -function add_lincomb_identity_body(code, lang::LangC, T, nodemem, coeff_id, coeff_real) +function add_lincomb_identity_body(code, lang::LangC, T, + nodemem, coeff_id, coeff_real) push_code!(code, "*(" * nodemem * " + i) += " * coeff_id * ";", ind_lvl = 2) end @@ -482,7 +518,7 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) nodemem = nothing recycle_parent = nothing for n in graph.parents[node] - if (n in dealloc_list) # If it's about to deallocated + if (n in dealloc_list) # If it's about to be deallocated nodemem = get_slot_name(mem, n) recycle_parent = n break @@ -493,6 +529,19 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) alloc_slot!(mem, nodemem_i, node) else push_comment!(code, "Smart lincomb recycle $recycle_parent.") + # Move parent to be recycled to the beginning of the vector. + index = findfirst(x -> occursin(nodemem, x), parent_mems) + + splice!(parent_mems, index) + pushfirst!(parent_mems, nodemem) + + coeff_name = coeff_names[index] + splice!(coeff_names, index) + pushfirst!(coeff_names, coeff_name) + + coeff_real = coeff_is_real[index] + splice!(coeff_is_real, index) + pushfirst!(coeff_is_real, coeff_real) end # Write the linear combination. @@ -500,7 +549,8 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) push_code!(code, "memset($nodemem, 0, n * n * sizeof(*master_mem));") else push_code!(code, "for (size_t i = 0; i < n * n; i++) {") - add_lincomb_body(code, lang, T, nodemem, coeff_names, coeff_is_real, parent_mems) + add_lincomb_body(code, lang, T, nodemem, + coeff_names, coeff_is_real, parent_mems) push_code!(code, "}") end From 6e5fd67cd747173beee487f9afe15ca84c125776 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Mon, 15 Jan 2024 21:19:53 +0000 Subject: [PATCH 21/27] Fix bug in generated C code --- src/code_gen/gen_c_code.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index ed18a51..1abd1f5 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -165,7 +165,7 @@ void acc_MKL_Complex16_double(MKL_Complex16 *acc, void fma_MKL_Complex16(MKL_Complex16 *acc, const MKL_Complex16 *a, const MKL_Complex16 *b) { - acc->real += a->real * a->real - b->imag * b->imag; + acc->real += a->real * b->real - a->imag * b->imag; acc->imag += a->real * b->imag + a->imag * b->real; } From 0b3915c55074f8f1d09f8caefe90ea5c720ec7e8 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Tue, 16 Jan 2024 09:46:35 +0000 Subject: [PATCH 22/27] Add minimal documentation for fma_, prod_, acc_ functions --- src/code_gen/gen_c_code.jl | 120 +++++++++++++++---------------------- 1 file changed, 47 insertions(+), 73 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 1abd1f5..ba3673b 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -99,92 +99,66 @@ function add_auxiliary_functions(code, ::LangC, T) return # In general, nothing is needed. end -function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float32}}) - push_code!(code, -"void fma_MKL_Complex8_float(MKL_Complex8 *acc, - const float *a, - const MKL_Complex8 *b) { - acc->real += *a * b->real; - acc->imag += *a * b->imag; +function create_auxiliary_functions(complex_type, real_type) + return "// Compute c <- c + a * b. +void fma_$(complex_type)_$(real_type)($(complex_type) *c, + const $(real_type) *a, + const $(complex_type) *b) { + c->real += *a * b->real; + c->imag += *a * b->imag; } -void prod_MKL_Complex8_float(MKL_Complex8 *acc, - const float *a, - const MKL_Complex8 *b) { - acc->real = *a * b->real; - acc->imag = *a * b->imag; +// Compute c <- a * b. +void prod_$(complex_type)_$(real_type)($(complex_type) *c, + const $(real_type) *a, + const $(complex_type) *b) { + c->real = *a * b->real; + c->imag = *a * b->imag; } -void acc_MKL_Complex8_float(MKL_Complex8 *acc, - const float *a) { - acc->real += *a; +// Compute c <- c + a. +void acc_$(complex_type)_$(real_type)($(complex_type) *c, + const $(real_type) *a) { + c->real += *a; } -void fma_MKL_Complex8(MKL_Complex8 *acc, - const MKL_Complex8 *a, - const MKL_Complex8 *b) { - acc->real += a->real * b->real - a->imag * b->imag; - acc->imag += a->real * b->imag + a->imag * b->real; +// Compute c <- c + a * b. +void fma_$(complex_type)($(complex_type) *c, + const $(complex_type) *a, + const $(complex_type) *b) { + c->real += a->real * b->real - a->imag * b->imag; + c->imag += a->real * b->imag + a->imag * b->real; } -void prod_MKL_Complex8(MKL_Complex8 *acc, - const MKL_Complex8 *a, - const MKL_Complex8 *b) { - acc->real = a->real * b->real - a->imag * b->imag; - acc->imag = a->real * b->imag + a->imag * b->real; +// Compute c <- a * b. +void prod_$(complex_type)($(complex_type) *c, + const $(complex_type) *a, + const $(complex_type) *b) { + c->real = a->real * b->real - a->imag * b->imag; + c->imag = a->real * b->imag + a->imag * b->real; } -void acc_MKL_Complex8(MKL_Complex8 *acc, - const MKL_Complex16 *a) { - acc->real += a->real; - acc->imag += a->imag; -}", ind_lvl = 0) +// Compute c <- c + a. +void acc_$(complex_type)($(complex_type) *c, + const $(complex_type) *a) { + c->real += a->real; + c->imag += a->imag; +}" end - -function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float64}}) - push_code!(code, -"void fma_MKL_Complex16_double(MKL_Complex16 *acc, - const double *a, - const MKL_Complex16 *b) { - acc->real += *a * b->real; - acc->imag += *a * b->imag; -} - -void prod_MKL_Complex16_double(MKL_Complex16 *acc, - const double *a, - const MKL_Complex16 *b) { - acc->real = *a * b->real; - acc->imag = *a * b->imag; -} - -void acc_MKL_Complex16_double(MKL_Complex16 *acc, - const double *a) { - acc->real += *a; -} - -void fma_MKL_Complex16(MKL_Complex16 *acc, - const MKL_Complex16 *a, - const MKL_Complex16 *b) { - acc->real += a->real * b->real - a->imag * b->imag; - acc->imag += a->real * b->imag + a->imag * b->real; -} - -void prod_MKL_Complex16(MKL_Complex16 *acc, - const MKL_Complex16 *a, - const MKL_Complex16 *b) { - acc->real = a->real * b->real - a->imag * b->imag; - acc->imag = a->real * b->imag + a->imag * b->real; -} - -void acc_MKL_Complex16(MKL_Complex16 *acc, - const MKL_Complex16 *a) { - acc->real += a->real; - acc->imag += a->imag; -}", ind_lvl = 0) +function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float32}}) + push_code!( + code, + create_auxiliary_functions("MKL_Complex8", "float"), + ind_lvl = 0 + ) end -function preprocess_codegen(graph, lang::LangC) - return graph +function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float64}}) + push_code!( + code, + create_auxiliary_functions("MKL_Complex16", "double"), + ind_lvl = 0 + ) end function function_definition(lang::LangC, graph, T, funname, precomputed_nodes) From db33faf0f719f37da0be5a5d0a9cb9550b225420 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Tue, 16 Jan 2024 10:45:55 +0000 Subject: [PATCH 23/27] Fix bug in generated MKL C code --- src/code_gen/gen_c_code.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index ba3673b..efa2ad0 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -126,16 +126,20 @@ void acc_$(complex_type)_$(real_type)($(complex_type) *c, void fma_$(complex_type)($(complex_type) *c, const $(complex_type) *a, const $(complex_type) *b) { - c->real += a->real * b->real - a->imag * b->imag; - c->imag += a->real * b->imag + a->imag * b->real; + $(real_type) real_part = a->real * b->real - a->imag * b->imag; + $(real_type) imag_part = a->real * b->imag + a->imag * b->real; + c->real += real_part; + c->imag += imag_part; } // Compute c <- a * b. void prod_$(complex_type)($(complex_type) *c, const $(complex_type) *a, const $(complex_type) *b) { - c->real = a->real * b->real - a->imag * b->imag; - c->imag = a->real * b->imag + a->imag * b->real; + $(real_type) real_part = a->real * b->real - a->imag * b->imag; + $(real_type) imag_part = a->real * b->imag + a->imag * b->real; + c->real = real_part; + c->imag = imag_part; } // Compute c <- c + a. From 4cec625178fb24142ac693bab1ca6c8471ee63b9 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Tue, 16 Jan 2024 17:31:33 +0000 Subject: [PATCH 24/27] Fix generation of random matrix for MKL complex types --- src/code_gen/gen_c_code.jl | 45 ++++++++++++++++++++++++++++---------- src/code_gen/gen_code.jl | 4 ++-- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index efa2ad0..19e31e7 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -35,17 +35,21 @@ comment(::LangC, s) = "/* $s */" slotname(::LangC, i) = "memslots[$(i-1)]" # Variable declaration, initialization, and reference. -# In C, complex types are structures and are passed by reference. -function initialization_string(::LangC, val::T) where {T<:Real} - return "$val" +# In MKL, complex types are structures and are passed by reference. +function initialization_string(::LangC_MKL, real_part, imag_part) + return "{.real = " * string(real_part) * ", " * + ".imag = " * string(imag_part) * "}" +end +function initialization_string(::LangC_OpenBLAS, real_part, imag_part) + return string(real_part) * " + " * string(imag_part) * "*I" end -function initialization_string(::LangC_MKL, val::T) where {T<:Complex} - return "{.real = " * string(real(val)) * ", " * - ".imag = " * string(imag(val)) * "}" +function initialization_string(lang::LangC, val::T) where {T<:Complex} + return initialization_string(lang, real(val), imag(val)) end -function initialization_string(::LangC_OpenBLAS, val::T) where {T<:Complex} - return string(real(val)) * " + " * string(imag(val)) * "*I" +function initialization_string(lang::LangC, val::T) where {T<:Real} + return "$val" end + function declare_var(lang::LangC, val, id, type) return "$type $id = " * initialization_string(lang, val) * ";" end @@ -59,6 +63,18 @@ function declare_coeff(lang::LangC, val::T, id) where T return (is_real_val, variable_string, dec_init_string) end +function assignment_string(::LangC, var, real_part) + return "$var = $(real_part)" +end +function assignment_string(::LangC_MKL, var, real_part, imag_part) + return "$(string(var)).real = $(real_part); " * + "$(string(var)).imag = $(imag_part);" +end +function assignment_string(lang::LangC_OpenBLAS, var, real_part, imag_part) + return "$(string(var)) = " * + initialization_string(lang, real_part, imag_part) * ";" +end + # Constant declaration and reference. function declare_const(lang::LangC, val, id, type) return "const " * declare_var(lang::LangC, val, id, type) @@ -98,7 +114,6 @@ end function add_auxiliary_functions(code, ::LangC, T) return # In general, nothing is needed. end - function create_auxiliary_functions(complex_type, real_type) return "// Compute c <- c + a * b. void fma_$(complex_type)_$(real_type)($(complex_type) *c, @@ -568,6 +583,14 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) return (code, nodemem) end +random_string() = "rand() / (1.0 * RAND_MAX)"; +function generate_random_entry(lang::LangC, ::Type{T}) where {T<:Real} + return assignment_string(lang, "A[i]", random_string()) +end +function generate_random_entry(lang::LangC, ::Type{T}) where {T<:Complex} + return assignment_string(lang, "A[i]", random_string(), random_string()) +end + function scalar_to_string(::LangC, z) return "$z" end @@ -597,7 +620,7 @@ function compilation_string(::LangC_MKL, fname) return "gcc -o main_compiled $fname -lmkl_rt" end -function gen_main(lang::LangC, T, fname, funname; A = 10::Union{Integer,Matrix}) +function gen_main(lang::LangC, T, fname, funname, graph; A = 10::Union{Integer,Matrix}) code = init_code(lang) if (lang.gen_main) (blas_type, blas_prefix) = get_blas_type(lang, T) @@ -635,7 +658,7 @@ function gen_main(lang::LangC, T, fname, funname; A = 10::Union{Integer,Matrix}) push_code!(code, "srand(0);") push_code!(code, "blas_type *A = malloc(n * n * sizeof(*A));") push_code!(code, "for(i = 0; i < n * n; i++){") - push_code!(code, "A[i] = rand() / (1.0 * RAND_MAX);", ind_lvl = 2) + push_code!(code, generate_random_entry(lang, eltype(graph)), ind_lvl = 2) push_code!(code, "}") end diff --git a/src/code_gen/gen_code.jl b/src/code_gen/gen_code.jl index ecc49ad..d0a6ebf 100644 --- a/src/code_gen/gen_code.jl +++ b/src/code_gen/gen_code.jl @@ -19,7 +19,7 @@ export gen_code # dealloc_list, mem) # Fallback. Default to no main function -function gen_main(lang, T, fname, funname) +function gen_main(lang, T, fname, funname, graph) return init_code(lang) end @@ -137,7 +137,7 @@ function _gen_code(fname, graph, lang, priohelp, funname, precomputed_nodes) println(file, to_string(function_end(lang, graph, mem))) # Generate main function, if necessary. - exec_code = gen_main(lang, T, fname, funname) + exec_code = gen_main(lang, T, fname, funname, graph) println(file, to_string(exec_code)) if (fname isa String) From fac0e489812b61d02b3caf04087040ceab554c53 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Tue, 16 Jan 2024 17:56:54 +0000 Subject: [PATCH 25/27] Simplify data passed across functions --- src/code_gen/gen_c_code.jl | 53 +++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 19e31e7..790b7a8 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -55,12 +55,11 @@ function declare_var(lang::LangC, val, id, type) end function declare_coeff(lang::LangC, val::T, id) where T # Use real type regardless of T. - is_real_val = isreal(val) - actual_val = is_real_val ? real(val) : val + actual_val = isreal(val) ? real(val) : val (blas_t, blas_prefix) = get_blas_type(lang, typeof(actual_val)) variable_string = "coeff_$id" dec_init_string = declare_var(lang, actual_val, variable_string, blas_t) - return (is_real_val, variable_string, dec_init_string) + return (val, variable_string, dec_init_string) end function assignment_string(::LangC, var, real_part) @@ -305,7 +304,7 @@ function function_end(lang::LangC, graph, mem) end function add_lincomb_body(code, lang::LangC, T, nodemem, - coeff_names, coeff_is_real, parent_mems) + coeff_names, coeff_values, parent_mems) rhs = join((coeff_names .* " * ") .* ("*(" .* parent_mems .* " + i)"), " + \n ") for_body = "*(" * nodemem * " + i) = " * (isempty(rhs) ? "0" : rhs) * ";" @@ -321,13 +320,14 @@ fma_function_name(T::Type{Complex{Float32}}, coeff_real) = fma_function_name(T::Type{Complex{Float64}}, coeff_real) = coeff_real ? "fma_MKL_Complex16_double" : "fma_MKL_Complex16" function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, nodemem, - coeff_names, coeff_is_real, + coeff_names, coeff_values, parent_mems) where S <: Real - for (it, (coeff, coeff_real, parent)) in - enumerate(zip(coeff_names, coeff_is_real, parent_mems)) - statement = (it == 1 ? - prod_function_name(T, coeff_real) : - fma_function_name(T, coeff_real)) * + for (it, (coeff, coeff_value, parent)) in + enumerate(zip(coeff_names, coeff_values, parent_mems)) + operation = it == 1 ? + prod_function_name(T, isreal(coeff_value)) : + fma_function_name(T, isreal(coeff_value)) + statement = operation * "(" * nodemem * " + i, " * reference_value(lang, T, coeff) * ", " * parent * " + i);" @@ -336,8 +336,12 @@ function add_lincomb_body(code, lang::LangC_MKL, T::Type{Complex{S}}, nodemem, end function add_lincomb_identity_body(code, lang::LangC, T, - nodemem, coeff_id, coeff_real) - push_code!(code, "*(" * nodemem * " + i) += " * coeff_id * ";", ind_lvl = 2) + nodemem, coeff_id, coeff_value) + push_code!( + code, + "*(" * nodemem * " + i) += " * coeff_id * ";", + ind_lvl = 2 + ) end acc_function_name(T::Type{Complex{Float32}}, coeff_real) = @@ -345,8 +349,8 @@ acc_function_name(T::Type{Complex{Float32}}, coeff_real) = acc_function_name(T::Type{Complex{Float64}}, coeff_real) = coeff_real ? "acc_MKL_Complex16_double" : "acc_MKL_Complex16" function add_lincomb_identity_body(code, lang::LangC_MKL, T::Type{Complex{S}}, - nodemem, coeff_id, coeff_real) where S <: Real - statement = acc_function_name(T, coeff_real) * "(" * nodemem * " + i, " * + nodemem, coeff_id, coeff_value) where S <: Real + statement = acc_function_name(T, isreal(coeff_value)) * "(" * nodemem * " + i, " * reference_value(lang, T, coeff_id) * ");" push_code!(code, statement, ind_lvl = 2) end @@ -483,7 +487,7 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) # Set coefficients. coeff_names = Vector() - coeff_is_real = Vector() + coeff_values = Vector() parent_mems = Vector() id_coefficient = 0 counter = 1 @@ -497,12 +501,12 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) id_coefficient += v else # Coefficient of other nodes. - (coeff_real, coeff_i, coeff_i_code) = declare_coeff(lang, v, + (coeff_value, coeff_i, coeff_i_code) = declare_coeff(lang, v, "$node" * "_" * "$counter") counter += 1 push_code!(code, coeff_i_code) push!(coeff_names, coeff_i) - push!(coeff_is_real, coeff_real) + push!(coeff_values, coeff_value) push!(parent_mems, get_slot_name(mem, n)) end end @@ -532,9 +536,9 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) splice!(coeff_names, index) pushfirst!(coeff_names, coeff_name) - coeff_real = coeff_is_real[index] - splice!(coeff_is_real, index) - pushfirst!(coeff_is_real, coeff_real) + coeff_value = coeff_values[index] + splice!(coeff_values, index) + pushfirst!(coeff_values, coeff_value) end # Write the linear combination. @@ -543,19 +547,16 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) else push_code!(code, "for (size_t i = 0; i < n * n; i++) {") add_lincomb_body(code, lang, T, nodemem, - coeff_names, coeff_is_real, parent_mems) + coeff_names, coeff_value, parent_mems) push_code!(code, "}") end if id_coefficient != 0 - (coeff_real, coeff_id, coeff_id_code) = declare_coeff(lang, + (coeff_value, coeff_id, coeff_id_code) = declare_coeff(lang, id_coefficient, "$node" * "_0") push_code!(code, coeff_id_code) - # for statement in coeff_id_code - # push_code!(code, statement) - # end push_code!(code, "for (size_t i = 0; i < n * n; i += n + 1) {") - add_lincomb_identity_body(code, lang, T, nodemem, coeff_id, coeff_real) + add_lincomb_identity_body(code, lang, T, nodemem, coeff_id, coeff_value) push_code!(code, "}") end From 68a2e882763d311874c737ac797dc29bef80dc19 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Wed, 17 Jan 2024 11:16:22 +0000 Subject: [PATCH 26/27] Switch to `lapack_complex_` types for OpenBLAS generated code --- src/code_gen/gen_c_code.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 790b7a8..41e0c4b 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -103,10 +103,10 @@ function get_blas_type(::LangC_MKL, T::Type{Complex{Float64}}) return ("MKL_Complex16", "z") end function get_blas_type(::LangC_OpenBLAS, T::Type{Complex{Float32}}) - return ("openblas_complex_float", "c") + return ("lapack_complex_float", "c") end function get_blas_type(::LangC_OpenBLAS, T::Type{Complex{Float64}}) - return ("openblas_complex_double", "z") + return ("lapack_complex_double", "z") end # Add functions to work with MKL complex types. From 78753f948934df1347860b982d9e2f8bca1a7123 Mon Sep 17 00:00:00 2001 From: Massimiliano Fasi Date: Wed, 17 Jan 2024 11:21:14 +0000 Subject: [PATCH 27/27] Remove unreachable matrix generation functionality in C generator --- src/code_gen/gen_c_code.jl | 46 +++++++------------------------------- 1 file changed, 8 insertions(+), 38 deletions(-) diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index 41e0c4b..a1410e9 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -592,28 +592,6 @@ function generate_random_entry(lang::LangC, ::Type{T}) where {T<:Complex} return assignment_string(lang, "A[i]", random_string(), random_string()) end -function scalar_to_string(::LangC, z) - return "$z" -end -function scalar_to_string(::LangC_OpenBLAS, z::T) where {T<:Complex} - return "$(real(z)) + $(imag(z)) * I" -end -function scalar_to_string(::LangC_MKL, z::T) where {T<:Complex} - return "{$(real(z)), $(imag(z))}" -end -function print_indented_matrix(lang::LangC, code, A; ind_lvl = 1) - (m, n) = size(A) - for j = 1:n - column_string = "" - for i = 1:m - column_string *= - scalar_to_string(lang, A[i, j]) * (i != m ? ", " : "") - end - column_string *= (j != n ? "," : "") - push_code!(code, column_string, ind_lvl = ind_lvl) - end -end - function compilation_string(::LangC_OpenBLAS, fname) return "gcc -o main_compiled $fname -lblas -llapacke" end @@ -621,7 +599,7 @@ function compilation_string(::LangC_MKL, fname) return "gcc -o main_compiled $fname -lmkl_rt" end -function gen_main(lang::LangC, T, fname, funname, graph; A = 10::Union{Integer,Matrix}) +function gen_main(lang::LangC, T, fname, funname, graph) code = init_code(lang) if (lang.gen_main) (blas_type, blas_prefix) = get_blas_type(lang, T) @@ -647,21 +625,13 @@ function gen_main(lang::LangC, T, fname, funname, graph; A = 10::Union{Integer,M push_code!(code, "size_t i;") # Generate matrix. - if isa(A, Matrix) - n = LinearAlgebra.checksquare(A) - push_code!(code, "size_t n = $n;") - push_code!(code, "blas_type A[$(n * n)] = {") - print_indented_matrix(lang, code, A, ind_lvl = 2) - push_code!(code, "};") - else # A is an integer - n = A - push_code!(code, "size_t n = $n;") - push_code!(code, "srand(0);") - push_code!(code, "blas_type *A = malloc(n * n * sizeof(*A));") - push_code!(code, "for(i = 0; i < n * n; i++){") - push_code!(code, generate_random_entry(lang, eltype(graph)), ind_lvl = 2) - push_code!(code, "}") - end + n = 4 + push_code!(code, "size_t n = $n;") + push_code!(code, "srand(0);") + push_code!(code, "blas_type *A = malloc(n * n * sizeof(*A));") + push_code!(code, "for(i = 0; i < n * n; i++){") + push_code!(code, generate_random_entry(lang, eltype(graph)), ind_lvl = 2) + push_code!(code, "}") # Call polynomial evaluation function. push_code!(code, "blas_type *B = malloc(n * n * sizeof(*A));")