diff --git a/src/code_gen/gen_c_code.jl b/src/code_gen/gen_c_code.jl index fdb7e10..a1410e9 100644 --- a/src/code_gen/gen_c_code.jl +++ b/src/code_gen/gen_c_code.jl @@ -4,7 +4,7 @@ export LangC_MKL, LangC_OpenBLAS """ LangC_MKL(gen_main::Bool) -Code generation in C using MKL implementation of BLAS. +Code generation in C using the oneAPI MKL implementation of BLAS. """ struct LangC_MKL gen_main::Any @@ -14,7 +14,7 @@ end """ LangC_OpenBLAS(gen_main::Bool) -Code generation in C using OpenBLAS implementation of BLAS. +Code generation in C using the OpenBLAS implementation of BLAS. """ struct LangC_OpenBLAS gen_main::Any @@ -32,56 +32,58 @@ end # Language specific operations. comment(::LangC, s) = "/* $s */" - slotname(::LangC, i) = "memslots[$(i-1)]" # Variable declaration, initialization, and reference. -# In C, complex types are structures and are passed by reference. -function assign_coeff(::LangC_MKL, val::T, i) where {T<:Complex} - assignment_string = [ - "coeff$i.real = " * string(real(val)) * ";", - "coeff$i.imag = " * string(imag(val)) * ";", - ] - variable_string = "&coeff$i" - return (variable_string, assignment_string) -end -function assign_coeff(::LangC_OpenBLAS, val::T, i) where {T<:Complex} - assignment_string = - ["coeff$i = " * string(real(val)) * " + " * string(imag(val)) * "*I;"] - variable_string = "&coeff$i" - return (variable_string, assignment_string) -end -function assign_coeff(::LangC, val::T, i) where {T<:Real} - assignment_string = ["coeff$i = $val;"] - variable_string = "coeff$i" - return (variable_string, assignment_string) +# In MKL, complex types are structures and are passed by reference. +function initialization_string(::LangC_MKL, real_part, imag_part) + return "{.real = " * string(real_part) * ", " * + ".imag = " * string(imag_part) * "}" +end +function initialization_string(::LangC_OpenBLAS, real_part, imag_part) + return string(real_part) * " + " * string(imag_part) * "*I" +end +function initialization_string(lang::LangC, val::T) where {T<:Complex} + return initialization_string(lang, real(val), imag(val)) +end +function initialization_string(lang::LangC, val::T) where {T<:Real} + return "$val" +end + +function declare_var(lang::LangC, val, id, type) + return "$type $id = " * initialization_string(lang, val) * ";" +end +function declare_coeff(lang::LangC, val::T, id) where T + # Use real type regardless of T. + actual_val = isreal(val) ? real(val) : val + (blas_t, blas_prefix) = get_blas_type(lang, typeof(actual_val)) + variable_string = "coeff_$id" + dec_init_string = declare_var(lang, actual_val, variable_string, blas_t) + return (val, variable_string, dec_init_string) +end + +function assignment_string(::LangC, var, real_part) + return "$var = $(real_part)" +end +function assignment_string(::LangC_MKL, var, real_part, imag_part) + return "$(string(var)).real = $(real_part); " * + "$(string(var)).imag = $(imag_part);" +end +function assignment_string(lang::LangC_OpenBLAS, var, real_part, imag_part) + return "$(string(var)) = " * + initialization_string(lang, real_part, imag_part) * ";" end # Constant declaration and reference. -function declare_constant(::LangC_MKL, val::T, id, type) where {T<:Complex} - return "const $type $id = {.real = " * - string(real(val)) * - ", " * - ".imag = " * - string(imag(val)) * - "};" -end -function declare_constant(::LangC_OpenBLAS, val::T, id, type) where {T<:Complex} - return "const $type $id = " * - string(real(val)) * - " + " * - string(imag(val)) * - "*I;" -end -function declare_constant(::LangC, val::T, id, type) where {T<:Real} - return "const $type $id = $val;" -end -reference_constant(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" +function declare_const(lang::LangC, val, id, type) + return "const " * declare_var(lang::LangC, val, id, type) +end +reference_value(::LangC, T, id) = (T <: Complex) ? "&$id" : "$id" # Code generation. # Return MKL/OpenBLAS-specific includes. function get_blas_includes(::LangC_MKL) - return "#include" + return "#include" end function get_blas_includes(::LangC_OpenBLAS) return "#include\n#include" @@ -101,16 +103,80 @@ function get_blas_type(::LangC_MKL, T::Type{Complex{Float64}}) return ("MKL_Complex16", "z") end function get_blas_type(::LangC_OpenBLAS, T::Type{Complex{Float32}}) - return ("openblas_complex_float", "c") + return ("lapack_complex_float", "c") end function get_blas_type(::LangC_OpenBLAS, T::Type{Complex{Float64}}) - return ("openblas_complex_double", "z") + return ("lapack_complex_double", "z") end +# Add functions to work with MKL complex types. +function add_auxiliary_functions(code, ::LangC, T) + return # In general, nothing is needed. +end +function create_auxiliary_functions(complex_type, real_type) + return "// Compute c <- c + a * b. +void fma_$(complex_type)_$(real_type)($(complex_type) *c, + const $(real_type) *a, + const $(complex_type) *b) { + c->real += *a * b->real; + c->imag += *a * b->imag; +} + +// Compute c <- a * b. +void prod_$(complex_type)_$(real_type)($(complex_type) *c, + const $(real_type) *a, + const $(complex_type) *b) { + c->real = *a * b->real; + c->imag = *a * b->imag; +} + +// Compute c <- c + a. +void acc_$(complex_type)_$(real_type)($(complex_type) *c, + const $(real_type) *a) { + c->real += *a; +} + +// Compute c <- c + a * b. +void fma_$(complex_type)($(complex_type) *c, + const $(complex_type) *a, + const $(complex_type) *b) { + $(real_type) real_part = a->real * b->real - a->imag * b->imag; + $(real_type) imag_part = a->real * b->imag + a->imag * b->real; + c->real += real_part; + c->imag += imag_part; +} + +// Compute c <- a * b. +void prod_$(complex_type)($(complex_type) *c, + const $(complex_type) *a, + const $(complex_type) *b) { + $(real_type) real_part = a->real * b->real - a->imag * b->imag; + $(real_type) imag_part = a->real * b->imag + a->imag * b->real; + c->real = real_part; + c->imag = imag_part; +} + +// Compute c <- c + a. +void acc_$(complex_type)($(complex_type) *c, + const $(complex_type) *a) { + c->real += a->real; + c->imag += a->imag; +}" +end +function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float32}}) + push_code!( + code, + create_auxiliary_functions("MKL_Complex8", "float"), + ind_lvl = 0 + ) +end -function preprocess_codegen(graph, lang::LangC) - (g,_)=graph_bigraph(graph) - return g # Lang C only supports bigraphs +function add_auxiliary_functions(code, ::LangC_MKL, T::Type{Complex{Float64}}) + push_code!( + code, + create_auxiliary_functions("MKL_Complex16", "double"), + ind_lvl = 0 + ) end function function_definition(lang::LangC, graph, T, funname, precomputed_nodes) @@ -121,6 +187,11 @@ function function_definition(lang::LangC, graph, T, funname, precomputed_nodes) push_code!(code, "#include", ind_lvl = 0) push_code!(code, "#include", ind_lvl = 0) push_code!(code, "") + # Add auxiliary functions to work with MKL complex types in lincomb. + if :lincomb ∈ values(graph.operations) + add_auxiliary_functions(code, lang, T) + end + push_code!(code, "") push_comment!(code, "Code for matrix function evaluation.", ind_lvl = 0) input_variables = join(map(x -> "$blas_type *" * string(x), precomputed_nodes), ", ") @@ -145,7 +216,7 @@ function init_mem(lang::LangC, max_nof_nodes, precomputed_nodes) end function function_init(lang::LangC, T, mem, graph, precomputed_nodes) - (blas_type, blas_prefix) = get_blas_type(lang, T) + (blas_t, blas_prefix) = get_blas_type(lang, T) code = init_code(lang) max_nodes = size(mem.slots, 1) @@ -162,26 +233,14 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) push_code!(code, "") push_comment!(code, "Declarations and initializations.") - # Allocate coefficients for linear combinations, if needed. - if :lincomb in graph_ops - push_code!(code, "$blas_type coeff1, coeff2;") - end # Declare constants ZERO and ONE, if needed. if :mult in graph_ops - push_code!( - code, - declare_constant(lang, convert(T, 0.0), "ZERO", blas_type), - ) - end - if :lincomb in graph_ops || :mult in graph_ops - push_code!( - code, - declare_constant(lang, convert(T, 1.0), "ONE", blas_type), - ) + push_code!(code, declare_const(lang, convert(T, 0.0), "ZERO", blas_t)) + push_code!(code, declare_const(lang, convert(T, 1.0), "ONE", blas_t)) end # Array ipiv for pivots of GEPP (needed only if graph has linear systems). if :ldiv in graph_ops - push_code!(code, "lapack_int *ipiv = malloc(n*sizeof(*ipiv));") + push_code!(code, "lapack_int *ipiv = malloc(n * sizeof(*ipiv));") end push_code!(code, "size_t j;") push_code!(code, "") @@ -189,12 +248,12 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) push_comment!(code, "Memory management.") push_code!( code, - "$blas_type *master_mem = malloc(n*n*max_memalloc" * - "*sizeof(*master_mem));", + "$blas_t *master_mem = malloc(n * n * max_memalloc" * + " * sizeof(*master_mem));", ) push_code!( code, - "$blas_type *memslots[max_memslots]; " * + "$blas_t *memslots[max_memslots]; " * comment(lang, "As many slots as nodes"), ) @@ -211,26 +270,18 @@ function function_init(lang::LangC, T, mem, graph, precomputed_nodes) # Otherwise make a copy push_code!( code, - "memcpy($Ak_slot_name,$n,n*n*sizeof(*master_mem));", + "memcpy($Ak_slot_name, $n, n * n * sizeof(*master_mem));", ) end end # Initialize pointers. push_comment!(code, "The other slots are pointers to allocated memory.") - push_code!(code, "for (j=$(num_preallocated_slots-1); j occursin(nodemem, x), parent_mems) + + splice!(parent_mems, index) + pushfirst!(parent_mems, nodemem) + + coeff_name = coeff_names[index] + splice!(coeff_names, index) + pushfirst!(coeff_names, coeff_name) + + coeff_value = coeff_values[index] + splice!(coeff_values, index) + pushfirst!(coeff_values, coeff_value) + end + + # Write the linear combination. + if isempty(coeff_names) + push_code!(code, "memset($nodemem, 0, n * n * sizeof(*master_mem));") + else + push_code!(code, "for (size_t i = 0; i < n * n; i++) {") + add_lincomb_body(code, lang, T, nodemem, + coeff_names, coeff_value, parent_mems) + push_code!(code, "}") + end + + if id_coefficient != 0 + (coeff_value, coeff_id, coeff_id_code) = declare_coeff(lang, + id_coefficient, "$node" * "_0") + push_code!(code, coeff_id_code) + push_code!(code, "for (size_t i = 0; i < n * n; i += n + 1) {") + add_lincomb_identity_body(code, lang, T, nodemem, coeff_id, coeff_value) + push_code!(code, "}") + end - # Remove output from deallocation list. + if !(isnothing(recycle_parent)) + # Avoid deallocate recycled parent setdiff!(dealloc_list, [recycle_parent]) - # Update CodeMem. + # Set the memory pointer j = get_slot_number(mem, recycle_parent) set_slot_number!(mem, j, node) - else - # Allocate new slot for result. - (nodemem_i, nodemem) = get_free_slot(mem) - alloc_slot!(mem, nodemem_i, node) - - # Compute linear combination. - if p1_is_identity - push_code!( - code, - "memcpy($nodemem, $parent2mem, " * - "n*n*sizeof(*master_mem));", - ) - push_code!( - code, - "cblas_$blas_prefix" * - "scal(n*n, $coeff2, " * - "$nodemem, 1);", - ) - push_code!( - code, - "cblas_$blas_prefix" * - "axpby(n, " * - "$coeff1, &ONE, 0,\n" * - " $rone, $nodemem, n+1);", - ) - elseif p2_is_identity - push_code!( - code, - "memcpy($nodemem, $parent1mem, " * - "n*n*sizeof(*master_mem));", - ) - push_code!( - code, - "cblas_$blas_prefix" * - "scal(n*n, $coeff1, " * - "$nodemem, 1);", - ) - push_code!( - code, - "cblas_$blas_prefix" * - "axpby(n*n, " * - "$coeff2, &ONE, 0,\n" * - " $rone, $nodemem, n+1);", - ) - else - push_code!( - code, - "memcpy($nodemem, $parent2mem, " * - "n*n*sizeof(*master_mem));", - ) - push_code!( - code, - "cblas_$blas_prefix" * - "axpby(n*n, " * - "$coeff1, $parent1mem, 1,\n" * - " $coeff2, $nodemem, 1);", - ) - end end + else error("Unknown operation") end @@ -531,36 +584,22 @@ function execute_operation!(lang::LangC, T, graph, node, dealloc_list, mem) return (code, nodemem) end -function scalar_to_string(::LangC, z) - return "$z" -end -function scalar_to_string(::LangC_OpenBLAS, z::T) where {T<:Complex} - return "$(real(z)) + $(imag(z))*I" +random_string() = "rand() / (1.0 * RAND_MAX)"; +function generate_random_entry(lang::LangC, ::Type{T}) where {T<:Real} + return assignment_string(lang, "A[i]", random_string()) end -function scalar_to_string(::LangC_MKL, z::T) where {T<:Complex} - return "{$(real(z)), $(imag(z))}" -end -function print_indented_matrix(lang::LangC, code, A; ind_lvl = 1) - (m, n) = size(A) - for j = 1:n - column_string = "" - for i = 1:m - column_string *= - scalar_to_string(lang, A[i, j]) * (i != m ? ", " : "") - end - column_string *= (j != n ? "," : "") - push_code!(code, column_string, ind_lvl = ind_lvl) - end +function generate_random_entry(lang::LangC, ::Type{T}) where {T<:Complex} + return assignment_string(lang, "A[i]", random_string(), random_string()) end function compilation_string(::LangC_OpenBLAS, fname) - return "gcc -o main_compiled $fname -labials -llapacke" + return "gcc -o main_compiled $fname -lblas -llapacke" end function compilation_string(::LangC_MKL, fname) return "gcc -o main_compiled $fname -lmkl_rt" end -function gen_main(lang::LangC, T, fname, funname; A = 10::Union{Integer,Matrix}) +function gen_main(lang::LangC, T, fname, funname, graph) code = init_code(lang) if (lang.gen_main) (blas_type, blas_prefix) = get_blas_type(lang, T) @@ -586,25 +625,17 @@ function gen_main(lang::LangC, T, fname, funname; A = 10::Union{Integer,Matrix}) push_code!(code, "size_t i;") # Generate matrix. - if isa(A, Matrix) - n = LinearAlgebra.checksquare(A) - push_code!(code, "size_t n = $n;") - push_code!(code, "blas_type A[$(n*n)] = {") - print_indented_matrix(lang, code, A, ind_lvl = 2) - push_code!(code, "};") - else # A is an integer - n = A - push_code!(code, "size_t n = $n;") - push_code!(code, "srand(0);") - push_code!(code, "blas_type *A = malloc(n*n*sizeof(*A));") - push_code!(code, "for(i=0; i Bool + +Checks whether the graph has a linear combination that includes the identity +matrix. +""" +function has_lincomb_with_identity(graph) + lincombs = [key for (key, value) in graph.operations if value == :lincomb] + lincomb_parents = [graph.parents[key] for key in lincombs] + return any(x -> :I in x, lincomb_parents) +end + """ has_identity_lincomb(graph) -> Bool @@ -166,15 +174,9 @@ Checks whether the graph has a node which is a linear combination of identity matrices. """ function has_identity_lincomb(graph) - has_identity_lincomb = false - for (key, parents) in graph.parents - if graph.operations[key] == :lincomb && - all(parents .== :I) - has_identity_lincomb = true - break - end - end - return has_identity_lincomb + lincombs = [key for (key, value) in graph.operations if value == :lincomb] + lincomb_parents = [graph.parents[key] for key in lincombs] + return any(x -> all(x .== :I), lincomb_parents) end """ @@ -184,14 +186,7 @@ Checks whether the graph has trivial nodes, that is, multiplications by the identity or linear systems whose coefficient is the identity matrix. """ function has_trivial_nodes(graph) - has_trivial_nodes = false - for (key, parents) in graph.parents - if is_trivial_left(graph, key) || is_trivial_right(graph, key) - has_trivial_nodes = true - break - end - end - return has_trivial_nodes + return any(is_trivial(graph, key) for (key, value) in graph.parents) end """ @@ -362,7 +357,8 @@ end """ compress_graph_passthrough!(graph,cref=[];verbose=false); -Identifies lincombs lincomb of length one with coeff equal to one. The node has no effect. It redirect appropriately. +Identifies lincombs of length one with coeff equal to one. +The node has no effect and is redirected appropriately. """ function compress_graph_passthrough!(graph, diff --git a/test/code_gen_test.jl b/test/code_gen_test.jl index 8ff8085..fa2d52f 100644 --- a/test/code_gen_test.jl +++ b/test/code_gen_test.jl @@ -11,8 +11,11 @@ using LinearAlgebra, StaticArrays # Test julia code generation for i = 1:length(a) (graph, crefs) = graph_ps_degopt([3 4 2 -1 2 a[i]]) - add_lincomb!(graph,:Q,[2.0],graph.outputs) # Check - add_ldiv!(graph, :R0, :B4, :Q) + add_lincomb!(graph, :Q, [2.0], graph.outputs)# Check + add_ldiv!(graph, :R2, :B4, :Q) + add_lincomb!(graph, :Is, [2., 3., 5.], [:I, :I, :I]) + add_lincomb!(graph, :R1, [2., 2.], [:Is, :R2]) + add_ldiv!(graph, :R0, :R1, :I) clear_outputs!(graph) add_output!(graph, :R0) @@ -70,9 +73,13 @@ using LinearAlgebra, StaticArrays for i = 1:4 #Not high-precision test for Matlab and C (graph, crefs) = graph_ps_degopt([3 4 2 a[i] 1 0]) - add_ldiv!(graph, :R0, :B4, graph.outputs[1]) + add_ldiv!(graph, :R1, :B4, graph.outputs[1]) + add_ldiv!(graph, :R2, :R1, :I) + add_ldiv!(graph, :R3, :B4, :A) + add_lincomb!(graph,:R4, [1., 2., 3.], [:I, :I, :I]) + add_lincomb!(graph,:R5, [2., 3., 4., 5., 6.], [:I, :R1, :R2, :R2, :R4]) clear_outputs!(graph) - add_output!(graph, :R0) + add_output!(graph, :R5) # Test Matlab code generation (not execution) fname = tempname() * ".m" @@ -80,20 +87,16 @@ using LinearAlgebra, StaticArrays rm(fname) # Test C code generation - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_MKL()) - rm(fname) - - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_MKL(true)) - rm(fname) - - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_OpenBLAS()) - rm(fname) + for gen_main in (true, false) + for overwrite_input in (true, false) + fname = tempname() * ".c" + gen_code(fname, graph, lang = LangC_MKL(gen_main, overwrite_input)) + rm(fname) - fname = tempname() * ".c" - gen_code(fname, graph, lang = LangC_OpenBLAS(true)) - rm(fname) + fname = tempname() * ".c" + gen_code(fname, graph, lang = LangC_OpenBLAS(gen_main, overwrite_input)) + rm(fname) + end + end end end diff --git a/test/compress.jl b/test/compress.jl index d16575e..e326003 100644 --- a/test/compress.jl +++ b/test/compress.jl @@ -34,13 +34,25 @@ using LinearAlgebra # Check that we actually removed something @test size(get_all_cref(graph2), 1) < size(get_all_cref(graph1), 1) + ## Test function to detect if graph has linear combination with identities ## Test function to detect if graph has linear combination of identities graph = Compgraph() add_lincomb!(graph, :P1, 1.0, :A, 1.0, :I) add_lincomb!(graph, :P2, 1.0, :I, 1.0, :I) add_mult!(graph, :P0, :P1, :P2) + @test has_lincomb_with_identity(graph) == true @test has_identity_lincomb(graph) == true + graph = Compgraph() + add_lincomb!(graph, :P, 1.0, :A, 1.0, :I) + @test has_lincomb_with_identity(graph) == true + @test has_identity_lincomb(graph) == false + + graph = Compgraph() + add_mult!(graph, :P, :A, :A) + @test has_lincomb_with_identity(graph) == false + @test has_identity_lincomb(graph) == false + ## Test trivial node detection and removal graph = Compgraph() add_mult!(graph, :AI, :A, :I) @@ -58,10 +70,15 @@ using LinearAlgebra @test has_trivial_nodes(graph) == true graph = Compgraph() - add_mult!(graph, :I2, :I, :I) # This node is trivial - add_mult!(graph, :AI, :A, :I2) # This node is trivial + add_lincomb!(graph, :A2, [2], [:A]) + add_output!(graph, :A2) + @test has_trivial_nodes(graph) == false + + graph = Compgraph() + add_mult!(graph, :I2, :I, :I) # This node is trivial + add_mult!(graph, :AI, :A, :I2) # This node is trivial add_mult!(graph, :IA, :I2, :AI) # This node is trivial - add_ldiv!(graph, :P0, :I, :IA) # This node is trivial + add_ldiv!(graph, :P0, :I, :IA) # This node is trivial add_output!(graph, :P0) graph1 = graph graph2 = deepcopy(graph)