Skip to content

Commit d062060

Browse files
committed
System image compression with zstd
Co-authored-by: Gabriel Baraldi <[email protected]> madvise with MADV_WILLNEED before accessing sysimage pages MADV_WILLNEED sysimage pages before checksumming or decompressing Even without compression, this gives about an 8% improvement in load times. Fix using jl_page_size before it is initialized Add zstd to ANALYSIS_DEPS now that it is used in staticdata.c Fix jl_page_size being uninitialized in jl_prefetch_system_image Add --compress-sysimage flag; use zstd-15 when enabled
1 parent 56a6065 commit d062060

File tree

8 files changed

+155
-42
lines changed

8 files changed

+155
-42
lines changed

Make.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1398,7 +1398,7 @@ JLDFLAGS += -Wl,--stack,8388608 --disable-auto-import --disable-runtime-pseudo-r
13981398
ifeq ($(ARCH),i686)
13991399
JLDFLAGS += -Wl,--large-address-aware
14001400
endif
1401-
JCPPFLAGS += -D_WIN32_WINNT=0x0502
1401+
JCPPFLAGS += -D_WIN32_WINNT=_WIN32_WINNT_WIN8
14021402
UNTRUSTED_SYSTEM_LIBM := 1
14031403
# Use hard links for files on windows, rather than soft links
14041404
# https://stackoverflow.com/questions/3648819/how-to-make-a-symbolic-link-with-cygwin-in-windows-7

base/options.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ struct JLOptions
5757
strip_ir::Int8
5858
permalloc_pkgimg::Int8
5959
heap_size_hint::UInt64
60+
compress_sysimage::Int8
6061
end
6162

6263
# This runs early in the sysimage != is not defined yet

base/util.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,9 @@ function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()); cpu_target::Unio
245245
if opts.use_sysimage_native_code == 0
246246
push!(addflags, "--sysimage-native-code=no")
247247
end
248+
if opts.compress_sysimage == 1
249+
push!(addflags, "--compress-sysimage=yes")
250+
end
248251
return `$julia -C $cpu_target -J$image_file $addflags`
249252
end
250253

src/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ LIBJULIA_PATH_REL := libjulia
167167
endif
168168

169169
COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
170-
RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
171-
CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI)
170+
RT_LIBS := $(WHOLE_ARCHIVE) $(LIBUV) $(WHOLE_ARCHIVE) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) -lzstd
171+
CG_LIBS := $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS) $(LIBTRACYCLIENT) $(LIBITTAPI) -lzstd
172172
RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
173173
CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
174174
RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
@@ -471,7 +471,7 @@ $(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG
471471
# before attempting this static analysis, so that all necessary headers
472472
# and dependencies are properly installed:
473473
# make -C src install-analysis-deps
474-
ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc
474+
ANALYSIS_DEPS := llvm clang llvm-tools libuv utf8proc zstd
475475
ifeq ($(OS),Darwin)
476476
ANALYSIS_DEPS += llvmunwind
477477
else ifneq ($(OS),WINNT)

src/aotcompile.cpp

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,10 @@
5757
#include <llvm/Support/FormatAdapters.h>
5858
#include <llvm/Linker/Linker.h>
5959

60-
6160
using namespace llvm;
6261

62+
#include <zstd.h>
63+
6364
#include "jitlayers.h"
6465
#include "serialize.h"
6566
#include "julia_assert.h"
@@ -1659,27 +1660,53 @@ void jl_dump_native_impl(void *native_code,
16591660
sysimgM.setDataLayout(DL);
16601661
sysimgM.setStackProtectorGuard(StackProtectorGuard);
16611662
sysimgM.setOverrideStackAlignment(OverrideStackAlignment);
1662-
Constant *data = ConstantDataArray::get(Context,
1663-
ArrayRef<uint8_t>((const unsigned char*)z->buf, z->size));
1663+
1664+
int compression = jl_options.compress_sysimage ? 15 : 0;
1665+
ArrayRef<char> sysimg_data{z->buf, (size_t)z->size};
1666+
SmallVector<char, 0> compressed_data;
1667+
if (compression) {
1668+
compressed_data.resize(ZSTD_compressBound(z->size));
1669+
size_t comp_size = ZSTD_compress(compressed_data.data(), compressed_data.size(),
1670+
z->buf, z->size, compression);
1671+
compressed_data.resize(comp_size);
1672+
sysimg_data = compressed_data;
1673+
ios_close(z);
1674+
free(z);
1675+
}
1676+
1677+
Constant *data = ConstantDataArray::get(Context, sysimg_data);
16641678
auto sysdata = new GlobalVariable(sysimgM, data->getType(), false,
16651679
GlobalVariable::ExternalLinkage,
16661680
data, "jl_system_image_data");
1667-
sysdata->setAlignment(Align(64));
1681+
sysdata->setAlignment(Align(jl_page_size));
16681682
#if JL_LLVM_VERSION >= 180000
16691683
sysdata->setCodeModel(CodeModel::Large);
16701684
#else
16711685
if (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())
16721686
sysdata->setSection(".ldata");
16731687
#endif
16741688
addComdat(sysdata, TheTriple);
1675-
Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), z->size);
1689+
Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), sysimg_data.size());
16761690
addComdat(new GlobalVariable(sysimgM, len->getType(), true,
16771691
GlobalVariable::ExternalLinkage,
16781692
len, "jl_system_image_size"), TheTriple);
1679-
// Free z here, since we've copied out everything into data
1680-
// Results in serious memory savings
1681-
ios_close(z);
1682-
free(z);
1693+
1694+
const char *unpack_func = compression ? "jl_image_unpack_zstd" : "jl_image_unpack_uncomp";
1695+
auto unpack = new GlobalVariable(sysimgM, DL.getIntPtrType(Context), true,
1696+
GlobalVariable::ExternalLinkage, nullptr,
1697+
unpack_func);
1698+
addComdat(new GlobalVariable(sysimgM, PointerType::getUnqual(Context), true,
1699+
GlobalVariable::ExternalLinkage, unpack,
1700+
"jl_image_unpack"),
1701+
TheTriple);
1702+
1703+
if (!compression) {
1704+
// Free z here, since we've copied out everything into data
1705+
// Results in serious memory savings
1706+
ios_close(z);
1707+
free(z);
1708+
}
1709+
compressed_data.clear();
16831710
// Note that we don't set z to null, this allows the check in WRITE_ARCHIVE
16841711
// to function as expected
16851712
// no need to free the module/context, destructor handles that

src/jloptions.c

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ JL_DLLEXPORT void jl_init_options(void)
100100
0, // strip-ir
101101
0, // permalloc_pkgimg
102102
0, // heap-size-hint
103+
0, // compress_sysimage
103104
};
104105
jl_options_initialized = 1;
105106
}
@@ -216,10 +217,13 @@ static const char opts_hidden[] =
216217
" Enable or disable JIT compiler, or request exhaustive or minimal compilation\n\n"
217218

218219
// compiler output options
219-
" --output-o <name> Generate an object file (including system image data)\n"
220-
" --output-ji <name> Generate a system image data file (.ji)\n"
221-
" --strip-metadata Remove docstrings and source location info from system image\n"
222-
" --strip-ir Remove IR (intermediate representation) of compiled functions\n\n"
220+
" --output-o <name> Generate an object file (including system image data)\n"
221+
" --output-ji <name> Generate a system image data file (.ji)\n"
222+
" --strip-metadata Remove docstrings and source location info from system image\n"
223+
" --strip-ir Remove IR (intermediate representation) of compiled functions\n"
224+
" --compress-sysimage={yes|no*} Compress the sys/pkgimage heap at the expense of\n"
225+
" slightly increased load time.\n"
226+
"\n"
223227

224228
// compiler debugging (see the devdocs for tips on using these options)
225229
" --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
@@ -276,7 +280,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
276280
opt_strip_ir,
277281
opt_heap_size_hint,
278282
opt_gc_threads,
279-
opt_permalloc_pkgimg
283+
opt_permalloc_pkgimg,
284+
opt_compress_sysimage,
280285
};
281286
static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
282287
static const struct option longopts[] = {
@@ -338,6 +343,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
338343
{ "strip-ir", no_argument, 0, opt_strip_ir },
339344
{ "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg },
340345
{ "heap-size-hint", required_argument, 0, opt_heap_size_hint },
346+
{ "compress-sysimage", required_argument, 0, opt_compress_sysimage },
341347
{ 0, 0, 0, 0 }
342348
};
343349

@@ -889,6 +895,12 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
889895
else
890896
jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg);
891897
break;
898+
case opt_compress_sysimage:
899+
if (!strcmp(optarg,"yes"))
900+
jl_options.compress_sysimage = 1;
901+
else if (!strcmp(optarg,"no"))
902+
jl_options.compress_sysimage = 0;
903+
break;
892904
default:
893905
jl_errorf("julia: unhandled option -- %c\n"
894906
"This is a bug, please report it.", c);

src/jloptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ typedef struct {
6161
int8_t strip_ir;
6262
int8_t permalloc_pkgimg;
6363
uint64_t heap_size_hint;
64+
int8_t compress_sysimage;
6465
} jl_options_t;
6566

6667
#endif

src/staticdata.c

Lines changed: 93 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -74,15 +74,20 @@ External links:
7474
#include <stdio.h> // printf
7575
#include <inttypes.h> // PRIxPTR
7676

77+
#include <zstd.h>
78+
7779
#include "julia.h"
7880
#include "julia_internal.h"
7981
#include "julia_gcext.h"
8082
#include "builtin_proto.h"
8183
#include "processor.h"
8284
#include "serialize.h"
8385

84-
#ifndef _OS_WINDOWS_
86+
#ifdef _OS_WINDOWS_
87+
#include <memoryapi.h>
88+
#else
8589
#include <dlfcn.h>
90+
#include <sys/mman.h>
8691
#endif
8792

8893
#include "valgrind.h"
@@ -3082,14 +3087,75 @@ JL_DLLEXPORT jl_image_buf_t jl_preload_sysimg(const char *fname)
30823087
}
30833088
}
30843089

3085-
// From a shared library handle, verify consistency and return a jl_image_buf_t
3086-
static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
3090+
typedef void jl_image_unpack_func_t(void *handle, jl_image_buf_t *image);
3091+
3092+
static void jl_prefetch_system_image(const char *data, size_t size)
3093+
{
3094+
size_t page_size = jl_getpagesize(); /* jl_page_size is not set yet when loading sysimg */
3095+
void *start = (void *)((uintptr_t)data & ~(page_size - 1));
3096+
size_t size_aligned = LLT_ALIGN(size, page_size);
3097+
#ifdef _OS_WINDOWS_
3098+
WIN32_MEMORY_RANGE_ENTRY entry = {start, size_aligned};
3099+
PrefetchVirtualMemory(GetCurrentProcess(), 1, &entry, 0);
3100+
#else
3101+
madvise(start, size_aligned, MADV_WILLNEED);
3102+
#endif
3103+
}
3104+
3105+
JL_DLLEXPORT void jl_image_unpack_uncomp(void *handle, jl_image_buf_t *image)
3106+
{
3107+
size_t *plen;
3108+
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
3109+
jl_dlsym(handle, "jl_system_image_data", (void **)&image->data, 1);
3110+
jl_dlsym(handle, "jl_image_pointers", (void**)&image->pointers, 1);
3111+
image->size = *plen;
3112+
jl_prefetch_system_image(image->data, image->size);
3113+
}
3114+
3115+
JL_DLLEXPORT void jl_image_unpack_zstd(void *handle, jl_image_buf_t *image)
30873116
{
30883117
size_t *plen;
30893118
const char *data;
3090-
const void *pointers;
3091-
uint64_t base;
3119+
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
3120+
jl_dlsym(handle, "jl_system_image_data", (void **)&data, 1);
3121+
jl_dlsym(handle, "jl_image_pointers", (void **)&image->pointers, 1);
3122+
jl_prefetch_system_image(data, *plen);
3123+
image->size = ZSTD_getFrameContentSize(data, *plen);
3124+
size_t page_size = jl_getpagesize(); /* jl_page_size is not set yet when loading sysimg */
3125+
size_t aligned_size = LLT_ALIGN(image->size, page_size);
3126+
#if defined(_OS_WINDOWS_)
3127+
size_t large_page_size = GetLargePageMinimum();
3128+
if (image->size > 4 * large_page_size) {
3129+
size_t aligned_size = LLT_ALIGN(image->size, large_page_size);
3130+
image->data = (char *)VirtualAlloc(
3131+
NULL, aligned_size, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE);
3132+
}
3133+
else {
3134+
image->data = (char *)VirtualAlloc(NULL, aligned_size, MEM_COMMIT | MEM_RESERVE,
3135+
PAGE_READWRITE);
3136+
}
3137+
#else
3138+
image->data =
3139+
(char *)mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
3140+
#endif
3141+
if (!image->data || image->data == (void *)-1) {
3142+
jl_printf(JL_STDERR, "ERROR: failed to allocate space for system image\n");
3143+
jl_exit(1);
3144+
}
3145+
3146+
ZSTD_decompress((void *)image->data, image->size, data, *plen);
3147+
size_t len = (*plen) & ~(page_size - 1);
3148+
#ifdef _OS_WINDOWS_
3149+
if (len)
3150+
VirtualFree((void *)data, len, MEM_RELEASE);
3151+
#else
3152+
munmap((void *)data, len);
3153+
#endif
3154+
}
30923155

3156+
// From a shared library handle, verify consistency and return a jl_image_buf_t
3157+
static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
3158+
{
30933159
// verify that the linker resolved the symbols in this image against ourselves (libjulia-internal)
30943160
void** (*get_jl_RTLD_DEFAULT_handle_addr)(void) = NULL;
30953161
if (handle != jl_RTLD_DEFAULT_handle) {
@@ -3098,39 +3164,42 @@ static jl_image_buf_t get_image_buf(void *handle, int is_pkgimage)
30983164
jl_error("Image file failed consistency check: maybe opened the wrong version?");
30993165
}
31003166

3167+
jl_image_unpack_func_t **unpack;
3168+
jl_image_buf_t image = {
3169+
.kind = JL_IMAGE_KIND_SO,
3170+
.handle = handle,
3171+
.pointers = NULL,
3172+
.data = NULL,
3173+
.size = 0,
3174+
.base = 0,
3175+
};
3176+
31013177
// verification passed, lookup the buffer pointers
31023178
if (jl_system_image_size == 0 || is_pkgimage) {
31033179
// in the usual case, the sysimage was not statically linked to libjulia-internal
31043180
// look up the external sysimage symbols via the dynamic linker
3105-
jl_dlsym(handle, "jl_system_image_size", (void **)&plen, 1);
3106-
jl_dlsym(handle, "jl_system_image_data", (void **)&data, 1);
3107-
jl_dlsym(handle, "jl_image_pointers", (void**)&pointers, 1);
3108-
} else {
3181+
jl_dlsym(handle, "jl_image_unpack", (void **)&unpack, 1);
3182+
(*unpack)(handle, &image);
3183+
}
3184+
else {
31093185
// the sysimage was statically linked directly against libjulia-internal
31103186
// use the internal symbols
3111-
plen = &jl_system_image_size;
3112-
pointers = &jl_image_pointers;
3113-
data = &jl_system_image_data;
3187+
image.size = jl_system_image_size;
3188+
image.pointers = &jl_image_pointers;
3189+
image.data = &jl_system_image_data;
31143190
}
31153191

31163192
#ifdef _OS_WINDOWS_
3117-
base = (intptr_t)handle;
3193+
image.base = (intptr_t)handle;
31183194
#else
31193195
Dl_info dlinfo;
3120-
if (dladdr((void*)pointers, &dlinfo) != 0)
3121-
base = (intptr_t)dlinfo.dli_fbase;
3196+
if (dladdr((void*)image.pointers, &dlinfo) != 0)
3197+
image.base = (intptr_t)dlinfo.dli_fbase;
31223198
else
3123-
base = 0;
3199+
image.base = 0;
31243200
#endif
31253201

3126-
return (jl_image_buf_t) {
3127-
.kind = JL_IMAGE_KIND_SO,
3128-
.handle = handle,
3129-
.pointers = pointers,
3130-
.data = data,
3131-
.size = *plen,
3132-
.base = base,
3133-
};
3202+
return image;
31343203
}
31353204

31363205
// Allow passing in a module handle directly, rather than a path

0 commit comments

Comments
 (0)