@@ -149,6 +149,10 @@ struct llama_model {
149149 // the model memory buffer
150150 std::vector<uint8_t > buf;
151151
152+ // model memory mapped file
153+ void * mm_addr;
154+ size_t mm_length;
155+
152156 // tensors
153157 int n_loaded;
154158 std::unordered_map<std::string, struct ggml_tensor *> tensors;
@@ -296,22 +300,32 @@ struct llama_context_params llama_context_default_params() {
296300// model loading
297301//
298302
299- static void * mmap_file (const char * fname) {
303+ static void mmap_file (const char * fname, void * &mm_addr, size_t &mm_length ) {
300304#if defined(MAP_FAILED)
301- // POSIX mmap
305+ // POSIX
302306 int fd = open (fname, O_RDONLY);
303- size_t len = lseek (fd, 0 , SEEK_END);
304- void * mm_addr = mmap (NULL , len, PROT_READ, MAP_SHARED, fd, 0 );
307+ mm_length = lseek (fd, 0 , SEEK_END);
308+ mm_addr = mmap (NULL , mm_length, PROT_READ, MAP_SHARED, fd, 0 );
309+ close (fd);
305310 if (mm_addr == MAP_FAILED) {
306311 perror (" mmap failed" );
307312 mm_addr = NULL ;
313+ mm_length = 0 ;
308314 }
309- close (fd);
310- return mm_addr;
311315#else
312316 // TODO: windows support
313317 (void )(fname); // suppress warnings
314- return NULL ;
318+ #endif
319+ }
320+
321+ static void munmap_file (void * addr, size_t length) {
322+ #if defined(MAP_FAILED)
323+ // POSIX
324+ munmap (addr, length);
325+ #else
326+ // TODO: windows support
327+ (void )(addr); // suppress warnings
328+ (void )(length);
315329#endif
316330}
317331
@@ -480,12 +494,15 @@ static bool llama_model_load(
480494 bool use_mmap = (n_parts == 1 );
481495
482496 // try to memory map the model file
483- void * mm_addr = NULL ;
497+ void * mm_addr = NULL ;
484498 if (use_mmap) {
485- mm_addr = mmap_file (fname.c_str ());
486- if (mm_addr == NULL ) {
499+ mmap_file (fname.c_str (), model. mm_addr , model. mm_length );
500+ if (model. mm_addr == NULL ) {
487501 use_mmap = false ;
488502 }
503+ else {
504+ mm_addr = model.mm_addr ;
505+ }
489506 }
490507
491508 auto & ctx = model.ctx ;
@@ -1750,6 +1767,10 @@ void llama_free(struct llama_context * ctx) {
17501767 ggml_free (ctx->model .ctx );
17511768 }
17521769
1770+ if (ctx->model .mm_addr ) {
1771+ munmap_file (ctx->model .mm_addr , ctx->model .mm_length );
1772+ }
1773+
17531774 delete ctx;
17541775}
17551776
0 commit comments