From 5b7ee850094e700b92cc78d47f4eaaeef4806bb5 Mon Sep 17 00:00:00 2001 From: Anshul Madan Date: Mon, 18 Nov 2013 12:21:01 -0500 Subject: [PATCH] Enhancements to the proposed SNIA APIs to flush data cached in the CPU cache to NVM. The enhancements allows a programmer to persist changes described with an array of discontinuous ranges, rather than just persist changes that are to a single continuous range of memory. Thus alleviating the overhead of enforcing memory barriers after each flush. --- LINUX_PMEM_API.txt | 55 +++++++++++++++- basic/basic.c | 78 ++++++++++++++++++++-- libpmem/pmem.c | 79 ++++++++++++++++++++++ libpmem/pmem.h | 36 +++++++++++ libpmem/pmem_cl.c | 118 ++++++++++++++++++++++++++++++++- libpmem/pmem_fit.c | 151 +++++++++++++++++++++++++++++++++++++++++-- libpmem/pmem_msync.c | 149 ++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 649 insertions(+), 17 deletions(-) diff --git a/LINUX_PMEM_API.txt b/LINUX_PMEM_API.txt index ae403f8..c8cf8fd 100644 --- a/LINUX_PMEM_API.txt +++ b/LINUX_PMEM_API.txt @@ -26,9 +26,17 @@ SYNOPSIS /* make changes persistent */ msync(addr, nbytes, MS_SYNC); - /* optional, instead of mmap line above: */ + /* optional, instead of msync line above: */ pmem_persist(addr, nbytes, 0); + /* optional, instead of msync line above: */ + int pmem_persist_iov(const struct iovec *addrs, size_t count, + int flags); + + /* optional, instead of msync line above: */ + int pmem_persist_iov_verify(const struct iovec *addrs, + size_t count, int flags); + /* other interfaces described in this document... */ pmem_flush_cache(addr, len, flags); pmem_fence(); @@ -187,6 +195,51 @@ DESCRIPTION persistent -- they can become persistent at any time before pmem_persist() is called. + void pmem_persist_iov(const struct iovec *addrs, size_t count, + int flags); + + Force any changes in an array of (discontinuous) ranges + [addr, addr+len) to be stored durably in Persistent Memory. + This is an alternative to msync(2) and pmem_persist described + above. This might be even more optimal than pmem_persist above + as it will alleviate the overhead of enforcing memory barriers + after each flush if possible. + + No flags have been defined for this call yet. + + WARNING: Like msync(2) described above, there is nothing + atomic or transactional about this call. Any + unwritten stores in the given range(s) will be written, + but some stores may have already been written by + virtue of normal cache eviction/replacement policies. + Correctly written code must not depend on stores + waiting until pmem_persist_iov() is called to become + persistent -- they can become persistent at any time + before pmem_persist() is called. + + void pmem_persist_iov_verify(const struct iovec *addrs, size_t count, + int flags); + + Force any changes in an array of (discontinuous) ranges + [addr, addr+len) to be stored durably in Persistent Memory + with Posix synchronized I/O data integrity completion, i.e. + O_SYNC-like behavior. This is an alternative to msync(2), + pmem_persist and pmem_persist_iov described above. This + ensures data integrity of writes unlike pmem_persist and + pmem_persist_iov. + + No flags have been defined for this call yet. + + WARNING: Like msync(2) described above, there is nothing + atomic or transactional about this call. Any + unwritten stores in the given range(s) will be written, + but some stores may have already been written by + virtue of normal cache eviction/replacement policies. + Correctly written code must not depend on stores + waiting until pmem_persist_iov() is called to become + persistent -- they can become persistent at any time + before pmem_persist() is called. + void pmem_flush_cache(void *addr, size_t len, int flags); void pmem_fence(void); void pmem_drain_pm_stores(void); diff --git a/basic/basic.c b/basic/basic.c index ae4c8f0..5159ac3 100644 --- a/basic/basic.c +++ b/basic/basic.c @@ -30,10 +30,41 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2013, NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the NetApp, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE.. + */ + /* * basic.c -- illustrate basic load/store operations on PM * - * usage: basic [-FMd] [-i icount] path [strings...] + * usage: basic [-FMRVd] [-i icount] path [strings...] * * Where is a file on a Persistent Memory aware file system * like PMFS. If path doesn't exist, this program will create it @@ -51,6 +82,14 @@ * Use this when you don't have a PM-aware file system and you're * running on traditional memory-mapped files instead. * + * The -R flag enables the multi-range version of pmem_persist. + * Use this when you want to persist multiple dirty regions + * together. + * + * The -V flag enables the multi-range version of pmem_persist with verify. + * Use this when you want to persist multiple dirty regions + * together with Posix synchronized I/O data integrity completion. + * * The -d flag turns on debugging prints. * * The -i flag, if given, turns on instruction counting for the @@ -76,13 +115,13 @@ #include "icount/icount.h" #include "libpmem/pmem.h" -char Usage[] = "[-FMd] [-i icount] path [strings...]"; /* for USAGE() */ +char Usage[] = "[-FMRVd] [-i icount] path [strings...]"; /* for USAGE() */ int main(int argc, char *argv[]) { int opt; - int iflag = 0; + int iflag = 0, iov_flag = 0, iov_verify_flag = 0; unsigned long icount; const char *path; struct stat stbuf; @@ -91,7 +130,7 @@ main(int argc, char *argv[]) char *pmaddr; Myname = argv[0]; - while ((opt = getopt(argc, argv, "FMdi:")) != -1) { + while ((opt = getopt(argc, argv, "FMRVdi:")) != -1) { switch (opt) { case 'F': pmem_fit_mode(); @@ -101,6 +140,14 @@ main(int argc, char *argv[]) pmem_msync_mode(); break; + case 'R': + iov_flag++; + break; + + case 'V': + iov_verify_flag++; + break; + case 'd': Debug++; break; @@ -149,10 +196,19 @@ main(int argc, char *argv[]) if (optind < argc) { /* strings supplied as arguments? */ int i; char *ptr = pmaddr; + struct iovec *dirty_addrs; + size_t dirty_count = 0; + int dirty_rc = 0; if (iflag) icount_start(icount); /* start instruction count */ + + if (iov_flag || iov_verify_flag) { + dirty_addrs = (struct iovec *) malloc ((argc - optind) + * sizeof (struct iovec)); + } + for (i = optind; i < argc; i++) { size_t len = strlen(argv[i]) + 1; /* includes '\0' */ @@ -163,12 +219,24 @@ main(int argc, char *argv[]) strcpy(ptr, argv[i]); /* make that change durable */ - pmem_persist(ptr, len, 0); + if (iov_flag || iov_verify_flag) { + pmem_add_dirty_range(dirty_addrs, &dirty_count, + ptr, len); + } else + pmem_persist(ptr, len, 0); ptr += len; size -= len; } + if (iov_flag) { + dirty_rc = pmem_persist_iov(dirty_addrs, + dirty_count, 0); + } else if (iov_verify_flag) { + dirty_rc = pmem_persist_iov_verify(dirty_addrs, + dirty_count, 0); + } + if (iflag) { icount_stop(); /* end instruction count */ diff --git a/libpmem/pmem.c b/libpmem/pmem.c index 1088abf..cdfc8b0 100644 --- a/libpmem/pmem.c +++ b/libpmem/pmem.c @@ -30,6 +30,37 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2013, NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the NetApp, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE.. + */ + /* * pmem.c -- entry points for libpmem */ @@ -41,14 +72,23 @@ /* dispatch tables for the various versions of libpmem */ void *pmem_map_cl(int fd, size_t len); void pmem_persist_cl(void *addr, size_t len, int flags); +int pmem_persist_iov_cl(const struct iovec *addrs, size_t count, int flags); +int pmem_persist_iov_verify_cl(const struct iovec *addrs, size_t count, + int flags); void pmem_flush_cache_cl(void *addr, size_t len, int flags); void pmem_drain_pm_stores_cl(void); void *pmem_map_msync(int fd, size_t len); void pmem_persist_msync(void *addr, size_t len, int flags); +int pmem_persist_iov_msync(const struct iovec *addrs, size_t count, int flags); +int pmem_persist_iov_verify_msync(const struct iovec *addrs, size_t count, + int flags); void pmem_flush_cache_msync(void *addr, size_t len, int flags); void pmem_drain_pm_stores_msync(void); void *pmem_map_fit(int fd, size_t len); void pmem_persist_fit(void *addr, size_t len, int flags); +int pmem_persist_iov_fit(const struct iovec *addrs, size_t count, int flags); +int pmem_persist_iov_verify_fit(const struct iovec *addrs, size_t count, + int flags); void pmem_flush_cache_fit(void *addr, size_t len, int flags); void pmem_drain_pm_stores_fit(void); #define PMEM_CL_INDEX 0 @@ -58,6 +98,13 @@ static void *(*Map[])(int fd, size_t len) = { pmem_map_cl, pmem_map_msync, pmem_map_fit }; static void (*Persist[])(void *addr, size_t len, int flags) = { pmem_persist_cl, pmem_persist_msync, pmem_persist_fit }; +static int (*PersistIOV[])(const struct iovec *addrs, size_t count, int flags) = + { pmem_persist_iov_cl, pmem_persist_iov_msync, + pmem_persist_iov_fit }; +static int (*PersistIOVVerify[])(const struct iovec *addrs, size_t count, + int flags) = { pmem_persist_iov_verify_cl, + pmem_persist_iov_verify_msync, + pmem_persist_iov_verify_fit }; static void (*Flush[])(void *addr, size_t len, int flags) = { pmem_flush_cache_cl, pmem_flush_cache_msync, pmem_flush_cache_fit }; @@ -106,6 +153,38 @@ pmem_persist(void *addr, size_t len, int flags) (*Persist[Mode])(addr, len, flags); } +/* + * pmem_persist_iov -- make any cached changes to an array of (discontinuous) + * ranges of PM persistent + */ +int +pmem_persist_iov(const struct iovec *addrs, size_t count, int flags) +{ + (*PersistIOV[Mode])(addrs, count, flags); +} + +/* + * pmem_persist_iov_verify -- make any cached changes to an array of + * (discontinuous) ranges of PM persistent with Posix synchronized I/O data + * integrity completion, i.e. O_SYNC-like behavior + */ +int +pmem_persist_iov_verify(const struct iovec *addrs, size_t count, int flags) +{ + (*PersistIOVVerify[Mode])(addrs, count, flags); +} + +/* + * pmem_add_dirty_range -- Helper function to track dirty addres ranges + */ +void pmem_add_dirty_range(struct iovec *addrs, size_t* count, void *addr, + size_t len) +{ + addrs[*count].iov_base = addr; + addrs[*count].iov_len = len; + *count = (*count) + 1; +} + /* * pmem_flush_cache -- flush processor cache for the given range */ diff --git a/libpmem/pmem.h b/libpmem/pmem.h index d5ae94d..1fe9e85 100644 --- a/libpmem/pmem.h +++ b/libpmem/pmem.h @@ -30,9 +30,41 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2013, NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the NetApp, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE.. + */ + /* * pmem.h -- definitions of libpmem entry points */ +#include void pmem_msync_mode(void); /* for testing on non-PM memory-mapped files */ void pmem_fit_mode(void); /* for fault injection testing */ @@ -40,6 +72,10 @@ void pmem_fit_mode(void); /* for fault injection testing */ /* commonly-used functions for Persistent Memory */ void *pmem_map(int fd, size_t len); void pmem_persist(void *addr, size_t len, int flags); +int pmem_persist_iov(const struct iovec *addrs, size_t count, int flags); +int pmem_persist_iov_verify(const struct iovec *addrs, size_t count, int flags); +void pmem_add_dirty_range(struct iovec *addrs, size_t* count, + void *addr, size_t len); /* for advanced users -- functions that do portions of pmem_persist() */ void pmem_flush_cache(void *addr, size_t len, int flags); diff --git a/libpmem/pmem_cl.c b/libpmem/pmem_cl.c index 9556d88..4272c52 100644 --- a/libpmem/pmem_cl.c +++ b/libpmem/pmem_cl.c @@ -30,6 +30,37 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2013, NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the NetApp, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE.. + */ + /* * pmem_cl.c -- cache-line-based implementation of libpmem * @@ -43,7 +74,9 @@ #include #include -#define ALIGN 64 /* assumes 64B cache line size */ +#include + +#define ALIGN 64 /* assumes 64B cache line size */ /* * pmem_map -- map the Persistent Memory @@ -102,6 +135,23 @@ pmem_flush_cache_cl(void *addr, size_t len, int flags) __builtin_ia32_clflush((void *)uptr); } +/* + * pmem_load_cache -- load given range into the processor cache + * + * This is the cache-line-based version. + */ +void +pmem_load_cache_cl(void *addr, size_t len, int flags) +{ + uintptr_t uptr; + + /* loop through 64B-aligned chunks covering the given range */ + for (uptr = (uintptr_t)addr & ~(ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += 64) + __asm__ __volatile__ + ( "movdqa %0, %%xmm0\n" : : "m"(*(char *)uptr) ); +} + /* * pmem_persist -- make any cached changes to a range of PM persistent * @@ -114,3 +164,69 @@ pmem_persist_cl(void *addr, size_t len, int flags) __builtin_ia32_sfence(); pmem_drain_pm_stores_cl(); } + +/* + * pm_persist_iov --- make any cached changes to an array of (discontinuous) + * ranges of PM persistent + * + * This is the cache-line-based version. + */ +int pmem_persist_iov_cl(const struct iovec *addrs, size_t count, int flags) +{ + int rc = 0; + unsigned i; + struct iovec *range; + + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + pmem_flush_cache_cl(range->iov_base, range->iov_len, flags); + } + __builtin_ia32_sfence(); + pmem_drain_pm_stores_cl(); + + /* + * While this implementation cannot encounter an error condition, + * other implementations may. Hence this example code template + * provides a return code. + */ + return rc; +} + +/* + * pm_persist_iov_verify --- make any cached changes to an array of + * (discontinuous) ranges of PM persistent with Posix synchronized I/O data + * integrity completion, i.e. O_SYNC-like behavior + * + * This is the cache-line-based version. + */ +int pmem_persist_iov_verify_cl(const struct iovec *addrs, size_t count, + int flags) +{ + int rc = 0; + uint64_t addr; + unsigned i; + struct iovec *range; + + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + pmem_flush_cache_cl(range->iov_base, range->iov_len, flags); + } + __builtin_ia32_mfence(); + pmem_drain_pm_stores_cl(); + + /* + * Verify that all ranges have either been successfully transferred or + * diagnosed as unsuccessful + * + * TODO: any read error should be caught and returned as EIO. + */ + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + pmem_load_cache_cl(range->iov_base, range->iov_len, flags); + } + + /* + * TODO: On any read error, return -1 and set errno to EIO. + */ + return rc; +} diff --git a/libpmem/pmem_fit.c b/libpmem/pmem_fit.c index 8a4857b..2586a41 100644 --- a/libpmem/pmem_fit.c +++ b/libpmem/pmem_fit.c @@ -30,6 +30,37 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2013, NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the NetApp, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE.. + */ + /* * pmem_fit.h -- implementation of libpmem for fault injection testing * @@ -47,6 +78,10 @@ #include "util/util.h" +#include + +#include + #define ALIGN 64 /* assumes 64B cache line size */ static int PM_fd; @@ -90,10 +125,11 @@ pmem_drain_pm_stores_fit(void) * * This is the fit version (fault injection test) that uses copy-on-write. */ -void +int pmem_flush_cache_fit(void *addr, size_t len, int flags) { uintptr_t uptr; + int rc = 0; if (!PM_base) FATAL("pmem_map hasn't been called"); @@ -104,10 +140,40 @@ pmem_flush_cache_fit(void *addr, size_t len, int flags) * chunks that cover the given range. */ for (uptr = (uintptr_t)addr & ~(ALIGN - 1); - uptr < (uintptr_t)addr + len; uptr += ALIGN) - if (pwrite(PM_fd, (void *)uptr, ALIGN, uptr - PM_base) < 0) - FATALSYS("pwrite len %d offset %lu", len, - addr - PM_base); + uptr < (uintptr_t)addr + len; uptr += ALIGN) { + rc = pwrite(PM_fd, (void *)uptr, ALIGN, uptr - PM_base); + if (rc < 0) + return rc; + } + return rc; +} + +/* + * pmem_load_cache -- load given range into processor cache + * + * This is the fit version (fault injection test) that uses copy-on-write. + */ +int +pmem_load_cache_fit(void *addr, size_t len, int flags) +{ + uintptr_t uptr; + int rc = 0; + + if (!PM_base) + FATAL("pmem_map hasn't been called"); + + /* + * even though pread() can take any random byte addresses and + * lengths, we simulate cache loading by reading the full 64B + * chunks that cover the given range. + */ + for (uptr = (uintptr_t)addr & ~(ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += ALIGN) { + rc = pread(PM_fd, (void *)uptr, ALIGN, uptr - PM_base); + if (rc < 0) + return rc; + } + return rc; } /* @@ -118,7 +184,80 @@ pmem_flush_cache_fit(void *addr, size_t len, int flags) void pmem_persist_fit(void *addr, size_t len, int flags) { - pmem_flush_cache_fit(addr, len, flags); + int rc = 0; + rc = pmem_flush_cache_fit(addr, len, flags); + if (rc < 0) { + FATALSYS("pwrite len %d offset %lu", len, + addr - PM_base); + } + __builtin_ia32_sfence(); + pmem_drain_pm_stores_fit(); +} + +/* + * pm_persist_iov --- make any cached changes to an array of (discontinuous) + * ranges of PM persistent + * + * This is the fit version (fault injection test) that uses copy-on-write. + */ +int pmem_persist_iov_fit(const struct iovec *addrs, size_t count, int flags) +{ + int rc = 0; + unsigned i; + struct iovec *range; + + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + rc = pmem_flush_cache_fit(range->iov_base, range->iov_len, flags); + if (rc < 0) { + return -1; + } + } __builtin_ia32_sfence(); pmem_drain_pm_stores_fit(); + + /* + * While this implementation cannot encounter an error condition, + * other implementations may. Hence this example code template + * provides a return code. + */ + return rc; +} + +/* + * pm_persist_iov_verify --- make any cached changes to an array of + * (discontinuous) ranges of PM persistent with Posix synchronized I/O data + * integrity completion, i.e. O_SYNC-like behavior + * + * This is the fit-line-based version. + */ +int pmem_persist_iov_verify_fit(const struct iovec *addrs, size_t count, + int flags) +{ + int rc = 0; + unsigned i; + struct iovec *range; + + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + rc = pmem_flush_cache_fit(range->iov_base, range->iov_len, flags); + if (rc < 0) + return rc; + } + __builtin_ia32_sfence(); + pmem_drain_pm_stores_fit(); + + /* + * Verify that all ranges have either been successfully transferred or. + * diagnosed as unsuccessful. + */ + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + rc = pmem_load_cache_fit(range->iov_base, range->iov_len, flags); + if (rc < 0) { + return -1; + } + } + + return rc; } diff --git a/libpmem/pmem_msync.c b/libpmem/pmem_msync.c index a343ee4..3c23c6e 100644 --- a/libpmem/pmem_msync.c +++ b/libpmem/pmem_msync.c @@ -30,6 +30,37 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/* + * Copyright (c) 2013, NetApp, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the NetApp, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE.. + */ + /* * pmem_msync.c -- msync-based implementation of libpmem */ @@ -42,8 +73,13 @@ #include "util/util.h" +#include + #define ALIGN 4096 /* assumes 4k page size for use with msync() */ +static int PM_fd; +static uintptr_t PM_base; + /* * pmem_map -- map the Persistent Memory * @@ -61,6 +97,9 @@ pmem_map_msync(int fd, size_t len) fd, 0)) == MAP_FAILED) return NULL; + PM_base = (uintptr_t)base; + PM_fd = dup(fd); + return base; } @@ -82,10 +121,11 @@ pmem_drain_pm_stores_msync(void) * * This is the msync-based version. */ -void +int pmem_flush_cache_msync(void *addr, size_t len, int flags) { uintptr_t uptr; + int rc = 0; /* * msync requires len to be a multiple of pagesize, so @@ -102,8 +142,36 @@ pmem_flush_cache_msync(void *addr, size_t len, int flags) /* round len up to multiple of page size */ len = (len + (ALIGN - 1)) & ~(ALIGN - 1); - if (msync((void *)uptr, len, MS_SYNC) < 0) - FATALSYS("msync"); + rc = msync((void *)uptr, len, MS_SYNC); + return rc; +} + +/* + * pmem_load_cache -- load given range into processor cache + * + * This is the msync-based version. + */ +int +pmem_load_cache_msync(void *addr, size_t len, int flags) +{ + uintptr_t uptr; + int rc = 0; + + if (!PM_base) + FATAL("pmem_map hasn't been called"); + + /* + * even though pread() can take any random byte addresses and + * lengths, we simulate cache loading by reading the full 64B + * chunks that cover the given range. + */ + for (uptr = (uintptr_t)addr & ~(ALIGN - 1); + uptr < (uintptr_t)addr + len; uptr += ALIGN) { + rc = pread(PM_fd, (void *)uptr, ALIGN, uptr - PM_base); + if (rc < 0) + return rc; + } + return rc; } /* @@ -114,7 +182,80 @@ pmem_flush_cache_msync(void *addr, size_t len, int flags) void pmem_persist_msync(void *addr, size_t len, int flags) { - pmem_flush_cache_msync(addr, len, flags); + int rc = 0; + rc = pmem_flush_cache_msync(addr, len, flags); + if (rc < 0) { + FATALSYS("msync"); + } __builtin_ia32_sfence(); pmem_drain_pm_stores_msync(); } + +/* + * pm_persist_iov --- make any cached changes to an array of (discontinuous) + * ranges of PM persistent + * + * This is the msync-based version. + */ +int pmem_persist_iov_msync(const struct iovec *addrs, size_t count, int flags) +{ + int rc = 0; + unsigned i; + struct iovec *range; + + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + rc = pmem_flush_cache_msync(range->iov_base, + range->iov_len, flags); + if (rc < 0) { + return -1; + } + } + __builtin_ia32_sfence(); + pmem_drain_pm_stores_msync(); + + /* + * While this implementation cannot encounter an error condition, + * other implementations may. Hence this example code template + * provides a return code. + */ + return rc; +} + +/* + * pm_persist_iov_verify --- make any cached changes to an array of + * (discontinuous) ranges of PM persistent with Posix synchronized I/O data + * integrity completion, i.e. O_SYNC-like behavior + * + * This is the msync-line-based version. + */ +int pmem_persist_iov_verify_msync(const struct iovec *addrs, size_t count, + int flags) +{ + int rc = 0; + unsigned i; + struct iovec *range; + + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + rc = pmem_flush_cache_msync(range->iov_base, range->iov_len, flags); + if (rc < 0) + return rc; + } + __builtin_ia32_sfence(); + pmem_drain_pm_stores_msync(); + + /* + * Verify that all ranges have either been successfully transferred or. + * diagnosed as unsuccessful. + */ + for (i = 0; i < count; i += 1) { + range = (struct iovec *)(addrs + i); + rc = pmem_load_cache_msync(range->iov_base, range->iov_len, flags); + if (rc < 0) { + return -1; + } + } + + return rc; +}